pose detection和landmark支持onnxruntime的cuda和tensorrt

2022-08-12 09:42:03 +08:00 · 2022-08-12 09:42:03 +08:00 · 008ed46ee0
commit 008ed46ee0
parent f3bf3ab3e3
13 changed files with 1485 additions and 1 deletions
--- a/mediapipe/examples/desktop/pose_tracking/BUILD
+++ b/mediapipe/examples/desktop/pose_tracking/BUILD
@ -24,6 +24,46 @@ cc_binary(
    ],
 )
 cc_binary(
    name = "pose_tracking_cpu_fps",
    deps = [
        "//mediapipe/examples/desktop:demo_run_graph_main_fps",
        "//mediapipe/graphs/pose_tracking:pose_tracking_cpu_deps",
    ],
 )
 cc_binary(
    name = "pose_tracking_onnx_cuda",
    deps = [
        "//mediapipe/examples/desktop:demo_run_graph_main",
        "//mediapipe/graphs/pose_tracking:pose_tracking_onnx_cuda_deps",
    ],
 )
 cc_binary(
    name = "pose_tracking_onnx_cuda_fps",
    deps = [
        "//mediapipe/examples/desktop:demo_run_graph_main_fps",
        "//mediapipe/graphs/pose_tracking:pose_tracking_onnx_cuda_deps",
    ],
 )
 cc_binary(
    name = "pose_tracking_onnx_tensorrt",
    deps = [
        "//mediapipe/examples/desktop:demo_run_graph_main",
        "//mediapipe/graphs/pose_tracking:pose_tracking_onnx_tensorrt_deps",
    ],
 )
 cc_binary(
    name = "pose_tracking_onnx_tensorrt_fps",
    deps = [
        "//mediapipe/examples/desktop:demo_run_graph_main_fps",
        "//mediapipe/graphs/pose_tracking:pose_tracking_onnx_tensorrt_deps",
    ],
 )
 # Linux only
 cc_binary(
    name = "pose_tracking_gpu",
--- a/mediapipe/graphs/pose_tracking/BUILD
+++ b/mediapipe/graphs/pose_tracking/BUILD
@ -54,3 +54,37 @@ mediapipe_binary_graph(
    output_name = "pose_tracking_cpu.binarypb",
    deps = [":pose_tracking_cpu_deps"],
 )
 cc_library(
    name = "pose_tracking_onnx_cuda_deps",
    deps = [
        "//mediapipe/calculators/core:constant_side_packet_calculator",
        "//mediapipe/calculators/core:flow_limiter_calculator",
        "//mediapipe/graphs/pose_tracking/subgraphs:pose_renderer_cpu",
        "//mediapipe/modules/pose_landmark:pose_landmark_onnx_cuda",
    ],
 )
 mediapipe_binary_graph(
    name = "pose_tracking_onnx_cuda_binary_graph",
    graph = "pose_tracking_onnx_cuda.pbtxt",
    output_name = "pose_tracking_onnx_cuda.binarypb",
    deps = [":pose_tracking_onnx_cuda_deps"],
 )
 cc_library(
    name = "pose_tracking_onnx_tensorrt_deps",
    deps = [
        "//mediapipe/calculators/core:constant_side_packet_calculator",
        "//mediapipe/calculators/core:flow_limiter_calculator",
        "//mediapipe/graphs/pose_tracking/subgraphs:pose_renderer_cpu",
        "//mediapipe/modules/pose_landmark:pose_landmark_onnx_tensorrt",
    ],
 )
 mediapipe_binary_graph(
    name = "pose_tracking_onnx_tensorrt_binary_graph",
    graph = "pose_tracking_onnx_tensorrt.pbtxt",
    output_name = "pose_tracking_onnx_tensorrt.binarypb",
    deps = [":pose_tracking_onnx_tensorrt"],
 )
--- a/mediapipe/graphs/pose_tracking/pose_tracking_cpu.pbtxt
+++ b/mediapipe/graphs/pose_tracking/pose_tracking_cpu.pbtxt
@ -14,7 +14,7 @@ node {
  output_side_packet: "PACKET:enable_segmentation"
  node_options: {
    [type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: {
-      packet { bool_value: true }
+      packet { bool_value: false }
    }
  }
 }
--- a/mediapipe/graphs/pose_tracking/pose_tracking_onnx_cuda.pbtxt
+++ b/mediapipe/graphs/pose_tracking/pose_tracking_onnx_cuda.pbtxt
@ -0,0 +1,63 @@
 # MediaPipe graph that performs pose tracking with onnxruntime on cuda.
 # CPU buffer. (ImageFrame)
 input_stream: "input_video"
 # Output image with rendered results. (ImageFrame)
 output_stream: "output_video"
 # Pose landmarks. (NormalizedLandmarkList)
 output_stream: "pose_landmarks"
 # Generates side packet to enable segmentation.
 node {
  calculator: "ConstantSidePacketCalculator"
  output_side_packet: "PACKET:enable_segmentation"
  node_options: {
    [type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: {
      packet { bool_value: false }
    }
  }
 }
 # Throttles the images flowing downstream for flow control. It passes through
 # the very first incoming image unaltered, and waits for downstream nodes
 # (calculators and subgraphs) in the graph to finish their tasks before it
 # passes through another image. All images that come in while waiting are
 # dropped, limiting the number of in-flight images in most part of the graph to
 # 1. This prevents the downstream nodes from queuing up incoming images and data
 # excessively, which leads to increased latency and memory usage, unwanted in
 # real-time mobile applications. It also eliminates unnecessarily computation,
 # e.g., the output produced by a node may get dropped downstream if the
 # subsequent nodes are still busy processing previous inputs.
 node {
  calculator: "FlowLimiterCalculator"
  input_stream: "input_video"
  input_stream: "FINISHED:output_video"
  input_stream_info: {
    tag_index: "FINISHED"
    back_edge: true
  }
  output_stream: "throttled_input_video"
 }
 # Subgraph that detects poses and corresponding landmarks.
 node {
  calculator: "PoseLandmarkOnnxCUDA"
  input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation"
  input_stream: "IMAGE:throttled_input_video"
  output_stream: "LANDMARKS:pose_landmarks"
  output_stream: "SEGMENTATION_MASK:segmentation_mask"
  output_stream: "DETECTION:pose_detection"
  output_stream: "ROI_FROM_LANDMARKS:roi_from_landmarks"
 }
 # Subgraph that renders pose-landmark annotation onto the input image.
 node {
  calculator: "PoseRendererCpu"
  input_stream: "IMAGE:throttled_input_video"
  input_stream: "LANDMARKS:pose_landmarks"
  input_stream: "SEGMENTATION_MASK:segmentation_mask"
  input_stream: "DETECTION:pose_detection"
  input_stream: "ROI:roi_from_landmarks"
  output_stream: "IMAGE:output_video"
 }
--- a/mediapipe/graphs/pose_tracking/pose_tracking_onnx_tensorrt.pbtxt
+++ b/mediapipe/graphs/pose_tracking/pose_tracking_onnx_tensorrt.pbtxt
@ -0,0 +1,63 @@
 # MediaPipe graph that performs pose tracking with onnxruntime on tensorrt.
 # CPU buffer. (ImageFrame)
 input_stream: "input_video"
 # Output image with rendered results. (ImageFrame)
 output_stream: "output_video"
 # Pose landmarks. (NormalizedLandmarkList)
 output_stream: "pose_landmarks"
 # Generates side packet to enable segmentation.
 node {
  calculator: "ConstantSidePacketCalculator"
  output_side_packet: "PACKET:enable_segmentation"
  node_options: {
    [type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: {
      packet { bool_value: false }
    }
  }
 }
 # Throttles the images flowing downstream for flow control. It passes through
 # the very first incoming image unaltered, and waits for downstream nodes
 # (calculators and subgraphs) in the graph to finish their tasks before it
 # passes through another image. All images that come in while waiting are
 # dropped, limiting the number of in-flight images in most part of the graph to
 # 1. This prevents the downstream nodes from queuing up incoming images and data
 # excessively, which leads to increased latency and memory usage, unwanted in
 # real-time mobile applications. It also eliminates unnecessarily computation,
 # e.g., the output produced by a node may get dropped downstream if the
 # subsequent nodes are still busy processing previous inputs.
 node {
  calculator: "FlowLimiterCalculator"
  input_stream: "input_video"
  input_stream: "FINISHED:output_video"
  input_stream_info: {
    tag_index: "FINISHED"
    back_edge: true
  }
  output_stream: "throttled_input_video"
 }
 # Subgraph that detects poses and corresponding landmarks.
 node {
  calculator: "PoseLandmarkOnnxTensorRT"
  input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation"
  input_stream: "IMAGE:throttled_input_video"
  output_stream: "LANDMARKS:pose_landmarks"
  output_stream: "SEGMENTATION_MASK:segmentation_mask"
  output_stream: "DETECTION:pose_detection"
  output_stream: "ROI_FROM_LANDMARKS:roi_from_landmarks"
 }
 # Subgraph that renders pose-landmark annotation onto the input image.
 node {
  calculator: "PoseRendererCpu"
  input_stream: "IMAGE:throttled_input_video"
  input_stream: "LANDMARKS:pose_landmarks"
  input_stream: "SEGMENTATION_MASK:segmentation_mask"
  input_stream: "DETECTION:pose_detection"
  input_stream: "ROI:roi_from_landmarks"
  output_stream: "IMAGE:output_video"
 }
--- a/mediapipe/modules/pose_detection/BUILD
+++ b/mediapipe/modules/pose_detection/BUILD
@ -35,6 +35,34 @@ mediapipe_simple_subgraph(
    ],
 )
 mediapipe_simple_subgraph(
    name = "pose_detection_onnx_cuda",
    graph = "pose_detection_onnx_cuda.pbtxt",
    register_as = "PoseDetectionOnnxCUDA",
    deps = [
        "//mediapipe/calculators/tensor:image_to_tensor_calculator",
        "//mediapipe/calculators/tensor:inference_calculator_onnx_cuda",
        "//mediapipe/calculators/tensor:tensors_to_detections_calculator",
        "//mediapipe/calculators/tflite:ssd_anchors_calculator",
        "//mediapipe/calculators/util:detection_letterbox_removal_calculator",
        "//mediapipe/calculators/util:non_max_suppression_calculator",
    ],
 )
 mediapipe_simple_subgraph(
    name = "pose_detection_onnx_tensorrt",
    graph = "pose_detection_onnx_tensorrt.pbtxt",
    register_as = "PoseDetectionOnnxTensorRT",
    deps = [
        "//mediapipe/calculators/tensor:image_to_tensor_calculator",
        "//mediapipe/calculators/tensor:inference_calculator_onnx_tensorrt",
        "//mediapipe/calculators/tensor:tensors_to_detections_calculator",
        "//mediapipe/calculators/tflite:ssd_anchors_calculator",
        "//mediapipe/calculators/util:detection_letterbox_removal_calculator",
        "//mediapipe/calculators/util:non_max_suppression_calculator",
    ],
 )
 mediapipe_simple_subgraph(
    name = "pose_detection_gpu",
    graph = "pose_detection_gpu.pbtxt",
--- a/mediapipe/modules/pose_detection/pose_detection_onnx_cuda.pbtxt
+++ b/mediapipe/modules/pose_detection/pose_detection_onnx_cuda.pbtxt
@ -0,0 +1,157 @@
 # MediaPipe graph to detect poses. (CPU input, and inference is executed with onnxruntime on
 # cuda.)
 #
 # It is required that "pose_detection.onnx" is available at
 # "mediapipe/modules/pose_detection/pose_detection.onnx"
 # path during execution.
 #
 # EXAMPLE:
 #   node {
 #     calculator: "PoseDetectionOnnxCUDA"
 #     input_stream: "IMAGE:image"
 #     output_stream: "DETECTIONS:pose_detections"
 #   }
 type: "PoseDetectionOnnxCUDA"
 # CPU image. (ImageFrame)
 input_stream: "IMAGE:image"
 # Detected poses. (std::vector<Detection>)
 # Bounding box in each pose detection is currently set to the bounding box of
 # the detected face. However, 4 additional key points are available in each
 # detection, which are used to further calculate a (rotated) bounding box that
 # encloses the body region of interest. Among the 4 key points, the first two
 # are for identifying the full-body region, and the second two for upper body
 # only:
 #
 # Key point 0 - mid hip center
 # Key point 1 - point that encodes size & rotation (for full body)
 # Key point 2 - mid shoulder center
 # Key point 3 - point that encodes size & rotation (for upper body)
 #
 # NOTE: there will not be an output packet in the DETECTIONS stream for this
 # particular timestamp if none of poses detected. However, the MediaPipe
 # framework will internally inform the downstream calculators of the absence of
 # this packet so that they don't wait for it unnecessarily.
 output_stream: "DETECTIONS:detections"
 # Transforms the input image into a 224x224 one while keeping the aspect ratio
 # (what is expected by the corresponding model), resulting in potential
 # letterboxing in the transformed image.
 node: {
  calculator: "ImageToTensorCalculator"
  input_stream: "IMAGE:image"
  output_stream: "TENSORS:input_tensors"
  output_stream: "LETTERBOX_PADDING:letterbox_padding"
  options: {
    [mediapipe.ImageToTensorCalculatorOptions.ext] {
      output_tensor_width: 224
      output_tensor_height: 224
      keep_aspect_ratio: true
      output_tensor_float_range {
        min: -1.0
        max: 1.0
      }
      border_mode: BORDER_ZERO
      # If this calculator truly operates in the CPU, then gpu_origin is
      # ignored, but if some build switch insists on GPU inference, then we will
      # still need to set this.
      gpu_origin: TOP_LEFT
    }
  }
 }
 # Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a
 # vector of tensors representing, for instance, detection boxes/keypoints and
 # scores.
 node {
  calculator: "InferenceCalculator"
  input_stream: "TENSORS:input_tensors"
  output_stream: "TENSORS:detection_tensors"
  options: {
    [mediapipe.InferenceCalculatorOptions.ext] {
      model_path: "mediapipe/modules/pose_detection/pose_detection.onnx"
      delegate { cuda {} }
    }
  }
 }
 # Generates a single side packet containing a vector of SSD anchors based on
 # the specification in the options.
 node {
  calculator: "SsdAnchorsCalculator"
  output_side_packet: "anchors"
  options: {
    [mediapipe.SsdAnchorsCalculatorOptions.ext] {
      num_layers: 5
      min_scale: 0.1484375
      max_scale: 0.75
      input_size_height: 224
      input_size_width: 224
      anchor_offset_x: 0.5
      anchor_offset_y: 0.5
      strides: 8
      strides: 16
      strides: 32
      strides: 32
      strides: 32
      aspect_ratios: 1.0
      fixed_anchor_size: true
    }
  }
 }
 # Decodes the detection tensors generated by the TensorFlow Lite model, based on
 # the SSD anchors and the specification in the options, into a vector of
 # detections. Each detection describes a detected object.
 node {
  calculator: "TensorsToDetectionsCalculator"
  input_stream: "TENSORS:detection_tensors"
  input_side_packet: "ANCHORS:anchors"
  output_stream: "DETECTIONS:unfiltered_detections"
  options: {
    [mediapipe.TensorsToDetectionsCalculatorOptions.ext] {
      num_classes: 1
      num_boxes: 2254
      num_coords: 12
      box_coord_offset: 0
      keypoint_coord_offset: 4
      num_keypoints: 4
      num_values_per_keypoint: 2
      sigmoid_score: true
      score_clipping_thresh: 100.0
      reverse_output_order: true
      x_scale: 224.0
      y_scale: 224.0
      h_scale: 224.0
      w_scale: 224.0
      min_score_thresh: 0.5
    }
  }
 }
 # Performs non-max suppression to remove excessive detections.
 node {
  calculator: "NonMaxSuppressionCalculator"
  input_stream: "unfiltered_detections"
  output_stream: "filtered_detections"
  options: {
    [mediapipe.NonMaxSuppressionCalculatorOptions.ext] {
      min_suppression_threshold: 0.3
      overlap_type: INTERSECTION_OVER_UNION
      algorithm: WEIGHTED
    }
  }
 }
 # Adjusts detection locations (already normalized to [0.f, 1.f]) on the
 # letterboxed image (after image transformation with the FIT scale mode) to the
 # corresponding locations on the same image with the letterbox removed (the
 # input image to the graph before image transformation).
 node {
  calculator: "DetectionLetterboxRemovalCalculator"
  input_stream: "DETECTIONS:filtered_detections"
  input_stream: "LETTERBOX_PADDING:letterbox_padding"
  output_stream: "DETECTIONS:detections"
 }
--- a/mediapipe/modules/pose_detection/pose_detection_onnx_tensorrt.pbtxt
+++ b/mediapipe/modules/pose_detection/pose_detection_onnx_tensorrt.pbtxt
@ -0,0 +1,157 @@
 # MediaPipe graph to detect poses. (CPU input, and inference is executed with onnxruntime on
 # tensorrt.)
 #
 # It is required that "pose_detection.onnx" is available at
 # "mediapipe/modules/pose_detection/pose_detection.onnx"
 # path during execution.
 #
 # EXAMPLE:
 #   node {
 #     calculator: "PoseDetectionOnnxTensorRT"
 #     input_stream: "IMAGE:image"
 #     output_stream: "DETECTIONS:pose_detections"
 #   }
 type: "PoseDetectionOnnxTensorRT"
 # CPU image. (ImageFrame)
 input_stream: "IMAGE:image"
 # Detected poses. (std::vector<Detection>)
 # Bounding box in each pose detection is currently set to the bounding box of
 # the detected face. However, 4 additional key points are available in each
 # detection, which are used to further calculate a (rotated) bounding box that
 # encloses the body region of interest. Among the 4 key points, the first two
 # are for identifying the full-body region, and the second two for upper body
 # only:
 #
 # Key point 0 - mid hip center
 # Key point 1 - point that encodes size & rotation (for full body)
 # Key point 2 - mid shoulder center
 # Key point 3 - point that encodes size & rotation (for upper body)
 #
 # NOTE: there will not be an output packet in the DETECTIONS stream for this
 # particular timestamp if none of poses detected. However, the MediaPipe
 # framework will internally inform the downstream calculators of the absence of
 # this packet so that they don't wait for it unnecessarily.
 output_stream: "DETECTIONS:detections"
 # Transforms the input image into a 224x224 one while keeping the aspect ratio
 # (what is expected by the corresponding model), resulting in potential
 # letterboxing in the transformed image.
 node: {
  calculator: "ImageToTensorCalculator"
  input_stream: "IMAGE:image"
  output_stream: "TENSORS:input_tensors"
  output_stream: "LETTERBOX_PADDING:letterbox_padding"
  options: {
    [mediapipe.ImageToTensorCalculatorOptions.ext] {
      output_tensor_width: 224
      output_tensor_height: 224
      keep_aspect_ratio: true
      output_tensor_float_range {
        min: -1.0
        max: 1.0
      }
      border_mode: BORDER_ZERO
      # If this calculator truly operates in the CPU, then gpu_origin is
      # ignored, but if some build switch insists on GPU inference, then we will
      # still need to set this.
      gpu_origin: TOP_LEFT
    }
  }
 }
 # Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a
 # vector of tensors representing, for instance, detection boxes/keypoints and
 # scores.
 node {
  calculator: "InferenceCalculator"
  input_stream: "TENSORS:input_tensors"
  output_stream: "TENSORS:detection_tensors"
  options: {
    [mediapipe.InferenceCalculatorOptions.ext] {
      model_path: "mediapipe/modules/pose_detection/pose_detection.onnx"
      delegate { tensorrt {} }
    }
  }
 }
 # Generates a single side packet containing a vector of SSD anchors based on
 # the specification in the options.
 node {
  calculator: "SsdAnchorsCalculator"
  output_side_packet: "anchors"
  options: {
    [mediapipe.SsdAnchorsCalculatorOptions.ext] {
      num_layers: 5
      min_scale: 0.1484375
      max_scale: 0.75
      input_size_height: 224
      input_size_width: 224
      anchor_offset_x: 0.5
      anchor_offset_y: 0.5
      strides: 8
      strides: 16
      strides: 32
      strides: 32
      strides: 32
      aspect_ratios: 1.0
      fixed_anchor_size: true
    }
  }
 }
 # Decodes the detection tensors generated by the TensorFlow Lite model, based on
 # the SSD anchors and the specification in the options, into a vector of
 # detections. Each detection describes a detected object.
 node {
  calculator: "TensorsToDetectionsCalculator"
  input_stream: "TENSORS:detection_tensors"
  input_side_packet: "ANCHORS:anchors"
  output_stream: "DETECTIONS:unfiltered_detections"
  options: {
    [mediapipe.TensorsToDetectionsCalculatorOptions.ext] {
      num_classes: 1
      num_boxes: 2254
      num_coords: 12
      box_coord_offset: 0
      keypoint_coord_offset: 4
      num_keypoints: 4
      num_values_per_keypoint: 2
      sigmoid_score: true
      score_clipping_thresh: 100.0
      reverse_output_order: true
      x_scale: 224.0
      y_scale: 224.0
      h_scale: 224.0
      w_scale: 224.0
      min_score_thresh: 0.5
    }
  }
 }
 # Performs non-max suppression to remove excessive detections.
 node {
  calculator: "NonMaxSuppressionCalculator"
  input_stream: "unfiltered_detections"
  output_stream: "filtered_detections"
  options: {
    [mediapipe.NonMaxSuppressionCalculatorOptions.ext] {
      min_suppression_threshold: 0.3
      overlap_type: INTERSECTION_OVER_UNION
      algorithm: WEIGHTED
    }
  }
 }
 # Adjusts detection locations (already normalized to [0.f, 1.f]) on the
 # letterboxed image (after image transformation with the FIT scale mode) to the
 # corresponding locations on the same image with the letterbox removed (the
 # input image to the graph before image transformation).
 node {
  calculator: "DetectionLetterboxRemovalCalculator"
  input_stream: "DETECTIONS:filtered_detections"
  input_stream: "LETTERBOX_PADDING:letterbox_padding"
  output_stream: "DETECTIONS:detections"
 }
--- a/mediapipe/modules/pose_landmark/BUILD
+++ b/mediapipe/modules/pose_landmark/BUILD
@ -61,6 +61,35 @@ mediapipe_simple_subgraph(
    ],
 )
 mediapipe_simple_subgraph(
    name = "pose_landmark_by_roi_onnx_cuda",
    graph = "pose_landmark_by_roi_onnx_cuda.pbtxt",
    register_as = "PoseLandmarkByRoiOnnxCUDA",
    deps = [
        ":pose_landmark_model_loader",
        ":pose_landmarks_and_segmentation_inverse_projection",
        ":tensors_to_pose_landmarks_and_segmentation",
        "//mediapipe/calculators/image:image_properties_calculator",
        "//mediapipe/calculators/tensor:image_to_tensor_calculator",
        "//mediapipe/calculators/tensor:inference_calculator_onnx_cuda",
    ],
 )
 mediapipe_simple_subgraph(
    name = "pose_landmark_by_roi_onnx_tensorrt",
    graph = "pose_landmark_by_roi_onnx_tensorrt.pbtxt",
    register_as = "PoseLandmarkByRoiOnnxTensorRT",
    deps = [
        ":pose_landmark_model_loader",
        ":pose_landmarks_and_segmentation_inverse_projection",
        ":tensors_to_pose_landmarks_and_segmentation",
        "//mediapipe/calculators/image:image_properties_calculator",
        "//mediapipe/calculators/tensor:image_to_tensor_calculator",
        "//mediapipe/calculators/tensor:inference_calculator_onnx_tensorrt",
    ],
 )
 mediapipe_simple_subgraph(
    name = "tensors_to_pose_landmarks_and_segmentation",
    graph = "tensors_to_pose_landmarks_and_segmentation.pbtxt",
@ -159,10 +188,57 @@ mediapipe_simple_subgraph(
    ],
 )
 mediapipe_simple_subgraph(
    name = "pose_landmark_onnx_cuda",
    graph = "pose_landmark_onnx_cuda.pbtxt",
    register_as = "PoseLandmarkOnnxCUDA",
    deps = [
        ":pose_detection_to_roi",
        ":pose_landmark_by_roi_onnx_cuda",
        ":pose_landmark_filtering",
        ":pose_landmarks_to_roi",
        ":pose_segmentation_filtering",
        "//mediapipe/calculators/core:constant_side_packet_calculator",
        "//mediapipe/calculators/core:gate_calculator",
        "//mediapipe/calculators/core:merge_calculator",
        "//mediapipe/calculators/core:packet_presence_calculator",
        "//mediapipe/calculators/core:previous_loopback_calculator",
        "//mediapipe/calculators/core:split_vector_calculator",
        "//mediapipe/calculators/image:image_properties_calculator",
        "//mediapipe/calculators/util:from_image_calculator",
        "//mediapipe/modules/pose_detection:pose_detection_onnx_cuda",
    ],
 )
 mediapipe_simple_subgraph(
    name = "pose_landmark_onnx_tensorrt",
    graph = "pose_landmark_onnx_tensorrt.pbtxt",
    register_as = "PoseLandmarkOnnxTensorRT",
    deps = [
        ":pose_detection_to_roi",
        ":pose_landmark_by_roi_onnx_tensorrt",
        ":pose_landmark_filtering",
        ":pose_landmarks_to_roi",
        ":pose_segmentation_filtering",
        "//mediapipe/calculators/core:constant_side_packet_calculator",
        "//mediapipe/calculators/core:gate_calculator",
        "//mediapipe/calculators/core:merge_calculator",
        "//mediapipe/calculators/core:packet_presence_calculator",
        "//mediapipe/calculators/core:previous_loopback_calculator",
        "//mediapipe/calculators/core:split_vector_calculator",
        "//mediapipe/calculators/image:image_properties_calculator",
        "//mediapipe/calculators/util:from_image_calculator",
        "//mediapipe/modules/pose_detection:pose_detection_onnx_tensorrt",
    ],
 )
 exports_files(
    srcs = [
        "pose_landmark_full.onnx",
        "pose_landmark_full.tflite",
        "pose_landmark_heavy.onnx",
        "pose_landmark_heavy.tflite",
        "pose_landmark_lite.onnx",
        "pose_landmark_lite.tflite",
    ],
 )
--- a/mediapipe/modules/pose_landmark/pose_landmark_by_roi_onnx_cuda.pbtxt
+++ b/mediapipe/modules/pose_landmark/pose_landmark_by_roi_onnx_cuda.pbtxt
@ -0,0 +1,165 @@
 # MediaPipe graph to detect/predict pose landmarks and optionally segmentation
 # within an ROI. (CPU input, and inference is executed on CPU.)
 #
 # It is required that "pose_landmark_lite.onnx" or
 # "pose_landmark_full.onnx" or "pose_landmark_heavy.onnx" is available at
 # "mediapipe/modules/pose_landmark/pose_landmark_lite.onnx" or
 # "mediapipe/modules/pose_landmark/pose_landmark_full.onnx" or
 # "mediapipe/modules/pose_landmark/pose_landmark_heavy.onnx"
 # path respectively during execution, depending on the specification in the
 # MODEL_COMPLEXITY input side packet.
 #
 # EXAMPLE:
 #   node {
 #     calculator: "PoseLandmarkByRoiOnnxCUDA"
 #     input_side_packet: "MODEL_COMPLEXITY:model_complexity"
 #     input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation"
 #     input_stream: "IMAGE:image"
 #     input_stream: "ROI:roi"
 #     output_stream: "LANDMARKS:landmarks"
 #     output_stream: "SEGMENTATION_MASK:segmentation_mask"
 #   }
 type: "PoseLandmarkByRoiOnnxCUDA"
 # CPU image. (ImageFrame)
 input_stream: "IMAGE:image"
 # ROI (region of interest) within the given image where a pose is located.
 # (NormalizedRect)
 input_stream: "ROI:roi"
 # Whether to predict the segmentation mask. If unspecified, functions as set to
 # false. (bool)
 input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation"
 # Pose landmarks within the given ROI. (NormalizedLandmarkList)
 # We have 33 landmarks (see pose_landmark_topology.svg) and there are other
 # auxiliary key points.
 # 0 - nose
 # 1 - left eye (inner)
 # 2 - left eye
 # 3 - left eye (outer)
 # 4 - right eye (inner)
 # 5 - right eye
 # 6 - right eye (outer)
 # 7 - left ear
 # 8 - right ear
 # 9 - mouth (left)
 # 10 - mouth (right)
 # 11 - left shoulder
 # 12 - right shoulder
 # 13 - left elbow
 # 14 - right elbow
 # 15 - left wrist
 # 16 - right wrist
 # 17 - left pinky
 # 18 - right pinky
 # 19 - left index
 # 20 - right index
 # 21 - left thumb
 # 22 - right thumb
 # 23 - left hip
 # 24 - right hip
 # 25 - left knee
 # 26 - right knee
 # 27 - left ankle
 # 28 - right ankle
 # 29 - left heel
 # 30 - right heel
 # 31 - left foot index
 # 32 - right foot index
 #
 # NOTE: If a pose is not present within the given ROI, for this particular
 # timestamp there will not be an output packet in the LANDMARKS stream. However,
 # the MediaPipe framework will internally inform the downstream calculators of
 # the absence of this packet so that they don't wait for it unnecessarily.
 output_stream: "LANDMARKS:landmarks"
 # Auxiliary landmarks for deriving the ROI in the subsequent image.
 # (NormalizedLandmarkList)
 output_stream: "AUXILIARY_LANDMARKS:auxiliary_landmarks"
 # Pose world landmarks within the given ROI. (LandmarkList)
 # World landmarks are real-world 3D coordinates in meters with the origin at the
 # center between hips. WORLD_LANDMARKS shares the same landmark topology as
 # LANDMARKS. However, LANDMARKS provides coordinates (in pixels) of a 3D object
 # projected onto the 2D image surface, while WORLD_LANDMARKS provides
 # coordinates (in meters) of the 3D object itself.
 output_stream: "WORLD_LANDMARKS:world_landmarks"
 # Segmentation mask on CPU in ImageFormat::VEC32F1. (Image)
 output_stream: "SEGMENTATION_MASK:segmentation_mask"
 # Retrieves the image size.
 node {
  calculator: "ImagePropertiesCalculator"
  input_stream: "IMAGE_CPU:image"
  output_stream: "SIZE:image_size"
 }
 # Crops and transforms the specified ROI in the input image into an image patch
 # represented as a tensor of dimension expected by the corresponding ML model,
 # while maintaining the aspect ratio of the ROI (which can be different from
 # that of the image patch). Therefore, there can be letterboxing around the ROI
 # in the generated tensor representation.
 node: {
  calculator: "ImageToTensorCalculator"
  input_stream: "IMAGE:image"
  input_stream: "NORM_RECT:roi"
  output_stream: "TENSORS:input_tensors"
  output_stream: "LETTERBOX_PADDING:letterbox_padding"
  output_stream: "MATRIX:transformation_matrix"
  options: {
    [mediapipe.ImageToTensorCalculatorOptions.ext] {
      output_tensor_width: 256
      output_tensor_height: 256
      keep_aspect_ratio: true
      output_tensor_float_range {
        min: 0.0
        max: 1.0
      }
    }
  }
 }
 node {
  calculator: "InferenceCalculator"
  input_stream: "TENSORS:input_tensors"
  output_stream: "TENSORS:output_tensors"
  options: {
    [mediapipe.InferenceCalculatorOptions.ext] {
      model_path: "mediapipe/modules/pose_landmark/pose_landmark_full.onnx"
      delegate { cuda {} }
    }
  }
 }
 # Decodes the tensors into the corresponding landmark and segmentation mask
 # representation.
 node {
  calculator: "TensorsToPoseLandmarksAndSegmentation"
  input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation"
  input_stream: "TENSORS:output_tensors"
  output_stream: "LANDMARKS:roi_landmarks"
  output_stream: "AUXILIARY_LANDMARKS:roi_auxiliary_landmarks"
  output_stream: "WORLD_LANDMARKS:roi_world_landmarks"
  output_stream: "SEGMENTATION_MASK:roi_segmentation_mask"
 }
 # Projects the landmarks and segmentation mask in the local coordinates of the
 # (potentially letterboxed) ROI back to the global coordinates of the full input
 # image.
 node {
  calculator: "PoseLandmarksAndSegmentationInverseProjection"
  input_stream: "IMAGE_SIZE:image_size"
  input_stream: "NORM_RECT:roi"
  input_stream: "LETTERBOX_PADDING:letterbox_padding"
  input_stream: "MATRIX:transformation_matrix"
  input_stream: "LANDMARKS:roi_landmarks"
  input_stream: "AUXILIARY_LANDMARKS:roi_auxiliary_landmarks"
  input_stream: "WORLD_LANDMARKS:roi_world_landmarks"
  input_stream: "SEGMENTATION_MASK:roi_segmentation_mask"
  output_stream: "LANDMARKS:landmarks"
  output_stream: "AUXILIARY_LANDMARKS:auxiliary_landmarks"
  output_stream: "WORLD_LANDMARKS:world_landmarks"
  output_stream: "SEGMENTATION_MASK:segmentation_mask"
 }
--- a/mediapipe/modules/pose_landmark/pose_landmark_by_roi_onnx_tensorrt.pbtxt
+++ b/mediapipe/modules/pose_landmark/pose_landmark_by_roi_onnx_tensorrt.pbtxt
@ -0,0 +1,165 @@
 # MediaPipe graph to detect/predict pose landmarks and optionally segmentation
 # within an ROI. (CPU input, and inference is executed on CPU.)
 #
 # It is required that "pose_landmark_lite.onnx" or
 # "pose_landmark_full.onnx" or "pose_landmark_heavy.onnx" is available at
 # "mediapipe/modules/pose_landmark/pose_landmark_lite.onnx" or
 # "mediapipe/modules/pose_landmark/pose_landmark_full.onnx" or
 # "mediapipe/modules/pose_landmark/pose_landmark_heavy.onnx"
 # path respectively during execution, depending on the specification in the
 # MODEL_COMPLEXITY input side packet.
 #
 # EXAMPLE:
 #   node {
 #     calculator: "PoseLandmarkByRoiOnnxTensorRT"
 #     input_side_packet: "MODEL_COMPLEXITY:model_complexity"
 #     input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation"
 #     input_stream: "IMAGE:image"
 #     input_stream: "ROI:roi"
 #     output_stream: "LANDMARKS:landmarks"
 #     output_stream: "SEGMENTATION_MASK:segmentation_mask"
 #   }
 type: "PoseLandmarkByRoiOnnxTensorRT"
 # CPU image. (ImageFrame)
 input_stream: "IMAGE:image"
 # ROI (region of interest) within the given image where a pose is located.
 # (NormalizedRect)
 input_stream: "ROI:roi"
 # Whether to predict the segmentation mask. If unspecified, functions as set to
 # false. (bool)
 input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation"
 # Pose landmarks within the given ROI. (NormalizedLandmarkList)
 # We have 33 landmarks (see pose_landmark_topology.svg) and there are other
 # auxiliary key points.
 # 0 - nose
 # 1 - left eye (inner)
 # 2 - left eye
 # 3 - left eye (outer)
 # 4 - right eye (inner)
 # 5 - right eye
 # 6 - right eye (outer)
 # 7 - left ear
 # 8 - right ear
 # 9 - mouth (left)
 # 10 - mouth (right)
 # 11 - left shoulder
 # 12 - right shoulder
 # 13 - left elbow
 # 14 - right elbow
 # 15 - left wrist
 # 16 - right wrist
 # 17 - left pinky
 # 18 - right pinky
 # 19 - left index
 # 20 - right index
 # 21 - left thumb
 # 22 - right thumb
 # 23 - left hip
 # 24 - right hip
 # 25 - left knee
 # 26 - right knee
 # 27 - left ankle
 # 28 - right ankle
 # 29 - left heel
 # 30 - right heel
 # 31 - left foot index
 # 32 - right foot index
 #
 # NOTE: If a pose is not present within the given ROI, for this particular
 # timestamp there will not be an output packet in the LANDMARKS stream. However,
 # the MediaPipe framework will internally inform the downstream calculators of
 # the absence of this packet so that they don't wait for it unnecessarily.
 output_stream: "LANDMARKS:landmarks"
 # Auxiliary landmarks for deriving the ROI in the subsequent image.
 # (NormalizedLandmarkList)
 output_stream: "AUXILIARY_LANDMARKS:auxiliary_landmarks"
 # Pose world landmarks within the given ROI. (LandmarkList)
 # World landmarks are real-world 3D coordinates in meters with the origin at the
 # center between hips. WORLD_LANDMARKS shares the same landmark topology as
 # LANDMARKS. However, LANDMARKS provides coordinates (in pixels) of a 3D object
 # projected onto the 2D image surface, while WORLD_LANDMARKS provides
 # coordinates (in meters) of the 3D object itself.
 output_stream: "WORLD_LANDMARKS:world_landmarks"
 # Segmentation mask on CPU in ImageFormat::VEC32F1. (Image)
 output_stream: "SEGMENTATION_MASK:segmentation_mask"
 # Retrieves the image size.
 node {
  calculator: "ImagePropertiesCalculator"
  input_stream: "IMAGE_CPU:image"
  output_stream: "SIZE:image_size"
 }
 # Crops and transforms the specified ROI in the input image into an image patch
 # represented as a tensor of dimension expected by the corresponding ML model,
 # while maintaining the aspect ratio of the ROI (which can be different from
 # that of the image patch). Therefore, there can be letterboxing around the ROI
 # in the generated tensor representation.
 node: {
  calculator: "ImageToTensorCalculator"
  input_stream: "IMAGE:image"
  input_stream: "NORM_RECT:roi"
  output_stream: "TENSORS:input_tensors"
  output_stream: "LETTERBOX_PADDING:letterbox_padding"
  output_stream: "MATRIX:transformation_matrix"
  options: {
    [mediapipe.ImageToTensorCalculatorOptions.ext] {
      output_tensor_width: 256
      output_tensor_height: 256
      keep_aspect_ratio: true
      output_tensor_float_range {
        min: 0.0
        max: 1.0
      }
    }
  }
 }
 node {
  calculator: "InferenceCalculator"
  input_stream: "TENSORS:input_tensors"
  output_stream: "TENSORS:output_tensors"
  options: {
    [mediapipe.InferenceCalculatorOptions.ext] {
      model_path: "mediapipe/modules/pose_landmark/pose_landmark_full.onnx"
      delegate { tensorrt {} }
    }
  }
 }
 # Decodes the tensors into the corresponding landmark and segmentation mask
 # representation.
 node {
  calculator: "TensorsToPoseLandmarksAndSegmentation"
  input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation"
  input_stream: "TENSORS:output_tensors"
  output_stream: "LANDMARKS:roi_landmarks"
  output_stream: "AUXILIARY_LANDMARKS:roi_auxiliary_landmarks"
  output_stream: "WORLD_LANDMARKS:roi_world_landmarks"
  output_stream: "SEGMENTATION_MASK:roi_segmentation_mask"
 }
 # Projects the landmarks and segmentation mask in the local coordinates of the
 # (potentially letterboxed) ROI back to the global coordinates of the full input
 # image.
 node {
  calculator: "PoseLandmarksAndSegmentationInverseProjection"
  input_stream: "IMAGE_SIZE:image_size"
  input_stream: "NORM_RECT:roi"
  input_stream: "LETTERBOX_PADDING:letterbox_padding"
  input_stream: "MATRIX:transformation_matrix"
  input_stream: "LANDMARKS:roi_landmarks"
  input_stream: "AUXILIARY_LANDMARKS:roi_auxiliary_landmarks"
  input_stream: "WORLD_LANDMARKS:roi_world_landmarks"
  input_stream: "SEGMENTATION_MASK:roi_segmentation_mask"
  output_stream: "LANDMARKS:landmarks"
  output_stream: "AUXILIARY_LANDMARKS:auxiliary_landmarks"
  output_stream: "WORLD_LANDMARKS:world_landmarks"
  output_stream: "SEGMENTATION_MASK:segmentation_mask"
 }
--- a/mediapipe/modules/pose_landmark/pose_landmark_onnx_cuda.pbtxt
+++ b/mediapipe/modules/pose_landmark/pose_landmark_onnx_cuda.pbtxt
@ -0,0 +1,268 @@
 # MediaPipe graph to detect/predict pose landmarks. (CPU input, and inference is
 # executed on CPU.) This graph tries to skip pose detection as much as possible
 # by using previously detected/predicted landmarks for new images.
 #
 # It is required that "pose_detection.onnx" is available at
 # "mediapipe/modules/pose_detection/pose_detection.onnx"
 # path during execution.
 #
 # It is required that "pose_landmark_lite.onnx" or
 # "pose_landmark_full.onnx" or "pose_landmark_heavy.onnx" is available at
 # "mediapipe/modules/pose_landmark/pose_landmark_lite.onnx" or
 # "mediapipe/modules/pose_landmark/pose_landmark_full.onnx" or
 # "mediapipe/modules/pose_landmark/pose_landmark_heavy.onnx"
 # path respectively during execution, depending on the specification in the
 # MODEL_COMPLEXITY input side packet.
 #
 # EXAMPLE:
 #   node {
 #     calculator: "PoseLandmarkOnnxCUDA"
 #     input_side_packet: "MODEL_COMPLEXITY:model_complexity"
 #     input_side_packet: "SMOOTH_LANDMARKS:smooth_landmarks"
 #     input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation"
 #     input_side_packet: "SMOOTH_SEGMENTATION:smooth_segmentation"
 #     input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
 #     input_stream: "IMAGE:image"
 #     output_stream: "LANDMARKS:pose_landmarks"
 #     output_stream: "SEGMENTATION_MASK:segmentation_mask"
 #   }
 type: "PoseLandmarkOnnxCUDA"
 # CPU image. (ImageFrame)
 input_stream: "IMAGE:image"
 # Whether to filter landmarks across different input images to reduce jitter.
 # If unspecified, functions as set to true. (bool)
 input_side_packet: "SMOOTH_LANDMARKS:smooth_landmarks"
 # Whether to predict the segmentation mask. If unspecified, functions as set to
 # false. (bool)
 input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation"
 # Whether to filter segmentation mask across different input images to reduce
 # jitter. If unspecified, functions as set to true. (bool)
 input_side_packet: "SMOOTH_SEGMENTATION:smooth_segmentation"
 # Complexity of the pose landmark model: 0, 1 or 2. Landmark accuracy as well as
 # inference latency generally go up with the model complexity. If unspecified,
 # functions as set to 1. (int)
 input_side_packet: "MODEL_COMPLEXITY:model_complexity"
 # Whether landmarks on the previous image should be used to help localize
 # landmarks on the current image. (bool)
 input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
 # Pose landmarks. (NormalizedLandmarkList)
 # We have 33 landmarks (see pose_landmark_topology.svg), and there are other
 # auxiliary key points.
 # 0 - nose
 # 1 - left eye (inner)
 # 2 - left eye
 # 3 - left eye (outer)
 # 4 - right eye (inner)
 # 5 - right eye
 # 6 - right eye (outer)
 # 7 - left ear
 # 8 - right ear
 # 9 - mouth (left)
 # 10 - mouth (right)
 # 11 - left shoulder
 # 12 - right shoulder
 # 13 - left elbow
 # 14 - right elbow
 # 15 - left wrist
 # 16 - right wrist
 # 17 - left pinky
 # 18 - right pinky
 # 19 - left index
 # 20 - right index
 # 21 - left thumb
 # 22 - right thumb
 # 23 - left hip
 # 24 - right hip
 # 25 - left knee
 # 26 - right knee
 # 27 - left ankle
 # 28 - right ankle
 # 29 - left heel
 # 30 - right heel
 # 31 - left foot index
 # 32 - right foot index
 #
 # NOTE: if a pose is not present within the given ROI, for this particular
 # timestamp there will not be an output packet in the LANDMARKS stream. However,
 # the MediaPipe framework will internally inform the downstream calculators of
 # the absence of this packet so that they don't wait for it unnecessarily.
 output_stream: "LANDMARKS:pose_landmarks"
 # Pose world landmarks. (LandmarkList)
 # World landmarks are real-world 3D coordinates in meters with the origin at the
 # center between hips. WORLD_LANDMARKS shares the same landmark topology as
 # LANDMARKS. However, LANDMARKS provides coordinates (in pixels) of a 3D object
 # projected onto the 2D image surface, while WORLD_LANDMARKS provides
 # coordinates (in meters) of the 3D object itself.
 output_stream: "WORLD_LANDMARKS:pose_world_landmarks"
 # Segmentation mask. (ImageFrame in ImageFormat::VEC32F1)
 output_stream: "SEGMENTATION_MASK:segmentation_mask"
 # Extra outputs (for debugging, for instance).
 # Detected poses. (Detection)
 output_stream: "DETECTION:pose_detection"
 # Regions of interest calculated based on landmarks. (NormalizedRect)
 output_stream: "ROI_FROM_LANDMARKS:pose_rect_from_landmarks"
 # Regions of interest calculated based on pose detections. (NormalizedRect)
 output_stream: "ROI_FROM_DETECTION:pose_rect_from_detection"
 # When the optional input side packet "use_prev_landmarks" is either absent or
 # set to true, uses the landmarks on the previous image to help localize
 # landmarks on the current image.
 node {
  calculator: "GateCalculator"
  input_side_packet: "ALLOW:use_prev_landmarks"
  input_stream: "prev_pose_rect_from_landmarks"
  output_stream: "gated_prev_pose_rect_from_landmarks"
  options: {
    [mediapipe.GateCalculatorOptions.ext] {
      allow: true
    }
  }
 }
 # Checks if there's previous pose rect calculated from landmarks.
 node: {
  calculator: "PacketPresenceCalculator"
  input_stream: "PACKET:gated_prev_pose_rect_from_landmarks"
  output_stream: "PRESENCE:prev_pose_rect_from_landmarks_is_present"
 }
 # Calculates size of the image.
 node {
  calculator: "ImagePropertiesCalculator"
  input_stream: "IMAGE_CPU:image"
  output_stream: "SIZE:image_size"
 }
 # Drops the incoming image if the pose has already been identified from the
 # previous image. Otherwise, passes the incoming image through to trigger a new
 # round of pose detection.
 node {
  calculator: "GateCalculator"
  input_stream: "image"
  input_stream: "image_size"
  input_stream: "DISALLOW:prev_pose_rect_from_landmarks_is_present"
  output_stream: "image_for_pose_detection"
  output_stream: "image_size_for_pose_detection"
  options: {
    [mediapipe.GateCalculatorOptions.ext] {
      empty_packets_as_allow: true
    }
  }
 }
 # Detects poses.
 node {
  calculator: "PoseDetectionOnnxCUDA"
  input_stream: "IMAGE:image_for_pose_detection"
  output_stream: "DETECTIONS:pose_detections"
 }
 # Gets the very first detection from "pose_detections" vector.
 node {
  calculator: "SplitDetectionVectorCalculator"
  input_stream: "pose_detections"
  output_stream: "pose_detection"
  options: {
    [mediapipe.SplitVectorCalculatorOptions.ext] {
      ranges: { begin: 0 end: 1 }
      element_only: true
    }
  }
 }
 # Calculates region of interest based on pose detection, so that can be used
 # to detect landmarks.
 node {
  calculator: "PoseDetectionToRoi"
  input_stream: "DETECTION:pose_detection"
  input_stream: "IMAGE_SIZE:image_size_for_pose_detection"
  output_stream: "ROI:pose_rect_from_detection"
 }
 # Selects either pose rect (or ROI) calculated from detection or from previously
 # detected landmarks if available (in this case, calculation of pose rect from
 # detection is skipped).
 node {
  calculator: "MergeCalculator"
  input_stream: "pose_rect_from_detection"
  input_stream: "gated_prev_pose_rect_from_landmarks"
  output_stream: "pose_rect"
 }
 # Detects pose landmarks within specified region of interest of the image.
 node {
  calculator: "PoseLandmarkByRoiOnnxCUDA"
  input_side_packet: "MODEL_COMPLEXITY:model_complexity"
  input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation"
  input_stream: "IMAGE:image"
  input_stream: "ROI:pose_rect"
  output_stream: "LANDMARKS:unfiltered_pose_landmarks"
  output_stream: "AUXILIARY_LANDMARKS:unfiltered_auxiliary_landmarks"
  output_stream: "WORLD_LANDMARKS:unfiltered_world_landmarks"
  output_stream: "SEGMENTATION_MASK:unfiltered_segmentation_mask"
 }
 # Smoothes landmarks to reduce jitter.
 node {
  calculator: "PoseLandmarkFiltering"
  input_side_packet: "ENABLE:smooth_landmarks"
  input_stream: "IMAGE_SIZE:image_size"
  input_stream: "NORM_LANDMARKS:unfiltered_pose_landmarks"
  input_stream: "AUX_NORM_LANDMARKS:unfiltered_auxiliary_landmarks"
  input_stream: "WORLD_LANDMARKS:unfiltered_world_landmarks"
  output_stream: "FILTERED_NORM_LANDMARKS:pose_landmarks"
  output_stream: "FILTERED_AUX_NORM_LANDMARKS:auxiliary_landmarks"
  output_stream: "FILTERED_WORLD_LANDMARKS:pose_world_landmarks"
 }
 # Calculates region of interest based on the auxiliary landmarks, to be used in
 # the subsequent image.
 node {
  calculator: "PoseLandmarksToRoi"
  input_stream: "LANDMARKS:auxiliary_landmarks"
  input_stream: "IMAGE_SIZE:image_size"
  output_stream: "ROI:pose_rect_from_landmarks"
 }
 # Caches pose rects calculated from landmarks, and upon the arrival of the next
 # input image, sends out the cached rects with timestamps replaced by that of
 # the input image, essentially generating a packet that carries the previous
 # pose rects. Note that upon the arrival of the very first input image, a
 # timestamp bound update occurs to jump start the feedback loop.
 node {
  calculator: "PreviousLoopbackCalculator"
  input_stream: "MAIN:image"
  input_stream: "LOOP:pose_rect_from_landmarks"
  input_stream_info: {
    tag_index: "LOOP"
    back_edge: true
  }
  output_stream: "PREV_LOOP:prev_pose_rect_from_landmarks"
 }
 # Smoothes segmentation to reduce jitter.
 node {
  calculator: "PoseSegmentationFiltering"
  input_side_packet: "ENABLE:smooth_segmentation"
  input_stream: "SEGMENTATION_MASK:unfiltered_segmentation_mask"
  output_stream: "FILTERED_SEGMENTATION_MASK:filtered_segmentation_mask"
 }
 # Converts the incoming segmentation mask represented as an Image into the
 # corresponding ImageFrame type.
 node: {
  calculator: "FromImageCalculator"
  input_stream: "IMAGE:filtered_segmentation_mask"
  output_stream: "IMAGE_CPU:segmentation_mask"
 }
--- a/mediapipe/modules/pose_landmark/pose_landmark_onnx_tensorrt.pbtxt
+++ b/mediapipe/modules/pose_landmark/pose_landmark_onnx_tensorrt.pbtxt
@ -0,0 +1,268 @@
 # MediaPipe graph to detect/predict pose landmarks. (CPU input, and inference is
 # executed on CPU.) This graph tries to skip pose detection as much as possible
 # by using previously detected/predicted landmarks for new images.
 #
 # It is required that "pose_detection.tflite" is available at
 # "mediapipe/modules/pose_detection/pose_detection.tflite"
 # path during execution.
 #
 # It is required that "pose_landmark_lite.tflite" or
 # "pose_landmark_full.tflite" or "pose_landmark_heavy.tflite" is available at
 # "mediapipe/modules/pose_landmark/pose_landmark_lite.tflite" or
 # "mediapipe/modules/pose_landmark/pose_landmark_full.tflite" or
 # "mediapipe/modules/pose_landmark/pose_landmark_heavy.tflite"
 # path respectively during execution, depending on the specification in the
 # MODEL_COMPLEXITY input side packet.
 #
 # EXAMPLE:
 #   node {
 #     calculator: "PoseLandmarkOnnxTensorRT"
 #     input_side_packet: "MODEL_COMPLEXITY:model_complexity"
 #     input_side_packet: "SMOOTH_LANDMARKS:smooth_landmarks"
 #     input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation"
 #     input_side_packet: "SMOOTH_SEGMENTATION:smooth_segmentation"
 #     input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
 #     input_stream: "IMAGE:image"
 #     output_stream: "LANDMARKS:pose_landmarks"
 #     output_stream: "SEGMENTATION_MASK:segmentation_mask"
 #   }
 type: "PoseLandmarkOnnxTensorRT"
 # CPU image. (ImageFrame)
 input_stream: "IMAGE:image"
 # Whether to filter landmarks across different input images to reduce jitter.
 # If unspecified, functions as set to true. (bool)
 input_side_packet: "SMOOTH_LANDMARKS:smooth_landmarks"
 # Whether to predict the segmentation mask. If unspecified, functions as set to
 # false. (bool)
 input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation"
 # Whether to filter segmentation mask across different input images to reduce
 # jitter. If unspecified, functions as set to true. (bool)
 input_side_packet: "SMOOTH_SEGMENTATION:smooth_segmentation"
 # Complexity of the pose landmark model: 0, 1 or 2. Landmark accuracy as well as
 # inference latency generally go up with the model complexity. If unspecified,
 # functions as set to 1. (int)
 input_side_packet: "MODEL_COMPLEXITY:model_complexity"
 # Whether landmarks on the previous image should be used to help localize
 # landmarks on the current image. (bool)
 input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
 # Pose landmarks. (NormalizedLandmarkList)
 # We have 33 landmarks (see pose_landmark_topology.svg), and there are other
 # auxiliary key points.
 # 0 - nose
 # 1 - left eye (inner)
 # 2 - left eye
 # 3 - left eye (outer)
 # 4 - right eye (inner)
 # 5 - right eye
 # 6 - right eye (outer)
 # 7 - left ear
 # 8 - right ear
 # 9 - mouth (left)
 # 10 - mouth (right)
 # 11 - left shoulder
 # 12 - right shoulder
 # 13 - left elbow
 # 14 - right elbow
 # 15 - left wrist
 # 16 - right wrist
 # 17 - left pinky
 # 18 - right pinky
 # 19 - left index
 # 20 - right index
 # 21 - left thumb
 # 22 - right thumb
 # 23 - left hip
 # 24 - right hip
 # 25 - left knee
 # 26 - right knee
 # 27 - left ankle
 # 28 - right ankle
 # 29 - left heel
 # 30 - right heel
 # 31 - left foot index
 # 32 - right foot index
 #
 # NOTE: if a pose is not present within the given ROI, for this particular
 # timestamp there will not be an output packet in the LANDMARKS stream. However,
 # the MediaPipe framework will internally inform the downstream calculators of
 # the absence of this packet so that they don't wait for it unnecessarily.
 output_stream: "LANDMARKS:pose_landmarks"
 # Pose world landmarks. (LandmarkList)
 # World landmarks are real-world 3D coordinates in meters with the origin at the
 # center between hips. WORLD_LANDMARKS shares the same landmark topology as
 # LANDMARKS. However, LANDMARKS provides coordinates (in pixels) of a 3D object
 # projected onto the 2D image surface, while WORLD_LANDMARKS provides
 # coordinates (in meters) of the 3D object itself.
 output_stream: "WORLD_LANDMARKS:pose_world_landmarks"
 # Segmentation mask. (ImageFrame in ImageFormat::VEC32F1)
 output_stream: "SEGMENTATION_MASK:segmentation_mask"
 # Extra outputs (for debugging, for instance).
 # Detected poses. (Detection)
 output_stream: "DETECTION:pose_detection"
 # Regions of interest calculated based on landmarks. (NormalizedRect)
 output_stream: "ROI_FROM_LANDMARKS:pose_rect_from_landmarks"
 # Regions of interest calculated based on pose detections. (NormalizedRect)
 output_stream: "ROI_FROM_DETECTION:pose_rect_from_detection"
 # When the optional input side packet "use_prev_landmarks" is either absent or
 # set to true, uses the landmarks on the previous image to help localize
 # landmarks on the current image.
 node {
  calculator: "GateCalculator"
  input_side_packet: "ALLOW:use_prev_landmarks"
  input_stream: "prev_pose_rect_from_landmarks"
  output_stream: "gated_prev_pose_rect_from_landmarks"
  options: {
    [mediapipe.GateCalculatorOptions.ext] {
      allow: true
    }
  }
 }
 # Checks if there's previous pose rect calculated from landmarks.
 node: {
  calculator: "PacketPresenceCalculator"
  input_stream: "PACKET:gated_prev_pose_rect_from_landmarks"
  output_stream: "PRESENCE:prev_pose_rect_from_landmarks_is_present"
 }
 # Calculates size of the image.
 node {
  calculator: "ImagePropertiesCalculator"
  input_stream: "IMAGE_CPU:image"
  output_stream: "SIZE:image_size"
 }
 # Drops the incoming image if the pose has already been identified from the
 # previous image. Otherwise, passes the incoming image through to trigger a new
 # round of pose detection.
 node {
  calculator: "GateCalculator"
  input_stream: "image"
  input_stream: "image_size"
  input_stream: "DISALLOW:prev_pose_rect_from_landmarks_is_present"
  output_stream: "image_for_pose_detection"
  output_stream: "image_size_for_pose_detection"
  options: {
    [mediapipe.GateCalculatorOptions.ext] {
      empty_packets_as_allow: true
    }
  }
 }
 # Detects poses.
 node {
  calculator: "PoseDetectionOnnxTensorRT"
  input_stream: "IMAGE:image_for_pose_detection"
  output_stream: "DETECTIONS:pose_detections"
 }
 # Gets the very first detection from "pose_detections" vector.
 node {
  calculator: "SplitDetectionVectorCalculator"
  input_stream: "pose_detections"
  output_stream: "pose_detection"
  options: {
    [mediapipe.SplitVectorCalculatorOptions.ext] {
      ranges: { begin: 0 end: 1 }
      element_only: true
    }
  }
 }
 # Calculates region of interest based on pose detection, so that can be used
 # to detect landmarks.
 node {
  calculator: "PoseDetectionToRoi"
  input_stream: "DETECTION:pose_detection"
  input_stream: "IMAGE_SIZE:image_size_for_pose_detection"
  output_stream: "ROI:pose_rect_from_detection"
 }
 # Selects either pose rect (or ROI) calculated from detection or from previously
 # detected landmarks if available (in this case, calculation of pose rect from
 # detection is skipped).
 node {
  calculator: "MergeCalculator"
  input_stream: "pose_rect_from_detection"
  input_stream: "gated_prev_pose_rect_from_landmarks"
  output_stream: "pose_rect"
 }
 # Detects pose landmarks within specified region of interest of the image.
 node {
  calculator: "PoseLandmarkByRoiOnnxTensorRT"
  input_side_packet: "MODEL_COMPLEXITY:model_complexity"
  input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation"
  input_stream: "IMAGE:image"
  input_stream: "ROI:pose_rect"
  output_stream: "LANDMARKS:unfiltered_pose_landmarks"
  output_stream: "AUXILIARY_LANDMARKS:unfiltered_auxiliary_landmarks"
  output_stream: "WORLD_LANDMARKS:unfiltered_world_landmarks"
  output_stream: "SEGMENTATION_MASK:unfiltered_segmentation_mask"
 }
 # Smoothes landmarks to reduce jitter.
 node {
  calculator: "PoseLandmarkFiltering"
  input_side_packet: "ENABLE:smooth_landmarks"
  input_stream: "IMAGE_SIZE:image_size"
  input_stream: "NORM_LANDMARKS:unfiltered_pose_landmarks"
  input_stream: "AUX_NORM_LANDMARKS:unfiltered_auxiliary_landmarks"
  input_stream: "WORLD_LANDMARKS:unfiltered_world_landmarks"
  output_stream: "FILTERED_NORM_LANDMARKS:pose_landmarks"
  output_stream: "FILTERED_AUX_NORM_LANDMARKS:auxiliary_landmarks"
  output_stream: "FILTERED_WORLD_LANDMARKS:pose_world_landmarks"
 }
 # Calculates region of interest based on the auxiliary landmarks, to be used in
 # the subsequent image.
 node {
  calculator: "PoseLandmarksToRoi"
  input_stream: "LANDMARKS:auxiliary_landmarks"
  input_stream: "IMAGE_SIZE:image_size"
  output_stream: "ROI:pose_rect_from_landmarks"
 }
 # Caches pose rects calculated from landmarks, and upon the arrival of the next
 # input image, sends out the cached rects with timestamps replaced by that of
 # the input image, essentially generating a packet that carries the previous
 # pose rects. Note that upon the arrival of the very first input image, a
 # timestamp bound update occurs to jump start the feedback loop.
 node {
  calculator: "PreviousLoopbackCalculator"
  input_stream: "MAIN:image"
  input_stream: "LOOP:pose_rect_from_landmarks"
  input_stream_info: {
    tag_index: "LOOP"
    back_edge: true
  }
  output_stream: "PREV_LOOP:prev_pose_rect_from_landmarks"
 }
 # Smoothes segmentation to reduce jitter.
 node {
  calculator: "PoseSegmentationFiltering"
  input_side_packet: "ENABLE:smooth_segmentation"
  input_stream: "SEGMENTATION_MASK:unfiltered_segmentation_mask"
  output_stream: "FILTERED_SEGMENTATION_MASK:filtered_segmentation_mask"
 }
 # Converts the incoming segmentation mask represented as an Image into the
 # corresponding ImageFrame type.
 node: {
  calculator: "FromImageCalculator"
  input_stream: "IMAGE:filtered_segmentation_mask"
  output_stream: "IMAGE_CPU:segmentation_mask"
 }