mediapipe/mediapipe2/examples/desktop/autoflip/autoflip_graph_development.pbtxt

# Autoflip graph that renders the final cropped video and debugging videos.
# For use by developers who may be adding signals and adjusting weights.
max_queue_size: -1

# VIDEO_PREP: Decodes an input video file into images and a video header.
node {
  calculator: "OpenCvVideoDecoderCalculator"
  input_side_packet: "INPUT_FILE_PATH:input_video_path"
  output_stream: "VIDEO:video_raw"
  output_stream: "VIDEO_PRESTREAM:video_header"
  output_side_packet: "SAVED_AUDIO_PATH:audio_path"
}

# VIDEO_PREP: Scale the input video before feature extraction.
node {
  calculator: "ScaleImageCalculator"
  input_stream: "FRAMES:video_raw"
  input_stream: "VIDEO_HEADER:video_header"
  output_stream: "FRAMES:video_frames_scaled"
  options: {
    [mediapipe.ScaleImageCalculatorOptions.ext]: {
      preserve_aspect_ratio: true
      output_format: SRGB
      target_width: 480
      algorithm: DEFAULT_WITHOUT_UPSCALE
    }
  }
}

# VIDEO_PREP: Create a low frame rate stream for feature extraction.
node {
  calculator: "PacketThinnerCalculator"
  input_stream: "video_frames_scaled"
  output_stream: "video_frames_scaled_downsampled"
  options: {
    [mediapipe.PacketThinnerCalculatorOptions.ext]: {
      thinner_type: ASYNC
      period: 200000
    }
  }
}

# DETECTION: find borders around the video and major background color.
node {
  calculator: "BorderDetectionCalculator"
  input_stream: "VIDEO:video_raw"
  output_stream: "DETECTED_BORDERS:borders"
}

# DETECTION: find shot/scene boundaries on the full frame rate stream.
node {
  calculator: "ShotBoundaryCalculator"
  input_stream: "VIDEO:video_frames_scaled"
  output_stream: "IS_SHOT_CHANGE:shot_change"
  options {
    [mediapipe.autoflip.ShotBoundaryCalculatorOptions.ext] {
      min_shot_span: 0.2
      min_motion: 0.3
      window_size: 15
      min_shot_measure: 10
      min_motion_with_shot_measure: 0.05
    }
  }
}

# DETECTION: find faces on the down sampled stream
node {
  calculator: "AutoFlipFaceDetectionSubgraph"
  input_stream: "VIDEO:video_frames_scaled_downsampled"
  output_stream: "DETECTIONS:face_detections"
}
node {
  calculator: "FaceToRegionCalculator"
  input_stream: "VIDEO:video_frames_scaled_downsampled"
  input_stream: "FACES:face_detections"
  output_stream: "REGIONS:face_regions"
}

# DETECTION: find objects on the down sampled stream
node {
  calculator: "AutoFlipObjectDetectionSubgraph"
  input_stream: "VIDEO:video_frames_scaled_downsampled"
  output_stream: "DETECTIONS:object_detections"
}
node {
  calculator: "LocalizationToRegionCalculator"
  input_stream: "DETECTIONS:object_detections"
  output_stream: "REGIONS:object_regions"
  options {
    [mediapipe.autoflip.LocalizationToRegionCalculatorOptions.ext] {
      output_all_signals: true
    }
  }
}

# SIGNAL FUSION: Combine detections (with weights) on each frame
node {
  calculator: "SignalFusingCalculator"
  input_stream: "shot_change"
  input_stream: "face_regions"
  input_stream: "object_regions"
  output_stream: "salient_regions"
  options {
    [mediapipe.autoflip.SignalFusingCalculatorOptions.ext] {
      signal_settings {
        type { standard: FACE_CORE_LANDMARKS }
        min_score: 0.85
        max_score: 0.9
        is_required: false
      }
      signal_settings {
        type { standard: FACE_ALL_LANDMARKS }
        min_score: 0.8
        max_score: 0.85
        is_required: false
      }
      signal_settings {
        type { standard: FACE_FULL }
        min_score: 0.8
        max_score: 0.85
        is_required: false
      }
      signal_settings {
        type: { standard: HUMAN }
        min_score: 0.75
        max_score: 0.8
        is_required: false
      }
      signal_settings {
        type: { standard: PET }
        min_score: 0.7
        max_score: 0.75
        is_required: false
      }
      signal_settings {
        type: { standard: CAR }
        min_score: 0.7
        max_score: 0.75
        is_required: false
      }
      signal_settings {
        type: { standard: OBJECT }
        min_score: 0.1
        max_score: 0.2
        is_required: false
      }
    }
  }
}

# CROPPING: make decisions about how to crop each frame.
node {
  calculator: "SceneCroppingCalculator"
  input_side_packet: "EXTERNAL_ASPECT_RATIO:aspect_ratio"
  input_stream: "VIDEO_FRAMES:video_raw"
  input_stream: "KEY_FRAMES:video_frames_scaled_downsampled"
  input_stream: "DETECTION_FEATURES:salient_regions"
  input_stream: "STATIC_FEATURES:borders"
  input_stream: "SHOT_BOUNDARIES:shot_change"
  output_stream: "CROPPED_FRAMES:cropped_frames"
  output_stream: "KEY_FRAME_CROP_REGION_VIZ_FRAMES:key_frame_crop_viz_frames"
  output_stream: "SALIENT_POINT_FRAME_VIZ_FRAMES:salient_point_viz_frames"
  options: {
    [mediapipe.autoflip.SceneCroppingCalculatorOptions.ext]: {
      max_scene_size: 600
      key_frame_crop_options: {
        score_aggregation_type: CONSTANT
      }
      scene_camera_motion_analyzer_options: {
        motion_stabilization_threshold_percent: 0.5
        salient_point_bound: 0.499
      }
      padding_parameters: {
        blur_cv_size: 200
        overlay_opacity: 0.6
      }
      target_size_type: MAXIMIZE_TARGET_DIMENSION
    }
  }
}

# ENCODING(required): encode the video stream for the final cropped output.
node {
  calculator: "VideoPreStreamCalculator"
  # Fetch frame format and dimension from input frames.
  input_stream: "FRAME:cropped_frames"
  # Copying frame rate and duration from original video.
  input_stream: "VIDEO_PRESTREAM:video_header"
  output_stream: "output_frames_video_header"
}

node {
  calculator: "OpenCvVideoEncoderCalculator"
  input_stream: "VIDEO:cropped_frames"
  input_stream: "VIDEO_PRESTREAM:output_frames_video_header"
  input_side_packet: "OUTPUT_FILE_PATH:output_video_path"
  input_side_packet: "AUDIO_FILE_PATH:audio_path"
  options: {
    [mediapipe.OpenCvVideoEncoderCalculatorOptions.ext]: {
      codec: "avc1"
      video_format: "mp4"
    }
  }
}

# ENCODING(optional): encode the video stream for the key_frame_crop_viz_frames
# output.  Draws boxes around required and non-required objects.
node {
  calculator: "VideoPreStreamCalculator"
  # Fetch frame format and dimension from input frames.
  input_stream: "FRAME:key_frame_crop_viz_frames"
  # Copying frame rate and duration from original video.
  input_stream: "VIDEO_PRESTREAM:video_header"
  output_stream: "key_frame_crop_viz_frames_header"
}

node {
  calculator: "OpenCvVideoEncoderCalculator"
  input_stream: "VIDEO:key_frame_crop_viz_frames"
  input_stream: "VIDEO_PRESTREAM:key_frame_crop_viz_frames_header"
  input_side_packet: "OUTPUT_FILE_PATH:key_frame_crop_viz_frames_path"
  options: {
    [mediapipe.OpenCvVideoEncoderCalculatorOptions.ext]: {
      codec: "avc1"
      video_format: "mp4"
    }
  }
}

# ENCODING(optional): encode the video stream for the salient_point_viz_frames
# output. Draws the focus points and the scene crop window (red).
node {
  calculator: "VideoPreStreamCalculator"
  # Fetch frame format and dimension from input frames.
  input_stream: "FRAME:salient_point_viz_frames"
  # Copying frame rate and duration from original video.
  input_stream: "VIDEO_PRESTREAM:video_header"
  output_stream: "salient_point_viz_frames_header"
}

node {
  calculator: "OpenCvVideoEncoderCalculator"
  input_stream: "VIDEO:salient_point_viz_frames"
  input_stream: "VIDEO_PRESTREAM:salient_point_viz_frames_header"
  input_side_packet: "OUTPUT_FILE_PATH:salient_point_viz_frames_path"
  options: {
    [mediapipe.OpenCvVideoEncoderCalculatorOptions.ext]: {
      codec: "avc1"
      video_format: "mp4"
    }
  }
}