mediapipe/mediapipe2/calculators/video/motion_analysis_calculator.proto

// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

syntax = "proto2";

package mediapipe;

import "mediapipe/framework/calculator.proto";
import "mediapipe/util/tracking/motion_analysis.proto";

// Next tag: 10
message MotionAnalysisCalculatorOptions {
  extend CalculatorOptions {
    optional MotionAnalysisCalculatorOptions ext = 270698255;
  }

  optional mediapipe.MotionAnalysisOptions analysis_options = 1;

  // Determines how optional input SELECTION (if present) is used to compute
  // the final camera motion.
  enum SelectionAnalysis {
    // Recompute camera motion for selected frame neighbors.
    ANALYSIS_RECOMPUTE = 1;

    // Use composited camera motion and region flow from SELECTION input. No
    // tracking or re-computation is performed.
    // Note that in this case only CAMERA, FLOW and VIDEO_OUT tags are
    // supported as output.
    NO_ANALYSIS_USE_SELECTION = 2;

    // Recompute camera motion for selected frame neighbors using
    // features supplied by SELECTION input. No feature tracking is performed.
    ANALYSIS_FROM_FEATURES = 3;

    // Recomputes camera motion for selected frame neighbors but seeds
    // initial transform with camera motion from SELECTION input.
    ANALYSIS_WITH_SEED = 4;
  }

  optional SelectionAnalysis selection_analysis = 4
      [default = ANALYSIS_WITH_SEED];

  // If activated when SELECTION input is activated, will replace the computed
  // camera motion (for any of the ANALYSIS_* case above) with the one supplied
  // by the frame selection, in case the frame selection one is more stable.
  // For example, if recomputed camera motion is unstable but the one from
  // the selection result is stable, will use the stable result instead.
  optional bool hybrid_selection_camera = 5 [default = false];

  // Determines how optional input META is used to compute the final camera
  // motion.
  enum MetaAnalysis {
    // Uses metadata supplied motions as is.
    META_ANALYSIS_USE_META = 1;

    // Seeds visual tracking from metadata motions - estimates visual residual
    // motion and combines with metadata.
    META_ANALYSIS_HYBRID = 2;
  }

  optional MetaAnalysis meta_analysis = 8 [default = META_ANALYSIS_USE_META];

  // Determines number of homography models per frame stored in the CSV file
  // or the homography metadata in META.
  // For values > 1, MixtureHomographies are created.
  optional int32 meta_models_per_frame = 6 [default = 1];

  // Used for META_ANALYSIS_HYBRID. Rejects features which flow deviates
  // domain_ratio * image diagonal size from the ground truth metadata motion.
  optional float meta_outlier_domain_ratio = 9 [default = 0.0015];

  // If true, the MotionAnalysisCalculator will skip all processing and emit no
  // packets on any output. This is useful for quickly creating different
  // versions of a MediaPipe graph without changing its structure, assuming that
  // downstream calculators can handle missing input packets.
  // TODO: Remove this hack. See b/36485206 for more details.
  optional bool bypass_mode = 7 [default = false];
}

// Taken from
// java/com/google/android/libraries/microvideo/proto/microvideo.proto to
// satisfy leakr requirements
// TODO: Remove and use above proto.
message HomographyData {
  // For each frame, there are 12 homography matrices stored. Each matrix is
  // 3x3 (9 elements). This field will contain 12 x 3 x 3  float values. The
  // first row of the first homography matrix will be followed by the second row
  // of the first homography matrix, followed by third row of first homography
  // matrix, followed by the first row of the second homography matrix, etc.
  repeated float motion_homography_data = 1 [packed = true];

  // Vector containing histogram counts for individual patches in the frame.
  repeated uint32 histogram_count_data = 2 [packed = true];

  // The width of the frame at the time metadata was sampled.
  optional int32 frame_width = 3;

  // The height of the frame at the time metadata was sampled.
  optional int32 frame_height = 4;
}