Update python documentation.

PiperOrigin-RevId: 488482142
2022-11-14 15:16:36 -08:00 · 2022-11-14 15:16:36 -08:00 · e714e656fe
commit e714e656fe
parent c027373688
8 changed files with 213 additions and 8 deletions
--- a/mediapipe/tasks/python/audio/audio_classifier.py
+++ b/mediapipe/tasks/python/audio/audio_classifier.py
@ -86,7 +86,30 @@ class AudioClassifierOptions:
 class AudioClassifier(base_audio_task_api.BaseAudioTaskApi):
-  """Class that performs audio classification on audio data."""
+  """Class that performs audio classification on audio data.
  This API expects a TFLite model with mandatory TFLite Model Metadata that
  contains the mandatory AudioProperties of the solo input audio tensor and the
  optional (but recommended) category labels as AssociatedFiles with type
  TENSOR_AXIS_LABELS per output classification tensor.
  Input tensor:
    (kTfLiteFloat32)
    - input audio buffer of size `[batch * samples]`.
    - batch inference is not supported (`batch` is required to be 1).
    - for multi-channel models, the channels must be interleaved.
  At least one output tensor with:
    (kTfLiteFloat32)
    - `[1 x N]` array with `N` represents the number of categories.
    - optional (but recommended) category labels as AssociatedFiles with type
      TENSOR_AXIS_LABELS, containing one label per line. The first such
      AssociatedFile (if any) is used to fill the `category_name` field of the
      results. The `display_name` field is filled from the AssociatedFile (if
      any) whose locale matches the `display_names_locale` field of the
      `AudioClassifierOptions` used at creation time ("en" by default, i.e.
      English). If none of these are available, only the `index` field of the
      results will be filled.
  """
  @classmethod
  def create_from_model_path(cls, model_path: str) -> 'AudioClassifier':
--- a/mediapipe/tasks/python/audio/audio_embedder.py
+++ b/mediapipe/tasks/python/audio/audio_embedder.py
@ -87,7 +87,24 @@ class AudioEmbedderOptions:
 class AudioEmbedder(base_audio_task_api.BaseAudioTaskApi):
-  """Class that performs embedding extraction on audio clips or audio stream."""
+  """Class that performs embedding extraction on audio clips or audio stream.
  This API expects a TFLite model with mandatory TFLite Model Metadata that
  contains the mandatory AudioProperties of the solo input audio tensor and the
  optional (but recommended) label items as AssociatedFiles with type
  TENSOR_AXIS_LABELS per output embedding tensor.
  Input tensor:
    (kTfLiteFloat32)
    - input audio buffer of size `[batch * samples]`.
    - batch inference is not supported (`batch` is required to be 1).
    - for multi-channel models, the channels must be interleaved.
  At least one output tensor with:
    (kTfLiteUInt8/kTfLiteFloat32)
    - `N` components corresponding to the `N` dimensions of the returned
    feature vector for this output layer.
    - Either 2 or 4 dimensions, i.e. `[1 x N]` or `[1 x 1 x 1 x N]`.
  """
  @classmethod
  def create_from_model_path(cls, model_path: str) -> 'AudioEmbedder':
--- a/mediapipe/tasks/python/text/text_classifier.py
+++ b/mediapipe/tasks/python/text/text_classifier.py
@ -62,7 +62,38 @@ class TextClassifierOptions:
 class TextClassifier(base_text_task_api.BaseTextTaskApi):
-  """Class that performs classification on text."""
+  """Class that performs classification on text.
  This API expects a TFLite model with (optional) TFLite Model Metadata that
  contains the mandatory (described below) input tensors, output tensor,
  and the optional (but recommended) category labels as AssociatedFiles with
  type
  TENSOR_AXIS_LABELS per output classification tensor. Metadata is required for
  models with int32 input tensors because it contains the input process unit
  for the model's Tokenizer. No metadata is required for models with string
  input tensors.
  Input tensors:
    (kTfLiteInt32)
    - 3 input tensors of size `[batch_size x bert_max_seq_len]` representing
      the input ids, segment ids, and mask ids
    - or 1 input tensor of size `[batch_size x max_seq_len]` representing the
      input ids
    or (kTfLiteString)
    - 1 input tensor that is shapeless or has shape [1] containing the input
      string
  At least one output tensor with:
    (kTfLiteFloat32/kBool)
    - `[1 x N]` array with `N` represents the number of categories.
    - optional (but recommended) category labels as AssociatedFiles with type
      TENSOR_AXIS_LABELS, containing one label per line. The first such
      AssociatedFile (if any) is used to fill the `category_name` field of the
      results. The `display_name` field is filled from the AssociatedFile (if
      any) whose locale matches the `display_names_locale` field of the
      `TextClassifierOptions` used at creation time ("en" by default, i.e.
      English). If none of these are available, only the `index` field of the
      results will be filled.
  """
  @classmethod
  def create_from_model_path(cls, model_path: str) -> 'TextClassifier':
--- a/mediapipe/tasks/python/text/text_embedder.py
+++ b/mediapipe/tasks/python/text/text_embedder.py
@ -63,7 +63,27 @@ class TextEmbedderOptions:
 class TextEmbedder(base_text_task_api.BaseTextTaskApi):
-  """Class that performs embedding extraction on text."""
+  """Class that performs embedding extraction on text.
  This API expects a TFLite model with TFLite Model Metadata that contains the
  mandatory (described below) input tensors and output tensors. Metadata should
  contain the input process unit for the model's Tokenizer as well as input /
  output tensor metadata.
  Input tensors:
    (kTfLiteInt32)
    - 3 input tensors of size `[batch_size x bert_max_seq_len]` with names
      "ids", "mask", and "segment_ids" representing the input ids, mask ids, and
      segment ids respectively.
    - or 1 input tensor of size `[batch_size x max_seq_len]` representing the
      input ids.
  At least one output tensor with:
    (kTfLiteFloat32)
    - `N` components corresponding to the `N` dimensions of the returned
      feature vector for this output layer.
    - Either 2 or 4 dimensions, i.e. `[1 x N]` or `[1 x 1 x 1 x N]`.
  """
  @classmethod
  def create_from_model_path(cls, model_path: str) -> 'TextEmbedder':
--- a/mediapipe/tasks/python/vision/image_classifier.py
+++ b/mediapipe/tasks/python/vision/image_classifier.py
@ -87,7 +87,40 @@ class ImageClassifierOptions:
 class ImageClassifier(base_vision_task_api.BaseVisionTaskApi):
-  """Class that performs image classification on images."""
+  """Class that performs image classification on images.
  The API expects a TFLite model with optional, but strongly recommended,
  TFLite Model Metadata.
  Input tensor:
    (kTfLiteUInt8/kTfLiteFloat32)
    - image input of size `[batch x height x width x channels]`.
    - batch inference is not supported (`batch` is required to be 1).
    - only RGB inputs are supported (`channels` is required to be 3).
    - if type is kTfLiteFloat32, NormalizationOptions are required to be
      attached to the metadata for input normalization.
  At least one output tensor with:
    (kTfLiteUInt8/kTfLiteFloat32)
    - `N `classes and either 2 or 4 dimensions, i.e. `[1 x N]` or
      `[1 x 1 x 1 x N]`
    - optional (but recommended) label map(s) as AssociatedFiles with type
      TENSOR_AXIS_LABELS, containing one label per line. The first such
      AssociatedFile (if any) is used to fill the `class_name` field of the
      results. The `display_name` field is filled from the AssociatedFile (if
      any) whose locale matches the `display_names_locale` field of the
      `ImageClassifierOptions` used at creation time ("en" by default, i.e.
      English). If none of these are available, only the `index` field of the
      results will be filled.
    - optional score calibration can be attached using ScoreCalibrationOptions
      and an AssociatedFile with type TENSOR_AXIS_SCORE_CALIBRATION. See
      metadata_schema.fbs [1] for more details.
  An example of such model can be found at:
  https://tfhub.dev/bohemian-visual-recognition-alliance/lite-model/models/mushroom-identification_v1/1
  [1]:
  https://github.com/google/mediapipe/blob/6cdc6443b6a7ed662744e2a2ce2d58d9c83e6d6f/mediapipe/tasks/metadata/metadata_schema.fbs#L456
  """
  @classmethod
  def create_from_model_path(cls, model_path: str) -> 'ImageClassifier':
--- a/mediapipe/tasks/python/vision/image_embedder.py
+++ b/mediapipe/tasks/python/vision/image_embedder.py
@ -86,7 +86,24 @@ class ImageEmbedderOptions:
 class ImageEmbedder(base_vision_task_api.BaseVisionTaskApi):
-  """Class that performs embedding extraction on images."""
+  """Class that performs embedding extraction on images.
  The API expects a TFLite model with optional, but strongly recommended,
  TFLite Model Metadata.
  Input tensor:
    (kTfLiteUInt8/kTfLiteFloat32)
    - image input of size `[batch x height x width x channels]`.
    - batch inference is not supported (`batch` is required to be 1).
    - only RGB inputs are supported (`channels` is required to be 3).
    - if type is kTfLiteFloat32, NormalizationOptions are required to be
      attached to the metadata for input normalization.
  At least one output tensor with:
    (kTfLiteUInt8/kTfLiteFloat32)
    - `N` components corresponding to the `N` dimensions of the returned
      feature vector for this output layer.
    - Either 2 or 4 dimensions, i.e. `[1 x N]` or `[1 x 1 x 1 x N]`.
  """
  @classmethod
  def create_from_model_path(cls, model_path: str) -> 'ImageEmbedder':
--- a/mediapipe/tasks/python/vision/image_segmenter.py
+++ b/mediapipe/tasks/python/vision/image_segmenter.py
@ -93,7 +93,29 @@ class ImageSegmenterOptions:
 class ImageSegmenter(base_vision_task_api.BaseVisionTaskApi):
-  """Class that performs image segmentation on images."""
+  """Class that performs image segmentation on images.
  The API expects a TFLite model with mandatory TFLite Model Metadata.
  Input tensor:
    (kTfLiteUInt8/kTfLiteFloat32)
    - image input of size `[batch x height x width x channels]`.
    - batch inference is not supported (`batch` is required to be 1).
    - RGB and greyscale inputs are supported (`channels` is required to be
      1 or 3).
    - if type is kTfLiteFloat32, NormalizationOptions are required to be
      attached to the metadata for input normalization.
  Output tensors:
    (kTfLiteUInt8/kTfLiteFloat32)
    - list of segmented masks.
    - if `output_type` is CATEGORY_MASK, uint8 Image, Image vector of size 1.
    - if `output_type` is CONFIDENCE_MASK, float32 Image list of size
      `cahnnels`.
    - batch is always 1
  An example of such model can be found at:
  https://tfhub.dev/tensorflow/lite-model/deeplabv3/1/metadata/2
  """
  @classmethod
  def create_from_model_path(cls, model_path: str) -> 'ImageSegmenter':
--- a/mediapipe/tasks/python/vision/object_detector.py
+++ b/mediapipe/tasks/python/vision/object_detector.py
@ -98,7 +98,49 @@ class ObjectDetectorOptions:
 class ObjectDetector(base_vision_task_api.BaseVisionTaskApi):
-  """Class that performs object detection on images."""
+  """Class that performs object detection on images.
  The API expects a TFLite model with mandatory TFLite Model Metadata.
  Input tensor:
    (kTfLiteUInt8/kTfLiteFloat32)
    - image input of size `[batch x height x width x channels]`.
    - batch inference is not supported (`batch` is required to be 1).
    - only RGB inputs are supported (`channels` is required to be 3).
    - if type is kTfLiteFloat32, NormalizationOptions are required to be
      attached to the metadata for input normalization.
  Output tensors must be the 4 outputs of a `DetectionPostProcess` op, i.e:
    (kTfLiteFloat32)
    - locations tensor of size `[num_results x 4]`, the inner array
      representing bounding boxes in the form [top, left, right, bottom].
    - BoundingBoxProperties are required to be attached to the metadata
      and must specify type=BOUNDARIES and coordinate_type=RATIO.
    (kTfLiteFloat32)
    - classes tensor of size `[num_results]`, each value representing the
      integer index of a class.
    - optional (but recommended) label map(s) can be attached as
      AssociatedFile-s with type TENSOR_VALUE_LABELS, containing one label per
      line. The first such AssociatedFile (if any) is used to fill the
      `class_name` field of the results. The `display_name` field is filled
      from the AssociatedFile (if any) whose locale matches the
      `display_names_locale` field of the `ObjectDetectorOptions` used at
      creation time ("en" by default, i.e. English). If none of these are
      available, only the `index` field of the results will be filled.
    (kTfLiteFloat32)
    - scores tensor of size `[num_results]`, each value representing the score
      of the detected object.
    - optional score calibration can be attached using ScoreCalibrationOptions
      and an AssociatedFile with type TENSOR_AXIS_SCORE_CALIBRATION. See
      metadata_schema.fbs [1] for more details.
    (kTfLiteFloat32)
    - integer num_results as a tensor of size `[1]`
  An example of such model can be found at:
  https://tfhub.dev/google/lite-model/object_detection/mobile_object_localizer_v1/1/metadata/1
  [1]:
  https://github.com/google/mediapipe/blob/6cdc6443b6a7ed662744e2a2ce2d58d9c83e6d6f/mediapipe/tasks/metadata/metadata_schema.fbs#L456
  """
  @classmethod
  def create_from_model_path(cls, model_path: str) -> 'ObjectDetector':