Update python documentation.

PiperOrigin-RevId: 488482142
This commit is contained in:
Jiuqiang Tang 2022-11-14 15:16:36 -08:00 committed by Copybara-Service
parent c027373688
commit e714e656fe
8 changed files with 213 additions and 8 deletions

View File

@ -86,7 +86,30 @@ class AudioClassifierOptions:
class AudioClassifier(base_audio_task_api.BaseAudioTaskApi):
"""Class that performs audio classification on audio data."""
"""Class that performs audio classification on audio data.
This API expects a TFLite model with mandatory TFLite Model Metadata that
contains the mandatory AudioProperties of the solo input audio tensor and the
optional (but recommended) category labels as AssociatedFiles with type
TENSOR_AXIS_LABELS per output classification tensor.
Input tensor:
(kTfLiteFloat32)
- input audio buffer of size `[batch * samples]`.
- batch inference is not supported (`batch` is required to be 1).
- for multi-channel models, the channels must be interleaved.
At least one output tensor with:
(kTfLiteFloat32)
- `[1 x N]` array with `N` represents the number of categories.
- optional (but recommended) category labels as AssociatedFiles with type
TENSOR_AXIS_LABELS, containing one label per line. The first such
AssociatedFile (if any) is used to fill the `category_name` field of the
results. The `display_name` field is filled from the AssociatedFile (if
any) whose locale matches the `display_names_locale` field of the
`AudioClassifierOptions` used at creation time ("en" by default, i.e.
English). If none of these are available, only the `index` field of the
results will be filled.
"""
@classmethod
def create_from_model_path(cls, model_path: str) -> 'AudioClassifier':

View File

@ -87,7 +87,24 @@ class AudioEmbedderOptions:
class AudioEmbedder(base_audio_task_api.BaseAudioTaskApi):
"""Class that performs embedding extraction on audio clips or audio stream."""
"""Class that performs embedding extraction on audio clips or audio stream.
This API expects a TFLite model with mandatory TFLite Model Metadata that
contains the mandatory AudioProperties of the solo input audio tensor and the
optional (but recommended) label items as AssociatedFiles with type
TENSOR_AXIS_LABELS per output embedding tensor.
Input tensor:
(kTfLiteFloat32)
- input audio buffer of size `[batch * samples]`.
- batch inference is not supported (`batch` is required to be 1).
- for multi-channel models, the channels must be interleaved.
At least one output tensor with:
(kTfLiteUInt8/kTfLiteFloat32)
- `N` components corresponding to the `N` dimensions of the returned
feature vector for this output layer.
- Either 2 or 4 dimensions, i.e. `[1 x N]` or `[1 x 1 x 1 x N]`.
"""
@classmethod
def create_from_model_path(cls, model_path: str) -> 'AudioEmbedder':

View File

@ -62,7 +62,38 @@ class TextClassifierOptions:
class TextClassifier(base_text_task_api.BaseTextTaskApi):
"""Class that performs classification on text."""
"""Class that performs classification on text.
This API expects a TFLite model with (optional) TFLite Model Metadata that
contains the mandatory (described below) input tensors, output tensor,
and the optional (but recommended) category labels as AssociatedFiles with
type
TENSOR_AXIS_LABELS per output classification tensor. Metadata is required for
models with int32 input tensors because it contains the input process unit
for the model's Tokenizer. No metadata is required for models with string
input tensors.
Input tensors:
(kTfLiteInt32)
- 3 input tensors of size `[batch_size x bert_max_seq_len]` representing
the input ids, segment ids, and mask ids
- or 1 input tensor of size `[batch_size x max_seq_len]` representing the
input ids
or (kTfLiteString)
- 1 input tensor that is shapeless or has shape [1] containing the input
string
At least one output tensor with:
(kTfLiteFloat32/kBool)
- `[1 x N]` array with `N` represents the number of categories.
- optional (but recommended) category labels as AssociatedFiles with type
TENSOR_AXIS_LABELS, containing one label per line. The first such
AssociatedFile (if any) is used to fill the `category_name` field of the
results. The `display_name` field is filled from the AssociatedFile (if
any) whose locale matches the `display_names_locale` field of the
`TextClassifierOptions` used at creation time ("en" by default, i.e.
English). If none of these are available, only the `index` field of the
results will be filled.
"""
@classmethod
def create_from_model_path(cls, model_path: str) -> 'TextClassifier':

View File

@ -63,7 +63,27 @@ class TextEmbedderOptions:
class TextEmbedder(base_text_task_api.BaseTextTaskApi):
"""Class that performs embedding extraction on text."""
"""Class that performs embedding extraction on text.
This API expects a TFLite model with TFLite Model Metadata that contains the
mandatory (described below) input tensors and output tensors. Metadata should
contain the input process unit for the model's Tokenizer as well as input /
output tensor metadata.
Input tensors:
(kTfLiteInt32)
- 3 input tensors of size `[batch_size x bert_max_seq_len]` with names
"ids", "mask", and "segment_ids" representing the input ids, mask ids, and
segment ids respectively.
- or 1 input tensor of size `[batch_size x max_seq_len]` representing the
input ids.
At least one output tensor with:
(kTfLiteFloat32)
- `N` components corresponding to the `N` dimensions of the returned
feature vector for this output layer.
- Either 2 or 4 dimensions, i.e. `[1 x N]` or `[1 x 1 x 1 x N]`.
"""
@classmethod
def create_from_model_path(cls, model_path: str) -> 'TextEmbedder':

View File

@ -87,7 +87,40 @@ class ImageClassifierOptions:
class ImageClassifier(base_vision_task_api.BaseVisionTaskApi):
"""Class that performs image classification on images."""
"""Class that performs image classification on images.
The API expects a TFLite model with optional, but strongly recommended,
TFLite Model Metadata.
Input tensor:
(kTfLiteUInt8/kTfLiteFloat32)
- image input of size `[batch x height x width x channels]`.
- batch inference is not supported (`batch` is required to be 1).
- only RGB inputs are supported (`channels` is required to be 3).
- if type is kTfLiteFloat32, NormalizationOptions are required to be
attached to the metadata for input normalization.
At least one output tensor with:
(kTfLiteUInt8/kTfLiteFloat32)
- `N `classes and either 2 or 4 dimensions, i.e. `[1 x N]` or
`[1 x 1 x 1 x N]`
- optional (but recommended) label map(s) as AssociatedFiles with type
TENSOR_AXIS_LABELS, containing one label per line. The first such
AssociatedFile (if any) is used to fill the `class_name` field of the
results. The `display_name` field is filled from the AssociatedFile (if
any) whose locale matches the `display_names_locale` field of the
`ImageClassifierOptions` used at creation time ("en" by default, i.e.
English). If none of these are available, only the `index` field of the
results will be filled.
- optional score calibration can be attached using ScoreCalibrationOptions
and an AssociatedFile with type TENSOR_AXIS_SCORE_CALIBRATION. See
metadata_schema.fbs [1] for more details.
An example of such model can be found at:
https://tfhub.dev/bohemian-visual-recognition-alliance/lite-model/models/mushroom-identification_v1/1
[1]:
https://github.com/google/mediapipe/blob/6cdc6443b6a7ed662744e2a2ce2d58d9c83e6d6f/mediapipe/tasks/metadata/metadata_schema.fbs#L456
"""
@classmethod
def create_from_model_path(cls, model_path: str) -> 'ImageClassifier':

View File

@ -86,7 +86,24 @@ class ImageEmbedderOptions:
class ImageEmbedder(base_vision_task_api.BaseVisionTaskApi):
"""Class that performs embedding extraction on images."""
"""Class that performs embedding extraction on images.
The API expects a TFLite model with optional, but strongly recommended,
TFLite Model Metadata.
Input tensor:
(kTfLiteUInt8/kTfLiteFloat32)
- image input of size `[batch x height x width x channels]`.
- batch inference is not supported (`batch` is required to be 1).
- only RGB inputs are supported (`channels` is required to be 3).
- if type is kTfLiteFloat32, NormalizationOptions are required to be
attached to the metadata for input normalization.
At least one output tensor with:
(kTfLiteUInt8/kTfLiteFloat32)
- `N` components corresponding to the `N` dimensions of the returned
feature vector for this output layer.
- Either 2 or 4 dimensions, i.e. `[1 x N]` or `[1 x 1 x 1 x N]`.
"""
@classmethod
def create_from_model_path(cls, model_path: str) -> 'ImageEmbedder':

View File

@ -93,7 +93,29 @@ class ImageSegmenterOptions:
class ImageSegmenter(base_vision_task_api.BaseVisionTaskApi):
"""Class that performs image segmentation on images."""
"""Class that performs image segmentation on images.
The API expects a TFLite model with mandatory TFLite Model Metadata.
Input tensor:
(kTfLiteUInt8/kTfLiteFloat32)
- image input of size `[batch x height x width x channels]`.
- batch inference is not supported (`batch` is required to be 1).
- RGB and greyscale inputs are supported (`channels` is required to be
1 or 3).
- if type is kTfLiteFloat32, NormalizationOptions are required to be
attached to the metadata for input normalization.
Output tensors:
(kTfLiteUInt8/kTfLiteFloat32)
- list of segmented masks.
- if `output_type` is CATEGORY_MASK, uint8 Image, Image vector of size 1.
- if `output_type` is CONFIDENCE_MASK, float32 Image list of size
`cahnnels`.
- batch is always 1
An example of such model can be found at:
https://tfhub.dev/tensorflow/lite-model/deeplabv3/1/metadata/2
"""
@classmethod
def create_from_model_path(cls, model_path: str) -> 'ImageSegmenter':

View File

@ -98,7 +98,49 @@ class ObjectDetectorOptions:
class ObjectDetector(base_vision_task_api.BaseVisionTaskApi):
"""Class that performs object detection on images."""
"""Class that performs object detection on images.
The API expects a TFLite model with mandatory TFLite Model Metadata.
Input tensor:
(kTfLiteUInt8/kTfLiteFloat32)
- image input of size `[batch x height x width x channels]`.
- batch inference is not supported (`batch` is required to be 1).
- only RGB inputs are supported (`channels` is required to be 3).
- if type is kTfLiteFloat32, NormalizationOptions are required to be
attached to the metadata for input normalization.
Output tensors must be the 4 outputs of a `DetectionPostProcess` op, i.e:
(kTfLiteFloat32)
- locations tensor of size `[num_results x 4]`, the inner array
representing bounding boxes in the form [top, left, right, bottom].
- BoundingBoxProperties are required to be attached to the metadata
and must specify type=BOUNDARIES and coordinate_type=RATIO.
(kTfLiteFloat32)
- classes tensor of size `[num_results]`, each value representing the
integer index of a class.
- optional (but recommended) label map(s) can be attached as
AssociatedFile-s with type TENSOR_VALUE_LABELS, containing one label per
line. The first such AssociatedFile (if any) is used to fill the
`class_name` field of the results. The `display_name` field is filled
from the AssociatedFile (if any) whose locale matches the
`display_names_locale` field of the `ObjectDetectorOptions` used at
creation time ("en" by default, i.e. English). If none of these are
available, only the `index` field of the results will be filled.
(kTfLiteFloat32)
- scores tensor of size `[num_results]`, each value representing the score
of the detected object.
- optional score calibration can be attached using ScoreCalibrationOptions
and an AssociatedFile with type TENSOR_AXIS_SCORE_CALIBRATION. See
metadata_schema.fbs [1] for more details.
(kTfLiteFloat32)
- integer num_results as a tensor of size `[1]`
An example of such model can be found at:
https://tfhub.dev/google/lite-model/object_detection/mobile_object_localizer_v1/1/metadata/1
[1]:
https://github.com/google/mediapipe/blob/6cdc6443b6a7ed662744e2a2ce2d58d9c83e6d6f/mediapipe/tasks/metadata/metadata_schema.fbs#L456
"""
@classmethod
def create_from_model_path(cls, model_path: str) -> 'ObjectDetector':