Update python documentation.
PiperOrigin-RevId: 488482142
This commit is contained in:
parent
c027373688
commit
e714e656fe
|
@ -86,7 +86,30 @@ class AudioClassifierOptions:
|
|||
|
||||
|
||||
class AudioClassifier(base_audio_task_api.BaseAudioTaskApi):
|
||||
"""Class that performs audio classification on audio data."""
|
||||
"""Class that performs audio classification on audio data.
|
||||
|
||||
This API expects a TFLite model with mandatory TFLite Model Metadata that
|
||||
contains the mandatory AudioProperties of the solo input audio tensor and the
|
||||
optional (but recommended) category labels as AssociatedFiles with type
|
||||
TENSOR_AXIS_LABELS per output classification tensor.
|
||||
|
||||
Input tensor:
|
||||
(kTfLiteFloat32)
|
||||
- input audio buffer of size `[batch * samples]`.
|
||||
- batch inference is not supported (`batch` is required to be 1).
|
||||
- for multi-channel models, the channels must be interleaved.
|
||||
At least one output tensor with:
|
||||
(kTfLiteFloat32)
|
||||
- `[1 x N]` array with `N` represents the number of categories.
|
||||
- optional (but recommended) category labels as AssociatedFiles with type
|
||||
TENSOR_AXIS_LABELS, containing one label per line. The first such
|
||||
AssociatedFile (if any) is used to fill the `category_name` field of the
|
||||
results. The `display_name` field is filled from the AssociatedFile (if
|
||||
any) whose locale matches the `display_names_locale` field of the
|
||||
`AudioClassifierOptions` used at creation time ("en" by default, i.e.
|
||||
English). If none of these are available, only the `index` field of the
|
||||
results will be filled.
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
def create_from_model_path(cls, model_path: str) -> 'AudioClassifier':
|
||||
|
|
|
@ -87,7 +87,24 @@ class AudioEmbedderOptions:
|
|||
|
||||
|
||||
class AudioEmbedder(base_audio_task_api.BaseAudioTaskApi):
|
||||
"""Class that performs embedding extraction on audio clips or audio stream."""
|
||||
"""Class that performs embedding extraction on audio clips or audio stream.
|
||||
|
||||
This API expects a TFLite model with mandatory TFLite Model Metadata that
|
||||
contains the mandatory AudioProperties of the solo input audio tensor and the
|
||||
optional (but recommended) label items as AssociatedFiles with type
|
||||
TENSOR_AXIS_LABELS per output embedding tensor.
|
||||
|
||||
Input tensor:
|
||||
(kTfLiteFloat32)
|
||||
- input audio buffer of size `[batch * samples]`.
|
||||
- batch inference is not supported (`batch` is required to be 1).
|
||||
- for multi-channel models, the channels must be interleaved.
|
||||
At least one output tensor with:
|
||||
(kTfLiteUInt8/kTfLiteFloat32)
|
||||
- `N` components corresponding to the `N` dimensions of the returned
|
||||
feature vector for this output layer.
|
||||
- Either 2 or 4 dimensions, i.e. `[1 x N]` or `[1 x 1 x 1 x N]`.
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
def create_from_model_path(cls, model_path: str) -> 'AudioEmbedder':
|
||||
|
|
|
@ -62,7 +62,38 @@ class TextClassifierOptions:
|
|||
|
||||
|
||||
class TextClassifier(base_text_task_api.BaseTextTaskApi):
|
||||
"""Class that performs classification on text."""
|
||||
"""Class that performs classification on text.
|
||||
|
||||
This API expects a TFLite model with (optional) TFLite Model Metadata that
|
||||
contains the mandatory (described below) input tensors, output tensor,
|
||||
and the optional (but recommended) category labels as AssociatedFiles with
|
||||
type
|
||||
TENSOR_AXIS_LABELS per output classification tensor. Metadata is required for
|
||||
models with int32 input tensors because it contains the input process unit
|
||||
for the model's Tokenizer. No metadata is required for models with string
|
||||
input tensors.
|
||||
|
||||
Input tensors:
|
||||
(kTfLiteInt32)
|
||||
- 3 input tensors of size `[batch_size x bert_max_seq_len]` representing
|
||||
the input ids, segment ids, and mask ids
|
||||
- or 1 input tensor of size `[batch_size x max_seq_len]` representing the
|
||||
input ids
|
||||
or (kTfLiteString)
|
||||
- 1 input tensor that is shapeless or has shape [1] containing the input
|
||||
string
|
||||
At least one output tensor with:
|
||||
(kTfLiteFloat32/kBool)
|
||||
- `[1 x N]` array with `N` represents the number of categories.
|
||||
- optional (but recommended) category labels as AssociatedFiles with type
|
||||
TENSOR_AXIS_LABELS, containing one label per line. The first such
|
||||
AssociatedFile (if any) is used to fill the `category_name` field of the
|
||||
results. The `display_name` field is filled from the AssociatedFile (if
|
||||
any) whose locale matches the `display_names_locale` field of the
|
||||
`TextClassifierOptions` used at creation time ("en" by default, i.e.
|
||||
English). If none of these are available, only the `index` field of the
|
||||
results will be filled.
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
def create_from_model_path(cls, model_path: str) -> 'TextClassifier':
|
||||
|
|
|
@ -63,7 +63,27 @@ class TextEmbedderOptions:
|
|||
|
||||
|
||||
class TextEmbedder(base_text_task_api.BaseTextTaskApi):
|
||||
"""Class that performs embedding extraction on text."""
|
||||
"""Class that performs embedding extraction on text.
|
||||
|
||||
This API expects a TFLite model with TFLite Model Metadata that contains the
|
||||
mandatory (described below) input tensors and output tensors. Metadata should
|
||||
contain the input process unit for the model's Tokenizer as well as input /
|
||||
output tensor metadata.
|
||||
|
||||
Input tensors:
|
||||
(kTfLiteInt32)
|
||||
- 3 input tensors of size `[batch_size x bert_max_seq_len]` with names
|
||||
"ids", "mask", and "segment_ids" representing the input ids, mask ids, and
|
||||
segment ids respectively.
|
||||
- or 1 input tensor of size `[batch_size x max_seq_len]` representing the
|
||||
input ids.
|
||||
|
||||
At least one output tensor with:
|
||||
(kTfLiteFloat32)
|
||||
- `N` components corresponding to the `N` dimensions of the returned
|
||||
feature vector for this output layer.
|
||||
- Either 2 or 4 dimensions, i.e. `[1 x N]` or `[1 x 1 x 1 x N]`.
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
def create_from_model_path(cls, model_path: str) -> 'TextEmbedder':
|
||||
|
|
|
@ -87,7 +87,40 @@ class ImageClassifierOptions:
|
|||
|
||||
|
||||
class ImageClassifier(base_vision_task_api.BaseVisionTaskApi):
|
||||
"""Class that performs image classification on images."""
|
||||
"""Class that performs image classification on images.
|
||||
|
||||
The API expects a TFLite model with optional, but strongly recommended,
|
||||
TFLite Model Metadata.
|
||||
|
||||
Input tensor:
|
||||
(kTfLiteUInt8/kTfLiteFloat32)
|
||||
- image input of size `[batch x height x width x channels]`.
|
||||
- batch inference is not supported (`batch` is required to be 1).
|
||||
- only RGB inputs are supported (`channels` is required to be 3).
|
||||
- if type is kTfLiteFloat32, NormalizationOptions are required to be
|
||||
attached to the metadata for input normalization.
|
||||
At least one output tensor with:
|
||||
(kTfLiteUInt8/kTfLiteFloat32)
|
||||
- `N `classes and either 2 or 4 dimensions, i.e. `[1 x N]` or
|
||||
`[1 x 1 x 1 x N]`
|
||||
- optional (but recommended) label map(s) as AssociatedFiles with type
|
||||
TENSOR_AXIS_LABELS, containing one label per line. The first such
|
||||
AssociatedFile (if any) is used to fill the `class_name` field of the
|
||||
results. The `display_name` field is filled from the AssociatedFile (if
|
||||
any) whose locale matches the `display_names_locale` field of the
|
||||
`ImageClassifierOptions` used at creation time ("en" by default, i.e.
|
||||
English). If none of these are available, only the `index` field of the
|
||||
results will be filled.
|
||||
- optional score calibration can be attached using ScoreCalibrationOptions
|
||||
and an AssociatedFile with type TENSOR_AXIS_SCORE_CALIBRATION. See
|
||||
metadata_schema.fbs [1] for more details.
|
||||
|
||||
An example of such model can be found at:
|
||||
https://tfhub.dev/bohemian-visual-recognition-alliance/lite-model/models/mushroom-identification_v1/1
|
||||
|
||||
[1]:
|
||||
https://github.com/google/mediapipe/blob/6cdc6443b6a7ed662744e2a2ce2d58d9c83e6d6f/mediapipe/tasks/metadata/metadata_schema.fbs#L456
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
def create_from_model_path(cls, model_path: str) -> 'ImageClassifier':
|
||||
|
|
|
@ -86,7 +86,24 @@ class ImageEmbedderOptions:
|
|||
|
||||
|
||||
class ImageEmbedder(base_vision_task_api.BaseVisionTaskApi):
|
||||
"""Class that performs embedding extraction on images."""
|
||||
"""Class that performs embedding extraction on images.
|
||||
|
||||
The API expects a TFLite model with optional, but strongly recommended,
|
||||
TFLite Model Metadata.
|
||||
|
||||
Input tensor:
|
||||
(kTfLiteUInt8/kTfLiteFloat32)
|
||||
- image input of size `[batch x height x width x channels]`.
|
||||
- batch inference is not supported (`batch` is required to be 1).
|
||||
- only RGB inputs are supported (`channels` is required to be 3).
|
||||
- if type is kTfLiteFloat32, NormalizationOptions are required to be
|
||||
attached to the metadata for input normalization.
|
||||
At least one output tensor with:
|
||||
(kTfLiteUInt8/kTfLiteFloat32)
|
||||
- `N` components corresponding to the `N` dimensions of the returned
|
||||
feature vector for this output layer.
|
||||
- Either 2 or 4 dimensions, i.e. `[1 x N]` or `[1 x 1 x 1 x N]`.
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
def create_from_model_path(cls, model_path: str) -> 'ImageEmbedder':
|
||||
|
|
|
@ -93,7 +93,29 @@ class ImageSegmenterOptions:
|
|||
|
||||
|
||||
class ImageSegmenter(base_vision_task_api.BaseVisionTaskApi):
|
||||
"""Class that performs image segmentation on images."""
|
||||
"""Class that performs image segmentation on images.
|
||||
|
||||
The API expects a TFLite model with mandatory TFLite Model Metadata.
|
||||
|
||||
Input tensor:
|
||||
(kTfLiteUInt8/kTfLiteFloat32)
|
||||
- image input of size `[batch x height x width x channels]`.
|
||||
- batch inference is not supported (`batch` is required to be 1).
|
||||
- RGB and greyscale inputs are supported (`channels` is required to be
|
||||
1 or 3).
|
||||
- if type is kTfLiteFloat32, NormalizationOptions are required to be
|
||||
attached to the metadata for input normalization.
|
||||
Output tensors:
|
||||
(kTfLiteUInt8/kTfLiteFloat32)
|
||||
- list of segmented masks.
|
||||
- if `output_type` is CATEGORY_MASK, uint8 Image, Image vector of size 1.
|
||||
- if `output_type` is CONFIDENCE_MASK, float32 Image list of size
|
||||
`cahnnels`.
|
||||
- batch is always 1
|
||||
|
||||
An example of such model can be found at:
|
||||
https://tfhub.dev/tensorflow/lite-model/deeplabv3/1/metadata/2
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
def create_from_model_path(cls, model_path: str) -> 'ImageSegmenter':
|
||||
|
|
|
@ -98,7 +98,49 @@ class ObjectDetectorOptions:
|
|||
|
||||
|
||||
class ObjectDetector(base_vision_task_api.BaseVisionTaskApi):
|
||||
"""Class that performs object detection on images."""
|
||||
"""Class that performs object detection on images.
|
||||
|
||||
The API expects a TFLite model with mandatory TFLite Model Metadata.
|
||||
|
||||
Input tensor:
|
||||
(kTfLiteUInt8/kTfLiteFloat32)
|
||||
- image input of size `[batch x height x width x channels]`.
|
||||
- batch inference is not supported (`batch` is required to be 1).
|
||||
- only RGB inputs are supported (`channels` is required to be 3).
|
||||
- if type is kTfLiteFloat32, NormalizationOptions are required to be
|
||||
attached to the metadata for input normalization.
|
||||
Output tensors must be the 4 outputs of a `DetectionPostProcess` op, i.e:
|
||||
(kTfLiteFloat32)
|
||||
- locations tensor of size `[num_results x 4]`, the inner array
|
||||
representing bounding boxes in the form [top, left, right, bottom].
|
||||
- BoundingBoxProperties are required to be attached to the metadata
|
||||
and must specify type=BOUNDARIES and coordinate_type=RATIO.
|
||||
(kTfLiteFloat32)
|
||||
- classes tensor of size `[num_results]`, each value representing the
|
||||
integer index of a class.
|
||||
- optional (but recommended) label map(s) can be attached as
|
||||
AssociatedFile-s with type TENSOR_VALUE_LABELS, containing one label per
|
||||
line. The first such AssociatedFile (if any) is used to fill the
|
||||
`class_name` field of the results. The `display_name` field is filled
|
||||
from the AssociatedFile (if any) whose locale matches the
|
||||
`display_names_locale` field of the `ObjectDetectorOptions` used at
|
||||
creation time ("en" by default, i.e. English). If none of these are
|
||||
available, only the `index` field of the results will be filled.
|
||||
(kTfLiteFloat32)
|
||||
- scores tensor of size `[num_results]`, each value representing the score
|
||||
of the detected object.
|
||||
- optional score calibration can be attached using ScoreCalibrationOptions
|
||||
and an AssociatedFile with type TENSOR_AXIS_SCORE_CALIBRATION. See
|
||||
metadata_schema.fbs [1] for more details.
|
||||
(kTfLiteFloat32)
|
||||
- integer num_results as a tensor of size `[1]`
|
||||
|
||||
An example of such model can be found at:
|
||||
https://tfhub.dev/google/lite-model/object_detection/mobile_object_localizer_v1/1/metadata/1
|
||||
|
||||
[1]:
|
||||
https://github.com/google/mediapipe/blob/6cdc6443b6a7ed662744e2a2ce2d58d9c83e6d6f/mediapipe/tasks/metadata/metadata_schema.fbs#L456
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
def create_from_model_path(cls, model_path: str) -> 'ObjectDetector':
|
||||
|
|
Loading…
Reference in New Issue
Block a user