diff --git a/mediapipe/tasks/python/metadata/metadata_writers/BUILD b/mediapipe/tasks/python/metadata/metadata_writers/BUILD index 69d952998..ce572283f 100644 --- a/mediapipe/tasks/python/metadata/metadata_writers/BUILD +++ b/mediapipe/tasks/python/metadata/metadata_writers/BUILD @@ -50,6 +50,12 @@ py_library( deps = [":metadata_writer"], ) +py_library( + name = "object_detector", + srcs = ["object_detector.py"], + deps = [":metadata_writer"], +) + py_library( name = "model_asset_bundle_utils", srcs = ["model_asset_bundle_utils.py"], diff --git a/mediapipe/tasks/python/metadata/metadata_writers/metadata_info.py b/mediapipe/tasks/python/metadata/metadata_writers/metadata_info.py index 9ddf818b6..6428b835f 100644 --- a/mediapipe/tasks/python/metadata/metadata_writers/metadata_info.py +++ b/mediapipe/tasks/python/metadata/metadata_writers/metadata_info.py @@ -349,6 +349,21 @@ class SentencePieceTokenizerMd: return tokenizer +class ValueRangeMd: + """A container for value range metadata information.""" + + def __init__(self, min_value: int, max_value: int) -> None: + self.min_value = min_value + self.max_value = max_value + + def create_metadata(self) -> _metadata_fb.ValueRangeT: + """Creates the value range metadata based on the information.""" + value_range_metadata = _metadata_fb.ValueRangeT() + value_range_metadata.min = self.min_value + value_range_metadata.max = self.max_value + return value_range_metadata + + class TensorMd: """A container for common tensor metadata information. @@ -362,10 +377,12 @@ class TensorMd: tensor_name: name of the corresponding tensor [1] in the TFLite model. It is used to locate the corresponding tensor and decide the order of the tensor metadata [2] when populating model metadata. - [1]: + content_range_md: information of content range [3]. [1]: https://github.com/tensorflow/tensorflow/blob/cb67fef35567298b40ac166b0581cd8ad68e5a3a/tensorflow/lite/schema/schema.fbs#L1129-L1136 - [2]: + [2]: https://github.com/google/mediapipe/blob/f8af41b1eb49ff4bdad756ff19d1d36f486be614/mediapipe/tasks/metadata/metadata_schema.fbs#L623-L640 + [3]: + https://github.com/google/mediapipe/blob/f8af41b1eb49ff4bdad756ff19d1d36f486be614/mediapipe/tasks/metadata/metadata_schema.fbs#L385 """ def __init__( @@ -376,7 +393,9 @@ class TensorMd: max_values: Optional[List[float]] = None, content_type: int = _metadata_fb.ContentProperties.FeatureProperties, associated_files: Optional[List[Type[AssociatedFileMd]]] = None, - tensor_name: Optional[str] = None) -> None: + tensor_name: Optional[str] = None, + content_range_md: Optional[ValueRangeMd] = None, + ) -> None: self.name = name self.description = description self.min_values = min_values @@ -384,6 +403,7 @@ class TensorMd: self.content_type = content_type self.associated_files = associated_files self.tensor_name = tensor_name + self.content_range_md = content_range_md def create_metadata(self) -> _metadata_fb.TensorMetadataT: """Creates the input tensor metadata based on the information. @@ -415,6 +435,8 @@ class TensorMd: content.contentPropertiesType = self.content_type tensor_metadata.content = content + if self.content_range_md: + tensor_metadata.content.range = self.content_range_md.create_metadata() # TODO: check if multiple label files have populated locale. # Create associated files @@ -737,7 +759,9 @@ class ClassificationTensorMd(TensorMd): tensor_type: Optional[int] = None, score_calibration_md: Optional[ScoreCalibrationMd] = None, tensor_name: Optional[str] = None, - score_thresholding_md: Optional[ScoreThresholdingMd] = None) -> None: + score_thresholding_md: Optional[ScoreThresholdingMd] = None, + content_range_md: Optional[ValueRangeMd] = None, + ) -> None: """Initializes the instance of ClassificationTensorMd. Args: @@ -753,16 +777,18 @@ class ClassificationTensorMd(TensorMd): order of the tensor metadata [4] when populating model metadata. score_thresholding_md: information of the score thresholding [5] in the classification tensor. - [1]: + content_range_md: information of content range [6]. [1]: https://github.com/google/mediapipe/blob/f8af41b1eb49ff4bdad756ff19d1d36f486be614/mediapipe/tasks/metadata/metadata_schema.fbs#L99 - [2]: + [2]: https://github.com/google/mediapipe/blob/f8af41b1eb49ff4bdad756ff19d1d36f486be614/mediapipe/tasks/metadata/metadata_schema.fbs#L456 - [3]: + [3]: https://github.com/tensorflow/tensorflow/blob/cb67fef35567298b40ac166b0581cd8ad68e5a3a/tensorflow/lite/schema/schema.fbs#L1129-L1136 - [4]: + [4]: https://github.com/google/mediapipe/blob/f8af41b1eb49ff4bdad756ff19d1d36f486be614/mediapipe/tasks/metadata/metadata_schema.fbs#L623-L640 - [5]: + [5]: https://github.com/google/mediapipe/blob/f8af41b1eb49ff4bdad756ff19d1d36f486be614/mediapipe/tasks/metadata/metadata_schema.fbs#L468 + [6]: + https://github.com/google/mediapipe/blob/f8af41b1eb49ff4bdad756ff19d1d36f486be614/mediapipe/tasks/metadata/metadata_schema.fbs#L385 """ self.score_calibration_md = score_calibration_md self.score_thresholding_md = score_thresholding_md @@ -784,9 +810,16 @@ class ClassificationTensorMd(TensorMd): associated_files.append( score_calibration_md.create_score_calibration_file_md()) - super().__init__(name, description, min_values, max_values, - _metadata_fb.ContentProperties.FeatureProperties, - associated_files, tensor_name) + super().__init__( + name, + description, + min_values, + max_values, + _metadata_fb.ContentProperties.FeatureProperties, + associated_files, + tensor_name, + content_range_md, + ) def create_metadata(self) -> _metadata_fb.TensorMetadataT: """Creates the classification tensor metadata based on the information.""" @@ -804,3 +837,188 @@ class ClassificationTensorMd(TensorMd): self.score_thresholding_md.create_metadata() ] return tensor_metadata + + +class LocationTensorMd(TensorMd): + """A container for the detection location tensor metadata information.""" + + # The default order is {left, top, right, bottom}. Denote the order to be + # {top, left, bottom, right}. + _BOUNDING_BOX_INDEX = (1, 0, 3, 2) + + def __init__( + self, + name: Optional[str] = None, + description: Optional[str] = None, + content_range_md: Optional[ValueRangeMd] = None, + ) -> None: + super().__init__( + name=name, description=description, content_range_md=content_range_md + ) + + def create_metadata(self) -> _metadata_fb.TensorMetadataT: + """Creates the detection location tensor metadata.""" + content = _metadata_fb.ContentT() + content.contentPropertiesType = ( + _metadata_fb.ContentProperties.BoundingBoxProperties + ) + properties = _metadata_fb.BoundingBoxPropertiesT() + properties.index = list(self._BOUNDING_BOX_INDEX) + properties.type = _metadata_fb.BoundingBoxType.BOUNDARIES + properties.coordinateType = _metadata_fb.CoordinateType.RATIO + content.contentProperties = properties + if self.content_range_md: + content.range = self.content_range_md.create_metadata() + location_metadata = super().create_metadata() + location_metadata.content = content + return location_metadata + + +class CategoryTensorMd(TensorMd): + """A container for the category tensor metadata information.""" + + def __init__( + self, + name: Optional[str] = None, + description: Optional[str] = None, + label_files: Optional[List[LabelFileMd]] = None, + content_range_md: Optional[ValueRangeMd] = None, + ): + """Initializes a CategoryTensorMd object. + + Args: + name: name of the tensor. + description: description of what the tensor is. + label_files: information of the label files [1] in the category tensor. + content_range_md: information of content range [2]. [1]: + https://github.com/google/mediapipe/blob/f8af41b1eb49ff4bdad756ff19d1d36f486be614/mediapipe/tasks/metadata/metadata_schema.fbs#L116 + [2]: + https://github.com/google/mediapipe/blob/f8af41b1eb49ff4bdad756ff19d1d36f486be614/mediapipe/tasks/metadata/metadata_schema.fbs#L385 + """ + # In category tensors, label files are in the type of TENSOR_VALUE_LABELS. + if label_files: + for file in label_files: + file.file_type = _metadata_fb.AssociatedFileType.TENSOR_VALUE_LABELS + + super().__init__( + name=name, + description=description, + associated_files=label_files, + content_range_md=content_range_md, + ) + + +class DetectionOutputTensorsMd: + """A container for the output tensor metadata of detection models.""" + + _LOCATION_NAME = "location" + _LOCATION_DESCRIPTION = "The locations of the detected boxes." + _CATRGORY_NAME = "category" + _CATEGORY_DESCRIPTION = "The categories of the detected boxes." + _SCORE_NAME = "score" + _SCORE_DESCRIPTION = "The scores of the detected boxes." + _NUMBER_NAME = "number of detections" + _NUMBER_DESCRIPTION = "The number of the detected boxes." + _CONTENT_VALUE_DIM = 2 + + def __init__( + self, + model_buffer: bytearray, + label_files: Optional[List[LabelFileMd]] = None, + score_calibration_md: Optional[ScoreCalibrationMd] = None, + ) -> None: + """Initializes the instance of DetectionOutputTensorsMd. + + Args: + model_buffer: A valid flatbuffer loaded from the TFLite model file. + label_files: information of the label files [1] in the classification + tensor. + score_calibration_md: information of the score calibration files operation + [2] in the classification tensor. [1]: + https://github.com/google/mediapipe/blob/f8af41b1eb49ff4bdad756ff19d1d36f486be614/mediapipe/tasks/metadata/metadata_schema.fbs#L99 + [2]: + https://github.com/google/mediapipe/blob/f8af41b1eb49ff4bdad756ff19d1d36f486be614/mediapipe/tasks/metadata/metadata_schema.fbs#L456 + """ + content_range_md = ValueRangeMd( + min_value=self._CONTENT_VALUE_DIM, max_value=self._CONTENT_VALUE_DIM + ) + location_md = LocationTensorMd( + name=self._LOCATION_NAME, + description=self._LOCATION_DESCRIPTION, + content_range_md=content_range_md, + ) + category_md = CategoryTensorMd( + name=self._CATRGORY_NAME, + description=self._CATEGORY_DESCRIPTION, + label_files=label_files, + content_range_md=content_range_md, + ) + score_md = ClassificationTensorMd( + name=self._SCORE_NAME, + description=self._SCORE_DESCRIPTION, + score_calibration_md=score_calibration_md, + content_range_md=content_range_md, + ) + number_md = TensorMd( + name=self._NUMBER_NAME, description=self._NUMBER_DESCRIPTION + ) + + # Get the tensor indices of tflite outputs and then gets the order of the + # output metadata by the value of tensor indices. The output tensor indices + # follow the order as [location, category, score,# detections]. For + # instance, if the output indices are [601, 599, 598, 600], tensor names and + # indices aligned as below: + # - (598, location) + # - (599, category) + # - (600, score) + # - (601, number of detections) + # because of the op's ports of TFLITE_DETECTION_POST_PROCESS + # (https://github.com/tensorflow/tensorflow/blob/a4fe268ea084e7d323133ed7b986e0ae259a2bc7/tensorflow/lite/kernels/detection_postprocess.cc#L47-L50). + # Thus, the metadata of tensors are paired with output tensor indices & name + # in this way. + + # Get the output tensor indices and names from the tflite model. + tensor_indices_and_names = zip( + writer_utils.get_output_tensor_indices(model_buffer), + writer_utils.get_output_tensor_names(model_buffer), + ) + # Sort by the output tensor indices. + tensor_indices_and_names = sorted(tensor_indices_and_names) + + # Align tensor names with metadata. + self._output_mds = [location_md, category_md, score_md, number_md] + if len(self._output_mds) != len(tensor_indices_and_names): + raise ValueError( + "The size of TFLite output should be " + str(len(self._output_mds)) + ) + for i, output_md in enumerate(self._output_mds): + output_md.tensor_name = tensor_indices_and_names[i][1] + + @property + def output_mds(self) -> List[TensorMd]: + return self._output_mds + + +class TensorGroupMd: + """A container for a group of tensor metadata information.""" + + def __init__( + self, name: Optional[str] = None, tensor_names: Optional[List[str]] = None + ) -> None: + """Initializes a CategoryTensorMd object. + + Args: + name: name of tensor group. + tensor_names: Names of the tensors to group together, corresponding to + TensorMetadata.name [1]. [1]: + https://github.com/google/mediapipe/blob/f8af41b1eb49ff4bdad756ff19d1d36f486be614/mediapipe/tasks/metadata/metadata_schema.fbs#L564 + """ + self.name = name + self.tensor_names = tensor_names + + def create_metadata(self) -> _metadata_fb.TensorGroupT: + """Creates the tensor group metadata.""" + group = _metadata_fb.TensorGroupT() + group.name = self.name + group.tensorNames = self.tensor_names + return group diff --git a/mediapipe/tasks/python/metadata/metadata_writers/metadata_writer.py b/mediapipe/tasks/python/metadata/metadata_writers/metadata_writer.py index 06a45645e..240655a88 100644 --- a/mediapipe/tasks/python/metadata/metadata_writers/metadata_writer.py +++ b/mediapipe/tasks/python/metadata/metadata_writers/metadata_writer.py @@ -34,6 +34,10 @@ _INPUT_REGEX_TEXT_DESCRIPTION = ('Embedding vectors representing the input ' 'text to be processed.') _OUTPUT_CLASSIFICATION_NAME = 'score' _OUTPUT_CLASSIFICATION_DESCRIPTION = 'Score of the labels respectively.' +# Detection tensor result to be grouped together. +_DETECTION_GROUP_NAME = 'detection_result' +# File name to export score calibration parameters. +_SCORE_CALIBATION_FILENAME = 'score_calibration.txt' @dataclasses.dataclass @@ -311,7 +315,8 @@ def _create_metadata_buffer( general_md: Optional[metadata_info.GeneralMd] = None, input_md: Optional[List[metadata_info.TensorMd]] = None, output_md: Optional[List[metadata_info.TensorMd]] = None, - input_process_units: Optional[List[metadata_fb.ProcessUnitT]] = None + input_process_units: Optional[List[metadata_fb.ProcessUnitT]] = None, + output_group_md: Optional[List[metadata_info.TensorGroupMd]] = None, ) -> bytearray: """Creates a buffer of the metadata. @@ -321,8 +326,11 @@ def _create_metadata_buffer( input_md: metadata information of the input tensors. output_md: metadata information of the output tensors. input_process_units: a lists of metadata of the input process units [1]. - [1]: + output_group_md: a list of metadata of output tensor groups [2]; [1]: https://github.com/google/mediapipe/blob/f8af41b1eb49ff4bdad756ff19d1d36f486be614/mediapipe/tasks/metadata/metadata_schema.fbs#L655 + [2]: + https://github.com/google/mediapipe/blob/f8af41b1eb49ff4bdad756ff19d1d36f486be614/mediapipe/tasks/metadata/metadata_schema.fbs#L677 + Returns: A buffer of the metadata. @@ -359,6 +367,10 @@ def _create_metadata_buffer( subgraph_metadata.outputTensorMetadata = output_metadata if input_process_units: subgraph_metadata.inputProcessUnits = input_process_units + if output_group_md: + subgraph_metadata.outputTensorGroups = [ + m.create_metadata() for m in output_group_md + ] # Create the whole model metadata. if general_md is None: @@ -402,6 +414,7 @@ class MetadataWriter(object): self._input_mds = [] self._input_process_units = [] self._output_mds = [] + self._output_group_mds = [] self._associated_files = [] self._temp_folder = tempfile.TemporaryDirectory() @@ -583,27 +596,13 @@ class MetadataWriter(object): Returns: The current Writer instance to allow chained operation. """ - calibration_md = None - if score_calibration: - calibration_md = metadata_info.ScoreCalibrationMd( - score_transformation_type=score_calibration.transformation_type, - default_score=score_calibration.default_score, - file_path=self._export_calibration_file('score_calibration.txt', - score_calibration.parameters)) + calibration_md = self._create_score_calibration_md(score_calibration) score_thresholding_md = None if score_thresholding: score_thresholding_md = metadata_info.ScoreThresholdingMd( score_thresholding.global_score_threshold) - label_files = None - if labels: - label_files = [] - for item in labels.labels: - label_files.append( - metadata_info.LabelFileMd( - self._export_labels(item.filename, item.names), - locale=item.locale)) - + label_files = self._create_label_file_md(labels) output_md = metadata_info.ClassificationTensorMd( name=name, description=description, @@ -615,6 +614,41 @@ class MetadataWriter(object): self._output_mds.append(output_md) return self + def add_detection_output( + self, + labels: Optional[Labels] = None, + score_calibration: Optional[ScoreCalibration] = None, + group_name: str = _DETECTION_GROUP_NAME, + ) -> 'MetadataWriter': + """Adds a detection head metadata for detection output tensor. + + Args: + labels: an instance of Labels helper class. + score_calibration: an instance of ScoreCalibration helper class. + group_name: name of output tensor group. + + Returns: + The current Writer instance to allow chained operation. + """ + calibration_md = self._create_score_calibration_md(score_calibration) + label_files = self._create_label_file_md(labels) + detection_output_mds = metadata_info.DetectionOutputTensorsMd( + self._model_buffer, + label_files=label_files, + score_calibration_md=calibration_md, + ).output_mds + self._output_mds.extend(detection_output_mds) + # Outputs are location, category, score, number of detections. + if len(detection_output_mds) != 4: + raise ValueError('The size of detections output should be 4.') + # The first 3 tensors (location, category, score) are grouped. + group_md = metadata_info.TensorGroupMd( + name=group_name, + tensor_names=[output_md.name for output_md in detection_output_mds[:3]], + ) + self._output_group_mds.append(group_md) + return self + def add_feature_output(self, name: Optional[str] = None, description: Optional[str] = None) -> 'MetadataWriter': @@ -639,7 +673,9 @@ class MetadataWriter(object): general_md=self._general_md, input_md=self._input_mds, output_md=self._output_mds, - input_process_units=self._input_process_units) + input_process_units=self._input_process_units, + output_group_md=self._output_group_mds, + ) populator.load_metadata_buffer(metadata_buffer) if self._associated_files: populator.load_associated_files(self._associated_files) @@ -683,6 +719,36 @@ class MetadataWriter(object): self._associated_files.append(filepath) return filepath + def _create_score_calibration_md( + self, score_calibration: ScoreCalibration + ) -> Optional[metadata_info.ScoreCalibrationMd]: + """Creates the ScoreCalibrationMd object.""" + if score_calibration is None: + return None + return metadata_info.ScoreCalibrationMd( + score_transformation_type=score_calibration.transformation_type, + default_score=score_calibration.default_score, + file_path=self._export_calibration_file( + _SCORE_CALIBATION_FILENAME, score_calibration.parameters + ), + ) + + def _create_label_file_md( + self, labels: Optional[Labels] = None + ) -> Optional[List[metadata_info.LabelFileMd]]: + """Creates a list of LabelFileMd objects.""" + label_files = None + if labels: + label_files = [] + for item in labels.labels: + label_files.append( + metadata_info.LabelFileMd( + self._export_labels(item.filename, item.names), + locale=item.locale, + ) + ) + return label_files + class MetadataWriterBase: """Base MetadataWriter class which contains the apis exposed to users. diff --git a/mediapipe/tasks/python/metadata/metadata_writers/object_detector.py b/mediapipe/tasks/python/metadata/metadata_writers/object_detector.py new file mode 100644 index 000000000..6fe78c1ca --- /dev/null +++ b/mediapipe/tasks/python/metadata/metadata_writers/object_detector.py @@ -0,0 +1,77 @@ +# Copyright 2023 The MediaPipe Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Writes metadata and label file to the Object Detector models.""" + +from typing import List, Optional + +from mediapipe.tasks.python.metadata.metadata_writers import metadata_writer + +_MODEL_NAME = "ObjectDetector" +_MODEL_DESCRIPTION = ( + "Identify which of a known set of objects might be present and provide " + "information about their positions within the given image or a video " + "stream." +) + + +class MetadataWriter(metadata_writer.MetadataWriterBase): + """MetadataWriter to write the metadata into the object detector.""" + + @classmethod + def create( + cls, + model_buffer: bytearray, + input_norm_mean: List[float], + input_norm_std: List[float], + labels: metadata_writer.Labels, + score_calibration: Optional[metadata_writer.ScoreCalibration] = None, + ) -> "MetadataWriter": + """Creates MetadataWriter to write the metadata for image classifier. + + The parameters required in this method are mandatory when using MediaPipe + Tasks. + + Example usage: + metadata_writer = object_detector.Metadatawriter.create(model_buffer, ...) + tflite_content, json_content = metadata_writer.populate() + + When calling `populate` function in this class, it returns TfLite content + and JSON content. Note that only the output TFLite is used for deployment. + The output JSON content is used to interpret the metadata content. + + Args: + model_buffer: A valid flatbuffer loaded from the TFLite model file. + input_norm_mean: the mean value used in the input tensor normalization + [1]. + input_norm_std: the std value used in the input tensor normalizarion [1]. + labels: an instance of Labels helper class used in the output + classification tensor [2]. + score_calibration: A container of the score calibration operation [3] in + the classification tensor. Optional if the model does not use score + calibration. [1]: + https://github.com/google/mediapipe/blob/f8af41b1eb49ff4bdad756ff19d1d36f486be614/mediapipe/tasks/metadata/metadata_schema.fbs#L389 + [2]: + https://github.com/google/mediapipe/blob/f8af41b1eb49ff4bdad756ff19d1d36f486be614/mediapipe/tasks/metadata/metadata_schema.fbs#L99 + [3]: + https://github.com/google/mediapipe/blob/f8af41b1eb49ff4bdad756ff19d1d36f486be614/mediapipe/tasks/metadata/metadata_schema.fbs#L456 + + Returns: + A MetadataWriter object. + """ + writer = metadata_writer.MetadataWriter(model_buffer) + writer.add_general_info(_MODEL_NAME, _MODEL_DESCRIPTION) + writer.add_image_input(input_norm_mean, input_norm_std) + writer.add_detection_output(labels, score_calibration) + return cls(writer) diff --git a/mediapipe/tasks/python/metadata/metadata_writers/writer_utils.py b/mediapipe/tasks/python/metadata/metadata_writers/writer_utils.py index 0a054812b..13586f63c 100644 --- a/mediapipe/tasks/python/metadata/metadata_writers/writer_utils.py +++ b/mediapipe/tasks/python/metadata/metadata_writers/writer_utils.py @@ -61,6 +61,12 @@ def get_output_tensor_types( return tensor_types +def get_output_tensor_indices(model_buffer: bytearray) -> List[int]: + """Gets a list of the output tensor indices.""" + subgraph = get_subgraph(model_buffer) + return subgraph.OutputsAsNumpy() + + def get_subgraph(model_buffer: bytearray) -> _schema_fb.SubGraph: """Gets the subgraph of the model. diff --git a/mediapipe/tasks/python/test/metadata/metadata_writers/BUILD b/mediapipe/tasks/python/test/metadata/metadata_writers/BUILD index 539b3903b..7088e341a 100644 --- a/mediapipe/tasks/python/test/metadata/metadata_writers/BUILD +++ b/mediapipe/tasks/python/test/metadata/metadata_writers/BUILD @@ -73,3 +73,19 @@ py_test( srcs = ["model_asset_bundle_utils_test.py"], deps = ["//mediapipe/tasks/python/metadata/metadata_writers:model_asset_bundle_utils"], ) + +py_test( + name = "object_detector_test", + srcs = ["object_detector_test.py"], + data = [ + "//mediapipe/tasks/testdata/metadata:data_files", + "//mediapipe/tasks/testdata/metadata:model_files", + ], + deps = [ + "//mediapipe/tasks/metadata:metadata_schema_py", + "//mediapipe/tasks/python/metadata", + "//mediapipe/tasks/python/metadata/metadata_writers:metadata_writer", + "//mediapipe/tasks/python/metadata/metadata_writers:object_detector", + "//mediapipe/tasks/python/test:test_utils", + ], +) diff --git a/mediapipe/tasks/python/test/metadata/metadata_writers/metadata_info_test.py b/mediapipe/tasks/python/test/metadata/metadata_writers/metadata_info_test.py index 57e5e3033..bcb384a34 100644 --- a/mediapipe/tasks/python/test/metadata/metadata_writers/metadata_info_test.py +++ b/mediapipe/tasks/python/test/metadata/metadata_writers/metadata_info_test.py @@ -403,6 +403,58 @@ class SentencePieceTokenizerMdTest(absltest.TestCase): self.assertEqual(metadata_json, expected_json) +class CategoryTensorMdTest(parameterized.TestCase, absltest.TestCase): + _NAME = "category" + _DESCRIPTION = "The category tensor." + _LABEL_FILE_EN = "labels.txt" + _LABEL_FILE_CN = "labels_cn.txt" # Locale label file in Chinese. + _EXPECTED_TENSOR_JSON = test_utils.get_test_data_path( + os.path.join(_TEST_DATA_DIR, "category_tensor_float_meta.json") + ) + + def test_create_metadata_should_succeed(self): + label_file_en = metadata_info.LabelFileMd( + file_path=self._LABEL_FILE_EN, locale="en" + ) + label_file_cn = metadata_info.LabelFileMd( + file_path=self._LABEL_FILE_CN, locale="cn" + ) + tensor_md = metadata_info.CategoryTensorMd( + name=self._NAME, + description=self._DESCRIPTION, + label_files=[label_file_en, label_file_cn], + ) + tensor_metadata = tensor_md.create_metadata() + + metadata_json = _metadata.convert_to_json( + _create_dummy_model_metadata_with_tensor(tensor_metadata) + ) + with open(self._EXPECTED_TENSOR_JSON, "r") as f: + expected_json = f.read() + self.assertEqual(metadata_json, expected_json) + + +class TensorGroupMdMdTest(absltest.TestCase): + _NAME = "detection_result" + _TENSOR_NAMES = ["location", "category", "score"] + _EXPECTED_JSON = test_utils.get_test_data_path( + os.path.join(_TEST_DATA_DIR, "tensor_group_meta.json") + ) + + def test_create_metadata_should_succeed(self): + tensor_group_md = metadata_info.TensorGroupMd( + name=self._NAME, tensor_names=self._TENSOR_NAMES + ) + tensor_group_metadata = tensor_group_md.create_metadata() + + metadata_json = _metadata.convert_to_json( + _create_dummy_model_metadata_with_tensor_group(tensor_group_metadata) + ) + with open(self._EXPECTED_JSON, "r") as f: + expected_json = f.read() + self.assertEqual(metadata_json, expected_json) + + def _create_dummy_model_metadata_with_tensor( tensor_metadata: _metadata_fb.TensorMetadataT) -> bytes: # Create a dummy model using the tensor metadata. @@ -427,6 +479,24 @@ def _create_dummy_model_metadata_with_process_uint( model_metadata = _metadata_fb.ModelMetadataT() model_metadata.subgraphMetadata = [subgraph_metadata] + # Create the Flatbuffers object and convert it to the json format. + builder = flatbuffers.Builder(0) + builder.Finish( + model_metadata.Pack(builder), + _metadata.MetadataPopulator.METADATA_FILE_IDENTIFIER, + ) + return bytes(builder.Output()) + + +def _create_dummy_model_metadata_with_tensor_group( + tensor_group: _metadata_fb.TensorGroupT, +) -> bytes: + # Creates a dummy model using the tensor group. + subgraph_metadata = _metadata_fb.SubGraphMetadataT() + subgraph_metadata.outputTensorGroups = [tensor_group] + model_metadata = _metadata_fb.ModelMetadataT() + model_metadata.subgraphMetadata = [subgraph_metadata] + # Create the Flatbuffers object and convert it to the json format. builder = flatbuffers.Builder(0) builder.Finish( diff --git a/mediapipe/tasks/python/test/metadata/metadata_writers/object_detector_test.py b/mediapipe/tasks/python/test/metadata/metadata_writers/object_detector_test.py new file mode 100644 index 000000000..195a9c8ec --- /dev/null +++ b/mediapipe/tasks/python/test/metadata/metadata_writers/object_detector_test.py @@ -0,0 +1,112 @@ +# Copyright 2023 The MediaPipe Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for metadata_writer.object_detector.""" + +import os + +from absl.testing import absltest +from absl.testing import parameterized + +from mediapipe.tasks.metadata import metadata_schema_py_generated as metadata_fb +from mediapipe.tasks.python.metadata import metadata +from mediapipe.tasks.python.metadata.metadata_writers import metadata_writer +from mediapipe.tasks.python.metadata.metadata_writers import object_detector +from mediapipe.tasks.python.test import test_utils + +_TEST_DATA_DIR = "mediapipe/tasks/testdata/metadata" +_LABEL_FILE = test_utils.get_test_data_path( + os.path.join(_TEST_DATA_DIR, "labelmap.txt") +) +_LABEL_FILE_NAME = "labels.txt" +_NORM_MEAN = 127.5 +_NORM_STD = 127.5 + +_MODEL_COCO = test_utils.get_test_data_path( + os.path.join( + _TEST_DATA_DIR, + "coco_ssd_mobilenet_v1_1.0_quant_2018_06_29_no_metadata.tflite", + ) +) +_SCORE_CALIBRATION_FILE = test_utils.get_test_data_path( + os.path.join(_TEST_DATA_DIR, "score_calibration.csv") +) +_SCORE_CALIBRATION_FILENAME = "score_calibration.txt" +_SCORE_CALIBRATION_DEFAULT_SCORE = 0.2 +_JSON_FOR_SCORE_CALIBRATION = test_utils.get_test_data_path( + os.path.join(_TEST_DATA_DIR, "coco_ssd_mobilenet_v1_score_calibration.json") +) + + +class MetadataWriterTest(parameterized.TestCase, absltest.TestCase): + + @parameterized.parameters( + "ssd_mobilenet_v1_no_metadata", + "efficientdet_lite0_v1", + ) + def test_create_should_succeed(self, model_name): + model_path = test_utils.get_test_data_path( + os.path.join(_TEST_DATA_DIR, model_name + ".tflite") + ) + with open(model_path, "rb") as f: + model_buffer = f.read() + writer = object_detector.MetadataWriter.create( + model_buffer, + [_NORM_MEAN], + [_NORM_STD], + labels=metadata_writer.Labels().add_from_file(_LABEL_FILE), + ) + _, metadata_json = writer.populate() + expected_json_path = test_utils.get_test_data_path( + os.path.join(_TEST_DATA_DIR, model_name + ".json") + ) + with open(expected_json_path, "r") as f: + expected_json = f.read() + self.assertEqual(metadata_json, expected_json) + + def test_create_with_score_calibration_should_succeed(self): + with open(_MODEL_COCO, "rb") as f: + model_buffer = f.read() + writer = object_detector.MetadataWriter.create( + model_buffer, + [_NORM_MEAN], + [_NORM_STD], + labels=metadata_writer.Labels().add_from_file(_LABEL_FILE), + score_calibration=metadata_writer.ScoreCalibration.create_from_file( + metadata_fb.ScoreTransformationType.INVERSE_LOGISTIC, + _SCORE_CALIBRATION_FILE, + _SCORE_CALIBRATION_DEFAULT_SCORE, + ), + ) + tflite_content, metadata_json = writer.populate() + with open(_JSON_FOR_SCORE_CALIBRATION, "r") as f: + expected_json = f.read() + self.assertEqual(metadata_json, expected_json) + + displayer = metadata.MetadataDisplayer.with_model_buffer(tflite_content) + calibration_file_buffer = displayer.get_associated_file_buffer( + _SCORE_CALIBRATION_FILENAME + ) + with open(_SCORE_CALIBRATION_FILE, "rb") as f: + expected_calibration_file_buffer = f.read() + self.assertEqual(calibration_file_buffer, expected_calibration_file_buffer) + + label_file_buffer = displayer.get_associated_file_buffer(_LABEL_FILE_NAME) + with open(_LABEL_FILE, "rb") as f: + expected_labelfile_buffer = f.read() + self.assertEqual(label_file_buffer, expected_labelfile_buffer) + + +if __name__ == "__main__": + absltest.main() diff --git a/mediapipe/tasks/testdata/metadata/BUILD b/mediapipe/tasks/testdata/metadata/BUILD index 7905fbadb..7b2812260 100644 --- a/mediapipe/tasks/testdata/metadata/BUILD +++ b/mediapipe/tasks/testdata/metadata/BUILD @@ -25,6 +25,12 @@ package( mediapipe_files(srcs = [ "30k-clean.model", "bert_text_classifier_no_metadata.tflite", + "category_tensor_float_meta.json", + "coco_ssd_mobilenet_v1_1.0_quant_2018_06_29_no_metadata.tflite", + "coco_ssd_mobilenet_v1_score_calibration.json", + "efficientdet_lite0_v1.json", + "efficientdet_lite0_v1.tflite", + "labelmap.txt", "mobile_ica_8bit-with-metadata.tflite", "mobile_ica_8bit-with-unsupported-metadata-version.tflite", "mobile_ica_8bit-without-model-metadata.tflite", @@ -35,6 +41,10 @@ mediapipe_files(srcs = [ "mobilenet_v2_1.0_224_quant_without_metadata.tflite", "mobilenet_v2_1.0_224_without_metadata.tflite", "movie_review.tflite", + "score_calibration.csv", + "ssd_mobilenet_v1_no_metadata.json", + "ssd_mobilenet_v1_no_metadata.tflite", + "tensor_group_meta.json", ]) exports_files([ @@ -74,6 +84,8 @@ filegroup( srcs = [ "30k-clean.model", "bert_text_classifier_no_metadata.tflite", + "coco_ssd_mobilenet_v1_1.0_quant_2018_06_29_no_metadata.tflite", + "efficientdet_lite0_v1.tflite", "mobile_ica_8bit-with-metadata.tflite", "mobile_ica_8bit-with-unsupported-metadata-version.tflite", "mobile_ica_8bit-without-model-metadata.tflite", @@ -83,6 +95,7 @@ filegroup( "mobilenet_v2_1.0_224_quant_without_metadata.tflite", "mobilenet_v2_1.0_224_without_metadata.tflite", "movie_review.tflite", + "ssd_mobilenet_v1_no_metadata.tflite", ], ) @@ -94,9 +107,12 @@ filegroup( "bert_text_classifier_with_sentence_piece.json", "bert_tokenizer_meta.json", "bounding_box_tensor_meta.json", + "category_tensor_float_meta.json", "classification_tensor_float_meta.json", "classification_tensor_uint8_meta.json", "classification_tensor_unsupported_meta.json", + "coco_ssd_mobilenet_v1_score_calibration.json", + "efficientdet_lite0_v1.json", "external_file", "feature_tensor_meta.json", "general_meta.json", @@ -107,6 +123,7 @@ filegroup( "input_image_tensor_unsupported_meta.json", "input_text_tensor_default_meta.json", "input_text_tensor_meta.json", + "labelmap.txt", "labels.txt", "mobilebert_vocab.txt", "mobilenet_v2_1.0_224.json", @@ -114,10 +131,13 @@ filegroup( "movie_review.json", "movie_review_labels.txt", "regex_vocab.txt", + "score_calibration.csv", "score_calibration.txt", "score_calibration_file_meta.json", "score_calibration_tensor_meta.json", "score_thresholding_meta.json", "sentence_piece_tokenizer_meta.json", + "ssd_mobilenet_v1_no_metadata.json", + "tensor_group_meta.json", ], ) diff --git a/mediapipe/tasks/testdata/metadata/category_tensor_float_meta.json b/mediapipe/tasks/testdata/metadata/category_tensor_float_meta.json new file mode 100644 index 000000000..9ca058835 --- /dev/null +++ b/mediapipe/tasks/testdata/metadata/category_tensor_float_meta.json @@ -0,0 +1,33 @@ +{ + "subgraph_metadata": [ + { + "input_tensor_metadata": [ + { + "name": "category", + "description": "The category tensor.", + "content": { + "content_properties_type": "FeatureProperties", + "content_properties": { + } + }, + "stats": { + }, + "associated_files": [ + { + "name": "labels.txt", + "description": "Labels for categories that the model can recognize.", + "type": "TENSOR_VALUE_LABELS", + "locale": "en" + }, + { + "name": "labels_cn.txt", + "description": "Labels for categories that the model can recognize.", + "type": "TENSOR_VALUE_LABELS", + "locale": "cn" + } + ] + } + ] + } + ] +} diff --git a/mediapipe/tasks/testdata/metadata/coco_ssd_mobilenet_v1_score_calibration.json b/mediapipe/tasks/testdata/metadata/coco_ssd_mobilenet_v1_score_calibration.json new file mode 100644 index 000000000..e24aa2e6b --- /dev/null +++ b/mediapipe/tasks/testdata/metadata/coco_ssd_mobilenet_v1_score_calibration.json @@ -0,0 +1,140 @@ +{ + "name": "ObjectDetector", + "description": "Identify which of a known set of objects might be present and provide information about their positions within the given image or a video stream.", + "subgraph_metadata": [ + { + "input_tensor_metadata": [ + { + "name": "image", + "description": "Input image to be processed.", + "content": { + "content_properties_type": "ImageProperties", + "content_properties": { + "color_space": "RGB" + } + }, + "process_units": [ + { + "options_type": "NormalizationOptions", + "options": { + "mean": [ + 127.5 + ], + "std": [ + 127.5 + ] + } + } + ], + "stats": { + "max": [ + 255.0 + ], + "min": [ + 0.0 + ] + } + } + ], + "output_tensor_metadata": [ + { + "name": "location", + "description": "The locations of the detected boxes.", + "content": { + "content_properties_type": "BoundingBoxProperties", + "content_properties": { + "index": [ + 1, + 0, + 3, + 2 + ], + "type": "BOUNDARIES" + }, + "range": { + "min": 2, + "max": 2 + } + }, + "stats": { + } + }, + { + "name": "category", + "description": "The categories of the detected boxes.", + "content": { + "content_properties_type": "FeatureProperties", + "content_properties": { + }, + "range": { + "min": 2, + "max": 2 + } + }, + "stats": { + }, + "associated_files": [ + { + "name": "labels.txt", + "description": "Labels for categories that the model can recognize.", + "type": "TENSOR_VALUE_LABELS" + } + ] + }, + { + "name": "score", + "description": "The scores of the detected boxes.", + "content": { + "content_properties_type": "FeatureProperties", + "content_properties": { + }, + "range": { + "min": 2, + "max": 2 + } + }, + "process_units": [ + { + "options_type": "ScoreCalibrationOptions", + "options": { + "score_transformation": "INVERSE_LOGISTIC", + "default_score": 0.2 + } + } + ], + "stats": { + }, + "associated_files": [ + { + "name": "score_calibration.txt", + "description": "Contains sigmoid-based score calibration parameters. The main purposes of score calibration is to make scores across classes comparable, so that a common threshold can be used for all output classes.", + "type": "TENSOR_AXIS_SCORE_CALIBRATION" + } + ] + }, + { + "name": "number of detections", + "description": "The number of the detected boxes.", + "content": { + "content_properties_type": "FeatureProperties", + "content_properties": { + } + }, + "stats": { + } + } + ], + "output_tensor_groups": [ + { + "name": "detection_result", + "tensor_names": [ + "location", + "category", + "score" + ] + } + ] + } + ], + "min_parser_version": "1.2.0" +} diff --git a/mediapipe/tasks/testdata/metadata/efficientdet_lite0_v1.json b/mediapipe/tasks/testdata/metadata/efficientdet_lite0_v1.json new file mode 100644 index 000000000..a3b98a626 --- /dev/null +++ b/mediapipe/tasks/testdata/metadata/efficientdet_lite0_v1.json @@ -0,0 +1,124 @@ +{ + "name": "ObjectDetector", + "description": "Identify which of a known set of objects might be present and provide information about their positions within the given image or a video stream.", + "subgraph_metadata": [ + { + "input_tensor_metadata": [ + { + "name": "image", + "description": "Input image to be processed.", + "content": { + "content_properties_type": "ImageProperties", + "content_properties": { + "color_space": "RGB" + } + }, + "process_units": [ + { + "options_type": "NormalizationOptions", + "options": { + "mean": [ + 127.5 + ], + "std": [ + 127.5 + ] + } + } + ], + "stats": { + "max": [ + 255.0 + ], + "min": [ + 0.0 + ] + } + } + ], + "output_tensor_metadata": [ + { + "name": "score", + "description": "The scores of the detected boxes.", + "content": { + "content_properties_type": "FeatureProperties", + "content_properties": { + }, + "range": { + "min": 2, + "max": 2 + } + }, + "stats": { + } + }, + { + "name": "location", + "description": "The locations of the detected boxes.", + "content": { + "content_properties_type": "BoundingBoxProperties", + "content_properties": { + "index": [ + 1, + 0, + 3, + 2 + ], + "type": "BOUNDARIES" + }, + "range": { + "min": 2, + "max": 2 + } + }, + "stats": { + } + }, + { + "name": "number of detections", + "description": "The number of the detected boxes.", + "content": { + "content_properties_type": "FeatureProperties", + "content_properties": { + } + }, + "stats": { + } + }, + { + "name": "category", + "description": "The categories of the detected boxes.", + "content": { + "content_properties_type": "FeatureProperties", + "content_properties": { + }, + "range": { + "min": 2, + "max": 2 + } + }, + "stats": { + }, + "associated_files": [ + { + "name": "labels.txt", + "description": "Labels for categories that the model can recognize.", + "type": "TENSOR_VALUE_LABELS" + } + ] + } + ], + "output_tensor_groups": [ + { + "name": "detection_result", + "tensor_names": [ + "location", + "category", + "score" + ] + } + ] + } + ], + "min_parser_version": "1.2.0" +} diff --git a/mediapipe/tasks/testdata/metadata/labelmap.txt b/mediapipe/tasks/testdata/metadata/labelmap.txt new file mode 100644 index 000000000..695772dcd --- /dev/null +++ b/mediapipe/tasks/testdata/metadata/labelmap.txt @@ -0,0 +1,90 @@ +person +bicycle +car +motorcycle +airplane +bus +train +truck +boat +traffic light +fire hydrant +??? +stop sign +parking meter +bench +bird +cat +dog +horse +sheep +cow +elephant +bear +zebra +giraffe +??? +backpack +umbrella +??? +??? +handbag +tie +suitcase +frisbee +skis +snowboard +sports ball +kite +baseball bat +baseball glove +skateboard +surfboard +tennis racket +bottle +??? +wine glass +cup +fork +knife +spoon +bowl +banana +apple +sandwich +orange +broccoli +carrot +hot dog +pizza +donut +cake +chair +couch +potted plant +bed +??? +dining table +??? +??? +toilet +??? +tv +laptop +mouse +remote +keyboard +cell phone +microwave +oven +toaster +sink +refrigerator +??? +book +clock +vase +scissors +teddy bear +hair drier +toothbrush diff --git a/mediapipe/tasks/testdata/metadata/score_calibration.csv b/mediapipe/tasks/testdata/metadata/score_calibration.csv new file mode 100644 index 000000000..1127d3d78 --- /dev/null +++ b/mediapipe/tasks/testdata/metadata/score_calibration.csv @@ -0,0 +1,89 @@ + +0.9876328110694885,0.36622241139411926,0.5352765321731567,0.71484375 +0.9584911465644836,1.0602262020111084,0.2777034342288971,0.019999999552965164 +0.9698624014854431,0.8795201778411865,0.539591908454895,0.00390625 +0.7486230731010437,1.1876736879348755,2.552982807159424,0.019999999552965164 +0.9745277166366577,0.3739396333694458,0.4621727764606476,0.19921875 +0.9683839678764343,0.6996201276779175,0.7690851092338562,0.019999999552965164 +0.6875,0.31044548749923706,1.0056899785995483,0.019999999552965164 +0.9849396347999573,0.8532888889312744,-0.2361421436071396,0.03125 +0.9878578186035156,1.0118975639343262,0.13313621282577515,0.359375 +0.9915205836296082,0.4434199929237366,1.0268371105194092,0.05078125 +0.9370332360267639,0.4586562216281891,-0.08101099729537964,0.019999999552965164 +0.9905818104743958,0.8670706152915955,0.012704282067716122,0.019999999552965164 +0.9080020189285278,0.8507471680641174,0.5081117749214172,0.019999999552965164 +0.985953152179718,0.9933826923370361,-0.8114940524101257,0.109375 +0.9819648861885071,1.12098228931427,-0.6330763697624207,0.01171875 +0.9025918245315552,0.7803755402565002,0.03275677561759949,0.08984375 +0.9863958954811096,0.11243592947721481,0.935604453086853,0.61328125 +0.9905291795730591,0.3710605800151825,0.708966851234436,0.359375 +0.9917052984237671,0.9596433043479919,0.19800108671188354,0.09765625 +0.8762937188148499,0.3449830114841461,0.5352474451065063,0.0078125 +0.9902125000953674,0.8918796181678772,-0.1306992471218109,0.26171875 + +0.9902340173721313,0.9177873134613037,-0.4322589933872223,0.019999999552965164 +0.9707600474357605,0.7028177976608276,0.9813734889030457,0.019999999552965164 +0.9823090434074402,1.0499590635299683,0.12045472860336304,0.0078125 +0.990516185760498,0.9449402093887329,1.3773189783096313,0.019999999552965164 +0.9875434041023254,0.577914297580719,1.282518982887268,0.0390625 +0.9821421504020691,0.0967339277267456,0.8279788494110107,0.47265625 +0.9875047206878662,0.9038218259811401,2.1208062171936035,0.38671875 +0.9857864379882812,0.8627446889877319,0.18189261853694916,0.019999999552965164 +0.9647751450538635,1.0752476453781128,-0.018294010311365128,0.0234375 +0.9830358624458313,0.5638481378555298,0.8346489667892456,0.019999999552965164 +0.9904966354370117,1.0160938501358032,-0.0573287308216095,0.00390625 +0.8458405137062073,0.4868394434452057,0.6617084741592407,0.019999999552965164 +0.9847381711006165,0.5939620137214661,0.008616370148956776,0.00390625 +0.9375938773155212,0.723095178604126,0.6635608077049255,0.019999999552965164 +0.9334303140640259,0.5689108967781067,0.37019580602645874,0.019999999552965164 +0.9716793894767761,1.0037211179733276,0.5898993611335754,0.02734375 +0.9197732210159302,0.46794334053993225,0.7365336418151855,0.640625 +0.9857497811317444,0.7299028635025024,0.9195274114608765,0.0390625 +0.8758038282394409,1.200216293334961,0.02580185979604721,0.019999999552965164 +0.9841026067733765,0.8050475716590881,0.9698556661605835,0.0078125 +0.9908539652824402,0.7911490201950073,0.19351358711719513,0.12109375 +0.9179956316947937,0.023991893976926804,0.35193610191345215,0.04296875 +0.9903728365898132,0.7744967341423035,0.2686336636543274,0.359375 +0.906022846698761,0.5766159892082214,1.0600007772445679,0.04296875 +0.9885554909706116,0.99117511510849,0.5611960291862488,0.4140625 +0.9906331896781921,1.1376535892486572,1.45369291305542,0.019999999552965164 +0.9640991687774658,0.5387894511222839,1.1824018955230713,0.019999999552965164 +0.9932155609130859,0.4347895085811615,1.3938102722167969,0.0078125 +0.9884702563285828,0.885567843914032,0.1556047648191452,0.1484375 +0.9891508221626282,0.04143073782324791,0.6111864447593689,0.0078125 +0.8935436010360718,0.2937895655632019,0.3215920031070709,0.00390625 +0.8327123522758484,0.8381986021995544,-0.026293788105249405,0.019999999552965164 +0.9839455485343933,0.9581400156021118,1.495324969291687,0.640625 +0.9904995560646057,0.9168422818183899,0.33293962478637695,0.015625 +0.9856975674629211,1.0433714389801025,0.5954801440238953,0.019999999552965164 +0.9942344427108765,0.7206616997718811,1.666426181793213,0.9609375 +0.8182767033576965,0.9546273946762085,0.5500107407569885,0.019999999552965164 +0.9631295800209045,0.6277880668640137,0.05952891707420349,0.05859375 +0.9819005727767944,1.0826934576034546,0.7444049715995789,0.30859375 +0.9884315133094788,1.0500890016555786,1.1161768436431885,0.019999999552965164 +0.9175815582275391,0.09232989698648453,1.596696138381958,0.47265625 +0.9868760108947754,0.903079628944397,-0.15774966776371002,0.8515625 +0.9866015911102295,0.7533788084983826,0.7489103078842163,0.03125 +0.8074312806129456,0.8615151643753052,0.40621864795684814,0.00390625 +0.9829285144805908,0.8954831957817078,0.4462486207485199,0.02734375 +0.9681841135025024,0.6257772445678711,0.43809664249420166,0.38671875 +0.9872947931289673,0.9947993159294128,0.9271130561828613,0.26171875 +0.7997345328330994,0.3995186686515808,-0.3755347430706024,0.019999999552965164 +0.9922754168510437,1.1357101202011108,-0.10267537832260132,0.5 +0.9861471652984619,0.8725204467773438,1.1657888889312744,0.019999999552965164 +0.9888646006584167,1.2098380327224731,-0.27832522988319397,0.05078125 +0.5641342997550964,1.0501892566680908,1.9519661664962769,0.019999999552965164 +0.9548168778419495,0.8971696496009827,1.378737449645996,0.00390625 +0.9875019788742065,0.8718118071556091,0.5476236939430237,0.0078125 +0.9725168347358704,0.6989551782608032,-1.3157455921173096,0.61328125 +0.9864014983177185,0.7576251029968262,-0.41650667786598206,0.00390625 +0.960071861743927,0.13068856298923492,0.4819187819957733,0.019999999552965164 +0.9849705100059509,0.7724528908729553,0.3877875804901123,0.03125 +0.9703006744384766,0.8848260641098022,-1.1767181158065796,0.80078125 +0.9837008714675903,0.7015050053596497,0.18209102749824524,0.00390625 +0.9579976797103882,0.053806986659765244,2.7309608459472656,0.4000000059604645 +0.9896979928016663,0.41135814785957336,0.5738034844398499,0.019999999552965164 +0.9853873252868652,0.5438565611839294,0.20562179386615753,0.02734375 +0.9784129858016968,0.6330984830856323,-0.1789831817150116,0.015625 +0.9375,0.855596125125885,-0.1933964192867279,0.019999999552965164 +0.9524176716804504,0.08709807693958282,0.6299692988395691,0.33203125 diff --git a/mediapipe/tasks/testdata/metadata/ssd_mobilenet_v1_no_metadata.json b/mediapipe/tasks/testdata/metadata/ssd_mobilenet_v1_no_metadata.json new file mode 100644 index 000000000..95500f196 --- /dev/null +++ b/mediapipe/tasks/testdata/metadata/ssd_mobilenet_v1_no_metadata.json @@ -0,0 +1,124 @@ +{ + "name": "ObjectDetector", + "description": "Identify which of a known set of objects might be present and provide information about their positions within the given image or a video stream.", + "subgraph_metadata": [ + { + "input_tensor_metadata": [ + { + "name": "image", + "description": "Input image to be processed.", + "content": { + "content_properties_type": "ImageProperties", + "content_properties": { + "color_space": "RGB" + } + }, + "process_units": [ + { + "options_type": "NormalizationOptions", + "options": { + "mean": [ + 127.5 + ], + "std": [ + 127.5 + ] + } + } + ], + "stats": { + "max": [ + 255.0 + ], + "min": [ + 0.0 + ] + } + } + ], + "output_tensor_metadata": [ + { + "name": "location", + "description": "The locations of the detected boxes.", + "content": { + "content_properties_type": "BoundingBoxProperties", + "content_properties": { + "index": [ + 1, + 0, + 3, + 2 + ], + "type": "BOUNDARIES" + }, + "range": { + "min": 2, + "max": 2 + } + }, + "stats": { + } + }, + { + "name": "category", + "description": "The categories of the detected boxes.", + "content": { + "content_properties_type": "FeatureProperties", + "content_properties": { + }, + "range": { + "min": 2, + "max": 2 + } + }, + "stats": { + }, + "associated_files": [ + { + "name": "labels.txt", + "description": "Labels for categories that the model can recognize.", + "type": "TENSOR_VALUE_LABELS" + } + ] + }, + { + "name": "score", + "description": "The scores of the detected boxes.", + "content": { + "content_properties_type": "FeatureProperties", + "content_properties": { + }, + "range": { + "min": 2, + "max": 2 + } + }, + "stats": { + } + }, + { + "name": "number of detections", + "description": "The number of the detected boxes.", + "content": { + "content_properties_type": "FeatureProperties", + "content_properties": { + } + }, + "stats": { + } + } + ], + "output_tensor_groups": [ + { + "name": "detection_result", + "tensor_names": [ + "location", + "category", + "score" + ] + } + ] + } + ], + "min_parser_version": "1.2.0" +} diff --git a/mediapipe/tasks/testdata/metadata/tensor_group_meta.json b/mediapipe/tasks/testdata/metadata/tensor_group_meta.json new file mode 100644 index 000000000..9b0cbc8d7 --- /dev/null +++ b/mediapipe/tasks/testdata/metadata/tensor_group_meta.json @@ -0,0 +1,16 @@ +{ + "subgraph_metadata": [ + { + "output_tensor_groups": [ + { + "name": "detection_result", + "tensor_names": [ + "location", + "category", + "score" + ] + } + ] + } + ] +} diff --git a/third_party/external_files.bzl b/third_party/external_files.bzl index f446b3728..7dd69f31e 100644 --- a/third_party/external_files.bzl +++ b/third_party/external_files.bzl @@ -94,6 +94,12 @@ def external_files(): urls = ["https://storage.googleapis.com/mediapipe-assets/canned_gesture_classifier.tflite?generation=1668550473107417"], ) + http_file( + name = "com_google_mediapipe_category_tensor_float_meta_json", + sha256 = "d0cbe95a99ffc57046d7e66cf194600d12899216a4d3bf1a3851811648005e38", + urls = ["https://storage.googleapis.com/mediapipe-assets/category_tensor_float_meta.json?generation=1677522730922512"], + ) + http_file( name = "com_google_mediapipe_cat_jpg", sha256 = "2533197401eebe9410ea4d063f86c43fbd2666f3e8165a38aca155c0d09c21be", @@ -160,6 +166,12 @@ def external_files(): urls = ["https://storage.googleapis.com/mediapipe-assets/coco_efficientdet_lite0_v1_1.0_quant_2021_09_06.tflite?generation=1661875692679200"], ) + http_file( + name = "com_google_mediapipe_coco_ssd_mobilenet_v1_1_0_quant_2018_06_29_no_metadata_tflite", + sha256 = "e4b118e5e4531945de2e659742c7c590f7536f8d0ed26d135abcfe83b4779d13", + urls = ["https://storage.googleapis.com/mediapipe-assets/coco_ssd_mobilenet_v1_1.0_quant_2018_06_29_no_metadata.tflite?generation=1677522735292070"], + ) + http_file( name = "com_google_mediapipe_coco_ssd_mobilenet_v1_1_0_quant_2018_06_29_tflite", sha256 = "61d598093ed03ed41aa47c3a39a28ac01e960d6a810a5419b9a5016a1e9c469b", @@ -172,6 +184,12 @@ def external_files(): urls = ["https://storage.googleapis.com/mediapipe-assets/coco_ssd_mobilenet_v1_1.0_quant_2018_06_29_with_dummy_score_calibration.tflite?generation=1662653237233967"], ) + http_file( + name = "com_google_mediapipe_coco_ssd_mobilenet_v1_score_calibration_json", + sha256 = "f377600be924c29697477f9d739db9db5d712aec4a644548526912858db6a082", + urls = ["https://storage.googleapis.com/mediapipe-assets/coco_ssd_mobilenet_v1_score_calibration.json?generation=1677522739770755"], + ) + http_file( name = "com_google_mediapipe_corrupted_mobilenet_v1_0_25_224_1_default_1_tflite", sha256 = "f0cbeb8061f4c693e20de779ce255af923508492e8a24f6db320845a52facb51", @@ -190,6 +208,18 @@ def external_files(): urls = ["https://storage.googleapis.com/mediapipe-assets/dummy_gesture_recognizer.task?generation=1665707319890725"], ) + http_file( + name = "com_google_mediapipe_efficientdet_lite0_v1_json", + sha256 = "7a9e1fb625a6130a251e612637fc546cfc8cfabfadc7dbdade44c87f1d8996ca", + urls = ["https://storage.googleapis.com/mediapipe-assets/efficientdet_lite0_v1.json?generation=1677522746026682"], + ) + + http_file( + name = "com_google_mediapipe_efficientdet_lite0_v1_tflite", + sha256 = "f97efd21d6009a7b4b94b3e57baaeb77ec3225b42d32477f5003736a8084c081", + urls = ["https://storage.googleapis.com/mediapipe-assets/efficientdet_lite0_v1.tflite?generation=1677522750449279"], + ) + http_file( name = "com_google_mediapipe_empty_vocab_for_regex_tokenizer_txt", sha256 = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", @@ -238,6 +268,30 @@ def external_files(): urls = ["https://storage.googleapis.com/mediapipe-assets/external_file.txt?generation=1661875736240688"], ) + http_file( + name = "com_google_mediapipe_face_blendshapes_generated_graph_pbtxt", + sha256 = "92e016a08940e1f81752e749e1931f9e551fa9483786b8fba0624257e9c41d3d", + urls = ["https://storage.googleapis.com/mediapipe-assets/face_blendshapes_generated_graph.pbtxt?generation=1677522753449135"], + ) + + http_file( + name = "com_google_mediapipe_face_blendshapes_in_landmarks_prototxt", + sha256 = "f6a10fa5825f2eee695371a449c605698403c146c270b2cb7574512f3f9e4af8", + urls = ["https://storage.googleapis.com/mediapipe-assets/face_blendshapes_in_landmarks.prototxt?generation=1677522757270549"], + ) + + http_file( + name = "com_google_mediapipe_face_blendshapes_out_prototxt", + sha256 = "ea7740add8b87c9bd375eaa40a05b509eaca04f025cb6bdc7ca486e9fb32dfba", + urls = ["https://storage.googleapis.com/mediapipe-assets/face_blendshapes_out.prototxt?generation=1677522761312644"], + ) + + http_file( + name = "com_google_mediapipe_face_blendshapes_tflite", + sha256 = "4f36dded049db18d76048567439b2a7f58f1daabc00d78bfe8f3ad396a2d2082", + urls = ["https://storage.googleapis.com/mediapipe-assets/face_blendshapes.tflite?generation=1677522764748685"], + ) + http_file( name = "com_google_mediapipe_face_detection_full_range_sparse_tflite", sha256 = "2c3728e6da56f21e21a320433396fb06d40d9088f2247c05e5635a688d45dfe1", @@ -448,6 +502,12 @@ def external_files(): urls = ["https://storage.googleapis.com/mediapipe-assets/knift_labelmap.txt?generation=1661875792821628"], ) + http_file( + name = "com_google_mediapipe_labelmap_txt", + sha256 = "f8803ef7900160c629d570848dfda4175e21667bf7b71f73f8ece4938c9f2bf2", + urls = ["https://storage.googleapis.com/mediapipe-assets/labelmap.txt?generation=1677522772140291"], + ) + http_file( name = "com_google_mediapipe_labels_txt", sha256 = "536feacc519de3d418de26b2effb4d75694a8c4c0063e36499a46fa8061e2da9", @@ -726,8 +786,8 @@ def external_files(): http_file( name = "com_google_mediapipe_portrait_expected_face_landmarks_with_attention_pbtxt", - sha256 = "f2ccd889654b914996e4aab0d7831a3e73d3b63d6c14f6bac4bec5cd3415bce4", - urls = ["https://storage.googleapis.com/mediapipe-assets/portrait_expected_face_landmarks_with_attention.pbtxt?generation=1676415475626542"], + sha256 = "dae959456f001015278f3a1535bd03c9fa0990a3df951135645ce23293be0613", + urls = ["https://storage.googleapis.com/mediapipe-assets/portrait_expected_face_landmarks_with_attention.pbtxt?generation=1677522777298874"], ) http_file( @@ -790,6 +850,12 @@ def external_files(): urls = ["https://storage.googleapis.com/mediapipe-assets/right_hands_rotated.jpg?generation=1666037076873345"], ) + http_file( + name = "com_google_mediapipe_score_calibration_csv", + sha256 = "3ff4962162387ab8851940d2f063ce2b3a4734a8893c007a3c92d11170b020c3", + urls = ["https://storage.googleapis.com/mediapipe-assets/score_calibration.csv?generation=1677522780749449"], + ) + http_file( name = "com_google_mediapipe_score_calibration_file_meta_json", sha256 = "6a3c305620371f662419a496f75be5a10caebca7803b1e99d8d5d22ba51cda94", @@ -892,12 +958,30 @@ def external_files(): urls = ["https://storage.googleapis.com/mediapipe-assets/ssdlite_object_detection.tflite?generation=1661875944118759"], ) + http_file( + name = "com_google_mediapipe_ssd_mobilenet_v1_no_metadata_json", + sha256 = "89157590b736cf3f3247aa9c8be3570c2856f4981a1e9476117e7c629e7c4825", + urls = ["https://storage.googleapis.com/mediapipe-assets/ssd_mobilenet_v1_no_metadata.json?generation=1677522786336455"], + ) + + http_file( + name = "com_google_mediapipe_ssd_mobilenet_v1_no_metadata_tflite", + sha256 = "e4b118e5e4531945de2e659742c7c590f7536f8d0ed26d135abcfe83b4779d13", + urls = ["https://storage.googleapis.com/mediapipe-assets/ssd_mobilenet_v1_no_metadata.tflite?generation=1677522790838583"], + ) + http_file( name = "com_google_mediapipe_ssd_mobilenet_v1_tflite", sha256 = "cbdecd08b44c5dea3821f77c5468e2936ecfbf43cde0795a2729fdb43401e58b", urls = ["https://storage.googleapis.com/mediapipe-assets/ssd_mobilenet_v1.tflite?generation=1661875947436302"], ) + http_file( + name = "com_google_mediapipe_tensor_group_meta_json", + sha256 = "eea454ae15b0c4f7e1f84aad9700bc936627fe22a085d335a40269740bc33c69", + urls = ["https://storage.googleapis.com/mediapipe-assets/tensor_group_meta.json?generation=1677522794324300"], + ) + http_file( name = "com_google_mediapipe_test_jpg", sha256 = "798a12a466933842528d8438f553320eebe5137f02650f12dd68706a2f94fb4f",