From cb806071ba30ba66cfa1ec26d80b9b5c0b3e2501 Mon Sep 17 00:00:00 2001
From: kinaryml <kinar.ravishankar@ymedialabs.com>
Date: Fri, 7 Oct 2022 22:26:49 -0700
Subject: [PATCH] Added more tests and updated the APIs to use a new constant

---
 .../test/vision/image_classifier_test.py      | 338 ++++++++++++++++--
 .../tasks/python/vision/image_classifier.py   |  45 ++-
 2 files changed, 355 insertions(+), 28 deletions(-)

diff --git a/mediapipe/tasks/python/test/vision/image_classifier_test.py b/mediapipe/tasks/python/test/vision/image_classifier_test.py
index 5143a28db..073674c3f 100644
--- a/mediapipe/tasks/python/test/vision/image_classifier_test.py
+++ b/mediapipe/tasks/python/test/vision/image_classifier_test.py
@@ -14,6 +14,7 @@
 """Tests for image classifier."""
 
 import enum
+from unittest import mock
 
 import numpy as np
 from absl.testing import absltest
@@ -41,33 +42,46 @@ _RUNNING_MODE = running_mode_module.VisionTaskRunningMode
 
 _MODEL_FILE = 'mobilenet_v2_1.0_224.tflite'
 _IMAGE_FILE = 'burger.jpg'
+_EXPECTED_CATEGORIES = [
+    _Category(
+      index=934,
+      score=0.7939587831497192,
+      display_name='',
+      category_name='cheeseburger'),
+    _Category(
+      index=932,
+      score=0.02739289402961731,
+      display_name='',
+      category_name='bagel'),
+    _Category(
+      index=925,
+      score=0.01934075355529785,
+      display_name='',
+      category_name='guacamole'),
+    _Category(
+      index=963,
+      score=0.006327860057353973,
+      display_name='',
+      category_name='meat loaf')
+]
 _EXPECTED_CLASSIFICATION_RESULT = _ClassificationResult(
   classifications=[
     _Classifications(
       entries=[
         _ClassificationEntry(
-          categories=[
-            _Category(
-              index=934,
-              score=0.7939587831497192,
-              display_name='',
-              category_name='cheeseburger'),
-            _Category(
-              index=932,
-              score=0.02739289402961731,
-              display_name='',
-              category_name='bagel'),
-            _Category(
-              index=925,
-              score=0.01934075355529785,
-              display_name='',
-              category_name='guacamole'),
-            _Category(
-              index=963,
-              score=0.006327860057353973,
-              display_name='',
-              category_name='meat loaf')
-          ],
+          categories=_EXPECTED_CATEGORIES,
+          timestamp_ms=0
+        )
+      ],
+      head_index=0,
+      head_name='probability')
+  ])
+_EMPTY_CLASSIFICATION_RESULT = _ClassificationResult(
+  classifications=[
+    _Classifications(
+      entries=[
+        _ClassificationEntry(
+          categories=[],
           timestamp_ms=0
         )
       ],
@@ -93,6 +107,36 @@ class ImageClassifierTest(parameterized.TestCase):
         test_utils.get_test_data_path(_IMAGE_FILE))
     self.model_path = test_utils.get_test_data_path(_MODEL_FILE)
 
+  def test_create_from_file_succeeds_with_valid_model_path(self):
+    # Creates with default option and valid model file successfully.
+    with _ImageClassifier.create_from_model_path(self.model_path) as classifier:
+      self.assertIsInstance(classifier, _ImageClassifier)
+
+  def test_create_from_options_succeeds_with_valid_model_path(self):
+    # Creates with options containing model file successfully.
+    base_options = _BaseOptions(model_asset_path=self.model_path)
+    options = _ImageClassifierOptions(base_options=base_options)
+    with _ImageClassifier.create_from_options(options) as classifier:
+      self.assertIsInstance(classifier, _ImageClassifier)
+
+  def test_create_from_options_fails_with_invalid_model_path(self):
+    # Invalid empty model path.
+    with self.assertRaisesRegex(
+        ValueError,
+        r"ExternalFile must specify at least one of 'file_content', "
+        r"'file_name' or 'file_descriptor_meta'."):
+      base_options = _BaseOptions(model_asset_path='')
+      options = _ImageClassifierOptions(base_options=base_options)
+      _ImageClassifier.create_from_options(options)
+
+  def test_create_from_options_succeeds_with_valid_model_content(self):
+    # Creates with options containing model content successfully.
+    with open(self.model_path, 'rb') as f:
+      base_options = _BaseOptions(model_asset_buffer=f.read())
+      options = _ImageClassifierOptions(base_options=base_options)
+      classifier = _ImageClassifier.create_from_options(options)
+      self.assertIsInstance(classifier, _ImageClassifier)
+
   @parameterized.parameters(
       (ModelFileType.FILE_NAME, 4, _EXPECTED_CLASSIFICATION_RESULT),
       (ModelFileType.FILE_CONTENT, 4, _EXPECTED_CLASSIFICATION_RESULT))
@@ -122,6 +166,183 @@ class ImageClassifierTest(parameterized.TestCase):
     # a context.
     classifier.close()
 
+  @parameterized.parameters(
+    (ModelFileType.FILE_NAME, 4, _EXPECTED_CLASSIFICATION_RESULT),
+    (ModelFileType.FILE_CONTENT, 4, _EXPECTED_CLASSIFICATION_RESULT))
+  def test_classify_in_context(self, model_file_type, max_results,
+                               expected_classification_result):
+    if model_file_type is ModelFileType.FILE_NAME:
+      base_options = _BaseOptions(model_asset_path=self.model_path)
+    elif model_file_type is ModelFileType.FILE_CONTENT:
+      with open(self.model_path, 'rb') as f:
+        model_content = f.read()
+      base_options = _BaseOptions(model_asset_buffer=model_content)
+    else:
+      # Should never happen
+      raise ValueError('model_file_type is invalid.')
+
+    classifier_options = _ClassifierOptions(max_results=max_results)
+    options = _ImageClassifierOptions(
+      base_options=base_options, classifier_options=classifier_options)
+    with _ImageClassifier.create_from_options(options) as classifier:
+      # Performs image classification on the input.
+      image_result = classifier.classify(self.test_image)
+      # Comparing results.
+      self.assertEqual(image_result, expected_classification_result)
+
+  def test_score_threshold_option(self):
+    classifier_options = _ClassifierOptions(score_threshold=_SCORE_THRESHOLD)
+    options = _ImageClassifierOptions(
+      base_options=_BaseOptions(model_asset_path=self.model_path),
+      classifier_options=classifier_options)
+    with _ImageClassifier.create_from_options(options) as classifier:
+      # Performs image classification on the input.
+      image_result = classifier.classify(self.test_image)
+      classifications = image_result.classifications
+
+      for classification in classifications:
+        for entry in classification.entries:
+          score = entry.categories[0].score
+          self.assertGreaterEqual(
+            score, _SCORE_THRESHOLD,
+            f'Classification with score lower than threshold found. '
+            f'{classification}')
+
+  def test_max_results_option(self):
+    classifier_options = _ClassifierOptions(score_threshold=_SCORE_THRESHOLD)
+    options = _ImageClassifierOptions(
+      base_options=_BaseOptions(model_asset_path=self.model_path),
+      classifier_options=classifier_options)
+    with _ImageClassifier.create_from_options(options) as classifier:
+      # Performs image classification on the input.
+      image_result = classifier.classify(self.test_image)
+      categories = image_result.classifications[0].entries[0].categories
+
+      self.assertLessEqual(
+        len(categories), _MAX_RESULTS, 'Too many results returned.')
+
+  def test_allow_list_option(self):
+    classifier_options = _ClassifierOptions(category_allowlist=_ALLOW_LIST)
+    options = _ImageClassifierOptions(
+      base_options=_BaseOptions(model_asset_path=self.model_path),
+      classifier_options=classifier_options)
+    with _ImageClassifier.create_from_options(options) as classifier:
+      # Performs image classification on the input.
+      image_result = classifier.classify(self.test_image)
+      classifications = image_result.classifications
+
+      for classification in classifications:
+        for entry in classification.entries:
+          label = entry.categories[0].category_name
+          self.assertIn(label, _ALLOW_LIST,
+                        f'Label {label} found but not in label allow list')
+
+  def test_deny_list_option(self):
+    classifier_options = _ClassifierOptions(category_denylist=_DENY_LIST)
+    options = _ImageClassifierOptions(
+      base_options=_BaseOptions(model_asset_path=self.model_path),
+      classifier_options=classifier_options)
+    with _ImageClassifier.create_from_options(options) as classifier:
+      # Performs image classification on the input.
+      image_result = classifier.classify(self.test_image)
+      classifications = image_result.classifications
+
+      for classification in classifications:
+        for entry in classification.entries:
+          label = entry.categories[0].category_name
+          self.assertNotIn(label, _DENY_LIST,
+                           f'Label {label} found but in deny list.')
+
+  def test_combined_allowlist_and_denylist(self):
+    # Fails with combined allowlist and denylist
+    with self.assertRaisesRegex(
+        ValueError,
+        r'`category_allowlist` and `category_denylist` are mutually '
+        r'exclusive options.'):
+      classifier_options = _ClassifierOptions(category_allowlist=['foo'],
+                                              category_denylist=['bar'])
+      options = _ImageClassifierOptions(
+        base_options=_BaseOptions(model_asset_path=self.model_path),
+        classifier_options=classifier_options)
+      with _ImageClassifier.create_from_options(options) as unused_classifier:
+        pass
+
+  def test_empty_classification_outputs(self):
+    classifier_options = _ClassifierOptions(score_threshold=1)
+    options = _ImageClassifierOptions(
+      base_options=_BaseOptions(model_asset_path=self.model_path),
+      classifier_options=classifier_options)
+    with _ImageClassifier.create_from_options(options) as classifier:
+      # Performs image classification on the input.
+      image_result = classifier.classify(self.test_image)
+      self.assertEmpty(image_result.classifications[0].entries[0].categories)
+
+  def test_missing_result_callback(self):
+    options = _ImageClassifierOptions(
+      base_options=_BaseOptions(model_asset_path=self.model_path),
+      running_mode=_RUNNING_MODE.LIVE_STREAM)
+    with self.assertRaisesRegex(ValueError,
+                                r'result callback must be provided'):
+      with _ImageClassifier.create_from_options(options) as unused_classifier:
+        pass
+
+  @parameterized.parameters((_RUNNING_MODE.IMAGE), (_RUNNING_MODE.VIDEO))
+  def test_illegal_result_callback(self, running_mode):
+    options = _ImageClassifierOptions(
+      base_options=_BaseOptions(model_asset_path=self.model_path),
+      running_mode=running_mode,
+      result_callback=mock.MagicMock())
+    with self.assertRaisesRegex(ValueError,
+                                r'result callback should not be provided'):
+      with _ImageClassifier.create_from_options(options) as unused_classifier:
+        pass
+
+  def test_calling_classify_for_video_in_image_mode(self):
+    options = _ImageClassifierOptions(
+      base_options=_BaseOptions(model_asset_path=self.model_path),
+      running_mode=_RUNNING_MODE.IMAGE)
+    with _ImageClassifier.create_from_options(options) as classifier:
+      with self.assertRaisesRegex(ValueError,
+                                  r'not initialized with the video mode'):
+        classifier.classify_for_video(self.test_image, 0)
+
+  def test_calling_classify_async_in_image_mode(self):
+    options = _ImageClassifierOptions(
+      base_options=_BaseOptions(model_asset_path=self.model_path),
+      running_mode=_RUNNING_MODE.IMAGE)
+    with _ImageClassifier.create_from_options(options) as classifier:
+      with self.assertRaisesRegex(ValueError,
+                                  r'not initialized with the live stream mode'):
+        classifier.classify_async(self.test_image, 0)
+
+  def test_calling_classify_in_video_mode(self):
+    options = _ImageClassifierOptions(
+      base_options=_BaseOptions(model_asset_path=self.model_path),
+      running_mode=_RUNNING_MODE.VIDEO)
+    with _ImageClassifier.create_from_options(options) as classifier:
+      with self.assertRaisesRegex(ValueError,
+                                  r'not initialized with the image mode'):
+        classifier.classify(self.test_image)
+
+  def test_calling_classify_async_in_video_mode(self):
+    options = _ImageClassifierOptions(
+      base_options=_BaseOptions(model_asset_path=self.model_path),
+      running_mode=_RUNNING_MODE.VIDEO)
+    with _ImageClassifier.create_from_options(options) as classifier:
+      with self.assertRaisesRegex(ValueError,
+                                  r'not initialized with the live stream mode'):
+        classifier.classify_async(self.test_image, 0)
+
+  def test_classify_for_video_with_out_of_order_timestamp(self):
+    options = _ImageClassifierOptions(
+      base_options=_BaseOptions(model_asset_path=self.model_path),
+      running_mode=_RUNNING_MODE.VIDEO)
+    with _ImageClassifier.create_from_options(options) as classifier:
+      unused_result = classifier.classify_for_video(self.test_image, 1)
+      with self.assertRaisesRegex(
+          ValueError, r'Input timestamp must be monotonically increasing'):
+        classifier.classify_for_video(self.test_image, 0)
+
   def test_classify_for_video(self):
     classifier_options = _ClassifierOptions(max_results=4)
     options = _ImageClassifierOptions(
@@ -132,7 +353,78 @@ class ImageClassifierTest(parameterized.TestCase):
       for timestamp in range(0, 300, 30):
         classification_result = classifier.classify_for_video(
             self.test_image, timestamp)
-        self.assertEqual(classification_result, _EXPECTED_CLASSIFICATION_RESULT)
+        expected_classification_result = _ClassificationResult(
+          classifications=[
+            _Classifications(
+              entries=[
+                _ClassificationEntry(
+                  categories=_EXPECTED_CATEGORIES, timestamp_ms=timestamp)
+              ],
+              head_index=0, head_name='probability')
+          ])
+        self.assertEqual(classification_result, expected_classification_result)
+
+  def test_calling_classify_in_live_stream_mode(self):
+    options = _ImageClassifierOptions(
+      base_options=_BaseOptions(model_asset_path=self.model_path),
+      running_mode=_RUNNING_MODE.LIVE_STREAM,
+      result_callback=mock.MagicMock())
+    with _ImageClassifier.create_from_options(options) as classifier:
+      with self.assertRaisesRegex(ValueError,
+                                  r'not initialized with the image mode'):
+        classifier.classify(self.test_image)
+
+  def test_calling_classify_for_video_in_live_stream_mode(self):
+    options = _ImageClassifierOptions(
+      base_options=_BaseOptions(model_asset_path=self.model_path),
+      running_mode=_RUNNING_MODE.LIVE_STREAM,
+      result_callback=mock.MagicMock())
+    with _ImageClassifier.create_from_options(options) as classifier:
+      with self.assertRaisesRegex(ValueError,
+                                  r'not initialized with the video mode'):
+        classifier.classify_for_video(self.test_image, 0)
+
+  def test_classify_async_calls_with_illegal_timestamp(self):
+    classifier_options = _ClassifierOptions(max_results=4)
+    options = _ImageClassifierOptions(
+      base_options=_BaseOptions(model_asset_path=self.model_path),
+      running_mode=_RUNNING_MODE.LIVE_STREAM,
+      classifier_options=classifier_options,
+      result_callback=mock.MagicMock())
+    with _ImageClassifier.create_from_options(options) as classifier:
+      classifier.classify_async(self.test_image, 100)
+      with self.assertRaisesRegex(
+          ValueError, r'Input timestamp must be monotonically increasing'):
+        classifier.classify_async(self.test_image, 0)
+
+  # TODO: Fix the packet is empty issue.
+  """
+  @parameterized.parameters((0, _EXPECTED_CLASSIFICATION_RESULT),
+                            (1, _EMPTY_CLASSIFICATION_RESULT))
+  def test_classify_async_calls(self, threshold, expected_result):
+    observed_timestamp_ms = -1
+
+    def check_result(result: _ClassificationResult, output_image: _Image,
+                     timestamp_ms: int):
+      self.assertEqual(result, expected_result)
+      self.assertTrue(
+        np.array_equal(output_image.numpy_view(),
+                       self.test_image.numpy_view()))
+      self.assertLess(observed_timestamp_ms, timestamp_ms)
+      self.observed_timestamp_ms = timestamp_ms
+
+    classifier_options = _ClassifierOptions(
+      max_results=4, score_threshold=threshold)
+    options = _ImageClassifierOptions(
+      base_options=_BaseOptions(model_asset_path=self.model_path),
+      running_mode=_RUNNING_MODE.LIVE_STREAM,
+      classifier_options=classifier_options,
+      result_callback=check_result)
+    classifier = _ImageClassifier.create_from_options(options)
+    for timestamp in range(0, 300, 30):
+      classifier.classify_async(self.test_image, timestamp)
+    classifier.close()
+  """
 
 
 if __name__ == '__main__':
diff --git a/mediapipe/tasks/python/vision/image_classifier.py b/mediapipe/tasks/python/vision/image_classifier.py
index b3bafa113..36c5561c4 100644
--- a/mediapipe/tasks/python/vision/image_classifier.py
+++ b/mediapipe/tasks/python/vision/image_classifier.py
@@ -43,6 +43,7 @@ _IMAGE_IN_STREAM_NAME = 'image_in'
 _IMAGE_OUT_STREAM_NAME = 'image_out'
 _IMAGE_TAG = 'IMAGE'
 _TASK_GRAPH_NAME = 'mediapipe.tasks.vision.image_classifier.ImageClassifierGraph'
+_MICRO_SECONDS_PER_MILLISECOND = 1000
 
 
 @dataclasses.dataclass
@@ -91,7 +92,7 @@ class ImageClassifier(base_vision_task_api.BaseVisionTaskApi):
     """Creates an `ImageClassifier` object from a TensorFlow Lite model and the default `ImageClassifierOptions`.
 
     Note that the created `ImageClassifier` instance is in image mode, for
-    detecting objects on single image inputs.
+    classifying objects on single image inputs.
 
     Args:
       model_path: Path to the model.
@@ -137,7 +138,8 @@ class ImageClassifier(base_vision_task_api.BaseVisionTaskApi):
       ])
       image = packet_getter.get_image(output_packets[_IMAGE_OUT_STREAM_NAME])
       timestamp = output_packets[_IMAGE_OUT_STREAM_NAME].timestamp
-      options.result_callback(classification_result, image, timestamp)
+      options.result_callback(classification_result, image,
+                              timestamp.value / _MICRO_SECONDS_PER_MILLISECOND)
 
     task_info = _TaskInfo(
         task_graph=_TASK_GRAPH_NAME,
@@ -156,7 +158,6 @@ class ImageClassifier(base_vision_task_api.BaseVisionTaskApi):
             _RunningMode.LIVE_STREAM), options.running_mode,
         packets_callback if options.result_callback else None)
 
-  # TODO: Create an Image class for MediaPipe Tasks.
   def classify(
       self,
       image: image_module.Image,
@@ -206,8 +207,9 @@ class ImageClassifier(base_vision_task_api.BaseVisionTaskApi):
       RuntimeError: If image classification failed to run.
     """
     output_packets = self._process_video_data({
-      _IMAGE_IN_STREAM_NAME:
-        packet_creator.create_image(image).at(timestamp_ms)
+        _IMAGE_IN_STREAM_NAME:
+            packet_creator.create_image(image).at(
+                timestamp_ms * _MICRO_SECONDS_PER_MILLISECOND)
     })
     classification_result_proto = packet_getter.get_proto(
       output_packets[_CLASSIFICATION_RESULT_OUT_STREAM_NAME])
@@ -216,3 +218,36 @@ class ImageClassifier(base_vision_task_api.BaseVisionTaskApi):
         classifications_module.Classifications.create_from_pb2(classification)
         for classification in classification_result_proto.classifications
     ])
+
+  def classify_async(self, image: image_module.Image, timestamp_ms: int) -> None:
+    """Sends live image data (an Image with a unique timestamp) to perform
+    image classification.
+
+    Only use this method when the ImageClassifier is created with the live
+    stream running mode. The input timestamps should be monotonically increasing
+    for adjacent calls of this method. This method will return immediately after
+    the input image is accepted. The results will be available via the
+    `result_callback` provided in the `ImageClassifierOptions`. The
+    `classify_async` method is designed to process live stream data such as
+    camera input. To lower the overall latency, image classifier may drop the
+    input images if needed. In other words, it's not guaranteed to have output
+    per input image.
+
+    The `result_callback` provides:
+      - A classification result object that contains a list of classifications.
+      - The input image that the image classifier runs on.
+      - The input timestamp in milliseconds.
+
+    Args:
+      image: MediaPipe Image.
+      timestamp_ms: The timestamp of the input image in milliseconds.
+
+    Raises:
+      ValueError: If the current input timestamp is smaller than what the image
+        classifier has already processed.
+    """
+    self._send_live_stream_data({
+        _IMAGE_IN_STREAM_NAME:
+            packet_creator.create_image(image).at(
+                timestamp_ms * _MICRO_SECONDS_PER_MILLISECOND)
+    })