Revised API implementation and added more tests for segment_for_video and segment_async

2022-10-18 04:24:12 -07:00 · 2022-10-18 04:24:12 -07:00 · f84e0bc1c6
commit f84e0bc1c6
parent 36ac0689d7
3 changed files with 277 additions and 40 deletions
--- a/mediapipe/tasks/python/test/vision/BUILD
+++ b/mediapipe/tasks/python/test/vision/BUILD
@ -47,7 +47,7 @@ py_test(
    deps = [
        "//mediapipe/python:_framework_bindings",
        "//mediapipe/tasks/python/core:base_options",
-        "//mediapipe/tasks/python/test:test_util",
+        "//mediapipe/tasks/python/test:test_utils",
        "//mediapipe/tasks/python/components/proto:segmenter_options",
        "//mediapipe/tasks/python/vision:image_segmenter",
        "//mediapipe/tasks/python/vision/core:vision_task_running_mode",
--- a/mediapipe/tasks/python/test/vision/image_segmenter_test.py
+++ b/mediapipe/tasks/python/test/vision/image_segmenter_test.py
@ -16,6 +16,8 @@
 import enum
 import numpy as np
 import cv2
 from typing import List
 from unittest import mock
 from absl.testing import absltest
 from absl.testing import parameterized
@ -24,7 +26,7 @@ from mediapipe.python._framework_bindings import image as image_module
 from mediapipe.python._framework_bindings import image_frame as image_frame_module
 from mediapipe.tasks.python.components.proto import segmenter_options
 from mediapipe.tasks.python.core import base_options as base_options_module
-from mediapipe.tasks.python.test import test_util
+from mediapipe.tasks.python.test import test_utils
 from mediapipe.tasks.python.vision import image_segmenter
 from mediapipe.tasks.python.vision.core import vision_task_running_mode as running_mode_module
@ -42,7 +44,22 @@ _MODEL_FILE = 'deeplabv3.tflite'
 _IMAGE_FILE = 'segmentation_input_rotation0.jpg'
 _SEGMENTATION_FILE = 'segmentation_golden_rotation0.png'
 _MASK_MAGNIFICATION_FACTOR = 10
-_MATCH_PIXELS_THRESHOLD = 0.01
+_MASK_SIMILARITY_THRESHOLD = 0.98
 def _similar_to_uint8_mask(actual_mask, expected_mask):
  actual_mask_pixels = actual_mask.numpy_view().flatten()
  expected_mask_pixels = expected_mask.numpy_view().flatten()
  consistent_pixels = 0
  num_pixels = len(expected_mask_pixels)
  for index in range(num_pixels):
    consistent_pixels += (
        actual_mask_pixels[index] * _MASK_MAGNIFICATION_FACTOR ==
        expected_mask_pixels[index])
  return consistent_pixels / num_pixels >= _MASK_SIMILARITY_THRESHOLD
 class ModelFileType(enum.Enum):
@ -54,10 +71,14 @@ class ImageSegmenterTest(parameterized.TestCase):
  def setUp(self):
    super().setUp()
-    self.test_image = test_util.read_test_image(
+    # Load the test input image.
-        test_util.get_test_data_path(_IMAGE_FILE))
+    self.test_image = _Image.create_from_file(
-    self.test_seg_path = test_util.get_test_data_path(_SEGMENTATION_FILE)
+        test_utils.get_test_data_path(_IMAGE_FILE))
-    self.model_path = test_util.get_test_data_path(_MODEL_FILE)
+    # Loads ground truth segmentation file.
    gt_segmentation_data = cv2.imread(
      test_utils.get_test_data_path(_SEGMENTATION_FILE), cv2.IMREAD_GRAYSCALE)
    self.test_seg_image = _Image(_ImageFormat.GRAY8, gt_segmentation_data)
    self.model_path = test_utils.get_test_data_path(_MODEL_FILE)
  def test_create_from_file_succeeds_with_valid_model_path(self):
    # Creates with default option and valid model file successfully.
@ -76,7 +97,7 @@ class ImageSegmenterTest(parameterized.TestCase):
    with self.assertRaisesRegex(
        ValueError,
        r"ExternalFile must specify at least one of 'file_content', "
-        r"'file_name' or 'file_descriptor_meta'."):
+        r"'file_name', 'file_pointer_meta' or 'file_descriptor_meta'."):
      base_options = _BaseOptions(model_asset_path='')
      options = _ImageSegmenterOptions(base_options=base_options)
      _ImageSegmenter.create_from_options(options)
@ -112,34 +133,16 @@ class ImageSegmenterTest(parameterized.TestCase):
    # Performs image segmentation on the input.
    category_masks = segmenter.segment(self.test_image)
    self.assertEqual(len(category_masks), 1)
-    result_pixels = category_masks[0].numpy_view().flatten()
+    category_mask = category_masks[0]
    result_pixels = category_mask.numpy_view().flatten()
-    # Check if data type of `category_masks` is correct.
+    # Check if data type of `category_mask` is correct.
    self.assertEqual(result_pixels.dtype, np.uint8)
-    # Loads ground truth segmentation file.
+    self.assertTrue(
-    image_data = cv2.imread(self.test_seg_path, cv2.IMREAD_GRAYSCALE)
+      _similar_to_uint8_mask(category_masks[0], self.test_seg_image),
    gt_segmentation = _Image(_ImageFormat.GRAY8, image_data)
    gt_segmentation_array = gt_segmentation.numpy_view()
    gt_segmentation_shape = gt_segmentation_array.shape
    num_pixels = gt_segmentation_shape[0] * gt_segmentation_shape[1]
    ground_truth_pixels = gt_segmentation_array.flatten()
    self.assertEqual(
      len(result_pixels), len(ground_truth_pixels),
      'Segmentation mask size does not match the ground truth mask size.')
    inconsistent_pixels = 0
    for index in range(num_pixels):
      inconsistent_pixels += (
          result_pixels[index] * _MASK_MAGNIFICATION_FACTOR !=
          ground_truth_pixels[index])
    self.assertLessEqual(
      inconsistent_pixels / num_pixels, _MATCH_PIXELS_THRESHOLD,
      f'Number of pixels in the candidate mask differing from that of the '
-      f'ground truth mask exceeds {_MATCH_PIXELS_THRESHOLD}.')
+      f'ground truth mask exceeds {_MASK_SIMILARITY_THRESHOLD}.')
    # Closes the segmenter explicitly when the segmenter is not used in
    # a context.
@ -188,6 +191,174 @@ class ImageSegmenterTest(parameterized.TestCase):
    # a context.
    segmenter.close()
  @parameterized.parameters(
    (ModelFileType.FILE_NAME,),
    (ModelFileType.FILE_CONTENT,))
  def test_segment_in_context(self, model_file_type):
    if model_file_type is ModelFileType.FILE_NAME:
      base_options = _BaseOptions(model_asset_path=self.model_path)
    elif model_file_type is ModelFileType.FILE_CONTENT:
      with open(self.model_path, 'rb') as f:
        model_contents = f.read()
      base_options = _BaseOptions(model_asset_buffer=model_contents)
    else:
      # Should never happen
      raise ValueError('model_file_type is invalid.')
    segmenter_options = _SegmenterOptions(output_type=_OutputType.CATEGORY_MASK)
    options = _ImageSegmenterOptions(base_options=base_options,
                                     segmenter_options=segmenter_options)
    with _ImageSegmenter.create_from_options(options) as segmenter:
      # Performs image segmentation on the input.
      category_masks = segmenter.segment(self.test_image)
      self.assertEqual(len(category_masks), 1)
      self.assertTrue(
        _similar_to_uint8_mask(category_masks[0], self.test_seg_image),
        f'Number of pixels in the candidate mask differing from that of the '
        f'ground truth mask exceeds {_MASK_SIMILARITY_THRESHOLD}.')
  def test_missing_result_callback(self):
    options = _ImageSegmenterOptions(
      base_options=_BaseOptions(model_asset_path=self.model_path),
      running_mode=_RUNNING_MODE.LIVE_STREAM)
    with self.assertRaisesRegex(ValueError,
                                r'result callback must be provided'):
      with _ImageSegmenter.create_from_options(options) as unused_segmenter:
        pass
  @parameterized.parameters((_RUNNING_MODE.IMAGE), (_RUNNING_MODE.VIDEO))
  def test_illegal_result_callback(self, running_mode):
    options = _ImageSegmenterOptions(
      base_options=_BaseOptions(model_asset_path=self.model_path),
      running_mode=running_mode,
      result_callback=mock.MagicMock())
    with self.assertRaisesRegex(ValueError,
                                r'result callback should not be provided'):
      with _ImageSegmenter.create_from_options(options) as unused_segmenter:
        pass
  def test_calling_segment_for_video_in_image_mode(self):
    options = _ImageSegmenterOptions(
      base_options=_BaseOptions(model_asset_path=self.model_path),
      running_mode=_RUNNING_MODE.IMAGE)
    with _ImageSegmenter.create_from_options(options) as segmenter:
      with self.assertRaisesRegex(ValueError,
                                  r'not initialized with the video mode'):
        segmenter.segment_for_video(self.test_image, 0)
  def test_calling_segment_async_in_image_mode(self):
    options = _ImageSegmenterOptions(
      base_options=_BaseOptions(model_asset_path=self.model_path),
      running_mode=_RUNNING_MODE.IMAGE)
    with _ImageSegmenter.create_from_options(options) as segmenter:
      with self.assertRaisesRegex(ValueError,
                                  r'not initialized with the live stream mode'):
        segmenter.segment_async(self.test_image, 0)
  def test_calling_segment_in_video_mode(self):
    options = _ImageSegmenterOptions(
      base_options=_BaseOptions(model_asset_path=self.model_path),
      running_mode=_RUNNING_MODE.VIDEO)
    with _ImageSegmenter.create_from_options(options) as segmenter:
      with self.assertRaisesRegex(ValueError,
                                  r'not initialized with the image mode'):
        segmenter.segment(self.test_image)
  def test_calling_segment_async_in_video_mode(self):
    options = _ImageSegmenterOptions(
      base_options=_BaseOptions(model_asset_path=self.model_path),
      running_mode=_RUNNING_MODE.VIDEO)
    with _ImageSegmenter.create_from_options(options) as segmenter:
      with self.assertRaisesRegex(ValueError,
                                  r'not initialized with the live stream mode'):
        segmenter.segment_async(self.test_image, 0)
  def test_detect_for_video_with_out_of_order_timestamp(self):
    options = _ImageSegmenterOptions(
      base_options=_BaseOptions(model_asset_path=self.model_path),
      running_mode=_RUNNING_MODE.VIDEO)
    with _ImageSegmenter.create_from_options(options) as segmenter:
      unused_result = segmenter.segment_for_video(self.test_image, 1)
      with self.assertRaisesRegex(
          ValueError, r'Input timestamp must be monotonically increasing'):
        segmenter.segment_for_video(self.test_image, 0)
  def test_segment_for_video(self):
    segmenter_options = _SegmenterOptions(output_type=_OutputType.CATEGORY_MASK)
    options = _ImageSegmenterOptions(
      base_options=_BaseOptions(model_asset_path=self.model_path),
      segmenter_options=segmenter_options,
      running_mode=_RUNNING_MODE.VIDEO)
    with _ImageSegmenter.create_from_options(options) as segmenter:
      for timestamp in range(0, 300, 30):
        category_masks = segmenter.segment_for_video(self.test_image, timestamp)
        self.assertEqual(len(category_masks), 1)
        self.assertTrue(
          _similar_to_uint8_mask(category_masks[0], self.test_seg_image),
          f'Number of pixels in the candidate mask differing from that of the '
          f'ground truth mask exceeds {_MASK_SIMILARITY_THRESHOLD}.')
  def test_calling_segment_in_live_stream_mode(self):
    options = _ImageSegmenterOptions(
      base_options=_BaseOptions(model_asset_path=self.model_path),
      running_mode=_RUNNING_MODE.LIVE_STREAM,
      result_callback=mock.MagicMock())
    with _ImageSegmenter.create_from_options(options) as segmenter:
      with self.assertRaisesRegex(ValueError,
                                  r'not initialized with the image mode'):
        segmenter.segment(self.test_image)
  def test_calling_segment_for_video_in_live_stream_mode(self):
    options = _ImageSegmenterOptions(
      base_options=_BaseOptions(model_asset_path=self.model_path),
      running_mode=_RUNNING_MODE.LIVE_STREAM,
      result_callback=mock.MagicMock())
    with _ImageSegmenter.create_from_options(options) as segmenter:
      with self.assertRaisesRegex(ValueError,
                                  r'not initialized with the video mode'):
        segmenter.segment_for_video(self.test_image, 0)
  def test_segment_async_calls_with_illegal_timestamp(self):
    options = _ImageSegmenterOptions(
      base_options=_BaseOptions(model_asset_path=self.model_path),
      running_mode=_RUNNING_MODE.LIVE_STREAM,
      result_callback=mock.MagicMock())
    with _ImageSegmenter.create_from_options(options) as segmenter:
      segmenter.segment_async(self.test_image, 100)
      with self.assertRaisesRegex(
          ValueError, r'Input timestamp must be monotonically increasing'):
        segmenter.segment_async(self.test_image, 0)
  def test_segment_async_calls(self):
    observed_timestamp_ms = -1
    def check_result(result: List[image_module.Image],
                     output_image: _Image,
                     timestamp_ms: int):
      # Get the output category mask.
      category_mask = result[0]
      self.assertEqual(output_image.width, self.test_image.width)
      self.assertEqual(output_image.height, self.test_image.height)
      self.assertEqual(output_image.width, self.test_seg_image.width)
      self.assertEqual(output_image.height, self.test_seg_image.height)
      self.assertTrue(
        _similar_to_uint8_mask(category_mask, self.test_seg_image),
        f'Number of pixels in the candidate mask differing from that of the '
        f'ground truth mask exceeds {_MASK_SIMILARITY_THRESHOLD}.')
      self.assertLess(observed_timestamp_ms, timestamp_ms)
      self.observed_timestamp_ms = timestamp_ms
    segmenter_options = _SegmenterOptions(output_type=_OutputType.CATEGORY_MASK)
    options = _ImageSegmenterOptions(
      base_options=_BaseOptions(model_asset_path=self.model_path),
      segmenter_options=segmenter_options,
      running_mode=_RUNNING_MODE.LIVE_STREAM,
      result_callback=check_result)
    with _ImageSegmenter.create_from_options(options) as segmenter:
      for timestamp in range(0, 300, 30):
        segmenter.segment_async(self.test_image, timestamp)
 if __name__ == '__main__':
  absltest.main()
--- a/mediapipe/tasks/python/vision/image_segmenter.py
+++ b/mediapipe/tasks/python/vision/image_segmenter.py
@ -42,6 +42,7 @@ _IMAGE_IN_STREAM_NAME = 'image_in'
 _IMAGE_OUT_STREAM_NAME = 'image_out'
 _IMAGE_TAG = 'IMAGE'
 _TASK_GRAPH_NAME = 'mediapipe.tasks.vision.ImageSegmenterGraph'
 _MICRO_SECONDS_PER_MILLISECOND = 1000
@dataclasses.dataclass
@ -52,9 +53,9 @@ class ImageSegmenterOptions:
    base_options: Base options for the image segmenter task.
    running_mode: The running mode of the task. Default to the image mode.
      Image segmenter task has three running modes:
-      1) The image mode for detecting objects on single image inputs.
+      1) The image mode for segmenting objects on single image inputs.
-      2) The video mode for detecting objects on the decoded frames of a video.
+      2) The video mode for segmenting objects on the decoded frames of a video.
-      3) The live stream mode for detecting objects on a live stream of input
+      3) The live stream mode for segmenting objects on a live stream of input
         data, such as from camera.
    segmenter_options: Options for the image segmenter task.
    result_callback: The user-defined result callback for processing live stream
@ -86,7 +87,8 @@ class ImageSegmenter(base_vision_task_api.BaseVisionTaskApi):
  @classmethod
  def create_from_model_path(cls, model_path: str) -> 'ImageSegmenter':
-    """Creates an `ImageSegmenter` object from a TensorFlow Lite model and the default `ImageSegmenterOptions`.
+    """Creates an `ImageSegmenter` object from a TensorFlow Lite model and the
    default `ImageSegmenterOptions`.
    Note that the created `ImageSegmenter` instance is in image mode, for
    performing image segmentation on single image inputs.
@ -131,8 +133,9 @@ class ImageSegmenter(base_vision_task_api.BaseVisionTaskApi):
      segmentation_result = packet_getter.get_image_list(
          output_packets[_SEGMENTATION_OUT_STREAM_NAME])
      image = packet_getter.get_image(output_packets[_IMAGE_OUT_STREAM_NAME])
-      timestamp = output_packets[_IMAGE_OUT_STREAM_NAME].timestamp
+      timestamp = output_packets[_SEGMENTATION_OUT_STREAM_NAME].timestamp
-      options.result_callback(segmentation_result, image, timestamp)
+      options.result_callback(segmentation_result, image,
                              timestamp.value // _MICRO_SECONDS_PER_MILLISECOND)
    task_info = _TaskInfo(
        task_graph=_TASK_GRAPH_NAME,
@ -148,7 +151,6 @@ class ImageSegmenter(base_vision_task_api.BaseVisionTaskApi):
            _RunningMode.LIVE_STREAM), options.running_mode,
        packets_callback if options.result_callback else None)
  # TODO: Create an Image class for MediaPipe Tasks.
  def segment(self,
              image: image_module.Image) -> List[image_module.Image]:
    """Performs the actual segmentation task on the provided MediaPipe Image.
@ -162,10 +164,74 @@ class ImageSegmenter(base_vision_task_api.BaseVisionTaskApi):
    Raises:
      ValueError: If any of the input arguments is invalid.
-      RuntimeError: If object detection failed to run.
+      RuntimeError: If image segmentation failed to run.
    """
    output_packets = self._process_image_data(
        {_IMAGE_IN_STREAM_NAME: packet_creator.create_image(image)})
    segmentation_result = packet_getter.get_image_list(
        output_packets[_SEGMENTATION_OUT_STREAM_NAME])
    return segmentation_result
  def segment_for_video(self, image: image_module.Image,
                        timestamp_ms: int) -> List[image_module.Image]:
    """Performs segmentation on the provided video frames.
    Only use this method when the ImageSegmenter is created with the video
    running mode. It's required to provide the video frame's timestamp (in
    milliseconds) along with the video frame. The input timestamps should be
    monotonically increasing for adjacent calls of this method.
    Args:
      image: MediaPipe Image.
      timestamp_ms: The timestamp of the input video frame in milliseconds.
    Returns:
      A segmentation result object that contains a list of segmentation masks
      as images.
    Raises:
      ValueError: If any of the input arguments is invalid.
      RuntimeError: If image segmentation failed to run.
    """
    output_packets = self._process_video_data({
      _IMAGE_IN_STREAM_NAME:
        packet_creator.create_image(image).at(
          timestamp_ms * _MICRO_SECONDS_PER_MILLISECOND)
    })
    segmentation_result = packet_getter.get_image_list(
      output_packets[_SEGMENTATION_OUT_STREAM_NAME])
    return segmentation_result
  def segment_async(self, image: image_module.Image, timestamp_ms: int) -> None:
    """Sends live image data (an Image with a unique timestamp) to perform
    image segmentation.
    Only use this method when the ImageSegmenter is created with the live stream
    running mode. The input timestamps should be monotonically increasing for
    adjacent calls of this method. This method will return immediately after the
    input image is accepted. The results will be available via the
    `result_callback` provided in the `ImageSegmenterOptions`. The
    `segment_async` method is designed to process live stream data such as
    camera input. To lower the overall latency, image segmenter may drop the
    input images if needed. In other words, it's not guaranteed to have output
    per input image.
    The `result_callback` prvoides:
      - A segmentation result object that contains a list of segmentation masks
        as images.
      - The input image that the image segmenter runs on.
      - The input timestamp in milliseconds.
    Args:
      image: MediaPipe Image.
      timestamp_ms: The timestamp of the input image in milliseconds.
    Raises:
      ValueError: If the current input timestamp is smaller than what the image
        segmenter has already processed.
    """
    self._send_live_stream_data({
      _IMAGE_IN_STREAM_NAME:
        packet_creator.create_image(image).at(
          timestamp_ms * _MICRO_SECONDS_PER_MILLISECOND)
    })