Implement Image.create_from_file and update the object_detector_test.py file accordingly.

PiperOrigin-RevId: 477682930
2022-09-29 03:42:07 -07:00 · 2022-09-29 03:42:07 -07:00 · 554e2a9d69
commit 554e2a9d69
parent 80fd47b820
7 changed files with 96 additions and 28 deletions
--- a/mediapipe/python/BUILD
+++ b/mediapipe/python/BUILD
@ -48,6 +48,8 @@ pybind_extension(
        "//mediapipe/python/pybind:timestamp",
        "//mediapipe/python/pybind:validated_graph_config",
        "//mediapipe/tasks/python/core/pybind:task_runner",
        "@com_google_absl//absl/strings:str_format",
        "@stblib//:stb_image",
        # Type registration.
        "//mediapipe/framework:basic_types_registration",
        "//mediapipe/framework/formats:classification_registration",
--- a/mediapipe/python/image_test.py
+++ b/mediapipe/python/image_test.py
@ -15,6 +15,7 @@
 """Tests for mediapipe.python._framework_bindings.image."""
 import gc
 import os
 import random
 import sys
@ -23,6 +24,7 @@ import cv2
 import numpy as np
 import PIL.Image
 # resources dependency
 from mediapipe.python._framework_bindings import image
 from mediapipe.python._framework_bindings import image_frame
@ -185,6 +187,16 @@ class ImageTest(absltest.TestCase):
    gc.collect()
    self.assertEqual(sys.getrefcount(rgb_image), initial_ref_count)
  def test_image_create_from_file(self):
    image_path = os.path.join(
        resources.GetRunfilesDir(),
        'mediapipe/tasks/testdata/vision/cat.jpg')
    loaded_image = Image.create_from_file(image_path)
    self.assertEqual(loaded_image.width, 600)
    self.assertEqual(loaded_image.height, 400)
    self.assertEqual(loaded_image.channels, 3)
    self.assertEqual(loaded_image.image_format, ImageFormat.SRGB)
 if __name__ == '__main__':
  absltest.main()
--- a/mediapipe/python/pybind/BUILD
+++ b/mediapipe/python/pybind/BUILD
@ -45,6 +45,8 @@ pybind_library(
        ":util",
        "//mediapipe/framework:type_map",
        "//mediapipe/framework/formats:image",
        "@com_google_absl//absl/strings:str_format",
        "@stblib//:stb_image",
    ],
 )
--- a/mediapipe/python/pybind/image.cc
+++ b/mediapipe/python/pybind/image.cc
@ -16,9 +16,11 @@
 #include <memory>
 #include "absl/strings/str_format.h"
 #include "mediapipe/python/pybind/image_frame_util.h"
 #include "mediapipe/python/pybind/util.h"
 #include "pybind11/stl.h"
 #include "stb_image.h"
 namespace mediapipe {
 namespace python {
@ -225,6 +227,62 @@ void ImageSubmodule(pybind11::module* module) {
    image.is_aligned(16)
 )doc");
  image.def_static(
      "create_from_file",
      [](const std::string& file_name) {
        int width;
        int height;
        int channels;
        auto* image_data =
            stbi_load(file_name.c_str(), &width, &height, &channels,
                      /*desired_channels=*/0);
        if (image_data == nullptr) {
          throw RaisePyError(PyExc_RuntimeError,
                             absl::StrFormat("Image decoding failed (%s): %s",
                                             stbi_failure_reason(), file_name)
                                 .c_str());
        }
        ImageFrameSharedPtr image_frame;
        switch (channels) {
          case 1:
            image_frame = std::make_shared<ImageFrame>(
                ImageFormat::GRAY8, width, height, width, image_data,
                stbi_image_free);
            break;
          case 3:
            image_frame = std::make_shared<ImageFrame>(
                ImageFormat::SRGB, width, height, 3 * width, image_data,
                stbi_image_free);
            break;
          case 4:
            image_frame = std::make_shared<ImageFrame>(
                ImageFormat::SRGBA, width, height, 4 * width, image_data,
                stbi_image_free);
            break;
          default:
            throw RaisePyError(
                PyExc_RuntimeError,
                absl::StrFormat(
                    "Expected image with 1 (grayscale), 3 (RGB) or 4 "
                    "(RGBA) channels, found %d channels.",
                    channels)
                    .c_str());
        }
        return Image(std::move(image_frame));
      },
      R"doc(Creates `Image` object from the image file.
 Args:
  file_name: Image file name.
 Returns:
  `Image` object.
 Raises:
  RuntimeError if the image file can't be decoded.
  )doc",
      py::arg("file_name"));
  image.def_property_readonly("width", &Image::width)
      .def_property_readonly("height", &Image::height)
      .def_property_readonly("channels", &Image::channels)
--- a/mediapipe/tasks/python/test/test_util.py
+++ b/mediapipe/tasks/python/test/test_util.py
@ -16,7 +16,6 @@
 import os
 from absl import flags
 import cv2
 from mediapipe.python._framework_bindings import image as image_module
 from mediapipe.python._framework_bindings import image_frame as image_frame_module
@ -44,12 +43,3 @@ def get_test_data_path(file_or_dirname: str) -> str:
      if f.endswith(file_or_dirname):
        return os.path.join(directory, f)
  raise ValueError("No %s in test directory" % file_or_dirname)
 # TODO: Implement image util module to read image data from file.
 def read_test_image(image_file: str) -> _Image:
  """Reads a MediaPipe Image from the image file."""
  image_data = cv2.imread(image_file)
  if image_data.shape[2] != _RGB_CHANNELS:
    raise ValueError("Input image must contain three channel rgb data.")
  return _Image(_ImageFormat.SRGB, cv2.cvtColor(image_data, cv2.COLOR_BGR2RGB))
--- a/mediapipe/tasks/python/test/vision/object_detector_test.py
+++ b/mediapipe/tasks/python/test/vision/object_detector_test.py
@ -44,7 +44,7 @@ _IMAGE_FILE = 'cats_and_dogs.jpg'
 _EXPECTED_DETECTION_RESULT = _DetectionResult(detections=[
    _Detection(
        bounding_box=_BoundingBox(
-            origin_x=608, origin_y=164, width=381, height=432),
+            origin_x=608, origin_y=161, width=381, height=439),
        categories=[
            _Category(
                index=None,
@ -64,7 +64,7 @@ _EXPECTED_DETECTION_RESULT = _DetectionResult(detections=[
        ]),
    _Detection(
        bounding_box=_BoundingBox(
-            origin_x=257, origin_y=394, width=173, height=202),
+            origin_x=256, origin_y=395, width=173, height=202),
        categories=[
            _Category(
                index=None,
@ -74,7 +74,7 @@ _EXPECTED_DETECTION_RESULT = _DetectionResult(detections=[
        ]),
    _Detection(
        bounding_box=_BoundingBox(
-            origin_x=362, origin_y=195, width=325, height=412),
+            origin_x=362, origin_y=191, width=325, height=419),
        categories=[
            _Category(
                index=None,
@ -98,7 +98,7 @@ class ObjectDetectorTest(parameterized.TestCase):
  def setUp(self):
    super().setUp()
-    self.test_image = test_util.read_test_image(
+    self.test_image = _Image.create_from_file(
        test_util.get_test_data_path(_IMAGE_FILE))
    self.model_path = test_util.get_test_data_path(_MODEL_FILE)
@ -153,9 +153,9 @@ class ObjectDetectorTest(parameterized.TestCase):
    detector = _ObjectDetector.create_from_options(options)
    # Performs object detection on the input.
-    image_result = detector.detect(self.test_image)
+    detection_result = detector.detect(self.test_image)
    # Comparing results.
-    self.assertEqual(image_result, expected_detection_result)
+    self.assertEqual(detection_result, expected_detection_result)
    # Closes the detector explicitly when the detector is not used in
    # a context.
    detector.close()
@ -179,9 +179,9 @@ class ObjectDetectorTest(parameterized.TestCase):
        base_options=base_options, max_results=max_results)
    with _ObjectDetector.create_from_options(options) as detector:
      # Performs object detection on the input.
-      image_result = detector.detect(self.test_image)
+      detection_result = detector.detect(self.test_image)
      # Comparing results.
-      self.assertEqual(image_result, expected_detection_result)
+      self.assertEqual(detection_result, expected_detection_result)
  def test_score_threshold_option(self):
    options = _ObjectDetectorOptions(
@ -189,8 +189,8 @@ class ObjectDetectorTest(parameterized.TestCase):
        score_threshold=_SCORE_THRESHOLD)
    with _ObjectDetector.create_from_options(options) as detector:
      # Performs object detection on the input.
-      image_result = detector.detect(self.test_image)
+      detection_result = detector.detect(self.test_image)
-      detections = image_result.detections
+      detections = detection_result.detections
      for detection in detections:
        score = detection.categories[0].score
@ -204,8 +204,8 @@ class ObjectDetectorTest(parameterized.TestCase):
        max_results=_MAX_RESULTS)
    with _ObjectDetector.create_from_options(options) as detector:
      # Performs object detection on the input.
-      image_result = detector.detect(self.test_image)
+      detection_result = detector.detect(self.test_image)
-      detections = image_result.detections
+      detections = detection_result.detections
      self.assertLessEqual(
          len(detections), _MAX_RESULTS, 'Too many results returned.')
@ -216,8 +216,8 @@ class ObjectDetectorTest(parameterized.TestCase):
        category_allowlist=_ALLOW_LIST)
    with _ObjectDetector.create_from_options(options) as detector:
      # Performs object detection on the input.
-      image_result = detector.detect(self.test_image)
+      detection_result = detector.detect(self.test_image)
-      detections = image_result.detections
+      detections = detection_result.detections
      for detection in detections:
        label = detection.categories[0].category_name
@ -230,8 +230,8 @@ class ObjectDetectorTest(parameterized.TestCase):
        category_denylist=_DENY_LIST)
    with _ObjectDetector.create_from_options(options) as detector:
      # Performs object detection on the input.
-      image_result = detector.detect(self.test_image)
+      detection_result = detector.detect(self.test_image)
-      detections = image_result.detections
+      detections = detection_result.detections
      for detection in detections:
        label = detection.categories[0].category_name
@ -257,8 +257,8 @@ class ObjectDetectorTest(parameterized.TestCase):
        score_threshold=1)
    with _ObjectDetector.create_from_options(options) as detector:
      # Performs object detection on the input.
-      image_result = detector.detect(self.test_image)
+      detection_result = detector.detect(self.test_image)
-      self.assertEmpty(image_result.detections)
+      self.assertEmpty(detection_result.detections)
  def test_missing_result_callback(self):
    options = _ObjectDetectorOptions(
--- a/mediapipe/tasks/testdata/vision/BUILD
+++ b/mediapipe/tasks/testdata/vision/BUILD
@ -85,6 +85,10 @@ filegroup(
        "selfie_segm_128_128_3_expected_mask.jpg",
        "selfie_segm_144_256_3_expected_mask.jpg",
    ],
    visibility = [
        "//mediapipe/python:__subpackages__",
        "//mediapipe/tasks:internal",
    ],
 )
 # TODO Create individual filegroup for models required for each Tasks.