Internal change

PiperOrigin-RevId: 526235882
2023-04-22 00:26:56 -07:00 · 2023-04-22 00:26:56 -07:00 · a6c1bb6324
commit a6c1bb6324
parent 58dcbc9833
3 changed files with 403 additions and 4 deletions
--- a/mediapipe/model_maker/python/core/utils/BUILD
+++ b/mediapipe/model_maker/python/core/utils/BUILD
@ -67,11 +67,18 @@ py_library(
    name = "loss_functions",
    srcs = ["loss_functions.py"],
    srcs_version = "PY3",
    deps = [
        ":file_util",
        ":model_util",
    ],
 )
 py_test(
    name = "loss_functions_test",
    srcs = ["loss_functions_test.py"],
    tags = [
        "requires-net:external",
    ],
    deps = [":loss_functions"],
 )
--- a/mediapipe/model_maker/python/core/utils/loss_functions.py
+++ b/mediapipe/model_maker/python/core/utils/loss_functions.py
@ -13,10 +13,21 @@
 # limitations under the License.
 """Loss function utility library."""
-from typing import Optional, Sequence
+import abc
 from typing import Mapping, Sequence
 import dataclasses
 from typing import Optional
 import numpy as np
 import tensorflow as tf
 from mediapipe.model_maker.python.core.utils import file_util
 from mediapipe.model_maker.python.core.utils import model_util
 from official.modeling import tf_utils
 _VGG_IMAGENET_PERCEPTUAL_MODEL_URL = 'https://storage.googleapis.com/mediapipe-assets/vgg_feature_extractor.tar.gz'
 class FocalLoss(tf.keras.losses.Loss):
  """Implementation of focal loss (https://arxiv.org/pdf/1708.02002.pdf).
@ -45,7 +56,6 @@ class FocalLoss(tf.keras.losses.Loss):
  ```python
  model.compile(optimizer='sgd', loss=FocalLoss(gamma))
  ```
  """
  def __init__(self, gamma, class_weight: Optional[Sequence[float]] = None):
@ -103,3 +113,206 @@ class FocalLoss(tf.keras.losses.Loss):
    # By default, this function uses "sum_over_batch_size" reduction for the
    # loss per batch.
    return tf.reduce_sum(losses) / batch_size
@dataclasses.dataclass
 class PerceptualLossWeight:
  """The weight for each perceptual loss.
  Attributes:
    l1: weight for L1 loss.
    content: weight for content loss.
    style: weight for style loss.
  """
  l1: float = 1.0
  content: float = 1.0
  style: float = 1.0
 class PerceptualLoss(tf.keras.Model, metaclass=abc.ABCMeta):
  """Base class for perceptual loss model."""
  def __init__(
      self,
      feature_weight: Optional[Sequence[float]] = None,
      loss_weight: Optional[PerceptualLossWeight] = None,
  ):
    """Instantiates perceptual loss.
    Args:
      feature_weight: The weight coeffcients of multiple model extracted
        features used for calculating the perceptual loss.
      loss_weight: The weight coefficients between `style_loss` and
        `content_loss`.
    """
    super().__init__()
    self._loss_op = lambda x, y: tf.math.reduce_mean(tf.abs(x - y))
    self._loss_style = tf.constant(0.0)
    self._loss_content = tf.constant(0.0)
    self._feature_weight = feature_weight
    self._loss_weight = loss_weight
  def __call__(
      self,
      img1: tf.Tensor,
      img2: tf.Tensor,
  ) -> Mapping[str, tf.Tensor]:
    """Computes perceptual loss between two images.
    Args:
      img1: First batch of images. The pixel values should be normalized to [-1,
        1].
      img2: Second batch of images. The pixel values should be normalized to
        [-1, 1].
    Returns:
      A mapping between loss name and loss tensors.
    """
    x_features = self._compute_features(img1)
    y_features = self._compute_features(img2)
    if self._loss_weight is None:
      self._loss_weight = PerceptualLossWeight()
    # If the _feature_weight is not initialized, then initialize it as a list of
    # all the element equals to 1.0.
    if self._feature_weight is None:
      self._feature_weight = [1.0] * len(x_features)
    # If the length of _feature_weight smallert than the length of the feature,
    # raise a ValueError. Otherwise, only use the first len(x_features) weight
    # for computing the loss.
    if len(self._feature_weight) < len(x_features):
      raise ValueError(
          f'Input feature weight length {len(self._feature_weight)} is smaller'
          f' than feature length {len(x_features)}'
      )
    if self._loss_weight.style > 0.0:
      self._loss_style = tf_utils.safe_mean(
          self._loss_weight.style
          * self._get_style_loss(x_feats=x_features, y_feats=y_features)
      )
    if self._loss_weight.content > 0.0:
      self._loss_content = tf_utils.safe_mean(
          self._loss_weight.content
          * self._get_content_loss(x_feats=x_features, y_feats=y_features)
      )
    return {'style_loss': self._loss_style, 'content_loss': self._loss_content}
  @abc.abstractmethod
  def _compute_features(self, img: tf.Tensor) -> Sequence[tf.Tensor]:
    """Computes features from the given image tensor.
    Args:
      img: Image tensor.
    Returns:
      A list of multi-scale feature maps.
    """
  def _get_content_loss(
      self, x_feats: Sequence[tf.Tensor], y_feats: Sequence[tf.Tensor]
  ) -> tf.Tensor:
    """Gets weighted multi-scale content loss.
    Args:
      x_feats: Reconstructed face image.
      y_feats: Target face image.
    Returns:
      A scalar tensor for the content loss.
    """
    content_losses = []
    for coef, x_feat, y_feat in zip(self._feature_weight, x_feats, y_feats):
      content_loss = self._loss_op(x_feat, y_feat) * coef
      content_losses.append(content_loss)
    return tf.math.reduce_sum(content_losses)
  def _get_style_loss(
      self, x_feats: Sequence[tf.Tensor], y_feats: Sequence[tf.Tensor]
  ) -> tf.Tensor:
    """Gets weighted multi-scale style loss.
    Args:
      x_feats: Reconstructed face image.
      y_feats: Target face image.
    Returns:
      A scalar tensor for the style loss.
    """
    style_losses = []
    i = 0
    for coef, x_feat, y_feat in zip(self._feature_weight, x_feats, y_feats):
      x_feat_g = _compute_gram_matrix(x_feat)
      y_feat_g = _compute_gram_matrix(y_feat)
      style_loss = self._loss_op(x_feat_g, y_feat_g) * coef
      style_losses.append(style_loss)
      i = i + 1
    return tf.math.reduce_sum(style_loss)
 class VGGPerceptualLoss(PerceptualLoss):
  """Perceptual loss based on VGG19 pretrained on the ImageNet dataset.
  Reference:
  - [Perceptual Losses for Real-Time Style Transfer and Super-Resolution](
      https://arxiv.org/abs/1603.08155) (ECCV 2016)
  Perceptual loss measures high-level perceptual and semantic differences
  between images.
  """
  def __init__(
      self,
      loss_weight: Optional[PerceptualLossWeight] = None,
  ):
    """Initializes image quality loss essentials.
    Args:
      loss_weight: Loss weight coefficients.
    """
    super().__init__(
        feature_weight=np.array([0.1, 0.1, 1.0, 1.0, 1.0]),
        loss_weight=loss_weight,
    )
    rgb_mean = tf.constant([0.485, 0.456, 0.406])
    rgb_std = tf.constant([0.229, 0.224, 0.225])
    self._rgb_mean = tf.reshape(rgb_mean, (1, 1, 1, 3))
    self._rgb_std = tf.reshape(rgb_std, (1, 1, 1, 3))
    model_path = file_util.DownloadedFiles(
        'vgg_feature_extractor',
        _VGG_IMAGENET_PERCEPTUAL_MODEL_URL,
        is_folder=True,
    )
    self._vgg19 = model_util.load_keras_model(model_path.get_path())
  def _compute_features(self, img: tf.Tensor) -> Sequence[tf.Tensor]:
    """Computes VGG19 features."""
    img = (img + 1) / 2.0
    norm_img = (img - self._rgb_mean) / self._rgb_std
    # no grad, as it only serves as a frozen feature extractor.
    return self._vgg19(norm_img)
 def _compute_gram_matrix(feature: tf.Tensor) -> tf.Tensor:
  """Computes gram matrix for the feature map.
  Args:
    feature: [B, H, W, C] feature map.
  Returns:
    [B, C, C] gram matrix.
  """
  h, w, c = feature.shape[1:].as_list()
  feat_reshaped = tf.reshape(feature, shape=(-1, h * w, c))
  feat_gram = tf.matmul(
      tf.transpose(feat_reshaped, perm=[0, 2, 1]), feat_reshaped
  )
  return feat_gram / (c * h * w)
--- a/mediapipe/model_maker/python/core/utils/loss_functions_test.py
+++ b/mediapipe/model_maker/python/core/utils/loss_functions_test.py
@ -13,7 +13,9 @@
 # limitations under the License.
 import math
-from typing import Optional
+import tempfile
 from typing import Dict, Optional, Sequence
 from unittest import mock as unittest_mock
 from absl.testing import parameterized
 import tensorflow as tf
@ -21,7 +23,7 @@ import tensorflow as tf
 from mediapipe.model_maker.python.core.utils import loss_functions
-class LossFunctionsTest(tf.test.TestCase, parameterized.TestCase):
+class FocalLossTest(tf.test.TestCase, parameterized.TestCase):
  @parameterized.named_parameters(
      dict(testcase_name='no_sample_weight', sample_weight=None),
@ -99,5 +101,182 @@ class LossFunctionsTest(tf.test.TestCase, parameterized.TestCase):
    self.assertNear(loss, expected_loss, 1e-4)
 class MockPerceptualLoss(loss_functions.PerceptualLoss):
  """A mock class with implementation of abstract methods for testing."""
  def __init__(
      self,
      use_mock_loss_op: bool = False,
      feature_weight: Optional[Sequence[float]] = None,
      loss_weight: Optional[loss_functions.PerceptualLossWeight] = None,
  ):
    super().__init__(feature_weight=feature_weight, loss_weight=loss_weight)
    if use_mock_loss_op:
      self._loss_op = lambda x, y: tf.math.reduce_mean(x - y)
  def _compute_features(self, img: tf.Tensor) -> Sequence[tf.Tensor]:
    return [tf.random.normal(shape=(1, 8, 8, 3))] * 5
 class PerceptualLossTest(tf.test.TestCase, parameterized.TestCase):
  def setUp(self):
    super().setUp()
    self._img1 = tf.fill(dims=(8, 8), value=0.2)
    self._img2 = tf.fill(dims=(8, 8), value=0.8)
  def test_invalid_feature_weight_raise_value_error(self):
    with self.assertRaisesRegex(
        ValueError,
        'Input feature weight length 2 is smaller than feature length 5',
    ):
      MockPerceptualLoss(feature_weight=[1.0, 2.0])(
          img1=self._img1, img2=self._img2
      )
  @parameterized.named_parameters(
      dict(
          testcase_name='default_loss_weight_and_loss_op',
          use_mock_loss_op=False,
          feature_weight=None,
          loss_weight=None,
          loss_values={
              'style_loss': 0.032839,
              'content_loss': 5.639870,
          },
      ),
      dict(
          testcase_name='style_loss_weight_is_0_default_loss_op',
          use_mock_loss_op=False,
          feature_weight=None,
          loss_weight=loss_functions.PerceptualLossWeight(style=0),
          loss_values={
              'style_loss': 0,
              'content_loss': 5.639870,
          },
      ),
      dict(
          testcase_name='content_loss_weight_is_0_default_loss_op',
          use_mock_loss_op=False,
          feature_weight=None,
          loss_weight=loss_functions.PerceptualLossWeight(content=0),
          loss_values={
              'style_loss': 0.032839,
              'content_loss': 0,
          },
      ),
      dict(
          testcase_name='customized_loss_weight_default_loss_op',
          use_mock_loss_op=False,
          feature_weight=None,
          loss_weight=loss_functions.PerceptualLossWeight(
              style=1.0, content=2.0
          ),
          loss_values={'style_loss': 0.032839, 'content_loss': 11.279739},
      ),
      dict(
          testcase_name=(
              'customized_feature_weight_and_loss_weight_default_loss_op'
          ),
          use_mock_loss_op=False,
          feature_weight=[1.0, 2.0, 3.0, 4.0, 5.0],
          loss_weight=loss_functions.PerceptualLossWeight(
              style=1.0, content=2.0
          ),
          loss_values={'style_loss': 0.164193, 'content_loss': 33.839218},
      ),
      dict(
          testcase_name='no_loss_change_if_extra_feature_weight_provided',
          use_mock_loss_op=False,
          feature_weight=[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0],
          loss_weight=loss_functions.PerceptualLossWeight(
              style=1.0, content=2.0
          ),
          loss_values={
              'style_loss': 0.164193,
              'content_loss': 33.839218,
          },
      ),
      dict(
          testcase_name='customized_loss_weight_custom_loss_op',
          use_mock_loss_op=True,
          feature_weight=None,
          loss_weight=loss_functions.PerceptualLossWeight(
              style=1.0, content=2.0
          ),
          loss_values={'style_loss': 0.000395, 'content_loss': -1.533469},
      ),
  )
  def test_weighted_perceptul_loss(
      self,
      use_mock_loss_op: bool,
      feature_weight: Sequence[float],
      loss_weight: loss_functions.PerceptualLossWeight,
      loss_values: Dict[str, float],
  ):
    perceptual_loss = MockPerceptualLoss(
        use_mock_loss_op=use_mock_loss_op,
        feature_weight=feature_weight,
        loss_weight=loss_weight,
    )
    loss = perceptual_loss(img1=self._img1, img2=self._img2)
    self.assertEqual(list(loss.keys()), ['style_loss', 'content_loss'])
    self.assertNear(loss['style_loss'], loss_values['style_loss'], 1e-4)
    self.assertNear(loss['content_loss'], loss_values['content_loss'], 1e-4)
 class VGGPerceptualLossTest(tf.test.TestCase, parameterized.TestCase):
  def setUp(self):
    super().setUp()
    # Mock tempfile.gettempdir() to be unique for each test to avoid race
    # condition when downloading model since these tests may run in parallel.
    mock_gettempdir = unittest_mock.patch.object(
        tempfile,
        'gettempdir',
        return_value=self.create_tempdir(),
        autospec=True,
    )
    self.mock_gettempdir = mock_gettempdir.start()
    self.addCleanup(mock_gettempdir.stop)
    self._img1 = tf.fill(dims=(1, 256, 256, 3), value=0.1)
    self._img2 = tf.fill(dims=(1, 256, 256, 3), value=0.9)
  @parameterized.named_parameters(
      dict(
          testcase_name='default_loss_weight',
          loss_weight=None,
          loss_values={
              'style_loss': 5.8363257e-06,
              'content_loss': 1.7016045,
          },
      ),
      dict(
          testcase_name='customized_loss_weight',
          loss_weight=loss_functions.PerceptualLossWeight(
              style=10.0, content=20.0
          ),
          loss_values={
              'style_loss': 5.8363257e-05,
              'content_loss': 34.03208,
          },
      ),
  )
  def test_vgg_perceptual_loss(self, loss_weight, loss_values):
    vgg_loss = loss_functions.VGGPerceptualLoss(loss_weight=loss_weight)
    loss = vgg_loss(img1=self._img1, img2=self._img2)
    self.assertEqual(list(loss.keys()), ['style_loss', 'content_loss'])
    self.assertNear(
        loss['style_loss'],
        loss_values['style_loss'],
        loss_values['style_loss'] / 1e5,
    )
    self.assertNear(
        loss['content_loss'],
        loss_values['content_loss'],
        loss_values['content_loss'] / 1e5,
    )
 if __name__ == '__main__':
  tf.test.main()