Add FaceDetector Java API

PiperOrigin-RevId: 515913662
2023-03-11 13:12:50 -08:00 · 2023-03-11 13:12:50 -08:00 · 131be2169a
commit 131be2169a
parent 296ee33be5
5 changed files with 966 additions and 0 deletions
--- a/mediapipe/tasks/java/com/google/mediapipe/tasks/vision/BUILD
+++ b/mediapipe/tasks/java/com/google/mediapipe/tasks/vision/BUILD
@ -45,6 +45,7 @@ cc_binary(
    deps = [
        "//mediapipe/calculators/core:flow_limiter_calculator",
        "//mediapipe/java/com/google/mediapipe/framework/jni:mediapipe_framework_jni",
        "//mediapipe/tasks/cc/vision/face_detector:face_detector_graph",
        "//mediapipe/tasks/cc/vision/gesture_recognizer:gesture_recognizer_graph",
        "//mediapipe/tasks/cc/vision/image_classifier:image_classifier_graph",
        "//mediapipe/tasks/cc/vision/image_embedder:image_embedder_graph",
@ -235,6 +236,7 @@ android_library(
 android_library(
    name = "facedetector",
    srcs = [
        "facedetector/FaceDetector.java",
        "facedetector/FaceDetectorResult.java",
    ],
    javacopts = [
@ -245,7 +247,10 @@ android_library(
        ":core",
        "//mediapipe/framework:calculator_options_java_proto_lite",
        "//mediapipe/framework/formats:detection_java_proto_lite",
        "//mediapipe/java/com/google/mediapipe/framework:android_framework",
        "//mediapipe/java/com/google/mediapipe/framework/image",
        "//mediapipe/tasks/cc/core/proto:base_options_java_proto_lite",
        "//mediapipe/tasks/cc/vision/face_detector/proto:face_detector_graph_options_java_proto_lite",
        "//mediapipe/tasks/java/com/google/mediapipe/tasks/components/containers:detection",
        "//mediapipe/tasks/java/com/google/mediapipe/tasks/core",
        "//third_party:autovalue",
--- a/mediapipe/tasks/java/com/google/mediapipe/tasks/vision/facedetector/FaceDetector.java
+++ b/mediapipe/tasks/java/com/google/mediapipe/tasks/vision/facedetector/FaceDetector.java
@ -0,0 +1,463 @@
 // Copyright 2023 The MediaPipe Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 package com.google.mediapipe.tasks.vision.facedetector;
 import android.content.Context;
 import android.os.ParcelFileDescriptor;
 import com.google.auto.value.AutoValue;
 import com.google.mediapipe.proto.CalculatorOptionsProto.CalculatorOptions;
 import com.google.mediapipe.framework.AndroidPacketGetter;
 import com.google.mediapipe.framework.Packet;
 import com.google.mediapipe.framework.PacketGetter;
 import com.google.mediapipe.framework.image.BitmapImageBuilder;
 import com.google.mediapipe.framework.image.MPImage;
 import com.google.mediapipe.tasks.core.BaseOptions;
 import com.google.mediapipe.tasks.core.ErrorListener;
 import com.google.mediapipe.tasks.core.OutputHandler;
 import com.google.mediapipe.tasks.core.OutputHandler.ResultListener;
 import com.google.mediapipe.tasks.core.TaskInfo;
 import com.google.mediapipe.tasks.core.TaskOptions;
 import com.google.mediapipe.tasks.core.TaskRunner;
 import com.google.mediapipe.tasks.core.proto.BaseOptionsProto;
 import com.google.mediapipe.tasks.vision.core.BaseVisionTaskApi;
 import com.google.mediapipe.tasks.vision.core.ImageProcessingOptions;
 import com.google.mediapipe.tasks.vision.core.RunningMode;
 import com.google.mediapipe.tasks.vision.facedetector.proto.FaceDetectorGraphOptionsProto;
 import com.google.mediapipe.formats.proto.DetectionProto.Detection;
 import java.io.File;
 import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.List;
 import java.util.Optional;
 /**
 * Performs face detection on images.
 *
 * <p>The API expects a TFLite model with <a
 * href="https://www.tensorflow.org/lite/convert/metadata">TFLite Model Metadata.</a>.
 *
 * <ul>
 *   <li>Input image {@link MPImage}
 *       <ul>
 *         <li>The image that the face detector runs on.
 *       </ul>
 *   <li>Output FaceDetectorResult {@link FaceDetectorResult}
 *       <ul>
 *         <li>A FaceDetectorResult containing detected faces.
 *       </ul>
 * </ul>
 */
 public final class FaceDetector extends BaseVisionTaskApi {
  private static final String TAG = FaceDetector.class.getSimpleName();
  private static final String IMAGE_IN_STREAM_NAME = "image_in";
  private static final String NORM_RECT_IN_STREAM_NAME = "norm_rect_in";
  @SuppressWarnings("ConstantCaseForConstants")
  private static final List<String> INPUT_STREAMS =
      Collections.unmodifiableList(
          Arrays.asList("IMAGE:" + IMAGE_IN_STREAM_NAME, "NORM_RECT:" + NORM_RECT_IN_STREAM_NAME));
  @SuppressWarnings("ConstantCaseForConstants")
  private static final List<String> OUTPUT_STREAMS =
      Collections.unmodifiableList(Arrays.asList("DETECTIONS:detections_out", "IMAGE:image_out"));
  private static final int DETECTIONS_OUT_STREAM_INDEX = 0;
  private static final int IMAGE_OUT_STREAM_INDEX = 1;
  private static final String TASK_GRAPH_NAME =
      "mediapipe.tasks.vision.face_detector.FaceDetectorGraph";
  /**
   * Creates a {@link FaceDetector} instance from a model file and the default {@link
   * FaceDetectorOptions}.
   *
   * @param context an Android {@link Context}.
   * @param modelPath path to the detection model with metadata in the assets.
   * @throws MediaPipeException if there is an error during {@link FaceDetector} creation.
   */
  public static FaceDetector createFromFile(Context context, String modelPath) {
    BaseOptions baseOptions = BaseOptions.builder().setModelAssetPath(modelPath).build();
    return createFromOptions(
        context, FaceDetectorOptions.builder().setBaseOptions(baseOptions).build());
  }
  /**
   * Creates a {@link FaceDetector} instance from a model file and the default {@link
   * FaceDetectorOptions}.
   *
   * @param context an Android {@link Context}.
   * @param modelFile the detection model {@link File} instance.
   * @throws IOException if an I/O error occurs when opening the tflite model file.
   * @throws MediaPipeException if there is an error during {@link FaceDetector} creation.
   */
  public static FaceDetector createFromFile(Context context, File modelFile) throws IOException {
    try (ParcelFileDescriptor descriptor =
        ParcelFileDescriptor.open(modelFile, ParcelFileDescriptor.MODE_READ_ONLY)) {
      BaseOptions baseOptions =
          BaseOptions.builder().setModelAssetFileDescriptor(descriptor.getFd()).build();
      return createFromOptions(
          context, FaceDetectorOptions.builder().setBaseOptions(baseOptions).build());
    }
  }
  /**
   * Creates a {@link FaceDetector} instance from a model buffer and the default {@link
   * FaceDetectorOptions}.
   *
   * @param context an Android {@link Context}.
   * @param modelBuffer a direct {@link ByteBuffer} or a {@link MappedByteBuffer} of the detection
   *     model.
   * @throws MediaPipeException if there is an error during {@link FaceDetector} creation.
   */
  public static FaceDetector createFromBuffer(Context context, final ByteBuffer modelBuffer) {
    BaseOptions baseOptions = BaseOptions.builder().setModelAssetBuffer(modelBuffer).build();
    return createFromOptions(
        context, FaceDetectorOptions.builder().setBaseOptions(baseOptions).build());
  }
  /**
   * Creates a {@link FaceDetector} instance from a {@link FaceDetectorOptions}.
   *
   * @param context an Android {@link Context}.
   * @param detectorOptions a {@link FaceDetectorOptions} instance.
   * @throws MediaPipeException if there is an error during {@link FaceDetector} creation.
   */
  public static FaceDetector createFromOptions(
      Context context, FaceDetectorOptions detectorOptions) {
    // TODO: Consolidate OutputHandler and TaskRunner.
    OutputHandler<FaceDetectorResult, MPImage> handler = new OutputHandler<>();
    handler.setOutputPacketConverter(
        new OutputHandler.OutputPacketConverter<FaceDetectorResult, MPImage>() {
          @Override
          public FaceDetectorResult convertToTaskResult(List<Packet> packets) {
            // If there is no faces detected in the image, just returns empty lists.
            if (packets.get(DETECTIONS_OUT_STREAM_INDEX).isEmpty()) {
              return FaceDetectorResult.create(
                  new ArrayList<>(),
                  BaseVisionTaskApi.generateResultTimestampMs(
                      detectorOptions.runningMode(), packets.get(DETECTIONS_OUT_STREAM_INDEX)));
            }
            return FaceDetectorResult.create(
                PacketGetter.getProtoVector(
                    packets.get(DETECTIONS_OUT_STREAM_INDEX), Detection.parser()),
                BaseVisionTaskApi.generateResultTimestampMs(
                    detectorOptions.runningMode(), packets.get(DETECTIONS_OUT_STREAM_INDEX)));
          }
          @Override
          public MPImage convertToTaskInput(List<Packet> packets) {
            return new BitmapImageBuilder(
                    AndroidPacketGetter.getBitmapFromRgb(packets.get(IMAGE_OUT_STREAM_INDEX)))
                .build();
          }
        });
    detectorOptions.resultListener().ifPresent(handler::setResultListener);
    detectorOptions.errorListener().ifPresent(handler::setErrorListener);
    TaskRunner runner =
        TaskRunner.create(
            context,
            TaskInfo.<FaceDetectorOptions>builder()
                .setTaskName(FaceDetector.class.getSimpleName())
                .setTaskRunningModeName(detectorOptions.runningMode().name())
                .setTaskGraphName(TASK_GRAPH_NAME)
                .setInputStreams(INPUT_STREAMS)
                .setOutputStreams(OUTPUT_STREAMS)
                .setTaskOptions(detectorOptions)
                .setEnableFlowLimiting(detectorOptions.runningMode() == RunningMode.LIVE_STREAM)
                .build(),
            handler);
    return new FaceDetector(runner, detectorOptions.runningMode());
  }
  /**
   * Constructor to initialize a {@link FaceDetector} from a {@link TaskRunner} and a {@link
   * RunningMode}.
   *
   * @param taskRunner a {@link TaskRunner}.
   * @param runningMode a mediapipe vision task {@link RunningMode}.
   */
  private FaceDetector(TaskRunner taskRunner, RunningMode runningMode) {
    super(taskRunner, runningMode, IMAGE_IN_STREAM_NAME, NORM_RECT_IN_STREAM_NAME);
  }
  /**
   * Performs face detection on the provided single image with default image processing options,
   * i.e. without any rotation applied. Only use this method when the {@link FaceDetector} is
   * created with {@link RunningMode.IMAGE}.
   *
   * <p>{@link FaceDetector} supports the following color space types:
   *
   * <ul>
   *   <li>{@link Bitmap.Config.ARGB_8888}
   * </ul>
   *
   * @param image a MediaPipe {@link MPImage} object for processing.
   * @throws MediaPipeException if there is an internal error.
   */
  public FaceDetectorResult detect(MPImage image) {
    return detect(image, ImageProcessingOptions.builder().build());
  }
  /**
   * Performs face detection on the provided single image. Only use this method when the {@link
   * FaceDetector} is created with {@link RunningMode.IMAGE}.
   *
   * <p>{@link FaceDetector} supports the following color space types:
   *
   * <ul>
   *   <li>{@link Bitmap.Config.ARGB_8888}
   * </ul>
   *
   * @param image a MediaPipe {@link MPImage} object for processing.
   * @param imageProcessingOptions the {@link ImageProcessingOptions} specifying how to process the
   *     input image before running inference. Note that region-of-interest is <b>not</b> supported
   *     by this task: specifying {@link ImageProcessingOptions#regionOfInterest()} will result in
   *     this method throwing an IllegalArgumentException.
   * @throws IllegalArgumentException if the {@link ImageProcessingOptions} specify a
   *     region-of-interest.
   * @throws MediaPipeException if there is an internal error.
   */
  public FaceDetectorResult detect(MPImage image, ImageProcessingOptions imageProcessingOptions) {
    validateImageProcessingOptions(imageProcessingOptions);
    return (FaceDetectorResult) processImageData(image, imageProcessingOptions);
  }
  /**
   * Performs face detection on the provided video frame with default image processing options, i.e.
   * without any rotation applied. Only use this method when the {@link FaceDetector} is created
   * with {@link RunningMode.VIDEO}.
   *
   * <p>It's required to provide the video frame's timestamp (in milliseconds). The input timestamps
   * must be monotonically increasing.
   *
   * <p>{@link FaceDetector} supports the following color space types:
   *
   * <ul>
   *   <li>{@link Bitmap.Config.ARGB_8888}
   * </ul>
   *
   * @param image a MediaPipe {@link MPImage} object for processing.
   * @param timestampMs the input timestamp (in milliseconds).
   * @throws MediaPipeException if there is an internal error.
   */
  public FaceDetectorResult detectForVideo(MPImage image, long timestampMs) {
    return detectForVideo(image, ImageProcessingOptions.builder().build(), timestampMs);
  }
  /**
   * Performs face detection on the provided video frame. Only use this method when the {@link
   * FaceDetector} is created with {@link RunningMode.VIDEO}.
   *
   * <p>It's required to provide the video frame's timestamp (in milliseconds). The input timestamps
   * must be monotonically increasing.
   *
   * <p>{@link FaceDetector} supports the following color space types:
   *
   * <ul>
   *   <li>{@link Bitmap.Config.ARGB_8888}
   * </ul>
   *
   * @param image a MediaPipe {@link MPImage} object for processing.
   * @param imageProcessingOptions the {@link ImageProcessingOptions} specifying how to process the
   *     input image before running inference. Note that region-of-interest is <b>not</b> supported
   *     by this task: specifying {@link ImageProcessingOptions#regionOfInterest()} will result in
   *     this method throwing an IllegalArgumentException.
   * @param timestampMs the input timestamp (in milliseconds).
   * @throws IllegalArgumentException if the {@link ImageProcessingOptions} specify a
   *     region-of-interest.
   * @throws MediaPipeException if there is an internal error.
   */
  public FaceDetectorResult detectForVideo(
      MPImage image, ImageProcessingOptions imageProcessingOptions, long timestampMs) {
    validateImageProcessingOptions(imageProcessingOptions);
    return (FaceDetectorResult) processVideoData(image, imageProcessingOptions, timestampMs);
  }
  /**
   * Sends live image data to perform face detection with default image processing options, i.e.
   * without any rotation applied, and the results will be available via the {@link ResultListener}
   * provided in the {@link FaceDetectorOptions}. Only use this method when the {@link FaceDetector}
   * is created with {@link RunningMode.LIVE_STREAM}.
   *
   * <p>It's required to provide a timestamp (in milliseconds) to indicate when the input image is
   * sent to the face detector. The input timestamps must be monotonically increasing.
   *
   * <p>{@link FaceDetector} supports the following color space types:
   *
   * <ul>
   *   <li>{@link Bitmap.Config.ARGB_8888}
   * </ul>
   *
   * @param image a MediaPipe {@link MPImage} object for processing.
   * @param timestampMs the input timestamp (in milliseconds).
   * @throws MediaPipeException if there is an internal error.
   */
  public void detectAsync(MPImage image, long timestampMs) {
    detectAsync(image, ImageProcessingOptions.builder().build(), timestampMs);
  }
  /**
   * Sends live image data to perform face detection, and the results will be available via the
   * {@link ResultListener} provided in the {@link FaceDetectorOptions}. Only use this method when
   * the {@link FaceDetector} is created with {@link RunningMode.LIVE_STREAM}.
   *
   * <p>It's required to provide a timestamp (in milliseconds) to indicate when the input image is
   * sent to the face detector. The input timestamps must be monotonically increasing.
   *
   * <p>{@link FaceDetector} supports the following color space types:
   *
   * <ul>
   *   <li>{@link Bitmap.Config.ARGB_8888}
   * </ul>
   *
   * @param image a MediaPipe {@link MPImage} object for processing.
   * @param imageProcessingOptions the {@link ImageProcessingOptions} specifying how to process the
   *     input image before running inference. Note that region-of-interest is <b>not</b> supported
   *     by this task: specifying {@link ImageProcessingOptions#regionOfInterest()} will result in
   *     this method throwing an IllegalArgumentException.
   * @param timestampMs the input timestamp (in milliseconds).
   * @throws IllegalArgumentException if the {@link ImageProcessingOptions} specify a
   *     region-of-interest.
   * @throws MediaPipeException if there is an internal error.
   */
  public void detectAsync(
      MPImage image, ImageProcessingOptions imageProcessingOptions, long timestampMs) {
    validateImageProcessingOptions(imageProcessingOptions);
    sendLiveStreamData(image, imageProcessingOptions, timestampMs);
  }
  /** Options for setting up a {@link FaceDetector}. */
  @AutoValue
  public abstract static class FaceDetectorOptions extends TaskOptions {
    /** Builder for {@link FaceDetectorOptions}. */
    @AutoValue.Builder
    public abstract static class Builder {
      /** Sets the {@link BaseOptions} for the face detector task. */
      public abstract Builder setBaseOptions(BaseOptions value);
      /**
       * Sets the {@link RunningMode} for the face detector task. Default to the image mode. face
       * detector has three modes:
       *
       * <ul>
       *   <li>IMAGE: The mode for detecting faces on single image inputs.
       *   <li>VIDEO: The mode for detecting faces on the decoded frames of a video.
       *   <li>LIVE_STREAM: The mode for for detecting faces on a live stream of input data, such as
       *       from camera. In this mode, {@code setResultListener} must be called to set up a
       *       listener to receive the detection results asynchronously.
       * </ul>
       */
      public abstract Builder setRunningMode(RunningMode value);
      /**
       * Sets the minimum confidence score for the face detection to be considered successful. The
       * default minDetectionConfidence is 0.5.
       */
      public abstract Builder setMinDetectionConfidence(Float value);
      /**
       * Sets the minimum non-maximum-suppression threshold for face detection to be considered
       * overlapped. The default minSuppressionThreshold is 0.3.
       */
      public abstract Builder setMinSuppressionThreshold(Float value);
      /**
       * Sets the {@link ResultListener} to receive the detection results asynchronously when the
       * face detector is in the live stream mode.
       */
      public abstract Builder setResultListener(ResultListener<FaceDetectorResult, MPImage> value);
      /** Sets an optional {@link ErrorListener}}. */
      public abstract Builder setErrorListener(ErrorListener value);
      abstract FaceDetectorOptions autoBuild();
      /**
       * Validates and builds the {@link FaceDetectorOptions} instance.
       *
       * @throws IllegalArgumentException if the result listener and the running mode are not
       *     properly configured. The result listener should only be set when the face detector is
       *     in the live stream mode.
       */
      public final FaceDetectorOptions build() {
        FaceDetectorOptions options = autoBuild();
        if (options.runningMode() == RunningMode.LIVE_STREAM) {
          if (!options.resultListener().isPresent()) {
            throw new IllegalArgumentException(
                "The face detector is in the live stream mode, a user-defined result listener"
                    + " must be provided in FaceDetectorOptions.");
          }
        } else if (options.resultListener().isPresent()) {
          throw new IllegalArgumentException(
              "The face detector is in the image or the video mode, a user-defined result"
                  + " listener shouldn't be provided in FaceDetectorOptions.");
        }
        return options;
      }
    }
    abstract BaseOptions baseOptions();
    abstract RunningMode runningMode();
    abstract float minDetectionConfidence();
    abstract float minSuppressionThreshold();
    abstract Optional<ResultListener<FaceDetectorResult, MPImage>> resultListener();
    abstract Optional<ErrorListener> errorListener();
    public static Builder builder() {
      return new AutoValue_FaceDetector_FaceDetectorOptions.Builder()
          .setRunningMode(RunningMode.IMAGE)
          .setMinDetectionConfidence(0.5f)
          .setMinSuppressionThreshold(0.3f);
    }
    /** Converts a {@link FaceDetectorOptions} to a {@link CalculatorOptions} protobuf message. */
    @Override
    public CalculatorOptions convertToCalculatorOptionsProto() {
      BaseOptionsProto.BaseOptions.Builder baseOptionsBuilder =
          BaseOptionsProto.BaseOptions.newBuilder();
      baseOptionsBuilder.setUseStreamMode(runningMode() != RunningMode.IMAGE);
      baseOptionsBuilder.mergeFrom(convertBaseOptionsToProto(baseOptions()));
      FaceDetectorGraphOptionsProto.FaceDetectorGraphOptions.Builder taskOptionsBuilder =
          FaceDetectorGraphOptionsProto.FaceDetectorGraphOptions.newBuilder()
              .setBaseOptions(baseOptionsBuilder);
      taskOptionsBuilder.setMinDetectionConfidence(minDetectionConfidence());
      taskOptionsBuilder.setMinSuppressionThreshold(minSuppressionThreshold());
      return CalculatorOptions.newBuilder()
          .setExtension(
              FaceDetectorGraphOptionsProto.FaceDetectorGraphOptions.ext,
              taskOptionsBuilder.build())
          .build();
    }
  }
  /**
   * Validates that the provided {@link ImageProcessingOptions} doesn't contain a
   * region-of-interest.
   */
  private static void validateImageProcessingOptions(
      ImageProcessingOptions imageProcessingOptions) {
    if (imageProcessingOptions.regionOfInterest().isPresent()) {
      throw new IllegalArgumentException("FaceDetector doesn't support region-of-interest.");
    }
  }
 }
--- a/mediapipe/tasks/javatests/com/google/mediapipe/tasks/vision/facedetector/AndroidManifest.xml
+++ b/mediapipe/tasks/javatests/com/google/mediapipe/tasks/vision/facedetector/AndroidManifest.xml
@ -0,0 +1,24 @@
 <?xml version="1.0" encoding="utf-8"?>
 <manifest xmlns:android="http://schemas.android.com/apk/res/android"
    package="com.google.mediapipe.tasks.vision.facedetectortest"
    android:versionCode="1"
    android:versionName="1.0" >
    <uses-permission android:name="android.permission.READ_EXTERNAL_STORAGE"/>
    <uses-permission android:name="android.permission.WRITE_EXTERNAL_STORAGE"/>
    <uses-sdk android:minSdkVersion="24"
        android:targetSdkVersion="30" />
    <application
        android:label="facedetectortest"
        android:name="android.support.multidex.MultiDexApplication"
        android:taskAffinity="">
        <uses-library android:name="android.test.runner" />
    </application>
    <instrumentation
        android:name="com.google.android.apps.common.testing.testrunner.GoogleInstrumentationTestRunner"
        android:targetPackage="com.google.mediapipe.tasks.vision.facedetectortest" />
 </manifest>
--- a/mediapipe/tasks/javatests/com/google/mediapipe/tasks/vision/facedetector/BUILD
+++ b/mediapipe/tasks/javatests/com/google/mediapipe/tasks/vision/facedetector/BUILD
@ -0,0 +1,19 @@
 # Copyright 2023 The MediaPipe Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #      http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 package(default_visibility = ["//mediapipe/tasks:internal"])
 licenses(["notice"])
 # TODO: Enable this in OSS
--- a/mediapipe/tasks/javatests/com/google/mediapipe/tasks/vision/facedetector/FaceDetectorTest.java
+++ b/mediapipe/tasks/javatests/com/google/mediapipe/tasks/vision/facedetector/FaceDetectorTest.java
@ -0,0 +1,455 @@
 // Copyright 2023 The MediaPipe Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 package com.google.mediapipe.tasks.vision.facedetector;
 import static com.google.common.truth.Truth.assertThat;
 import static org.junit.Assert.assertThrows;
 import android.content.res.AssetManager;
 import android.graphics.BitmapFactory;
 import android.graphics.RectF;
 import androidx.test.core.app.ApplicationProvider;
 import androidx.test.ext.junit.runners.AndroidJUnit4;
 import com.google.mediapipe.framework.MediaPipeException;
 import com.google.mediapipe.framework.image.BitmapImageBuilder;
 import com.google.mediapipe.framework.image.MPImage;
 import com.google.mediapipe.tasks.components.containers.NormalizedKeypoint;
 import com.google.mediapipe.tasks.core.BaseOptions;
 import com.google.mediapipe.tasks.core.TestUtils;
 import com.google.mediapipe.tasks.vision.core.ImageProcessingOptions;
 import com.google.mediapipe.tasks.vision.core.RunningMode;
 import com.google.mediapipe.tasks.vision.facedetector.FaceDetector.FaceDetectorOptions;
 import java.io.InputStream;
 import java.nio.ByteBuffer;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.List;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.Suite;
 import org.junit.runners.Suite.SuiteClasses;
 /** Test for {@link FaceDetector}. */
@RunWith(Suite.class)
@SuiteClasses({FaceDetectorTest.General.class, FaceDetectorTest.RunningModeTest.class})
 public class FaceDetectorTest {
  private static final String MODEL_FILE = "face_detection_short_range.tflite";
  private static final String CAT_IMAGE = "cat.jpg";
  private static final String PORTRAIT_IMAGE = "portrait.jpg";
  private static final String PORTRAIT_ROTATED_IMAGE = "portrait_rotated.jpg";
  private static final float KEYPOINTS_DIFF_TOLERANCE = 0.01f;
  private static final float PIXEL_DIFF_TOLERANCE = 5.0f;
  private static final RectF PORTRAIT_FACE_BOUNDING_BOX = new RectF(283, 115, 514, 349);
  private static final List<NormalizedKeypoint> PORTRAIT_FACE_KEYPOINTS =
      Collections.unmodifiableList(
          Arrays.asList(
              NormalizedKeypoint.create(0.44416f, 0.17643f),
              NormalizedKeypoint.create(0.55514f, 0.17731f),
              NormalizedKeypoint.create(0.50467f, 0.22657f),
              NormalizedKeypoint.create(0.50227f, 0.27199f),
              NormalizedKeypoint.create(0.36063f, 0.20143f),
              NormalizedKeypoint.create(0.60841f, 0.20409f)));
  private static final RectF PORTRAIT_ROTATED_FACE_BOUNDING_BOX = new RectF(674, 283, 910, 519);
  private static final List<NormalizedKeypoint> PORTRAIT_ROTATED_FACE_KEYPOINTS =
      Collections.unmodifiableList(
          Arrays.asList(
              NormalizedKeypoint.create(0.82075f, 0.44679f),
              NormalizedKeypoint.create(0.81965f, 0.56261f),
              NormalizedKeypoint.create(0.76194f, 0.51719f),
              NormalizedKeypoint.create(0.71993f, 0.51360f),
              NormalizedKeypoint.create(0.80700f, 0.36298f),
              NormalizedKeypoint.create(0.80882f, 0.61204f)));
  @RunWith(AndroidJUnit4.class)
  public static final class General extends FaceDetectorTest {
    @Test
    public void detect_successWithValidModels() throws Exception {
      FaceDetectorOptions options =
          FaceDetectorOptions.builder()
              .setBaseOptions(BaseOptions.builder().setModelAssetPath(MODEL_FILE).build())
              .build();
      FaceDetector faceDetector =
          FaceDetector.createFromOptions(ApplicationProvider.getApplicationContext(), options);
      FaceDetectorResult results = faceDetector.detect(getImageFromAsset(PORTRAIT_IMAGE));
      assertContainsSinglePortraitFace(
          results, PORTRAIT_FACE_BOUNDING_BOX, PORTRAIT_FACE_KEYPOINTS);
    }
    @Test
    public void detect_succeedsWithMinDetectionConfidence() throws Exception {
      FaceDetectorOptions options =
          FaceDetectorOptions.builder()
              .setBaseOptions(BaseOptions.builder().setModelAssetPath(MODEL_FILE).build())
              .setMinDetectionConfidence(1.0f)
              .build();
      FaceDetector faceDetector =
          FaceDetector.createFromOptions(ApplicationProvider.getApplicationContext(), options);
      FaceDetectorResult results = faceDetector.detect(getImageFromAsset(PORTRAIT_IMAGE));
      // Set minDetectionConfidence to 1.0, so the detected face should be all filtered out.
      assertThat(results.detections().isEmpty()).isTrue();
    }
    @Test
    public void detect_succeedsWithEmptyFace() throws Exception {
      FaceDetectorOptions options =
          FaceDetectorOptions.builder()
              .setBaseOptions(BaseOptions.builder().setModelAssetPath(MODEL_FILE).build())
              .setMinDetectionConfidence(1.0f)
              .build();
      FaceDetector faceDetector =
          FaceDetector.createFromOptions(ApplicationProvider.getApplicationContext(), options);
      FaceDetectorResult results = faceDetector.detect(getImageFromAsset(CAT_IMAGE));
      assertThat(results.detections().isEmpty()).isTrue();
    }
    @Test
    public void detect_succeedsWithModelFileObject() throws Exception {
      FaceDetector faceDetector =
          FaceDetector.createFromFile(
              ApplicationProvider.getApplicationContext(),
              TestUtils.loadFile(ApplicationProvider.getApplicationContext(), MODEL_FILE));
      FaceDetectorResult results = faceDetector.detect(getImageFromAsset(PORTRAIT_IMAGE));
      assertContainsSinglePortraitFace(
          results, PORTRAIT_FACE_BOUNDING_BOX, PORTRAIT_FACE_KEYPOINTS);
    }
    @Test
    public void detect_succeedsWithModelBuffer() throws Exception {
      FaceDetector faceDetector =
          FaceDetector.createFromBuffer(
              ApplicationProvider.getApplicationContext(),
              TestUtils.loadToDirectByteBuffer(
                  ApplicationProvider.getApplicationContext(), MODEL_FILE));
      FaceDetectorResult results = faceDetector.detect(getImageFromAsset(PORTRAIT_IMAGE));
      assertContainsSinglePortraitFace(
          results, PORTRAIT_FACE_BOUNDING_BOX, PORTRAIT_FACE_KEYPOINTS);
    }
    @Test
    public void detect_succeedsWithModelBufferAndOptions() throws Exception {
      FaceDetectorOptions options =
          FaceDetectorOptions.builder()
              .setBaseOptions(
                  BaseOptions.builder()
                      .setModelAssetBuffer(
                          TestUtils.loadToDirectByteBuffer(
                              ApplicationProvider.getApplicationContext(), MODEL_FILE))
                      .build())
              .build();
      FaceDetector faceDetector =
          FaceDetector.createFromOptions(ApplicationProvider.getApplicationContext(), options);
      FaceDetectorResult results = faceDetector.detect(getImageFromAsset(PORTRAIT_IMAGE));
      assertContainsSinglePortraitFace(
          results, PORTRAIT_FACE_BOUNDING_BOX, PORTRAIT_FACE_KEYPOINTS);
    }
    @Test
    public void create_failsWithMissingModel() throws Exception {
      String nonexistentFile = "/path/to/non/existent/file";
      MediaPipeException exception =
          assertThrows(
              MediaPipeException.class,
              () ->
                  FaceDetector.createFromFile(
                      ApplicationProvider.getApplicationContext(), nonexistentFile));
      assertThat(exception).hasMessageThat().contains(nonexistentFile);
    }
    @Test
    public void create_failsWithInvalidModelBuffer() throws Exception {
      // Create a non-direct model ByteBuffer.
      ByteBuffer modelBuffer =
          TestUtils.loadToNonDirectByteBuffer(
              ApplicationProvider.getApplicationContext(), MODEL_FILE);
      IllegalArgumentException exception =
          assertThrows(
              IllegalArgumentException.class,
              () ->
                  FaceDetector.createFromBuffer(
                      ApplicationProvider.getApplicationContext(), modelBuffer));
      assertThat(exception)
          .hasMessageThat()
          .contains("The model buffer should be either a direct ByteBuffer or a MappedByteBuffer.");
    }
    @Test
    public void detect_succeedsWithRotation() throws Exception {
      FaceDetectorOptions options =
          FaceDetectorOptions.builder()
              .setBaseOptions(BaseOptions.builder().setModelAssetPath(MODEL_FILE).build())
              .build();
      FaceDetector faceDetector =
          FaceDetector.createFromOptions(ApplicationProvider.getApplicationContext(), options);
      ImageProcessingOptions imageProcessingOptions =
          ImageProcessingOptions.builder().setRotationDegrees(-90).build();
      FaceDetectorResult results =
          faceDetector.detect(getImageFromAsset(PORTRAIT_ROTATED_IMAGE), imageProcessingOptions);
      assertContainsSinglePortraitFace(
          results, PORTRAIT_ROTATED_FACE_BOUNDING_BOX, PORTRAIT_ROTATED_FACE_KEYPOINTS);
    }
    @Test
    public void detect_failsWithRegionOfInterest() throws Exception {
      FaceDetectorOptions options =
          FaceDetectorOptions.builder()
              .setBaseOptions(BaseOptions.builder().setModelAssetPath(MODEL_FILE).build())
              .build();
      FaceDetector faceDetector =
          FaceDetector.createFromOptions(ApplicationProvider.getApplicationContext(), options);
      ImageProcessingOptions imageProcessingOptions =
          ImageProcessingOptions.builder().setRegionOfInterest(new RectF(0, 0, 1, 1)).build();
      IllegalArgumentException exception =
          assertThrows(
              IllegalArgumentException.class,
              () -> faceDetector.detect(getImageFromAsset(PORTRAIT_IMAGE), imageProcessingOptions));
      assertThat(exception)
          .hasMessageThat()
          .contains("FaceDetector doesn't support region-of-interest");
    }
  }
  @RunWith(AndroidJUnit4.class)
  public static final class RunningModeTest extends FaceDetectorTest {
    @Test
    public void create_failsWithIllegalResultListenerInNonLiveStreamMode() throws Exception {
      for (RunningMode mode : new RunningMode[] {RunningMode.IMAGE, RunningMode.VIDEO}) {
        IllegalArgumentException exception =
            assertThrows(
                IllegalArgumentException.class,
                () ->
                    FaceDetectorOptions.builder()
                        .setBaseOptions(BaseOptions.builder().setModelAssetPath(MODEL_FILE).build())
                        .setRunningMode(mode)
                        .setResultListener((faceDetectorResult, inputImage) -> {})
                        .build());
        assertThat(exception)
            .hasMessageThat()
            .contains("a user-defined result listener shouldn't be provided");
      }
    }
    @Test
    public void create_failsWithMissingResultListenerInLiveSteamMode() throws Exception {
      IllegalArgumentException exception =
          assertThrows(
              IllegalArgumentException.class,
              () ->
                  FaceDetectorOptions.builder()
                      .setBaseOptions(BaseOptions.builder().setModelAssetPath(MODEL_FILE).build())
                      .setRunningMode(RunningMode.LIVE_STREAM)
                      .build());
      assertThat(exception)
          .hasMessageThat()
          .contains("a user-defined result listener must be provided");
    }
    @Test
    public void detect_failsWithCallingWrongApiInImageMode() throws Exception {
      FaceDetectorOptions options =
          FaceDetectorOptions.builder()
              .setBaseOptions(BaseOptions.builder().setModelAssetPath(MODEL_FILE).build())
              .setRunningMode(RunningMode.IMAGE)
              .build();
      FaceDetector faceDetector =
          FaceDetector.createFromOptions(ApplicationProvider.getApplicationContext(), options);
      MediaPipeException exception =
          assertThrows(
              MediaPipeException.class,
              () ->
                  faceDetector.detectForVideo(
                      getImageFromAsset(PORTRAIT_IMAGE), /* timestampsMs= */ 0));
      assertThat(exception).hasMessageThat().contains("not initialized with the video mode");
      exception =
          assertThrows(
              MediaPipeException.class,
              () ->
                  faceDetector.detectAsync(
                      getImageFromAsset(PORTRAIT_IMAGE), /* timestampsMs= */ 0));
      assertThat(exception).hasMessageThat().contains("not initialized with the live stream mode");
    }
    @Test
    public void detect_failsWithCallingWrongApiInVideoMode() throws Exception {
      FaceDetectorOptions options =
          FaceDetectorOptions.builder()
              .setBaseOptions(BaseOptions.builder().setModelAssetPath(MODEL_FILE).build())
              .setRunningMode(RunningMode.VIDEO)
              .build();
      FaceDetector faceDetector =
          FaceDetector.createFromOptions(ApplicationProvider.getApplicationContext(), options);
      MediaPipeException exception =
          assertThrows(
              MediaPipeException.class,
              () -> faceDetector.detect(getImageFromAsset(PORTRAIT_IMAGE)));
      assertThat(exception).hasMessageThat().contains("not initialized with the image mode");
      exception =
          assertThrows(
              MediaPipeException.class,
              () ->
                  faceDetector.detectAsync(
                      getImageFromAsset(PORTRAIT_IMAGE), /* timestampsMs= */ 0));
      assertThat(exception).hasMessageThat().contains("not initialized with the live stream mode");
    }
    @Test
    public void detect_failsWithCallingWrongApiInLiveSteamMode() throws Exception {
      FaceDetectorOptions options =
          FaceDetectorOptions.builder()
              .setBaseOptions(BaseOptions.builder().setModelAssetPath(MODEL_FILE).build())
              .setRunningMode(RunningMode.LIVE_STREAM)
              .setResultListener((faceDetectorResult, inputImage) -> {})
              .build();
      FaceDetector faceDetector =
          FaceDetector.createFromOptions(ApplicationProvider.getApplicationContext(), options);
      MediaPipeException exception =
          assertThrows(
              MediaPipeException.class,
              () -> faceDetector.detect(getImageFromAsset(PORTRAIT_IMAGE)));
      assertThat(exception).hasMessageThat().contains("not initialized with the image mode");
      exception =
          assertThrows(
              MediaPipeException.class,
              () ->
                  faceDetector.detectForVideo(
                      getImageFromAsset(PORTRAIT_IMAGE), /* timestampsMs= */ 0));
      assertThat(exception).hasMessageThat().contains("not initialized with the video mode");
    }
    @Test
    public void detect_successWithImageMode() throws Exception {
      FaceDetectorOptions options =
          FaceDetectorOptions.builder()
              .setBaseOptions(BaseOptions.builder().setModelAssetPath(MODEL_FILE).build())
              .setRunningMode(RunningMode.IMAGE)
              .build();
      FaceDetector faceDetector =
          FaceDetector.createFromOptions(ApplicationProvider.getApplicationContext(), options);
      FaceDetectorResult results = faceDetector.detect(getImageFromAsset(PORTRAIT_IMAGE));
      assertContainsSinglePortraitFace(
          results, PORTRAIT_FACE_BOUNDING_BOX, PORTRAIT_FACE_KEYPOINTS);
    }
    @Test
    public void detect_successWithVideoMode() throws Exception {
      FaceDetectorOptions options =
          FaceDetectorOptions.builder()
              .setBaseOptions(BaseOptions.builder().setModelAssetPath(MODEL_FILE).build())
              .setRunningMode(RunningMode.VIDEO)
              .build();
      FaceDetector faceDetector =
          FaceDetector.createFromOptions(ApplicationProvider.getApplicationContext(), options);
      for (int i = 0; i < 3; i++) {
        FaceDetectorResult results =
            faceDetector.detectForVideo(getImageFromAsset(PORTRAIT_IMAGE), /* timestampsMs= */ i);
        assertContainsSinglePortraitFace(
            results, PORTRAIT_FACE_BOUNDING_BOX, PORTRAIT_FACE_KEYPOINTS);
      }
    }
    @Test
    public void detect_failsWithOutOfOrderInputTimestamps() throws Exception {
      MPImage image = getImageFromAsset(PORTRAIT_IMAGE);
      FaceDetectorOptions options =
          FaceDetectorOptions.builder()
              .setBaseOptions(BaseOptions.builder().setModelAssetPath(MODEL_FILE).build())
              .setRunningMode(RunningMode.LIVE_STREAM)
              .setResultListener(
                  (faceDetectorResult, inputImage) -> {
                    assertContainsSinglePortraitFace(
                        faceDetectorResult, PORTRAIT_FACE_BOUNDING_BOX, PORTRAIT_FACE_KEYPOINTS);
                  })
              .build();
      try (FaceDetector faceDetector =
          FaceDetector.createFromOptions(ApplicationProvider.getApplicationContext(), options)) {
        faceDetector.detectAsync(image, /* timestampsMs= */ 1);
        MediaPipeException exception =
            assertThrows(
                MediaPipeException.class,
                () -> faceDetector.detectAsync(image, /* timestampsMs= */ 0));
        assertThat(exception)
            .hasMessageThat()
            .contains("having a smaller timestamp than the processed timestamp");
      }
    }
    @Test
    public void detect_successWithLiveSteamMode() throws Exception {
      MPImage image = getImageFromAsset(PORTRAIT_IMAGE);
      FaceDetectorOptions options =
          FaceDetectorOptions.builder()
              .setBaseOptions(BaseOptions.builder().setModelAssetPath(MODEL_FILE).build())
              .setRunningMode(RunningMode.LIVE_STREAM)
              .setResultListener(
                  (faceDetectorResult, inputImage) -> {
                    assertContainsSinglePortraitFace(
                        faceDetectorResult, PORTRAIT_FACE_BOUNDING_BOX, PORTRAIT_FACE_KEYPOINTS);
                  })
              .build();
      try (FaceDetector faceDetector =
          FaceDetector.createFromOptions(ApplicationProvider.getApplicationContext(), options)) {
        for (int i = 0; i < 3; i++) {
          faceDetector.detectAsync(image, /* timestampsMs= */ i);
        }
      }
    }
  }
  private static MPImage getImageFromAsset(String filePath) throws Exception {
    AssetManager assetManager = ApplicationProvider.getApplicationContext().getAssets();
    InputStream istr = assetManager.open(filePath);
    return new BitmapImageBuilder(BitmapFactory.decodeStream(istr)).build();
  }
  private static void assertContainsSinglePortraitFace(
      FaceDetectorResult results,
      RectF expectedboundingBox,
      List<NormalizedKeypoint> expectedKeypoints) {
    assertThat(results.detections()).hasSize(1);
    assertApproximatelyEqualBoundingBoxes(
        results.detections().get(0).boundingBox(), expectedboundingBox);
    assertThat(results.detections().get(0).keypoints().isPresent()).isTrue();
    assertApproximatelyEqualKeypoints(
        results.detections().get(0).keypoints().get(), expectedKeypoints);
  }
  private static void assertApproximatelyEqualBoundingBoxes(
      RectF boundingBox1, RectF boundingBox2) {
    assertThat(boundingBox1.left).isWithin(PIXEL_DIFF_TOLERANCE).of(boundingBox2.left);
    assertThat(boundingBox1.top).isWithin(PIXEL_DIFF_TOLERANCE).of(boundingBox2.top);
    assertThat(boundingBox1.right).isWithin(PIXEL_DIFF_TOLERANCE).of(boundingBox2.right);
    assertThat(boundingBox1.bottom).isWithin(PIXEL_DIFF_TOLERANCE).of(boundingBox2.bottom);
  }
  private static void assertApproximatelyEqualKeypoints(
      List<NormalizedKeypoint> keypoints1, List<NormalizedKeypoint> keypoints2) {
    assertThat(keypoints1.size()).isEqualTo(keypoints2.size());
    for (int i = 0; i < keypoints1.size(); i++) {
      assertThat(keypoints1.get(i).x())
          .isWithin(KEYPOINTS_DIFF_TOLERANCE)
          .of(keypoints2.get(i).x());
      assertThat(keypoints1.get(i).y())
          .isWithin(KEYPOINTS_DIFF_TOLERANCE)
          .of(keypoints2.get(i).y());
    }
  }
 }