From 296ee33be55ba115ae7ba68eeaa35d031ce485e3 Mon Sep 17 00:00:00 2001 From: MediaPipe Team Date: Sat, 11 Mar 2023 13:02:56 -0800 Subject: [PATCH] Add FaceLandmarker C++ API PiperOrigin-RevId: 515912777 --- .../tasks/cc/vision/face_landmarker/BUILD | 31 ++ .../vision/face_landmarker/face_landmarker.cc | 250 ++++++++++ .../vision/face_landmarker/face_landmarker.h | 198 ++++++++ .../face_landmarker/face_landmarker_result.cc | 14 +- .../face_landmarker/face_landmarker_result.h | 4 +- .../face_landmarker_result_test.cc | 7 +- .../face_landmarker/face_landmarker_test.cc | 455 ++++++++++++++++++ 7 files changed, 947 insertions(+), 12 deletions(-) create mode 100644 mediapipe/tasks/cc/vision/face_landmarker/face_landmarker.cc create mode 100644 mediapipe/tasks/cc/vision/face_landmarker/face_landmarker.h create mode 100644 mediapipe/tasks/cc/vision/face_landmarker/face_landmarker_test.cc diff --git a/mediapipe/tasks/cc/vision/face_landmarker/BUILD b/mediapipe/tasks/cc/vision/face_landmarker/BUILD index 7ecc93b21..3df2f2db6 100644 --- a/mediapipe/tasks/cc/vision/face_landmarker/BUILD +++ b/mediapipe/tasks/cc/vision/face_landmarker/BUILD @@ -129,6 +129,37 @@ cc_library( ], ) +cc_library( + name = "face_landmarker", + srcs = ["face_landmarker.cc"], + hdrs = ["face_landmarker.h"], + deps = [ + ":face_landmarker_graph", + ":face_landmarker_result", + "//mediapipe/framework/api2:builder", + "//mediapipe/framework/formats:classification_cc_proto", + "//mediapipe/framework/formats:image", + "//mediapipe/framework/formats:landmark_cc_proto", + "//mediapipe/framework/formats:matrix", + "//mediapipe/framework/formats:matrix_data_cc_proto", + "//mediapipe/framework/formats:rect_cc_proto", + "//mediapipe/tasks/cc/components/containers:classification_result", + "//mediapipe/tasks/cc/core:base_options", + "//mediapipe/tasks/cc/core:base_task_api", + "//mediapipe/tasks/cc/core:task_runner", + "//mediapipe/tasks/cc/core:utils", + "//mediapipe/tasks/cc/vision/core:base_vision_task_api", + "//mediapipe/tasks/cc/vision/core:image_processing_options", + "//mediapipe/tasks/cc/vision/core:running_mode", + "//mediapipe/tasks/cc/vision/core:vision_task_api_factory", + "//mediapipe/tasks/cc/vision/face_detector/proto:face_detector_graph_options_cc_proto", + "//mediapipe/tasks/cc/vision/face_geometry/proto:face_geometry_cc_proto", + "//mediapipe/tasks/cc/vision/face_landmarker/proto:face_landmarker_graph_options_cc_proto", + "//mediapipe/tasks/cc/vision/face_landmarker/proto:face_landmarks_detector_graph_options_cc_proto", + "@com_google_absl//absl/status:statusor", + ], +) + cc_library( name = "face_landmarker_result_cc", srcs = ["face_landmarker_result.cc"], diff --git a/mediapipe/tasks/cc/vision/face_landmarker/face_landmarker.cc b/mediapipe/tasks/cc/vision/face_landmarker/face_landmarker.cc new file mode 100644 index 000000000..e006b4490 --- /dev/null +++ b/mediapipe/tasks/cc/vision/face_landmarker/face_landmarker.cc @@ -0,0 +1,250 @@ +/* Copyright 2023 The MediaPipe Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "mediapipe/tasks/cc/vision/face_landmarker/face_landmarker.h" + +#include "mediapipe/framework/api2/builder.h" +#include "mediapipe/framework/formats/classification.pb.h" +#include "mediapipe/framework/formats/image.h" +#include "mediapipe/framework/formats/landmark.pb.h" +#include "mediapipe/framework/formats/matrix.h" +#include "mediapipe/framework/formats/matrix_data.pb.h" +#include "mediapipe/framework/formats/rect.pb.h" +#include "mediapipe/tasks/cc/components/containers/classification_result.h" +#include "mediapipe/tasks/cc/core/base_task_api.h" +#include "mediapipe/tasks/cc/core/task_runner.h" +#include "mediapipe/tasks/cc/core/utils.h" +#include "mediapipe/tasks/cc/vision/core/base_vision_task_api.h" +#include "mediapipe/tasks/cc/vision/core/image_processing_options.h" +#include "mediapipe/tasks/cc/vision/core/vision_task_api_factory.h" +#include "mediapipe/tasks/cc/vision/face_detector/proto/face_detector_graph_options.pb.h" +#include "mediapipe/tasks/cc/vision/face_geometry/proto/face_geometry.pb.h" +#include "mediapipe/tasks/cc/vision/face_landmarker/proto/face_landmarker_graph_options.pb.h" +#include "mediapipe/tasks/cc/vision/face_landmarker/proto/face_landmarks_detector_graph_options.pb.h" + +namespace mediapipe { +namespace tasks { +namespace vision { +namespace face_landmarker { + +namespace { + +using FaceLandmarkerGraphOptionsProto = ::mediapipe::tasks::vision:: + face_landmarker::proto::FaceLandmarkerGraphOptions; + +constexpr char kFaceLandmarkerGraphTypeName[] = + "mediapipe.tasks.vision.face_landmarker.FaceLandmarkerGraph"; + +constexpr char kImageTag[] = "IMAGE"; +constexpr char kImageInStreamName[] = "image_in"; +constexpr char kImageOutStreamName[] = "image_out"; +constexpr char kNormRectTag[] = "NORM_RECT"; +constexpr char kNormRectStreamName[] = "norm_rect_in"; +constexpr char kNormLandmarksTag[] = "NORM_LANDMARKS"; +constexpr char kNormLandmarksStreamName[] = "norm_landmarks"; +constexpr char kBlendshapesTag[] = "BLENDSHAPES"; +constexpr char kBlendshapesStreamName[] = "blendshapes"; +constexpr char kFaceGeometryTag[] = "FACE_GEOMETRY"; +constexpr char kFaceGeometryStreamName[] = "face_geometry"; +constexpr int kMicroSecondsPerMilliSecond = 1000; + +// Creates a MediaPipe graph config that contains a subgraph node of +// "mediapipe.tasks.vision.face_ladnamrker.FaceLandmarkerGraph". If the task is +// running in the live stream mode, a "FlowLimiterCalculator" will be added to +// limit the number of frames in flight. +CalculatorGraphConfig CreateGraphConfig( + std::unique_ptr options, + bool output_face_blendshapes, bool output_facial_transformation_matrixes, + bool enable_flow_limiting) { + api2::builder::Graph graph; + auto& subgraph = graph.AddNode(kFaceLandmarkerGraphTypeName); + subgraph.GetOptions().Swap(options.get()); + graph.In(kImageTag).SetName(kImageInStreamName); + graph.In(kNormRectTag).SetName(kNormRectStreamName); + subgraph.Out(kNormLandmarksTag).SetName(kNormLandmarksStreamName) >> + graph.Out(kNormLandmarksTag); + subgraph.Out(kImageTag).SetName(kImageOutStreamName) >> graph.Out(kImageTag); + if (output_face_blendshapes) { + subgraph.Out(kBlendshapesTag).SetName(kBlendshapesStreamName) >> + graph.Out(kBlendshapesTag); + } + if (output_facial_transformation_matrixes) { + subgraph.Out(kFaceGeometryTag).SetName(kFaceGeometryStreamName) >> + graph.Out(kFaceGeometryTag); + } + if (enable_flow_limiting) { + return tasks::core::AddFlowLimiterCalculator( + graph, subgraph, {kImageTag, kNormRectTag}, kNormLandmarksTag); + } + graph.In(kImageTag) >> subgraph.In(kImageTag); + graph.In(kNormRectTag) >> subgraph.In(kNormRectTag); + return graph.GetConfig(); +} + +// Converts the user-facing FaceLandmarkerOptions struct to the internal +// FaceLandmarkerGraphOptions proto. +std::unique_ptr +ConvertFaceLandmarkerGraphOptionsProto(FaceLandmarkerOptions* options) { + auto options_proto = std::make_unique(); + auto base_options_proto = std::make_unique( + tasks::core::ConvertBaseOptionsToProto(&(options->base_options))); + options_proto->mutable_base_options()->Swap(base_options_proto.get()); + options_proto->mutable_base_options()->set_use_stream_mode( + options->running_mode != core::RunningMode::IMAGE); + + // Configure face detector options. + auto* face_detector_graph_options = + options_proto->mutable_face_detector_graph_options(); + face_detector_graph_options->set_num_faces(options->num_faces); + face_detector_graph_options->set_min_detection_confidence( + options->min_face_detection_confidence); + + // Configure face landmark detector options. + options_proto->set_min_tracking_confidence(options->min_tracking_confidence); + auto* face_landmarks_detector_graph_options = + options_proto->mutable_face_landmarks_detector_graph_options(); + face_landmarks_detector_graph_options->set_min_detection_confidence( + options->min_face_presence_confidence); + + return options_proto; +} + +FaceLandmarkerResult GetFaceLandmarkerResultFromPacketMap( + const tasks::core::PacketMap& packet_map) { + const auto& face_landmarks = packet_map.at(kNormLandmarksStreamName) + .Get>(); + std::optional> face_blendshapes; + if (packet_map.find(kBlendshapesStreamName) != packet_map.end()) { + face_blendshapes = packet_map.at(kBlendshapesStreamName) + .Get>(); + } + std::optional> matrix_data_list; + if (packet_map.find(kFaceGeometryStreamName) != packet_map.end()) { + const auto& face_geometry_list = + packet_map.at(kFaceGeometryStreamName) + .Get>(); + matrix_data_list = std::vector(face_geometry_list.size()); + std::transform(face_geometry_list.begin(), face_geometry_list.end(), + matrix_data_list->begin(), + [](const face_geometry::proto::FaceGeometry& face_geometry) { + return face_geometry.pose_transform_matrix(); + }); + } + return ConvertToFaceLandmarkerResult( + /* face_landmarks_proto = */ face_landmarks, + /* face_blendshapes_proto= */ face_blendshapes, + /* facial_transformation_matrixes_proto= */ matrix_data_list); +} + +} // namespace + +absl::StatusOr> FaceLandmarker::Create( + std::unique_ptr options) { + auto options_proto = ConvertFaceLandmarkerGraphOptionsProto(options.get()); + tasks::core::PacketsCallback packets_callback = nullptr; + if (options->result_callback) { + auto result_callback = options->result_callback; + packets_callback = [=](absl::StatusOr packet_map) { + if (!packet_map.ok()) { + Image image; + result_callback(packet_map.status(), image, Timestamp::Unset().Value()); + return; + } + if (packet_map->at(kImageOutStreamName).IsEmpty()) { + return; + } + Packet image_packet = packet_map->at(kImageOutStreamName); + if (packet_map->at(kNormLandmarksStreamName).IsEmpty()) { + Packet empty_packet = packet_map->at(kNormLandmarksStreamName); + result_callback( + {FaceLandmarkerResult()}, image_packet.Get(), + empty_packet.Timestamp().Value() / kMicroSecondsPerMilliSecond); + return; + } + result_callback( + GetFaceLandmarkerResultFromPacketMap(*packet_map), + image_packet.Get(), + packet_map->at(kNormLandmarksStreamName).Timestamp().Value() / + kMicroSecondsPerMilliSecond); + }; + } + return core::VisionTaskApiFactory::Create( + CreateGraphConfig( + std::move(options_proto), options->output_face_blendshapes, + options->output_facial_transformation_matrixes, + options->running_mode == core::RunningMode::LIVE_STREAM), + std::move(options->base_options.op_resolver), options->running_mode, + std::move(packets_callback)); +} + +absl::StatusOr FaceLandmarker::Detect( + mediapipe::Image image, + std::optional image_processing_options) { + ASSIGN_OR_RETURN(NormalizedRect norm_rect, + ConvertToNormalizedRect(image_processing_options, + /*roi_allowed=*/false)); + ASSIGN_OR_RETURN( + auto output_packets, + ProcessImageData( + {{kImageInStreamName, MakePacket(std::move(image))}, + {kNormRectStreamName, + MakePacket(std::move(norm_rect))}})); + if (output_packets[kNormLandmarksStreamName].IsEmpty()) { + return {FaceLandmarkerResult()}; + } + return GetFaceLandmarkerResultFromPacketMap(output_packets); +} + +absl::StatusOr FaceLandmarker::DetectForVideo( + mediapipe::Image image, int64_t timestamp_ms, + std::optional image_processing_options) { + ASSIGN_OR_RETURN(NormalizedRect norm_rect, + ConvertToNormalizedRect(image_processing_options, + /*roi_allowed=*/false)); + ASSIGN_OR_RETURN( + auto output_packets, + ProcessVideoData( + {{kImageInStreamName, + MakePacket(std::move(image)) + .At(Timestamp(timestamp_ms * kMicroSecondsPerMilliSecond))}, + {kNormRectStreamName, + MakePacket(std::move(norm_rect)) + .At(Timestamp(timestamp_ms * kMicroSecondsPerMilliSecond))}})); + if (output_packets[kNormLandmarksStreamName].IsEmpty()) { + return {FaceLandmarkerResult()}; + } + return GetFaceLandmarkerResultFromPacketMap(output_packets); +} + +absl::Status FaceLandmarker::DetectAsync( + mediapipe::Image image, int64_t timestamp_ms, + std::optional image_processing_options) { + ASSIGN_OR_RETURN(NormalizedRect norm_rect, + ConvertToNormalizedRect(image_processing_options, + /*roi_allowed=*/false)); + return SendLiveStreamData( + {{kImageInStreamName, + MakePacket(std::move(image)) + .At(Timestamp(timestamp_ms * kMicroSecondsPerMilliSecond))}, + {kNormRectStreamName, + MakePacket(std::move(norm_rect)) + .At(Timestamp(timestamp_ms * kMicroSecondsPerMilliSecond))}}); +} + +} // namespace face_landmarker +} // namespace vision +} // namespace tasks +} // namespace mediapipe diff --git a/mediapipe/tasks/cc/vision/face_landmarker/face_landmarker.h b/mediapipe/tasks/cc/vision/face_landmarker/face_landmarker.h new file mode 100644 index 000000000..5a5c8404a --- /dev/null +++ b/mediapipe/tasks/cc/vision/face_landmarker/face_landmarker.h @@ -0,0 +1,198 @@ +/* Copyright 2023 The MediaPipe Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef MEDIAPIPE_TASKS_CC_VISION_FACE_LANDMARKER_FACE_LANDMARKER_H_ +#define MEDIAPIPE_TASKS_CC_VISION_FACE_LANDMARKER_FACE_LANDMARKER_H_ + +#include +#include +#include + +#include "absl/status/statusor.h" +#include "mediapipe/framework/formats/image.h" +#include "mediapipe/tasks/cc/core/base_options.h" +#include "mediapipe/tasks/cc/vision/core/base_vision_task_api.h" +#include "mediapipe/tasks/cc/vision/core/image_processing_options.h" +#include "mediapipe/tasks/cc/vision/core/running_mode.h" +#include "mediapipe/tasks/cc/vision/face_landmarker/face_landmarker_result.h" + +namespace mediapipe { +namespace tasks { +namespace vision { +namespace face_landmarker { + +struct FaceLandmarkerOptions { + // Base options for configuring MediaPipe Tasks library, such as specifying + // the TfLite model bundle file with metadata, accelerator options, op + // resolver, etc. + tasks::core::BaseOptions base_options; + + // The running mode of the task. Default to the image mode. + // FaceLandmarker has three running modes: + // 1) The image mode for detecting face landmarks on single image inputs. + // 2) The video mode for detecting face landmarks on the decoded frames of a + // video. + // 3) The live stream mode for detecting face landmarks on the live stream of + // input data, such as from camera. In this mode, the "result_callback" + // below must be specified to receive the detection results asynchronously. + core::RunningMode running_mode = core::RunningMode::IMAGE; + + // The maximum number of faces that can be detected by the FaceLandmarker. + int num_faces = 1; + + // The minimum confidence score for the face detection to be considered + // successful. + float min_face_detection_confidence = 0.5; + + // The minimum confidence score of face presence score in the face landmark + // detection. + float min_face_presence_confidence = 0.5; + + // The minimum confidence score for the face tracking to be considered + // successful. + float min_tracking_confidence = 0.5; + + // Whether FaceLandmarker outputs face blendshapes classification. Face + // blendshapes are used for rendering the 3D face model. + bool output_face_blendshapes = false; + + // Whether FaceLandmarker outputs facial transformation_matrix. Facial + // transformation matrix is used to transform the face landmarks in canonical + // face to the detected face, so that users can apply face effects on the + // detected landmarks. + bool output_facial_transformation_matrixes = false; + + // The user-defined result callback for processing live stream data. + // The result callback should only be specified when the running mode is set + // to RunningMode::LIVE_STREAM. + std::function, const Image&, + int64_t)> + result_callback = nullptr; +}; + +// Performs face landmarks detection on the given image. +// +// TODO add the link to DevSite. +// This API expects a pre-trained face landmarker model asset bundle. +// +// Inputs: +// Image +// - The image that face landmarks detection runs on. +// std::optional +// - If provided, can be used to specify the rotation to apply to the image +// before performing face landmarks detection, by setting its 'rotation' +// field in radians (e.g. 'M_PI / 2' for a 90° anti-clockwise rotation). +// Note that specifying a region-of-interest using the 'x_center', +// 'y_center', 'width' and 'height' fields is NOT supported and will +// result in an invalid argument error being returned. +// Outputs: +// FaceLandmarkerResult +// - The face landmarks detection results. +class FaceLandmarker : tasks::vision::core::BaseVisionTaskApi { + public: + using BaseVisionTaskApi::BaseVisionTaskApi; + + // Creates a FaceLandmarker from a FaceLandmarkerOptions to process image data + // or streaming data. Face landmarker can be created with one of the following + // three running modes: + // 1) Image mode for detecting face landmarks on single image inputs. Users + // provide mediapipe::Image to the `Detect` method, and will receive the + // deteced face landmarks results as the return value. + // 2) Video mode for detecting face landmarks on the decoded frames of a + // video. Users call `DetectForVideo` method, and will receive the detected + // face landmarks results as the return value. + // 3) Live stream mode for detecting face landmarks on the live stream of the + // input data, such as from camera. Users call `DetectAsync` to push the + // image data into the FaceLandmarker, the detected results along with the + // input timestamp and the image that face landmarker runs on will be + // available in the result callback when the face landmarker finishes the + // work. + static absl::StatusOr> Create( + std::unique_ptr options); + + // Performs face landmarks detection on the given image. + // Only use this method when the FaceLandmarker is created with the image + // running mode. + // + // The optional 'image_processing_options' parameter can be used to specify + // the rotation to apply to the image before performing detection, by setting + // its 'rotation_degrees' field. Note that specifying a region-of-interest + // using the 'region_of_interest' field is NOT supported and will result in an + // invalid argument error being returned. + // + // The image can be of any size with format RGB or RGBA. + // TODO: Describes how the input image will be preprocessed + // after the yuv support is implemented. + absl::StatusOr Detect( + Image image, + std::optional image_processing_options = + std::nullopt); + + // Performs face landmarks detection on the provided video frame. + // Only use this method when the FaceLandmarker is created with the video + // running mode. + // + // The optional 'image_processing_options' parameter can be used to specify + // the rotation to apply to the image before performing detection, by setting + // its 'rotation_degrees' field. Note that specifying a region-of-interest + // using the 'region_of_interest' field is NOT supported and will result in an + // invalid argument error being returned. + // + // The image can be of any size with format RGB or RGBA. It's required to + // provide the video frame's timestamp (in milliseconds). The input timestamps + // must be monotonically increasing. + absl::StatusOr DetectForVideo( + Image image, int64_t timestamp_ms, + std::optional image_processing_options = + std::nullopt); + + // Sends live image data to perform face landmarks detection, and the results + // will be available via the "result_callback" provided in the + // FaceLandmarkerOptions. Only use this method when the FaceLandmarker + // is created with the live stream running mode. + // + // The image can be of any size with format RGB or RGBA. It's required to + // provide a timestamp (in milliseconds) to indicate when the input image is + // sent to the face landmarker. The input timestamps must be monotonically + // increasing. + // + // The optional 'image_processing_options' parameter can be used to specify + // the rotation to apply to the image before performing detection, by setting + // its 'rotation_degrees' field. Note that specifying a region-of-interest + // using the 'region_of_interest' field is NOT supported and will result in an + // invalid argument error being returned. + // + // The "result_callback" provides + // - A vector of FaceLandmarkerResult, each is the detected results + // for a input frame. + // - The const reference to the corresponding input image that the face + // landmarker runs on. Note that the const reference to the image will no + // longer be valid when the callback returns. To access the image data + // outside of the callback, callers need to make a copy of the image. + // - The input timestamp in milliseconds. + absl::Status DetectAsync(Image image, int64_t timestamp_ms, + std::optional + image_processing_options = std::nullopt); + + // Shuts down the FaceLandmarker when all works are done. + absl::Status Close() { return runner_->Close(); } +}; + +} // namespace face_landmarker +} // namespace vision +} // namespace tasks +} // namespace mediapipe + +#endif // MEDIAPIPE_TASKS_CC_VISION_FACE_LANDMARKER_FACE_LANDMARKER_H_ diff --git a/mediapipe/tasks/cc/vision/face_landmarker/face_landmarker_result.cc b/mediapipe/tasks/cc/vision/face_landmarker/face_landmarker_result.cc index 3f369cc16..53a171ed5 100644 --- a/mediapipe/tasks/cc/vision/face_landmarker/face_landmarker_result.cc +++ b/mediapipe/tasks/cc/vision/face_landmarker/face_landmarker_result.cc @@ -34,7 +34,7 @@ FaceLandmarkerResult ConvertToFaceLandmarkerResult( std::optional> face_blendshapes_proto, std::optional> - facial_transformation_matrix_proto) { + facial_transformation_matrixes_proto) { FaceLandmarkerResult result; result.face_landmarks.resize(face_landmarks_proto.size()); std::transform(face_landmarks_proto.begin(), face_landmarks_proto.end(), @@ -52,12 +52,12 @@ FaceLandmarkerResult ConvertToFaceLandmarkerResult( classification_list); }); } - if (facial_transformation_matrix_proto.has_value()) { - result.facial_transformation_matrix = - std::vector(facial_transformation_matrix_proto->size()); - std::transform(facial_transformation_matrix_proto->begin(), - facial_transformation_matrix_proto->end(), - result.facial_transformation_matrix->begin(), + if (facial_transformation_matrixes_proto.has_value()) { + result.facial_transformation_matrixes = + std::vector(facial_transformation_matrixes_proto->size()); + std::transform(facial_transformation_matrixes_proto->begin(), + facial_transformation_matrixes_proto->end(), + result.facial_transformation_matrixes->begin(), [](const mediapipe::MatrixData& matrix_proto) { mediapipe::Matrix matrix; MatrixFromMatrixDataProto(matrix_proto, &matrix); diff --git a/mediapipe/tasks/cc/vision/face_landmarker/face_landmarker_result.h b/mediapipe/tasks/cc/vision/face_landmarker/face_landmarker_result.h index 9774d80d9..35dd7a8ab 100644 --- a/mediapipe/tasks/cc/vision/face_landmarker/face_landmarker_result.h +++ b/mediapipe/tasks/cc/vision/face_landmarker/face_landmarker_result.h @@ -40,7 +40,7 @@ struct FaceLandmarkerResult { std::optional> face_blendshapes; // Optional facial transformation matrix. - std::optional> facial_transformation_matrix; + std::optional> facial_transformation_matrixes; }; // Convert face landmarks result from proto format to FaceLandmarkerResult. @@ -49,7 +49,7 @@ FaceLandmarkerResult ConvertToFaceLandmarkerResult( std::optional> face_blendshapes_proto = std::nullopt, std::optional> - facial_transformation_matrix_proto = std::nullopt); + facial_transformation_matrixes_proto = std::nullopt); } // namespace face_landmarker } // namespace vision diff --git a/mediapipe/tasks/cc/vision/face_landmarker/face_landmarker_result_test.cc b/mediapipe/tasks/cc/vision/face_landmarker/face_landmarker_result_test.cc index c3ed2d371..4123a81f3 100644 --- a/mediapipe/tasks/cc/vision/face_landmarker/face_landmarker_result_test.cc +++ b/mediapipe/tasks/cc/vision/face_landmarker/face_landmarker_result_test.cc @@ -73,9 +73,10 @@ TEST(FaceLandmarkerResultTest, Succeeds) { std::nullopt)); Matrix expected_matrix{{0, 3, 6}, {1, 4, 7}, {2, 5, 8}}; - ASSERT_TRUE(face_landmarker_result.facial_transformation_matrix.has_value()); - EXPECT_EQ(face_landmarker_result.facial_transformation_matrix->size(), 1); - EXPECT_EQ(face_landmarker_result.facial_transformation_matrix->at(0), + ASSERT_TRUE( + face_landmarker_result.facial_transformation_matrixes.has_value()); + EXPECT_EQ(face_landmarker_result.facial_transformation_matrixes->size(), 1); + EXPECT_EQ(face_landmarker_result.facial_transformation_matrixes->at(0), expected_matrix); } diff --git a/mediapipe/tasks/cc/vision/face_landmarker/face_landmarker_test.cc b/mediapipe/tasks/cc/vision/face_landmarker/face_landmarker_test.cc new file mode 100644 index 000000000..0b6d9af73 --- /dev/null +++ b/mediapipe/tasks/cc/vision/face_landmarker/face_landmarker_test.cc @@ -0,0 +1,455 @@ +/* Copyright 2023 The MediaPipe Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "mediapipe/tasks/cc/vision/face_landmarker/face_landmarker.h" + +#include +#include +#include +#include +#include +#include + +#include "absl/flags/flag.h" +#include "absl/status/status.h" +#include "absl/status/statusor.h" +#include "absl/strings/string_view.h" +#include "mediapipe/framework/deps/file_path.h" +#include "mediapipe/framework/formats/classification.pb.h" +#include "mediapipe/framework/formats/image.h" +#include "mediapipe/framework/formats/landmark.pb.h" +#include "mediapipe/framework/formats/matrix.h" +#include "mediapipe/framework/formats/matrix_data.pb.h" +#include "mediapipe/framework/port/file_helpers.h" +#include "mediapipe/framework/port/gmock.h" +#include "mediapipe/framework/port/gtest.h" +#include "mediapipe/tasks/cc/common.h" +#include "mediapipe/tasks/cc/components/containers/category.h" +#include "mediapipe/tasks/cc/components/containers/classification_result.h" +#include "mediapipe/tasks/cc/components/containers/landmark.h" +#include "mediapipe/tasks/cc/components/containers/rect.h" +#include "mediapipe/tasks/cc/components/processors/proto/classifier_options.pb.h" +#include "mediapipe/tasks/cc/core/base_options.h" +#include "mediapipe/tasks/cc/vision/core/image_processing_options.h" +#include "mediapipe/tasks/cc/vision/face_landmarker/face_landmarker_result.h" +#include "mediapipe/tasks/cc/vision/utils/image_utils.h" + +namespace mediapipe { +namespace tasks { +namespace vision { +namespace face_landmarker { +namespace { + +using ::file::Defaults; +using ::mediapipe::tasks::vision::core::ImageProcessingOptions; +using ::testing::TestParamInfo; +using ::testing::TestWithParam; +using ::testing::Values; + +constexpr char kTestDataDirectory[] = "/mediapipe/tasks/testdata/vision/"; +constexpr char kFaceLandmarkerModelBundleName[] = "face_landmarker.task"; +constexpr char kFaceLandmarkerWithBlendshapesModelBundleName[] = + "face_landmarker_with_blendshapes.task"; +constexpr char kPortraitImageName[] = "portrait.jpg"; +constexpr char kPortraitExpectedFaceLandamrksName[] = + "portrait_expected_face_landmarks.pbtxt"; +constexpr char kPortraitExpectedFaceLandamrksWithAttentionName[] = + "portrait_expected_face_landmarks_with_attention.pbtxt"; +constexpr char kPortraitExpectedBlendshapesName[] = + "portrait_expected_blendshapes_with_attention.pbtxt"; + +constexpr float kLandmarksDiffMargin = 0.03; +constexpr float kBlendshapesDiffMargin = 0.1; +constexpr float kFacialTransformationMatrixDiffMargin = 0.02; + +template +ProtoT GetExpectedProto(absl::string_view filename) { + ProtoT expected_proto; + MP_EXPECT_OK(GetTextProto(file::JoinPath("./", kTestDataDirectory, filename), + &expected_proto, Defaults())); + return expected_proto; +} + +// Struct holding the parameters for parameterized FaceLandmarkerGraphTest +// class. +struct FaceLandmarkerTestParams { + // The name of this test, for convenience when displaying test results. + std::string test_name; + // The filename of the model to test. + std::string input_model_name; + // The filename of the test image. + std::string test_image_name; + // The rotation to apply to the test image before processing, in degrees + // clockwise. + int rotation; + // The expected output face landmarker result. + FaceLandmarkerResult expected_result; +}; + +mediapipe::MatrixData MakePortraitExpectedFacialTransformationMatrix() { + const Matrix matrix{{0.9995292, -0.005092691, 0.030254554, -0.37340546}, + {0.0072318087, 0.99744856, -0.07102106, 22.212194}, + {-0.029815676, 0.07120642, 0.9970159, -64.76358}, + {0, 0, 0, 1}}; + mediapipe::MatrixData matrix_data; + MatrixDataProtoFromMatrix(matrix, &matrix_data); + return matrix_data; +} + +testing::Matcher LandmarkIs( + const components::containers::NormalizedLandmark& landmark) { + return testing::AllOf( + testing::Field(&components::containers::NormalizedLandmark::x, + testing::FloatNear(landmark.x, kLandmarksDiffMargin)), + testing::Field(&components::containers::NormalizedLandmark::y, + testing::FloatNear(landmark.y, kLandmarksDiffMargin))); +} + +void ExpectLandmarksCorrect( + const std::vector + actual_landmarks, + const std::vector + expected_landmarks) { + ASSERT_EQ(actual_landmarks.size(), expected_landmarks.size()); + for (int i = 0; i < actual_landmarks.size(); ++i) { + ASSERT_EQ(actual_landmarks[i].landmarks.size(), + expected_landmarks[i].landmarks.size()); + for (int j = 0; j < actual_landmarks[i].landmarks.size(); ++j) { + EXPECT_THAT(actual_landmarks[i].landmarks[j], + LandmarkIs(expected_landmarks[i].landmarks[j])); + } + } +} + +testing::Matcher CategoryIs( + const components::containers::Category& category) { + return testing::AllOf( + testing::Field(&components::containers::Category::index, + testing::Eq(category.index)), + testing::Field( + &components::containers::Category::score, + testing::FloatNear(category.score, kBlendshapesDiffMargin))); +} + +void ExpectBlendshapesCorrect( + const std::vector& + actual_blendshapes, + const std::vector& + expected_blendshapes) { + ASSERT_EQ(actual_blendshapes.size(), expected_blendshapes.size()); + for (int i = 0; i < actual_blendshapes.size(); ++i) { + ASSERT_EQ(actual_blendshapes[i].categories.size(), + expected_blendshapes[i].categories.size()); + for (int j = 0; j < actual_blendshapes[i].categories.size(); ++j) { + EXPECT_THAT(actual_blendshapes[i].categories[j], + CategoryIs(expected_blendshapes[i].categories[j])); + } + } +} + +void ExpectFacialTransformationMatrixCorrect( + const std::vector& actual_matrix_list, + const std::vector& expected_matrix_list) { + ASSERT_EQ(actual_matrix_list.size(), expected_matrix_list.size()); + for (int i = 0; i < actual_matrix_list.size(); ++i) { + const Matrix& actual_matrix = actual_matrix_list[i]; + const Matrix& expected_matrix = expected_matrix_list[i]; + ASSERT_EQ(actual_matrix.cols(), expected_matrix.cols()); + ASSERT_EQ(actual_matrix.rows(), expected_matrix.rows()); + for (int i = 0; i < actual_matrix.size(); ++i) { + EXPECT_NEAR(actual_matrix.data()[i], expected_matrix.data()[i], + kFacialTransformationMatrixDiffMargin); + } + } +} + +void ExpectFaceLandmarkerResultCorrect( + const FaceLandmarkerResult& actual_result, + const FaceLandmarkerResult& expected_result) { + ExpectLandmarksCorrect(actual_result.face_landmarks, + expected_result.face_landmarks); + + ASSERT_EQ(actual_result.face_blendshapes.has_value(), + expected_result.face_blendshapes.has_value()); + if (expected_result.face_blendshapes.has_value()) { + ASSERT_TRUE(actual_result.face_blendshapes.has_value()); + ExpectBlendshapesCorrect(*actual_result.face_blendshapes, + *expected_result.face_blendshapes); + } + + ASSERT_EQ(actual_result.facial_transformation_matrixes.has_value(), + expected_result.facial_transformation_matrixes.has_value()); + if (expected_result.facial_transformation_matrixes.has_value()) { + ASSERT_TRUE(actual_result.facial_transformation_matrixes.has_value()); + ExpectFacialTransformationMatrixCorrect( + *actual_result.facial_transformation_matrixes, + *expected_result.facial_transformation_matrixes); + } +} + +class ImageModeTest : public TestWithParam {}; + +TEST_P(ImageModeTest, Succeeds) { + MP_ASSERT_OK_AND_ASSIGN( + Image image, DecodeImageFromFile(file::JoinPath( + "./", kTestDataDirectory, GetParam().test_image_name))); + auto options = std::make_unique(); + options->base_options.model_asset_path = + file::JoinPath("./", kTestDataDirectory, GetParam().input_model_name); + options->running_mode = core::RunningMode::IMAGE; + options->output_face_blendshapes = + GetParam().expected_result.face_blendshapes.has_value(); + options->output_facial_transformation_matrixes = + GetParam().expected_result.facial_transformation_matrixes.has_value(); + + MP_ASSERT_OK_AND_ASSIGN(std::unique_ptr face_landmarker, + FaceLandmarker::Create(std::move(options))); + FaceLandmarkerResult actual_result; + if (GetParam().rotation != 0) { + ImageProcessingOptions image_processing_options; + image_processing_options.rotation_degrees = GetParam().rotation; + MP_ASSERT_OK_AND_ASSIGN( + actual_result, + face_landmarker->Detect(image, image_processing_options)); + } else { + MP_ASSERT_OK_AND_ASSIGN(actual_result, face_landmarker->Detect(image)); + } + ExpectFaceLandmarkerResultCorrect(actual_result, GetParam().expected_result); + MP_ASSERT_OK(face_landmarker->Close()); +} + +INSTANTIATE_TEST_SUITE_P( + FaceLandmarkerTest, ImageModeTest, + Values(FaceLandmarkerTestParams{ + /* test_name= */ "Portrait", + /* input_model_name= */ kFaceLandmarkerModelBundleName, + /* test_image_name= */ kPortraitImageName, + /* rotation= */ 0, + /* expected_result= */ + ConvertToFaceLandmarkerResult( + {GetExpectedProto( + kPortraitExpectedFaceLandamrksName)})}, + FaceLandmarkerTestParams{ + /* test_name= */ "PortraitWithAttention", + /* input_model_name= */ + kFaceLandmarkerWithBlendshapesModelBundleName, + /* test_image_name= */ kPortraitImageName, + /* rotation= */ 0, + /* expected_result= */ + ConvertToFaceLandmarkerResult( + {GetExpectedProto( + kPortraitExpectedFaceLandamrksWithAttentionName)})}, + FaceLandmarkerTestParams{ + /* test_name= */ "PortraitWithBlendshapes", + /* input_model_name= */ + kFaceLandmarkerWithBlendshapesModelBundleName, + /* test_image_name= */ kPortraitImageName, + /* rotation= */ 0, + /* expected_result= */ + ConvertToFaceLandmarkerResult( + {GetExpectedProto( + kPortraitExpectedFaceLandamrksWithAttentionName)}, + {{GetExpectedProto( + kPortraitExpectedBlendshapesName)}})}, + FaceLandmarkerTestParams{ + /* test_name= */ "PortraitWithBlendshapesWithFacialTransformatio" + "nMatrix", + /* input_model_name= */ + kFaceLandmarkerWithBlendshapesModelBundleName, + /* test_image_name= */ kPortraitImageName, + /* rotation= */ 0, + /* expected_result= */ + ConvertToFaceLandmarkerResult( + {GetExpectedProto( + kPortraitExpectedFaceLandamrksWithAttentionName)}, + {{GetExpectedProto( + kPortraitExpectedBlendshapesName)}}, + {{MakePortraitExpectedFacialTransformationMatrix()}})}), + [](const TestParamInfo& info) { + return info.param.test_name; + }); + +class VideoModeTest : public TestWithParam {}; + +TEST_P(VideoModeTest, Succeeds) { + MP_ASSERT_OK_AND_ASSIGN( + Image image, DecodeImageFromFile(file::JoinPath( + "./", kTestDataDirectory, GetParam().test_image_name))); + auto options = std::make_unique(); + options->base_options.model_asset_path = + file::JoinPath("./", kTestDataDirectory, GetParam().input_model_name); + options->running_mode = core::RunningMode::VIDEO; + options->output_face_blendshapes = + GetParam().expected_result.face_blendshapes.has_value(); + options->output_facial_transformation_matrixes = + GetParam().expected_result.facial_transformation_matrixes.has_value(); + + MP_ASSERT_OK_AND_ASSIGN(std::unique_ptr face_landmarker, + FaceLandmarker::Create(std::move(options))); + for (int i = 0; i < 3; ++i) { + FaceLandmarkerResult actual_result; + if (GetParam().rotation != 0) { + ImageProcessingOptions image_processing_options; + image_processing_options.rotation_degrees = GetParam().rotation; + MP_ASSERT_OK_AND_ASSIGN( + actual_result, + face_landmarker->DetectForVideo(image, i, image_processing_options)); + } else { + MP_ASSERT_OK_AND_ASSIGN(actual_result, + face_landmarker->DetectForVideo(image, i)); + } + ExpectFaceLandmarkerResultCorrect(actual_result, + GetParam().expected_result); + } + MP_ASSERT_OK(face_landmarker->Close()); +} + +INSTANTIATE_TEST_SUITE_P( + FaceLandmarkerTest, VideoModeTest, + Values(FaceLandmarkerTestParams{ + /* test_name= */ "Portrait", + /* input_model_name= */ kFaceLandmarkerModelBundleName, + /* test_image_name= */ kPortraitImageName, + /* rotation= */ 0, + /* expected_result= */ + ConvertToFaceLandmarkerResult( + {GetExpectedProto( + kPortraitExpectedFaceLandamrksName)})}, + FaceLandmarkerTestParams{ + /* test_name= */ "PortraitWithAttention", + /* input_model_name= */ + kFaceLandmarkerWithBlendshapesModelBundleName, + /* test_image_name= */ kPortraitImageName, + /* rotation= */ 0, + /* expected_result= */ + ConvertToFaceLandmarkerResult( + {GetExpectedProto( + kPortraitExpectedFaceLandamrksWithAttentionName)})}, + FaceLandmarkerTestParams{ + /* test_name= */ "PortraitWithBlendshapes", + /* input_model_name= */ + kFaceLandmarkerWithBlendshapesModelBundleName, + /* test_image_name= */ kPortraitImageName, + /* rotation= */ 0, + /* expected_result= */ + ConvertToFaceLandmarkerResult( + {GetExpectedProto( + kPortraitExpectedFaceLandamrksWithAttentionName)}, + {{GetExpectedProto( + kPortraitExpectedBlendshapesName)}})}), + [](const TestParamInfo& info) { + return info.param.test_name; + }); + +class LiveStreamModeTest : public TestWithParam {}; + +TEST_P(LiveStreamModeTest, Succeeds) { + MP_ASSERT_OK_AND_ASSIGN( + Image image, DecodeImageFromFile(file::JoinPath( + "./", kTestDataDirectory, GetParam().test_image_name))); + auto options = std::make_unique(); + options->base_options.model_asset_path = + file::JoinPath("./", kTestDataDirectory, GetParam().input_model_name); + options->running_mode = core::RunningMode::LIVE_STREAM; + options->output_face_blendshapes = + GetParam().expected_result.face_blendshapes.has_value(); + options->output_facial_transformation_matrixes = + GetParam().expected_result.facial_transformation_matrixes.has_value(); + + std::vector face_landmarker_results; + std::vector timestamps; + options->result_callback = [&face_landmarker_results, ×tamps]( + absl::StatusOr result, + const Image& image, int64_t timestamp_ms) { + MP_ASSERT_OK(result.status()); + face_landmarker_results.push_back(std::move(result.value())); + timestamps.push_back(timestamp_ms); + }; + + MP_ASSERT_OK_AND_ASSIGN(std::unique_ptr face_landmarker, + FaceLandmarker::Create(std::move(options))); + + const int iterations = 100; + for (int i = 0; i < iterations; ++i) { + FaceLandmarkerResult actual_result; + if (GetParam().rotation != 0) { + ImageProcessingOptions image_processing_options; + image_processing_options.rotation_degrees = GetParam().rotation; + MP_ASSERT_OK( + face_landmarker->DetectAsync(image, i, image_processing_options)); + } else { + MP_ASSERT_OK(face_landmarker->DetectAsync(image, i)); + } + } + MP_ASSERT_OK(face_landmarker->Close()); + + // Due to the flow limiter, the total of outputs will be smaller than the + // number of iterations. + ASSERT_LE(face_landmarker_results.size(), iterations); + ASSERT_GT(face_landmarker_results.size(), 0); + + for (int i = 0; i < face_landmarker_results.size(); ++i) { + ExpectFaceLandmarkerResultCorrect(face_landmarker_results[i], + GetParam().expected_result); + } + int64_t timestamp_ms = -1; + for (const auto& timestamp : timestamps) { + EXPECT_GT(timestamp, timestamp_ms); + timestamp_ms = timestamp; + } +} + +INSTANTIATE_TEST_SUITE_P( + FaceLandmarkerTest, LiveStreamModeTest, + Values(FaceLandmarkerTestParams{ + /* test_name= */ "Portrait", + /* input_model_name= */ kFaceLandmarkerModelBundleName, + /* test_image_name= */ kPortraitImageName, + /* rotation= */ 0, + /* expected_result= */ + ConvertToFaceLandmarkerResult( + {GetExpectedProto( + kPortraitExpectedFaceLandamrksName)})}, + FaceLandmarkerTestParams{ + /* test_name= */ "PortraitWithAttention", + /* input_model_name= */ + kFaceLandmarkerWithBlendshapesModelBundleName, + /* test_image_name= */ kPortraitImageName, + /* rotation= */ 0, + /* expected_result= */ + ConvertToFaceLandmarkerResult( + {GetExpectedProto( + kPortraitExpectedFaceLandamrksWithAttentionName)})}, + FaceLandmarkerTestParams{ + /* test_name= */ "PortraitWithBlendshapes", + /* input_model_name= */ + kFaceLandmarkerWithBlendshapesModelBundleName, + /* test_image_name= */ kPortraitImageName, + /* rotation= */ 0, + /* expected_result= */ + ConvertToFaceLandmarkerResult( + {GetExpectedProto( + kPortraitExpectedFaceLandamrksWithAttentionName)}, + {{GetExpectedProto( + kPortraitExpectedBlendshapesName)}})}), + [](const TestParamInfo& info) { + return info.param.test_name; + }); + +} // namespace +} // namespace face_landmarker +} // namespace vision +} // namespace tasks +} // namespace mediapipe