internal change

PiperOrigin-RevId: 511338483
2023-02-21 16:39:02 -08:00 · 2023-02-21 16:39:02 -08:00 · e2ac704428
commit e2ac704428
parent 9a6d5e0444
3 changed files with 173 additions and 0 deletions
--- a/mediapipe/tasks/cc/vision/hand_detector/utils.cc
+++ b/mediapipe/tasks/cc/vision/hand_detector/utils.cc
@ -0,0 +1,77 @@
+#include "mediapipe/tasks/cc/vision/hand_detector/utils.h"
+
+#include "mediapipe/calculators/tensor/tensors_to_detections_calculator.pb.h"
+#include "mediapipe/calculators/tflite/ssd_anchors_calculator.pb.h"
+#include "mediapipe/framework/formats/object_detection/anchor.pb.h"
+#include "mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_graph_options.pb.h"
+#include "mediapipe/tasks/cc/vision/utils/image_tensor_specs.h"
+#include "research/aimatter/api/face_detector_metadata_generated.h"
+#include "research/aimatter/api/internal/blaze_face/anchor_ssd_decoder.h"
+#include "util/task/contrib/status_macros/ret_check.h"
+
+namespace mediapipe::tasks::vision::hand_detector {
+
+namespace rapi = ::research::aimatter::api;
+
+constexpr int kPalmClassNum = 1;
+constexpr int kBboxCoordsNum = 4;
+constexpr int kPalmKeypointNum = 7;
+constexpr int kKeypointCoordsNum = 2;
+constexpr int kCoordsNum =
+    kBboxCoordsNum + kKeypointCoordsNum * kPalmKeypointNum;
+
+absl::Status ConfigureSsdAnchorsCalculator(
+    const ImageTensorSpecs& image_tensor_specs,
+    const research::aimatter::api::fb::FaceDetectorMetadata& metadata_fb,
+    mediapipe::SsdAnchorsCalculatorOptions& options) {
+  options.Clear();
+  const auto& output_spec_fb = *metadata_fb.output_spec();
+  RET_CHECK(output_spec_fb.v1() == nullptr && output_spec_fb.v2() != nullptr)
+      << "Only support BlazeFaceOutputSpecV2.";
+  auto* configuration = output_spec_fb.v2()->anchors_scheme()->configuration();
+  std::vector<rapi::internal::AnchorSsdDecoder::AnchorConfig> configs;
+  configs.reserve(configuration->Length());
+  for (int i = 0; i < configuration->Length(); ++i) {
+    configs.push_back({.stride = configuration->Get(i)->stride(),
+                       .anchors_num = static_cast<int>(
+                           configuration->Get(i)->anchors()->Length())});
+  }
+  const int tensor_height = image_tensor_specs.image_height;
+  const int tensor_width = image_tensor_specs.image_width;
+  const auto& rapi_anchors = rapi::internal::AnchorSsdDecoder::GenerateAnchors(
+      configs, tensor_width, tensor_height);
+  for (const auto rapi_anchor : rapi_anchors) {
+    auto* anchor = options.add_fixed_anchors();
+    anchor->set_x_center(rapi_anchor.center_x / tensor_width);
+    anchor->set_y_center(rapi_anchor.center_y / tensor_height);
+    anchor->set_w(1.0);
+    anchor->set_h(1.0);
+  }
+  return absl::OkStatus();
+}
+
+absl::Status ConfigureTensorsToDetectionsCalculator(
+    const ImageTensorSpecs& image_tensor_specs, int num_boxes,
+    float min_detection_confidence,
+    mediapipe::TensorsToDetectionsCalculatorOptions& options) {
+  options.Clear();
+  const int tensor_height = image_tensor_specs.image_height;
+  const int tensor_width = image_tensor_specs.image_width;
+  options.set_num_classes(kPalmClassNum);
+  options.set_num_boxes(num_boxes);
+  options.set_num_coords(kCoordsNum);
+  options.set_box_coord_offset(0);
+  options.set_keypoint_coord_offset(kBboxCoordsNum);
+  options.set_num_keypoints(kPalmKeypointNum);
+  options.set_num_values_per_keypoint(kKeypointCoordsNum);
+  options.set_sigmoid_score(true);
+  options.set_box_format(mediapipe::TensorsToDetectionsCalculatorOptions::XYWH);
+  options.set_min_score_thresh(min_detection_confidence);
+  options.set_x_scale(tensor_width);
+  options.set_y_scale(tensor_height);
+  options.set_w_scale(tensor_width);
+  options.set_h_scale(tensor_height);
+  return absl::OkStatus();
+}
+
+}  // namespace mediapipe::tasks::vision::hand_detector
--- a/mediapipe/tasks/cc/vision/hand_detector/utils.h
+++ b/mediapipe/tasks/cc/vision/hand_detector/utils.h
@ -0,0 +1,27 @@
+#ifndef MEDIAPIPE_TASKS_CC_VISION_HAND_DETECTOR_UTILS_H_
+#define MEDIAPIPE_TASKS_CC_VISION_HAND_DETECTOR_UTILS_H_
+
+#include "absl/status/status.h"
+#include "mediapipe/calculators/tensor/tensors_to_detections_calculator.pb.h"
+#include "mediapipe/calculators/tflite/ssd_anchors_calculator.pb.h"
+#include "mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_graph_options.pb.h"
+#include "mediapipe/tasks/cc/vision/utils/image_tensor_specs.h"
+#include "research/aimatter/api/face_detector_metadata_generated.h"
+
+namespace mediapipe::tasks::vision::hand_detector {
+
+// Configure SsdAnchorsCalculator from the tflite model with aimatter metadata.
+absl::Status ConfigureSsdAnchorsCalculator(
+    const ImageTensorSpecs& image_tensor_specs,
+    const research::aimatter::api::fb::FaceDetectorMetadata& metadata_fb,
+    mediapipe::SsdAnchorsCalculatorOptions& options);
+
+// Configure TensorsToDetectionCalculator.
+absl::Status ConfigureTensorsToDetectionsCalculator(
+    const ImageTensorSpecs& image_tensor_specs, int num_boxes,
+    float min_detection_confidence,
+    mediapipe::TensorsToDetectionsCalculatorOptions& options);
+
+}  // namespace mediapipe::tasks::vision::hand_detector
+
+#endif  // MEDIAPIPE_TASKS_CC_VISION_HAND_DETECTOR_UTILS_H_
--- a/mediapipe/tasks/cc/vision/hand_detector/utils_test.cc
+++ b/mediapipe/tasks/cc/vision/hand_detector/utils_test.cc
@ -0,0 +1,69 @@
+#include "mediapipe/tasks/cc/vision/hand_detector/utils.h"
+
+#include "absl/status/statusor.h"
+#include "mediapipe/calculators/tensor/tensors_to_detections_calculator.pb.h"
+#include "mediapipe/calculators/tflite/ssd_anchors_calculator.pb.h"
+#include "mediapipe/framework/deps/file_path.h"
+#include "mediapipe/framework/port/file_helpers.h"
+#include "mediapipe/framework/port/gmock.h"
+#include "mediapipe/framework/port/gtest.h"
+#include "mediapipe/tasks/cc/core/model_resources.h"
+#include "mediapipe/tasks/cc/core/proto/external_file.pb.h"
+#include "mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_graph_options.pb.h"
+#include "mediapipe/tasks/cc/vision/utils/image_tensor_specs.h"
+#include "research/aimatter/api/metadata_utils.h"
+
+namespace mediapipe::tasks::vision::hand_detector {
+namespace {
+
+namespace rapi = ::research::aimatter::api;
+
+using ::mediapipe::file::JoinPath;
+using ::mediapipe::tasks::core::ModelResources;
+using ::mediapipe::tasks::core::proto::ExternalFile;
+
+constexpr char kTestDataDirectory[] = "/mediapipe/tasks/testdata/vision/";
+constexpr char kTestModelResourcesTag[] = "test_model_resources";
+constexpr char kModelWithMetadataName[] = "palm_detection_full.tflite";
+constexpr float kEpsilon = 1e-6;
+
+// Helper function to get ModelResources.
+absl::StatusOr<std::unique_ptr<ModelResources>> CreateModelResourcesForModel(
+    absl::string_view model_name) {
+  auto external_file = std::make_unique<ExternalFile>();
+  external_file->set_file_name(JoinPath("./", kTestDataDirectory, model_name));
+  return ModelResources::Create(kTestModelResourcesTag,
+                                std::move(external_file));
+}
+
+TEST(Utils, ConfigureSsdAnchorsCalculator) {
+  MP_ASSERT_OK_AND_ASSIGN(auto model_resources,
+                          CreateModelResourcesForModel(kModelWithMetadataName));
+  const tflite::Model& model = *(model_resources->GetTfLiteModel());
+  MP_ASSERT_OK_AND_ASSIGN(
+      const auto metadata_fb,
+      rapi::VerifyAndLoadMetadata<rapi::fb::FaceDetectorMetadata>(
+          model, rapi::fb::FaceDetectorMetadataIdentifier()));
+  mediapipe::SsdAnchorsCalculatorOptions ssd_anchors_options;
+  MP_ASSERT_OK(ConfigureSsdAnchorsCalculator(
+      *BuildInputImageTensorSpecs(*model_resources), *metadata_fb,
+      ssd_anchors_options));
+  EXPECT_EQ(ssd_anchors_options.fixed_anchors().size(), 2016);
+}
+
+TEST(Utils, ConfigureTensorsToDetectionCalculator) {
+  MP_ASSERT_OK_AND_ASSIGN(auto model_resources,
+                          CreateModelResourcesForModel(kModelWithMetadataName));
+  mediapipe::TensorsToDetectionsCalculatorOptions tensors_to_detections_options;
+  MP_ASSERT_OK(ConfigureTensorsToDetectionsCalculator(
+      *BuildInputImageTensorSpecs(*model_resources), 2016, 0.1,
+      tensors_to_detections_options));
+  EXPECT_NEAR(tensors_to_detections_options.x_scale(), 192, kEpsilon);
+  EXPECT_NEAR(tensors_to_detections_options.y_scale(), 192, kEpsilon);
+  EXPECT_NEAR(tensors_to_detections_options.w_scale(), 192, kEpsilon);
+  EXPECT_NEAR(tensors_to_detections_options.h_scale(), 192, kEpsilon);
+  EXPECT_NEAR(tensors_to_detections_options.min_score_thresh(), 0.1, kEpsilon);
+}
+
+}  // namespace
+}  // namespace mediapipe::tasks::vision::hand_detector