diff --git a/mediapipe/tasks/cc/vision/hand_detector/utils.cc b/mediapipe/tasks/cc/vision/hand_detector/utils.cc new file mode 100644 index 000000000..4ebb377ac --- /dev/null +++ b/mediapipe/tasks/cc/vision/hand_detector/utils.cc @@ -0,0 +1,77 @@ +#include "mediapipe/tasks/cc/vision/hand_detector/utils.h" + +#include "mediapipe/calculators/tensor/tensors_to_detections_calculator.pb.h" +#include "mediapipe/calculators/tflite/ssd_anchors_calculator.pb.h" +#include "mediapipe/framework/formats/object_detection/anchor.pb.h" +#include "mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_graph_options.pb.h" +#include "mediapipe/tasks/cc/vision/utils/image_tensor_specs.h" +#include "research/aimatter/api/face_detector_metadata_generated.h" +#include "research/aimatter/api/internal/blaze_face/anchor_ssd_decoder.h" +#include "util/task/contrib/status_macros/ret_check.h" + +namespace mediapipe::tasks::vision::hand_detector { + +namespace rapi = ::research::aimatter::api; + +constexpr int kPalmClassNum = 1; +constexpr int kBboxCoordsNum = 4; +constexpr int kPalmKeypointNum = 7; +constexpr int kKeypointCoordsNum = 2; +constexpr int kCoordsNum = + kBboxCoordsNum + kKeypointCoordsNum * kPalmKeypointNum; + +absl::Status ConfigureSsdAnchorsCalculator( + const ImageTensorSpecs& image_tensor_specs, + const research::aimatter::api::fb::FaceDetectorMetadata& metadata_fb, + mediapipe::SsdAnchorsCalculatorOptions& options) { + options.Clear(); + const auto& output_spec_fb = *metadata_fb.output_spec(); + RET_CHECK(output_spec_fb.v1() == nullptr && output_spec_fb.v2() != nullptr) + << "Only support BlazeFaceOutputSpecV2."; + auto* configuration = output_spec_fb.v2()->anchors_scheme()->configuration(); + std::vector configs; + configs.reserve(configuration->Length()); + for (int i = 0; i < configuration->Length(); ++i) { + configs.push_back({.stride = configuration->Get(i)->stride(), + .anchors_num = static_cast( + configuration->Get(i)->anchors()->Length())}); + } + const int tensor_height = image_tensor_specs.image_height; + const int tensor_width = image_tensor_specs.image_width; + const auto& rapi_anchors = rapi::internal::AnchorSsdDecoder::GenerateAnchors( + configs, tensor_width, tensor_height); + for (const auto rapi_anchor : rapi_anchors) { + auto* anchor = options.add_fixed_anchors(); + anchor->set_x_center(rapi_anchor.center_x / tensor_width); + anchor->set_y_center(rapi_anchor.center_y / tensor_height); + anchor->set_w(1.0); + anchor->set_h(1.0); + } + return absl::OkStatus(); +} + +absl::Status ConfigureTensorsToDetectionsCalculator( + const ImageTensorSpecs& image_tensor_specs, int num_boxes, + float min_detection_confidence, + mediapipe::TensorsToDetectionsCalculatorOptions& options) { + options.Clear(); + const int tensor_height = image_tensor_specs.image_height; + const int tensor_width = image_tensor_specs.image_width; + options.set_num_classes(kPalmClassNum); + options.set_num_boxes(num_boxes); + options.set_num_coords(kCoordsNum); + options.set_box_coord_offset(0); + options.set_keypoint_coord_offset(kBboxCoordsNum); + options.set_num_keypoints(kPalmKeypointNum); + options.set_num_values_per_keypoint(kKeypointCoordsNum); + options.set_sigmoid_score(true); + options.set_box_format(mediapipe::TensorsToDetectionsCalculatorOptions::XYWH); + options.set_min_score_thresh(min_detection_confidence); + options.set_x_scale(tensor_width); + options.set_y_scale(tensor_height); + options.set_w_scale(tensor_width); + options.set_h_scale(tensor_height); + return absl::OkStatus(); +} + +} // namespace mediapipe::tasks::vision::hand_detector diff --git a/mediapipe/tasks/cc/vision/hand_detector/utils.h b/mediapipe/tasks/cc/vision/hand_detector/utils.h new file mode 100644 index 000000000..5b211602b --- /dev/null +++ b/mediapipe/tasks/cc/vision/hand_detector/utils.h @@ -0,0 +1,27 @@ +#ifndef MEDIAPIPE_TASKS_CC_VISION_HAND_DETECTOR_UTILS_H_ +#define MEDIAPIPE_TASKS_CC_VISION_HAND_DETECTOR_UTILS_H_ + +#include "absl/status/status.h" +#include "mediapipe/calculators/tensor/tensors_to_detections_calculator.pb.h" +#include "mediapipe/calculators/tflite/ssd_anchors_calculator.pb.h" +#include "mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_graph_options.pb.h" +#include "mediapipe/tasks/cc/vision/utils/image_tensor_specs.h" +#include "research/aimatter/api/face_detector_metadata_generated.h" + +namespace mediapipe::tasks::vision::hand_detector { + +// Configure SsdAnchorsCalculator from the tflite model with aimatter metadata. +absl::Status ConfigureSsdAnchorsCalculator( + const ImageTensorSpecs& image_tensor_specs, + const research::aimatter::api::fb::FaceDetectorMetadata& metadata_fb, + mediapipe::SsdAnchorsCalculatorOptions& options); + +// Configure TensorsToDetectionCalculator. +absl::Status ConfigureTensorsToDetectionsCalculator( + const ImageTensorSpecs& image_tensor_specs, int num_boxes, + float min_detection_confidence, + mediapipe::TensorsToDetectionsCalculatorOptions& options); + +} // namespace mediapipe::tasks::vision::hand_detector + +#endif // MEDIAPIPE_TASKS_CC_VISION_HAND_DETECTOR_UTILS_H_ diff --git a/mediapipe/tasks/cc/vision/hand_detector/utils_test.cc b/mediapipe/tasks/cc/vision/hand_detector/utils_test.cc new file mode 100644 index 000000000..32f4decc0 --- /dev/null +++ b/mediapipe/tasks/cc/vision/hand_detector/utils_test.cc @@ -0,0 +1,69 @@ +#include "mediapipe/tasks/cc/vision/hand_detector/utils.h" + +#include "absl/status/statusor.h" +#include "mediapipe/calculators/tensor/tensors_to_detections_calculator.pb.h" +#include "mediapipe/calculators/tflite/ssd_anchors_calculator.pb.h" +#include "mediapipe/framework/deps/file_path.h" +#include "mediapipe/framework/port/file_helpers.h" +#include "mediapipe/framework/port/gmock.h" +#include "mediapipe/framework/port/gtest.h" +#include "mediapipe/tasks/cc/core/model_resources.h" +#include "mediapipe/tasks/cc/core/proto/external_file.pb.h" +#include "mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_graph_options.pb.h" +#include "mediapipe/tasks/cc/vision/utils/image_tensor_specs.h" +#include "research/aimatter/api/metadata_utils.h" + +namespace mediapipe::tasks::vision::hand_detector { +namespace { + +namespace rapi = ::research::aimatter::api; + +using ::mediapipe::file::JoinPath; +using ::mediapipe::tasks::core::ModelResources; +using ::mediapipe::tasks::core::proto::ExternalFile; + +constexpr char kTestDataDirectory[] = "/mediapipe/tasks/testdata/vision/"; +constexpr char kTestModelResourcesTag[] = "test_model_resources"; +constexpr char kModelWithMetadataName[] = "palm_detection_full.tflite"; +constexpr float kEpsilon = 1e-6; + +// Helper function to get ModelResources. +absl::StatusOr> CreateModelResourcesForModel( + absl::string_view model_name) { + auto external_file = std::make_unique(); + external_file->set_file_name(JoinPath("./", kTestDataDirectory, model_name)); + return ModelResources::Create(kTestModelResourcesTag, + std::move(external_file)); +} + +TEST(Utils, ConfigureSsdAnchorsCalculator) { + MP_ASSERT_OK_AND_ASSIGN(auto model_resources, + CreateModelResourcesForModel(kModelWithMetadataName)); + const tflite::Model& model = *(model_resources->GetTfLiteModel()); + MP_ASSERT_OK_AND_ASSIGN( + const auto metadata_fb, + rapi::VerifyAndLoadMetadata( + model, rapi::fb::FaceDetectorMetadataIdentifier())); + mediapipe::SsdAnchorsCalculatorOptions ssd_anchors_options; + MP_ASSERT_OK(ConfigureSsdAnchorsCalculator( + *BuildInputImageTensorSpecs(*model_resources), *metadata_fb, + ssd_anchors_options)); + EXPECT_EQ(ssd_anchors_options.fixed_anchors().size(), 2016); +} + +TEST(Utils, ConfigureTensorsToDetectionCalculator) { + MP_ASSERT_OK_AND_ASSIGN(auto model_resources, + CreateModelResourcesForModel(kModelWithMetadataName)); + mediapipe::TensorsToDetectionsCalculatorOptions tensors_to_detections_options; + MP_ASSERT_OK(ConfigureTensorsToDetectionsCalculator( + *BuildInputImageTensorSpecs(*model_resources), 2016, 0.1, + tensors_to_detections_options)); + EXPECT_NEAR(tensors_to_detections_options.x_scale(), 192, kEpsilon); + EXPECT_NEAR(tensors_to_detections_options.y_scale(), 192, kEpsilon); + EXPECT_NEAR(tensors_to_detections_options.w_scale(), 192, kEpsilon); + EXPECT_NEAR(tensors_to_detections_options.h_scale(), 192, kEpsilon); + EXPECT_NEAR(tensors_to_detections_options.min_score_thresh(), 0.1, kEpsilon); +} + +} // namespace +} // namespace mediapipe::tasks::vision::hand_detector