From b91ec031a2e2ad3e525ed3d703fb85b887b9d05a Mon Sep 17 00:00:00 2001 From: MediaPipe Team Date: Mon, 4 Dec 2023 10:27:19 -0800 Subject: [PATCH 01/14] Adding version.bzl for tracking version PiperOrigin-RevId: 587767961 --- version.bzl | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 version.bzl diff --git a/version.bzl b/version.bzl new file mode 100644 index 000000000..c5d7d4d83 --- /dev/null +++ b/version.bzl @@ -0,0 +1,3 @@ +"""Version number for MediaPipe""" + +MEDIAPIPE_FULL_VERSION = "0.10.8" From dad2626f91fd927b6a6958cfc66a8ee8b05e6df5 Mon Sep 17 00:00:00 2001 From: Sebastian Schmidt Date: Mon, 4 Dec 2023 14:55:48 -0800 Subject: [PATCH 02/14] No public description PiperOrigin-RevId: 587850211 --- platform_mappings | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/platform_mappings b/platform_mappings index debf1e4b8..e6ada28df 100644 --- a/platform_mappings +++ b/platform_mappings @@ -24,35 +24,35 @@ flags: --cpu=darwin_x86_64 --apple_platform_type=macos - @@mediapipe//mediapipe:macos_x86_64_platform + @mediapipe//mediapipe:macos_x86_64_platform --cpu=darwin_arm64 --apple_platform_type=macos - @@mediapipe//mediapipe:macos_arm64_platform + @mediapipe//mediapipe:macos_arm64_platform --cpu=ios_i386 --apple_platform_type=ios - @@mediapipe//mediapipe:ios_i386_platform + @mediapipe//mediapipe:ios_i386_platform --cpu=ios_x86_64 --apple_platform_type=ios - @@mediapipe//mediapipe:ios_x86_64_platform + @mediapipe//mediapipe:ios_x86_64_platform --cpu=ios_sim_arm64 --apple_platform_type=ios - @@mediapipe//mediapipe:ios_sim_arm64_platform + @mediapipe//mediapipe:ios_sim_arm64_platform --cpu=ios_armv7 --apple_platform_type=ios - @@mediapipe//mediapipe:ios_armv7_platform + @mediapipe//mediapipe:ios_armv7_platform --cpu=ios_arm64 --apple_platform_type=ios - @@mediapipe//mediapipe:ios_arm64_platform + @mediapipe//mediapipe:ios_arm64_platform --cpu=ios_arm64e --apple_platform_type=ios - @@mediapipe//mediapipe:ios_arm64e_platform + @mediapipe//mediapipe:ios_arm64e_platform --cpu=x64_windows @mediapipe//mediapipe:windows_platform From 0f90ba17dc589ff4ed22e8638a67d01f6f1c563b Mon Sep 17 00:00:00 2001 From: Sebastian Schmidt Date: Tue, 5 Dec 2023 13:24:31 -0800 Subject: [PATCH 03/14] Use Java Proto Lite Target for Hand ROI Refinement proto PiperOrigin-RevId: 588170664 --- .../java/com/google/mediapipe/tasks/mediapipe_tasks_aar.bzl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mediapipe/tasks/java/com/google/mediapipe/tasks/mediapipe_tasks_aar.bzl b/mediapipe/tasks/java/com/google/mediapipe/tasks/mediapipe_tasks_aar.bzl index f2e4d485f..ae167a1bc 100644 --- a/mediapipe/tasks/java/com/google/mediapipe/tasks/mediapipe_tasks_aar.bzl +++ b/mediapipe/tasks/java/com/google/mediapipe/tasks/mediapipe_tasks_aar.bzl @@ -85,7 +85,7 @@ _VISION_TASKS_IMAGE_GENERATOR_JAVA_PROTO_LITE_TARGETS = [ "//mediapipe/tasks/cc/vision/hand_detector/proto:hand_detector_graph_options_java_proto_lite", "//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_landmarker_graph_options_java_proto_lite", "//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_landmarks_detector_graph_options_java_proto_lite", - "//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_roi_refinement_graph_options_proto", + "//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_roi_refinement_graph_options_java_proto_lite", ] _TEXT_TASKS_JAVA_PROTO_LITE_TARGETS = [ From b5c1c11f6a64f216da5b2bd9ebaecfd43d3811ba Mon Sep 17 00:00:00 2001 From: Kinar Date: Wed, 6 Dec 2023 02:59:39 -0800 Subject: [PATCH 04/14] Added Hand Landmarker C Tasks API and tests --- .../gesture_recognizer/gesture_recognizer.cc | 2 +- ...esture_recognizer_result_converter_test.cc | 16 + .../tasks/c/vision/hand_landmarker/BUILD | 141 +++++++++ .../vision/hand_landmarker/hand_landmarker.cc | 283 ++++++++++++++++++ .../vision/hand_landmarker/hand_landmarker.h | 144 +++++++++ .../hand_landmarker/hand_landmarker_result.h | 58 ++++ .../hand_landmarker_result_converter.cc | 103 +++++++ .../hand_landmarker_result_converter.h | 32 ++ .../hand_landmarker_result_converter_test.cc | 125 ++++++++ .../hand_landmarker/hand_landmarker_test.cc | 261 ++++++++++++++++ 10 files changed, 1164 insertions(+), 1 deletion(-) create mode 100644 mediapipe/tasks/c/vision/hand_landmarker/BUILD create mode 100644 mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker.cc create mode 100644 mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker.h create mode 100644 mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker_result.h create mode 100644 mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker_result_converter.cc create mode 100644 mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker_result_converter.h create mode 100644 mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker_result_converter_test.cc create mode 100644 mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker_test.cc diff --git a/mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer.cc b/mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer.cc index d3b0868f8..f05b9a122 100644 --- a/mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer.cc +++ b/mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer.cc @@ -228,7 +228,7 @@ int CppGestureRecognizerRecognizeAsync(void* recognizer, const MpImage& image, auto cpp_recognizer = static_cast(recognizer); auto cpp_result = cpp_recognizer->RecognizeAsync(*img, timestamp_ms); if (!cpp_result.ok()) { - ABSL_LOG(ERROR) << "Data preparation for the image classification failed: " + ABSL_LOG(ERROR) << "Data preparation for the gesture recognition failed: " << cpp_result; return CppProcessError(cpp_result, error_msg); } diff --git a/mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer_result_converter_test.cc b/mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer_result_converter_test.cc index 603e5ed7d..f37dbf8e4 100644 --- a/mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer_result_converter_test.cc +++ b/mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer_result_converter_test.cc @@ -95,6 +95,22 @@ TEST(GestureRecognizerResultConverterTest, ConvertsCustomResult) { } } + // Verify conversion of handedness + EXPECT_NE(c_result.handedness, nullptr); + EXPECT_EQ(c_result.handedness_count, cpp_result.handedness.size()); + + for (uint32_t i = 0; i < c_result.handedness_count; ++i) { + EXPECT_EQ(c_result.handedness[i].categories_count, + cpp_result.handedness[i].classification_size()); + for (uint32_t j = 0; j < c_result.handedness[i].categories_count; ++j) { + auto handedness = cpp_result.handedness[i].classification(j); + EXPECT_EQ(std::string(c_result.handedness[i].categories[j].category_name), + handedness.label()); + EXPECT_FLOAT_EQ(c_result.handedness[i].categories[j].score, + handedness.score()); + } + } + // Verify conversion of hand_landmarks EXPECT_NE(c_result.hand_landmarks, nullptr); EXPECT_EQ(c_result.hand_landmarks_count, cpp_result.hand_landmarks.size()); diff --git a/mediapipe/tasks/c/vision/hand_landmarker/BUILD b/mediapipe/tasks/c/vision/hand_landmarker/BUILD new file mode 100644 index 000000000..a7ac13935 --- /dev/null +++ b/mediapipe/tasks/c/vision/hand_landmarker/BUILD @@ -0,0 +1,141 @@ +# Copyright 2023 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +package(default_visibility = ["//mediapipe/tasks:internal"]) + +licenses(["notice"]) + +cc_library( + name = "hand_landmarker_result", + hdrs = ["hand_landmarker_result.h"], + visibility = ["//visibility:public"], + deps = [ + "//mediapipe/tasks/c/components/containers:category", + "//mediapipe/tasks/c/components/containers:landmark", + ], +) + +cc_library( + name = "hand_landmarker_result_converter", + srcs = ["hand_landmarker_result_converter.cc"], + hdrs = ["hand_landmarker_result_converter.h"], + deps = [ + ":hand_landmarker_result", + "//mediapipe/tasks/c/components/containers:category", + "//mediapipe/tasks/c/components/containers:category_converter", + "//mediapipe/tasks/c/components/containers:landmark", + "//mediapipe/tasks/c/components/containers:landmark_converter", + "//mediapipe/tasks/cc/components/containers:category", + "//mediapipe/tasks/cc/components/containers:landmark", + "//mediapipe/tasks/cc/vision/hand_landmarker:hand_landmarker_result", + ], +) + +cc_test( + name = "hand_landmarker_result_converter_test", + srcs = ["hand_landmarker_result_converter_test.cc"], + linkstatic = 1, + deps = [ + ":hand_landmarker_result", + ":hand_landmarker_result_converter", + "//mediapipe/framework/port:gtest", + "//mediapipe/tasks/c/components/containers:landmark", + "//mediapipe/tasks/cc/vision/hand_landmarker:hand_landmarker_result", + "@com_google_googletest//:gtest_main", + ], +) + +cc_library( + name = "hand_landmarker_lib", + srcs = ["hand_landmarker.cc"], + hdrs = ["hand_landmarker.h"], + visibility = ["//visibility:public"], + deps = [ + ":hand_landmarker_result", + ":hand_landmarker_result_converter", + "//mediapipe/framework/formats:image", + "//mediapipe/framework/formats:image_frame", + "//mediapipe/tasks/c/core:base_options", + "//mediapipe/tasks/c/core:base_options_converter", + "//mediapipe/tasks/c/vision/core:common", + "//mediapipe/tasks/cc/vision/core:running_mode", + "//mediapipe/tasks/cc/vision/hand_landmarker", + "//mediapipe/tasks/cc/vision/hand_landmarker:hand_landmarker_result", + "//mediapipe/tasks/cc/vision/utils:image_utils", + "@com_google_absl//absl/log:absl_log", + "@com_google_absl//absl/status", + "@com_google_absl//absl/status:statusor", + ], + alwayslink = 1, +) + +cc_test( + name = "hand_landmarker_test", + srcs = ["hand_landmarker_test.cc"], + data = [ + "//mediapipe/framework/formats:image_frame_opencv", + "//mediapipe/framework/port:opencv_core", + "//mediapipe/framework/port:opencv_imgproc", + "//mediapipe/tasks/testdata/vision:test_images", + "//mediapipe/tasks/testdata/vision:test_models", + ], + linkstatic = 1, + deps = [ + ":hand_landmarker_lib", + ":hand_landmarker_result", + "//mediapipe/framework/deps:file_path", + "//mediapipe/framework/formats:image", + "//mediapipe/framework/port:gtest", + "//mediapipe/tasks/c/components/containers:landmark", + "//mediapipe/tasks/c/vision/core:common", + "//mediapipe/tasks/cc/vision/utils:image_utils", + "@com_google_absl//absl/flags:flag", + "@com_google_absl//absl/strings", + "@com_google_googletest//:gtest_main", + ], +) + +# bazel build -c opt --linkopt -s --strip always --define MEDIAPIPE_DISABLE_GPU=1 \ +# //mediapipe/tasks/c/vision/hand_landmarker:libhand_landmarker.so +cc_binary( + name = "libhand_landmarker.so", + linkopts = [ + "-Wl,-soname=libhand_landmarker.so", + "-fvisibility=hidden", + ], + linkshared = True, + tags = [ + "manual", + "nobuilder", + "notap", + ], + deps = [":hand_landmarker_lib"], +) + +# bazel build --config darwin_arm64 -c opt --strip always --define MEDIAPIPE_DISABLE_GPU=1 \ +# //mediapipe/tasks/c/vision/hand_landmarker:libhand_landmarker.dylib +cc_binary( + name = "libhand_landmarker.dylib", + linkopts = [ + "-Wl,-install_name,libhand_landmarker.dylib", + "-fvisibility=hidden", + ], + linkshared = True, + tags = [ + "manual", + "nobuilder", + "notap", + ], + deps = [":hand_landmarker_lib"], +) diff --git a/mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker.cc b/mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker.cc new file mode 100644 index 000000000..f6df09f96 --- /dev/null +++ b/mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker.cc @@ -0,0 +1,283 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker.h" + +#include +#include +#include +#include + +#include "absl/log/absl_log.h" +#include "absl/status/status.h" +#include "absl/status/statusor.h" +#include "mediapipe/framework/formats/image.h" +#include "mediapipe/framework/formats/image_frame.h" +#include "mediapipe/tasks/c/core/base_options_converter.h" +#include "mediapipe/tasks/c/vision/core/common.h" +#include "mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker_result.h" +#include "mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker_result_converter.h" +#include "mediapipe/tasks/cc/vision/core/running_mode.h" +#include "mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarker.h" +#include "mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarker_result.h" +#include "mediapipe/tasks/cc/vision/utils/image_utils.h" + +namespace mediapipe::tasks::c::vision::hand_landmarker { + +namespace { + +using ::mediapipe::tasks::c::components::containers:: + CppCloseHandLandmarkerResult; +using ::mediapipe::tasks::c::components::containers:: + CppConvertToHandLandmarkerResult; +using ::mediapipe::tasks::c::core::CppConvertToBaseOptions; +using ::mediapipe::tasks::vision::CreateImageFromBuffer; +using ::mediapipe::tasks::vision::core::RunningMode; +using ::mediapipe::tasks::vision::hand_landmarker::HandLandmarker; +typedef ::mediapipe::tasks::vision::hand_landmarker::HandLandmarkerResult + CppHandLandmarkerResult; + +int CppProcessError(absl::Status status, char** error_msg) { + if (error_msg) { + *error_msg = strdup(status.ToString().c_str()); + } + return status.raw_code(); +} + +} // namespace + +void CppConvertToHandLandmarkerOptions( + const HandLandmarkerOptions& in, + mediapipe::tasks::vision::hand_landmarker::HandLandmarkerOptions* out) { + out->num_hands = in.num_hands; + out->min_hand_detection_confidence = in.min_hand_detection_confidence; + out->min_hand_presence_confidence = in.min_hand_presence_confidence; + out->min_tracking_confidence = in.min_tracking_confidence; +} + +HandLandmarker* CppHandLandmarkerCreate(const HandLandmarkerOptions& options, + char** error_msg) { + auto cpp_options = std::make_unique< + ::mediapipe::tasks::vision::hand_landmarker::HandLandmarkerOptions>(); + + CppConvertToBaseOptions(options.base_options, &cpp_options->base_options); + CppConvertToHandLandmarkerOptions(options, cpp_options.get()); + cpp_options->running_mode = static_cast(options.running_mode); + + // Enable callback for processing live stream data when the running mode is + // set to RunningMode::LIVE_STREAM. + if (cpp_options->running_mode == RunningMode::LIVE_STREAM) { + if (options.result_callback == nullptr) { + const absl::Status status = absl::InvalidArgumentError( + "Provided null pointer to callback function."); + ABSL_LOG(ERROR) << "Failed to create HandLandmarker: " << status; + CppProcessError(status, error_msg); + return nullptr; + } + + HandLandmarkerOptions::result_callback_fn result_callback = + options.result_callback; + cpp_options->result_callback = + [result_callback](absl::StatusOr cpp_result, + const Image& image, int64_t timestamp) { + char* error_msg = nullptr; + + if (!cpp_result.ok()) { + ABSL_LOG(ERROR) << "Recognition failed: " << cpp_result.status(); + CppProcessError(cpp_result.status(), &error_msg); + result_callback(nullptr, MpImage(), timestamp, error_msg); + free(error_msg); + return; + } + + // Result is valid for the lifetime of the callback function. + HandLandmarkerResult result; + CppConvertToHandLandmarkerResult(*cpp_result, &result); + + const auto& image_frame = image.GetImageFrameSharedPtr(); + const MpImage mp_image = { + .type = MpImage::IMAGE_FRAME, + .image_frame = { + .format = static_cast<::ImageFormat>(image_frame->Format()), + .image_buffer = image_frame->PixelData(), + .width = image_frame->Width(), + .height = image_frame->Height()}}; + + result_callback(&result, mp_image, timestamp, + /* error_msg= */ nullptr); + + CppCloseHandLandmarkerResult(&result); + }; + } + + auto detector = HandLandmarker::Create(std::move(cpp_options)); + if (!detector.ok()) { + ABSL_LOG(ERROR) << "Failed to create HandLandmarker: " << detector.status(); + CppProcessError(detector.status(), error_msg); + return nullptr; + } + return detector->release(); +} + +int CppHandLandmarkerDetect(void* detector, const MpImage& image, + HandLandmarkerResult* result, char** error_msg) { + if (image.type == MpImage::GPU_BUFFER) { + const absl::Status status = + absl::InvalidArgumentError("GPU Buffer not supported yet."); + + ABSL_LOG(ERROR) << "Recognition failed: " << status.message(); + return CppProcessError(status, error_msg); + } + + const auto img = CreateImageFromBuffer( + static_cast(image.image_frame.format), + image.image_frame.image_buffer, image.image_frame.width, + image.image_frame.height); + + if (!img.ok()) { + ABSL_LOG(ERROR) << "Failed to create Image: " << img.status(); + return CppProcessError(img.status(), error_msg); + } + + auto cpp_detector = static_cast(detector); + auto cpp_result = cpp_detector->Detect(*img); + if (!cpp_result.ok()) { + ABSL_LOG(ERROR) << "Recognition failed: " << cpp_result.status(); + return CppProcessError(cpp_result.status(), error_msg); + } + CppConvertToHandLandmarkerResult(*cpp_result, result); + return 0; +} + +int CppHandLandmarkerDetectForVideo(void* detector, const MpImage& image, + int64_t timestamp_ms, + HandLandmarkerResult* result, + char** error_msg) { + if (image.type == MpImage::GPU_BUFFER) { + absl::Status status = + absl::InvalidArgumentError("GPU Buffer not supported yet"); + + ABSL_LOG(ERROR) << "Recognition failed: " << status.message(); + return CppProcessError(status, error_msg); + } + + const auto img = CreateImageFromBuffer( + static_cast(image.image_frame.format), + image.image_frame.image_buffer, image.image_frame.width, + image.image_frame.height); + + if (!img.ok()) { + ABSL_LOG(ERROR) << "Failed to create Image: " << img.status(); + return CppProcessError(img.status(), error_msg); + } + + auto cpp_detector = static_cast(detector); + auto cpp_result = cpp_detector->DetectForVideo(*img, timestamp_ms); + if (!cpp_result.ok()) { + ABSL_LOG(ERROR) << "Recognition failed: " << cpp_result.status(); + return CppProcessError(cpp_result.status(), error_msg); + } + CppConvertToHandLandmarkerResult(*cpp_result, result); + return 0; +} + +int CppHandLandmarkerDetectAsync(void* detector, const MpImage& image, + int64_t timestamp_ms, char** error_msg) { + if (image.type == MpImage::GPU_BUFFER) { + absl::Status status = + absl::InvalidArgumentError("GPU Buffer not supported yet"); + + ABSL_LOG(ERROR) << "Recognition failed: " << status.message(); + return CppProcessError(status, error_msg); + } + + const auto img = CreateImageFromBuffer( + static_cast(image.image_frame.format), + image.image_frame.image_buffer, image.image_frame.width, + image.image_frame.height); + + if (!img.ok()) { + ABSL_LOG(ERROR) << "Failed to create Image: " << img.status(); + return CppProcessError(img.status(), error_msg); + } + + auto cpp_detector = static_cast(detector); + auto cpp_result = cpp_detector->DetectAsync(*img, timestamp_ms); + if (!cpp_result.ok()) { + ABSL_LOG(ERROR) << "Data preparation for the landmark detection failed: " + << cpp_result; + return CppProcessError(cpp_result, error_msg); + } + return 0; +} + +void CppHandLandmarkerCloseResult(HandLandmarkerResult* result) { + CppCloseHandLandmarkerResult(result); +} + +int CppHandLandmarkerClose(void* detector, char** error_msg) { + auto cpp_detector = static_cast(detector); + auto result = cpp_detector->Close(); + if (!result.ok()) { + ABSL_LOG(ERROR) << "Failed to close HandLandmarker: " << result; + return CppProcessError(result, error_msg); + } + delete cpp_detector; + return 0; +} + +} // namespace mediapipe::tasks::c::vision::hand_landmarker + +extern "C" { + +void* hand_landmarker_create(struct HandLandmarkerOptions* options, + char** error_msg) { + return mediapipe::tasks::c::vision::hand_landmarker::CppHandLandmarkerCreate( + *options, error_msg); +} + +int hand_landmarker_detect_image(void* detector, const MpImage& image, + HandLandmarkerResult* result, + char** error_msg) { + return mediapipe::tasks::c::vision::hand_landmarker:: + CppHandLandmarkerDetect(detector, image, result, error_msg); +} + +int hand_landmarker_detect_for_video(void* detector, const MpImage& image, + int64_t timestamp_ms, + HandLandmarkerResult* result, + char** error_msg) { + return mediapipe::tasks::c::vision::hand_landmarker:: + CppHandLandmarkerDetectForVideo(detector, image, timestamp_ms, result, + error_msg); +} + +int hand_landmarker_detect_async(void* detector, const MpImage& image, + int64_t timestamp_ms, char** error_msg) { + return mediapipe::tasks::c::vision::hand_landmarker:: + CppHandLandmarkerDetectAsync(detector, image, timestamp_ms, error_msg); +} + +void hand_landmarker_close_result(HandLandmarkerResult* result) { + mediapipe::tasks::c::vision::hand_landmarker::CppHandLandmarkerCloseResult( + result); +} + +int hand_landmarker_close(void* detector, char** error_ms) { + return mediapipe::tasks::c::vision::hand_landmarker::CppHandLandmarkerClose( + detector, error_ms); +} + +} // extern "C" diff --git a/mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker.h b/mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker.h new file mode 100644 index 000000000..e813f07e5 --- /dev/null +++ b/mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker.h @@ -0,0 +1,144 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef MEDIAPIPE_TASKS_C_VISION_HAND_LANDMARKER_HAND_LANDMARKER_H_ +#define MEDIAPIPE_TASKS_C_VISION_HAND_LANDMARKER_HAND_LANDMARKER_H_ + +#include "mediapipe/tasks/c/core/base_options.h" +#include "mediapipe/tasks/c/vision/core/common.h" +#include "mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker_result.h" + +#ifndef MP_EXPORT +#define MP_EXPORT __attribute__((visibility("default"))) +#endif // MP_EXPORT + +#ifdef __cplusplus +extern "C" { +#endif + +// The options for configuring a MediaPipe hand landmarker task. +struct HandLandmarkerOptions { + // Base options for configuring MediaPipe Tasks, such as specifying the model + // file with metadata, accelerator options, op resolver, etc. + struct BaseOptions base_options; + + // The running mode of the task. Default to the image mode. + // HandLandmarker has three running modes: + // 1) The image mode for recognizing hand landmarks on single image inputs. + // 2) The video mode for recognizing hand landmarks on the decoded frames of a + // video. + // 3) The live stream mode for recognizing hand landmarks on the live stream + // of input data, such as from camera. In this mode, the "result_callback" + // below must be specified to receive the detection results asynchronously. + RunningMode running_mode; + + // The maximum number of hands can be detected by the HandLandmarker. + int num_hands = 1; + + // The minimum confidence score for the hand detection to be considered + // successful. + float min_hand_detection_confidence = 0.5; + + // The minimum confidence score of hand presence score in the hand landmark + // detection. + float min_hand_presence_confidence = 0.5; + + // The minimum confidence score for the hand tracking to be considered + // successful. + float min_tracking_confidence = 0.5; + + // The user-defined result callback for processing live stream data. + // The result callback should only be specified when the running mode is set + // to RunningMode::LIVE_STREAM. Arguments of the callback function include: + // the pointer to recognition result, the image that result was obtained + // on, the timestamp relevant to recognition results and pointer to error + // message in case of any failure. The validity of the passed arguments is + // true for the lifetime of the callback function. + // + // A caller is responsible for closing hand landmarker result. + typedef void (*result_callback_fn)(HandLandmarkerResult* result, + const MpImage& image, int64_t timestamp_ms, + char* error_msg); + result_callback_fn result_callback; +}; + +// Creates an HandLandmarker from the provided `options`. +// Returns a pointer to the hand landmarker on success. +// If an error occurs, returns `nullptr` and sets the error parameter to an +// an error message (if `error_msg` is not `nullptr`). You must free the memory +// allocated for the error message. +MP_EXPORT void* hand_landmarker_create(struct HandLandmarkerOptions* options, + char** error_msg); + +// Performs gesture recognition on the input `image`. Returns `0` on success. +// If an error occurs, returns an error code and sets the error parameter to an +// an error message (if `error_msg` is not `nullptr`). You must free the memory +// allocated for the error message. +MP_EXPORT int hand_landmarker_detect_image(void* detector, const MpImage& image, + HandLandmarkerResult* result, + char** error_msg); + +// Performs gesture recognition on the provided video frame. +// Only use this method when the HandLandmarker is created with the video +// running mode. +// The image can be of any size with format RGB or RGBA. It's required to +// provide the video frame's timestamp (in milliseconds). The input timestamps +// must be monotonically increasing. +// If an error occurs, returns an error code and sets the error parameter to an +// an error message (if `error_msg` is not `nullptr`). You must free the memory +// allocated for the error message. +MP_EXPORT int hand_landmarker_detect_for_video(void* detector, + const MpImage& image, + int64_t timestamp_ms, + HandLandmarkerResult* result, + char** error_msg); + +// Sends live image data to gesture recognition, and the results will be +// available via the `result_callback` provided in the HandLandmarkerOptions. +// Only use this method when the HandLandmarker is created with the live +// stream running mode. +// The image can be of any size with format RGB or RGBA. It's required to +// provide a timestamp (in milliseconds) to indicate when the input image is +// sent to the hand landmarker. The input timestamps must be monotonically +// increasing. +// The `result_callback` provides: +// - The recognition results as an HandLandmarkerResult object. +// - The const reference to the corresponding input image that the gesture +// detector runs on. Note that the const reference to the image will no +// longer be valid when the callback returns. To access the image data +// outside of the callback, callers need to make a copy of the image. +// - The input timestamp in milliseconds. +// If an error occurs, returns an error code and sets the error parameter to an +// an error message (if `error_msg` is not `nullptr`). You must free the memory +// allocated for the error message. +MP_EXPORT int hand_landmarker_detect_async(void* detector, const MpImage& image, + int64_t timestamp_ms, + char** error_msg); + +// Frees the memory allocated inside a HandLandmarkerResult result. +// Does not free the result pointer itself. +MP_EXPORT void hand_landmarker_close_result(HandLandmarkerResult* result); + +// Frees hand landmarker. +// If an error occurs, returns an error code and sets the error parameter to an +// an error message (if `error_msg` is not `nullptr`). You must free the memory +// allocated for the error message. +MP_EXPORT int hand_landmarker_close(void* detector, char** error_msg); + +#ifdef __cplusplus +} // extern C +#endif + +#endif // MEDIAPIPE_TASKS_C_VISION_HAND_LANDMARKER_HAND_LANDMARKER_H_ diff --git a/mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker_result.h b/mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker_result.h new file mode 100644 index 000000000..da5e4c5aa --- /dev/null +++ b/mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker_result.h @@ -0,0 +1,58 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef MEDIAPIPE_TASKS_C_VISION_HAND_LANDMARKER_RESULT_HAND_LANDMARKER_RESULT_H_ +#define MEDIAPIPE_TASKS_C_VISION_HAND_LANDMARKER_RESULT_HAND_LANDMARKER_RESULT_H_ + +#include + +#include "mediapipe/tasks/c/components/containers/category.h" +#include "mediapipe/tasks/c/components/containers/landmark.h" + +#ifndef MP_EXPORT +#define MP_EXPORT __attribute__((visibility("default"))) +#endif // MP_EXPORT + +#ifdef __cplusplus +extern "C" { +#endif + +// The hand landmarker result from HandLandmarker, where each vector +// element represents a single hand detected in the image. +struct HandLandmarkerResult { + // Classification of handedness. + struct Categories* handedness; + + // The number of elements in the handedness array. + uint32_t handedness_count; + + // Detected hand landmarks in normalized image coordinates. + struct NormalizedLandmarks* hand_landmarks; + + // The number of elements in the hand_landmarks array. + uint32_t hand_landmarks_count; + + // Detected hand landmarks in world coordinates. + struct Landmarks* hand_world_landmarks; + + // The number of elements in the hand_world_landmarks array. + uint32_t hand_world_landmarks_count; +}; + +#ifdef __cplusplus +} // extern C +#endif + +#endif // MEDIAPIPE_TASKS_C_VISION_HAND_LANDMARKER_RESULT_HAND_LANDMARKER_RESULT_H_ diff --git a/mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker_result_converter.cc b/mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker_result_converter.cc new file mode 100644 index 000000000..3ce32ee63 --- /dev/null +++ b/mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker_result_converter.cc @@ -0,0 +1,103 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker_result_converter.h" + +#include +#include + +#include "mediapipe/tasks/c/components/containers/category.h" +#include "mediapipe/tasks/c/components/containers/category_converter.h" +#include "mediapipe/tasks/c/components/containers/landmark.h" +#include "mediapipe/tasks/c/components/containers/landmark_converter.h" +#include "mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker_result.h" +#include "mediapipe/tasks/cc/components/containers/category.h" +#include "mediapipe/tasks/cc/components/containers/landmark.h" +#include "mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarker_result.h" + +namespace mediapipe::tasks::c::components::containers { + +using CppCategory = ::mediapipe::tasks::components::containers::Category; +using CppLandmark = ::mediapipe::tasks::components::containers::Landmark; +using CppNormalizedLandmark = + ::mediapipe::tasks::components::containers::NormalizedLandmark; + +void CppConvertToHandLandmarkerResult( + const mediapipe::tasks::vision::hand_landmarker::HandLandmarkerResult& in, + HandLandmarkerResult* out) { + out->handedness_count = in.handedness.size(); + out->handedness = new Categories[out->handedness_count]; + + for (uint32_t i = 0; i < out->handedness_count; ++i) { + uint32_t categories_count = in.handedness[i].categories.size(); + out->handedness[i].categories_count = categories_count; + out->handedness[i].categories = new Category[categories_count]; + + for (uint32_t j = 0; j < categories_count; ++j) { + const auto& cpp_category = in.handedness[i].categories[j]; + CppConvertToCategory(cpp_category, &out->handedness[i].categories[j]); + } + } + + out->hand_landmarks_count = in.hand_landmarks.size(); + out->hand_landmarks = new NormalizedLandmarks[out->hand_landmarks_count]; + for (uint32_t i = 0; i < out->hand_landmarks_count; ++i) { + std::vector cpp_normalized_landmarks; + for (uint32_t j = 0; j < in.hand_landmarks[i].landmarks.size(); ++j) { + const auto& cpp_landmark = in.hand_landmarks[i].landmarks[j]; + cpp_normalized_landmarks.push_back(cpp_landmark); + } + CppConvertToNormalizedLandmarks(cpp_normalized_landmarks, + &out->hand_landmarks[i]); + } + + out->hand_world_landmarks_count = in.hand_world_landmarks.size(); + out->hand_world_landmarks = new Landmarks[out->hand_world_landmarks_count]; + for (uint32_t i = 0; i < out->hand_world_landmarks_count; ++i) { + std::vector cpp_landmarks; + for (uint32_t j = 0; j < in.hand_world_landmarks[i].landmarks.size(); ++j) { + const auto& cpp_landmark = in.hand_world_landmarks[i].landmarks[j]; + cpp_landmarks.push_back(cpp_landmark); + } + CppConvertToLandmarks(cpp_landmarks, &out->hand_world_landmarks[i]); + } +} + +void CppCloseHandLandmarkerResult(HandLandmarkerResult* result) { + for (uint32_t i = 0; i < result->handedness_count; ++i) { + CppCloseCategories(&result->handedness[i]); + } + delete[] result->handedness; + + for (uint32_t i = 0; i < result->hand_landmarks_count; ++i) { + CppCloseNormalizedLandmarks(&result->hand_landmarks[i]); + } + delete[] result->hand_landmarks; + + for (uint32_t i = 0; i < result->hand_world_landmarks_count; ++i) { + CppCloseLandmarks(&result->hand_world_landmarks[i]); + } + delete[] result->hand_world_landmarks; + + result->handedness = nullptr; + result->hand_landmarks = nullptr; + result->hand_world_landmarks = nullptr; + + result->handedness_count = 0; + result->hand_landmarks_count = 0; + result->hand_world_landmarks_count = 0; +} + +} // namespace mediapipe::tasks::c::components::containers diff --git a/mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker_result_converter.h b/mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker_result_converter.h new file mode 100644 index 000000000..9fcd8e470 --- /dev/null +++ b/mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker_result_converter.h @@ -0,0 +1,32 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef MEDIAPIPE_TASKS_C_COMPONENTS_CONTAINERS_HAND_LANDMARKER_RESULT_CONVERTER_H_ +#define MEDIAPIPE_TASKS_C_COMPONENTS_CONTAINERS_HAND_LANDMARKER_RESULT_CONVERTER_H_ + +#include "mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker_result.h" +#include "mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarker_result.h" + +namespace mediapipe::tasks::c::components::containers { + +void CppConvertToHandLandmarkerResult( + const mediapipe::tasks::vision::hand_landmarker::HandLandmarkerResult& in, + HandLandmarkerResult* out); + +void CppCloseHandLandmarkerResult(HandLandmarkerResult* result); + +} // namespace mediapipe::tasks::c::components::containers + +#endif // MEDIAPIPE_TASKS_C_COMPONENTS_CONTAINERS_HAND_LANDMARKER_RESULT_CONVERTER_H_ diff --git a/mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker_result_converter_test.cc b/mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker_result_converter_test.cc new file mode 100644 index 000000000..c38f5ea06 --- /dev/null +++ b/mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker_result_converter_test.cc @@ -0,0 +1,125 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker_result_converter.h" + +#include +#include + +#include "mediapipe/framework/port/gtest.h" +#include "mediapipe/tasks/c/components/containers/landmark.h" +#include "mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker_result.h" +#include "mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarker_result.h" + +namespace mediapipe::tasks::c::components::containers { + +void InitHandLandmarkerResult( + ::mediapipe::tasks::vision::hand_landmarker::HandLandmarkerResult* + cpp_result) { + // Initialize handedness + mediapipe::tasks::components::containers::Category cpp_category = { + /* index= */ 1, + /* score= */ 0.8f, + /* category_name= */ "handeness_label_1", + /* display_name= */ "handeness_display_name_1"}; + mediapipe::tasks::components::containers::Classifications + classifications_for_handedness; + classifications_for_handedness.categories.push_back(cpp_category); + cpp_result->handedness.push_back(classifications_for_handedness); + + // Initialize hand_landmarks + mediapipe::tasks::components::containers::NormalizedLandmark + cpp_normalized_landmark = {/* x= */ 0.1f, + /* y= */ 0.2f, + /* z= */ 0.3f}; + mediapipe::tasks::components::containers::NormalizedLandmarks + cpp_normalized_landmarks; + cpp_normalized_landmarks.landmarks.push_back(cpp_normalized_landmark); + cpp_result->hand_landmarks.push_back(cpp_normalized_landmarks); + + // Initialize hand_world_landmarks + mediapipe::tasks::components::containers::Landmark cpp_landmark = { + /* x= */ 1.0f, + /* y= */ 1.1f, + /* z= */ 1.2f}; + mediapipe::tasks::components::containers::Landmarks cpp_landmarks; + cpp_landmarks.landmarks.push_back(cpp_landmark); + cpp_result->hand_world_landmarks.push_back(cpp_landmarks); +} + +TEST(HandLandmarkerResultConverterTest, ConvertsCustomResult) { + ::mediapipe::tasks::vision::hand_landmarker::HandLandmarkerResult cpp_result; + InitHandLandmarkerResult(&cpp_result); + + HandLandmarkerResult c_result; + CppConvertToHandLandmarkerResult(cpp_result, &c_result); + + // Verify conversion of hand_landmarks + EXPECT_NE(c_result.hand_landmarks, nullptr); + EXPECT_EQ(c_result.hand_landmarks_count, cpp_result.hand_landmarks.size()); + + for (uint32_t i = 0; i < c_result.hand_landmarks_count; ++i) { + EXPECT_EQ(c_result.hand_landmarks[i].landmarks_count, + cpp_result.hand_landmarks[i].landmarks.size()); + for (uint32_t j = 0; j < c_result.hand_landmarks[i].landmarks_count; ++j) { + const auto& landmark = cpp_result.hand_landmarks[i].landmarks[j]; + EXPECT_FLOAT_EQ(c_result.hand_landmarks[i].landmarks[j].x, landmark.x); + EXPECT_FLOAT_EQ(c_result.hand_landmarks[i].landmarks[j].y, landmark.y); + EXPECT_FLOAT_EQ(c_result.hand_landmarks[i].landmarks[j].z, landmark.z); + } + } + + // Verify conversion of hand_world_landmarks + EXPECT_NE(c_result.hand_world_landmarks, nullptr); + EXPECT_EQ(c_result.hand_world_landmarks_count, + cpp_result.hand_world_landmarks.size()); + + for (uint32_t i = 0; i < c_result.hand_landmarks_count; ++i) { + EXPECT_EQ(c_result.hand_world_landmarks[i].landmarks_count, + cpp_result.hand_world_landmarks[i].landmarks.size()); + for (uint32_t j = 0; j < c_result.hand_world_landmarks[i].landmarks_count; + ++j) { + const auto& landmark = cpp_result.hand_world_landmarks[i].landmarks[j]; + EXPECT_FLOAT_EQ(c_result.hand_world_landmarks[i].landmarks[j].x, + landmark.x); + EXPECT_FLOAT_EQ(c_result.hand_world_landmarks[i].landmarks[j].y, + landmark.y); + EXPECT_FLOAT_EQ(c_result.hand_world_landmarks[i].landmarks[j].z, + landmark.z); + } + } + + CppCloseHandLandmarkerResult(&c_result); +} + +TEST(HandLandmarkerResultConverterTest, FreesMemory) { + ::mediapipe::tasks::vision::hand_landmarker::HandLandmarkerResult cpp_result; + InitHandLandmarkerResult(&cpp_result); + + HandLandmarkerResult c_result; + CppConvertToHandLandmarkerResult(cpp_result, &c_result); + + EXPECT_NE(c_result.handedness, nullptr); + EXPECT_NE(c_result.hand_landmarks, nullptr); + EXPECT_NE(c_result.hand_world_landmarks, nullptr); + + CppCloseHandLandmarkerResult(&c_result); + + EXPECT_EQ(c_result.handedness, nullptr); + EXPECT_EQ(c_result.hand_landmarks, nullptr); + EXPECT_EQ(c_result.hand_world_landmarks, nullptr); +} + +} // namespace mediapipe::tasks::c::components::containers diff --git a/mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker_test.cc b/mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker_test.cc new file mode 100644 index 000000000..c418657e5 --- /dev/null +++ b/mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker_test.cc @@ -0,0 +1,261 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker.h" + +#include +#include +#include + +#include "absl/flags/flag.h" +#include "absl/strings/string_view.h" +#include "mediapipe/framework/deps/file_path.h" +#include "mediapipe/framework/formats/image.h" +#include "mediapipe/framework/port/gmock.h" +#include "mediapipe/framework/port/gtest.h" +#include "mediapipe/tasks/c/components/containers/landmark.h" +#include "mediapipe/tasks/c/vision/core/common.h" +#include "mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker_result.h" +#include "mediapipe/tasks/cc/vision/utils/image_utils.h" + +namespace { + +using ::mediapipe::file::JoinPath; +using ::mediapipe::tasks::vision::DecodeImageFromFile; +using testing::HasSubstr; + +constexpr char kTestDataDirectory[] = "/mediapipe/tasks/testdata/vision/"; +constexpr char kModelName[] = "hand_landmarker.task"; +constexpr char kImageFile[] = "fist.jpg"; +constexpr float kScorePrecision = 1e-2; +constexpr float kLandmarkPrecision = 1e-1; +constexpr int kIterations = 100; + +std::string GetFullPath(absl::string_view file_name) { + return JoinPath("./", kTestDataDirectory, file_name); +} + +void MatchesHandLandmarkerResult(HandLandmarkerResult* result, + const float score_precision, + const float landmark_precision) { + // Expects to have the same number of hands detected. + EXPECT_EQ(result->handedness_count, 1); + + // Actual handedness matches expected handedness. + EXPECT_EQ(std::string{result->handedness[0].categories[0].category_name}, + "Right"); + EXPECT_NEAR(result->handedness[0].categories[0].score, 0.9893f, + score_precision); + + // Actual landmarks match expected landmarks. + EXPECT_NEAR(result->hand_landmarks[0].landmarks[0].x, 0.477f, + landmark_precision); + EXPECT_NEAR(result->hand_landmarks[0].landmarks[0].y, 0.661f, + landmark_precision); + EXPECT_NEAR(result->hand_landmarks[0].landmarks[0].z, 0.0f, + landmark_precision); + EXPECT_NEAR(result->hand_world_landmarks[0].landmarks[0].x, -0.009f, + landmark_precision); + EXPECT_NEAR(result->hand_world_landmarks[0].landmarks[0].y, 0.082f, + landmark_precision); + EXPECT_NEAR(result->hand_world_landmarks[0].landmarks[0].z, 0.006f, + landmark_precision); +} + +TEST(HandLandmarkerTest, ImageModeTest) { + const auto image = DecodeImageFromFile(GetFullPath(kImageFile)); + ASSERT_TRUE(image.ok()); + + const std::string model_path = GetFullPath(kModelName); + HandLandmarkerOptions options = { + /* base_options= */ {/* model_asset_buffer= */ nullptr, + /* model_asset_buffer_count= */ 0, + /* model_asset_path= */ model_path.c_str()}, + /* running_mode= */ RunningMode::IMAGE, + /* num_hands= */ 1, + /* min_hand_detection_confidence= */ 0.5, + /* min_hand_presence_confidence= */ 0.5, + /* min_tracking_confidence= */ 0.5, + }; + + void* detector = hand_landmarker_create(&options, /* error_msg */ nullptr); + EXPECT_NE(detector, nullptr); + + const auto& image_frame = image->GetImageFrameSharedPtr(); + const MpImage mp_image = { + .type = MpImage::IMAGE_FRAME, + .image_frame = {.format = static_cast(image_frame->Format()), + .image_buffer = image_frame->PixelData(), + .width = image_frame->Width(), + .height = image_frame->Height()}}; + + HandLandmarkerResult result; + hand_landmarker_detect_image(detector, mp_image, &result, + /* error_msg */ nullptr); + MatchesHandLandmarkerResult(&result, kScorePrecision, kLandmarkPrecision); + hand_landmarker_close_result(&result); + hand_landmarker_close(detector, /* error_msg */ nullptr); +} + +TEST(HandLandmarkerTest, VideoModeTest) { + const auto image = DecodeImageFromFile(GetFullPath(kImageFile)); + ASSERT_TRUE(image.ok()); + + const std::string model_path = GetFullPath(kModelName); + HandLandmarkerOptions options = { + /* base_options= */ {/* model_asset_buffer= */ nullptr, + /* model_asset_buffer_count= */ 0, + /* model_asset_path= */ model_path.c_str()}, + /* running_mode= */ RunningMode::VIDEO, + /* num_hands= */ 1, + /* min_hand_detection_confidence= */ 0.5, + /* min_hand_presence_confidence= */ 0.5, + /* min_tracking_confidence= */ 0.5, + }; + + void* detector = hand_landmarker_create(&options, /* error_msg */ nullptr); + EXPECT_NE(detector, nullptr); + + const auto& image_frame = image->GetImageFrameSharedPtr(); + const MpImage mp_image = { + .type = MpImage::IMAGE_FRAME, + .image_frame = {.format = static_cast(image_frame->Format()), + .image_buffer = image_frame->PixelData(), + .width = image_frame->Width(), + .height = image_frame->Height()}}; + + for (int i = 0; i < kIterations; ++i) { + HandLandmarkerResult result; + hand_landmarker_detect_for_video(detector, mp_image, i, &result, + /* error_msg */ nullptr); + + MatchesHandLandmarkerResult(&result, kScorePrecision, kLandmarkPrecision); + hand_landmarker_close_result(&result); + } + hand_landmarker_close(detector, /* error_msg */ nullptr); +} + +// A structure to support LiveStreamModeTest below. This structure holds a +// static method `Fn` for a callback function of C API. A `static` qualifier +// allows to take an address of the method to follow API style. Another static +// struct member is `last_timestamp` that is used to verify that current +// timestamp is greater than the previous one. +struct LiveStreamModeCallback { + static int64_t last_timestamp; + static void Fn(HandLandmarkerResult* detector_result, const MpImage& image, + int64_t timestamp, char* error_msg) { + ASSERT_NE(detector_result, nullptr); + ASSERT_EQ(error_msg, nullptr); + MatchesHandLandmarkerResult(detector_result, kScorePrecision, + kLandmarkPrecision); + EXPECT_GT(image.image_frame.width, 0); + EXPECT_GT(image.image_frame.height, 0); + EXPECT_GT(timestamp, last_timestamp); + last_timestamp++; + } +}; +int64_t LiveStreamModeCallback::last_timestamp = -1; + +TEST(HandLandmarkerTest, LiveStreamModeTest) { + const auto image = DecodeImageFromFile(GetFullPath(kImageFile)); + ASSERT_TRUE(image.ok()); + + const std::string model_path = GetFullPath(kModelName); + + HandLandmarkerOptions options = { + /* base_options= */ {/* model_asset_buffer= */ nullptr, + /* model_asset_buffer_count= */ 0, + /* model_asset_path= */ model_path.c_str()}, + /* running_mode= */ RunningMode::LIVE_STREAM, + /* num_hands= */ 1, + /* min_hand_detection_confidence= */ 0.5, + /* min_hand_presence_confidence= */ 0.5, + /* min_tracking_confidence= */ 0.5, + /* result_callback= */ LiveStreamModeCallback::Fn, + }; + + void* detector = hand_landmarker_create(&options, /* error_msg */ nullptr); + EXPECT_NE(detector, nullptr); + + const auto& image_frame = image->GetImageFrameSharedPtr(); + const MpImage mp_image = { + .type = MpImage::IMAGE_FRAME, + .image_frame = {.format = static_cast(image_frame->Format()), + .image_buffer = image_frame->PixelData(), + .width = image_frame->Width(), + .height = image_frame->Height()}}; + + for (int i = 0; i < kIterations; ++i) { + EXPECT_GE(hand_landmarker_detect_async(detector, mp_image, i, + /* error_msg */ nullptr), + 0); + } + hand_landmarker_close(detector, /* error_msg */ nullptr); + + // Due to the flow limiter, the total of outputs might be smaller than the + // number of iterations. + EXPECT_LE(LiveStreamModeCallback::last_timestamp, kIterations); + EXPECT_GT(LiveStreamModeCallback::last_timestamp, 0); +} + +TEST(HandLandmarkerTest, InvalidArgumentHandling) { + // It is an error to set neither the asset buffer nor the path. + HandLandmarkerOptions options = { + /* base_options= */ {/* model_asset_buffer= */ nullptr, + /* model_asset_buffer_count= */ 0, + /* model_asset_path= */ nullptr}, + /* running_mode= */ RunningMode::IMAGE, + /* num_hands= */ 1, + /* min_hand_detection_confidence= */ 0.5, + /* min_hand_presence_confidence= */ 0.5, + /* min_tracking_confidence= */ 0.5, + }; + + char* error_msg; + void* detector = hand_landmarker_create(&options, &error_msg); + EXPECT_EQ(detector, nullptr); + + EXPECT_THAT(error_msg, HasSubstr("ExternalFile must specify")); + + free(error_msg); +} + +TEST(HandLandmarkerTest, FailedRecognitionHandling) { + const std::string model_path = GetFullPath(kModelName); + HandLandmarkerOptions options = { + /* base_options= */ {/* model_asset_buffer= */ nullptr, + /* model_asset_buffer_count= */ 0, + /* model_asset_path= */ model_path.c_str()}, + /* running_mode= */ RunningMode::IMAGE, + /* num_hands= */ 1, + /* min_hand_detection_confidence= */ 0.5, + /* min_hand_presence_confidence= */ 0.5, + /* min_tracking_confidence= */ 0.5, + }; + + void* detector = hand_landmarker_create(&options, /* error_msg */ + nullptr); + EXPECT_NE(detector, nullptr); + + const MpImage mp_image = {.type = MpImage::GPU_BUFFER, .gpu_buffer = {}}; + HandLandmarkerResult result; + char* error_msg; + hand_landmarker_detect_image(detector, mp_image, &result, &error_msg); + EXPECT_THAT(error_msg, HasSubstr("GPU Buffer not supported yet")); + free(error_msg); + hand_landmarker_close(detector, /* error_msg */ nullptr); +} + +} // namespace From e4a6ea3079e5e68b3392513459dca96bbbe311da Mon Sep 17 00:00:00 2001 From: MediaPipe Team Date: Wed, 6 Dec 2023 04:25:37 -0800 Subject: [PATCH 05/14] No public description PiperOrigin-RevId: 588376739 --- mediapipe/framework/BUILD | 2 +- mediapipe/framework/calculator_graph.cc | 15 ++++++++++----- mediapipe/framework/calculator_graph.h | 7 ++++--- 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/mediapipe/framework/BUILD b/mediapipe/framework/BUILD index e5e72cfbe..38812b39e 100644 --- a/mediapipe/framework/BUILD +++ b/mediapipe/framework/BUILD @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# load("//mediapipe/framework/port:build_config.bzl", "mediapipe_proto_library") load("@bazel_skylib//:bzl_library.bzl", "bzl_library") @@ -368,6 +367,7 @@ cc_library( "@com_google_absl//absl/container:flat_hash_set", "@com_google_absl//absl/log:absl_check", "@com_google_absl//absl/log:absl_log", + "@com_google_absl//absl/log:check", "@com_google_absl//absl/memory", "@com_google_absl//absl/status", "@com_google_absl//absl/status:statusor", diff --git a/mediapipe/framework/calculator_graph.cc b/mediapipe/framework/calculator_graph.cc index 1890d799c..1bd356eac 100644 --- a/mediapipe/framework/calculator_graph.cc +++ b/mediapipe/framework/calculator_graph.cc @@ -28,6 +28,7 @@ #include "absl/container/flat_hash_set.h" #include "absl/log/absl_check.h" #include "absl/log/absl_log.h" +#include "absl/log/check.h" #include "absl/memory/memory.h" #include "absl/status/status.h" #include "absl/strings/str_cat.h" @@ -890,12 +891,12 @@ absl::Status CalculatorGraph::WaitForObservedOutput() { } absl::Status CalculatorGraph::AddPacketToInputStream( - const std::string& stream_name, const Packet& packet) { + absl::string_view stream_name, const Packet& packet) { return AddPacketToInputStreamInternal(stream_name, packet); } absl::Status CalculatorGraph::AddPacketToInputStream( - const std::string& stream_name, Packet&& packet) { + absl::string_view stream_name, Packet&& packet) { return AddPacketToInputStreamInternal(stream_name, std::move(packet)); } @@ -918,14 +919,18 @@ absl::Status CalculatorGraph::SetInputStreamTimestampBound( // std::forward will deduce the correct type as we pass along packet. template absl::Status CalculatorGraph::AddPacketToInputStreamInternal( - const std::string& stream_name, T&& packet) { + absl::string_view stream_name, T&& packet) { + auto stream_it = graph_input_streams_.find(stream_name); std::unique_ptr* stream = - mediapipe::FindOrNull(graph_input_streams_, stream_name); + stream_it == graph_input_streams_.end() ? nullptr : &stream_it->second; RET_CHECK(stream).SetNoLogging() << absl::Substitute( "AddPacketToInputStream called on input stream \"$0\" which is not a " "graph input stream.", stream_name); - int node_id = mediapipe::FindOrDie(graph_input_stream_node_ids_, stream_name); + auto node_id_it = graph_input_stream_node_ids_.find(stream_name); + ABSL_CHECK(node_id_it != graph_input_stream_node_ids_.end()) + << "Map key not found: " << stream_name; + int node_id = node_id_it->second; ABSL_CHECK_GE(node_id, validated_graph_->CalculatorInfos().size()); { absl::MutexLock lock(&full_input_streams_mutex_); diff --git a/mediapipe/framework/calculator_graph.h b/mediapipe/framework/calculator_graph.h index 4284beb7c..80af72650 100644 --- a/mediapipe/framework/calculator_graph.h +++ b/mediapipe/framework/calculator_graph.h @@ -32,6 +32,7 @@ #include "absl/container/flat_hash_set.h" #include "absl/status/status.h" #include "absl/status/statusor.h" +#include "absl/strings/string_view.h" #include "absl/synchronization/mutex.h" #include "mediapipe/framework/calculator.pb.h" #include "mediapipe/framework/calculator_base.h" @@ -255,7 +256,7 @@ class CalculatorGraph { // sizes of the queues in the graph. The input stream must have been specified // in the configuration as a graph level input_stream. On error, nothing is // added. - absl::Status AddPacketToInputStream(const std::string& stream_name, + absl::Status AddPacketToInputStream(absl::string_view stream_name, const Packet& packet); // Same as the l-value version of this function by the same name, but moves @@ -265,7 +266,7 @@ class CalculatorGraph { // packet may remain valid. In particular, when using the ADD_IF_NOT_FULL // mode with a full queue, this will return StatusUnavailable and the caller // may try adding the packet again later. - absl::Status AddPacketToInputStream(const std::string& stream_name, + absl::Status AddPacketToInputStream(absl::string_view stream_name, Packet&& packet); // Indicates that input will arrive no earlier than a certain timestamp. @@ -509,7 +510,7 @@ class CalculatorGraph { // AddPacketToInputStream(Packet&& packet) or // AddPacketToInputStream(const Packet& packet). template - absl::Status AddPacketToInputStreamInternal(const std::string& stream_name, + absl::Status AddPacketToInputStreamInternal(absl::string_view stream_name, T&& packet); // Sets the executor that will run the nodes assigned to the executor From 0a3f27d1cef454675d02679e840e16674ce434ca Mon Sep 17 00:00:00 2001 From: Sebastian Schmidt Date: Wed, 6 Dec 2023 09:01:40 -0800 Subject: [PATCH 06/14] Move hand_roi_refinement_graph_options_java_proto_lite to vision lib PiperOrigin-RevId: 588444225 --- .../java/com/google/mediapipe/tasks/mediapipe_tasks_aar.bzl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mediapipe/tasks/java/com/google/mediapipe/tasks/mediapipe_tasks_aar.bzl b/mediapipe/tasks/java/com/google/mediapipe/tasks/mediapipe_tasks_aar.bzl index ae167a1bc..9d32f05de 100644 --- a/mediapipe/tasks/java/com/google/mediapipe/tasks/mediapipe_tasks_aar.bzl +++ b/mediapipe/tasks/java/com/google/mediapipe/tasks/mediapipe_tasks_aar.bzl @@ -50,6 +50,7 @@ _VISION_TASKS_JAVA_PROTO_LITE_TARGETS = [ "//mediapipe/tasks/cc/vision/hand_detector/proto:hand_detector_graph_options_java_proto_lite", "//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_landmarker_graph_options_java_proto_lite", "//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_landmarks_detector_graph_options_java_proto_lite", + "//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_roi_refinement_graph_options_java_proto_lite", "//mediapipe/tasks/cc/vision/holistic_landmarker/proto:holistic_landmarker_graph_options_java_proto_lite", "//mediapipe/tasks/cc/vision/image_classifier/proto:image_classifier_graph_options_java_proto_lite", "//mediapipe/tasks/cc/vision/image_embedder/proto:image_embedder_graph_options_java_proto_lite", @@ -85,7 +86,6 @@ _VISION_TASKS_IMAGE_GENERATOR_JAVA_PROTO_LITE_TARGETS = [ "//mediapipe/tasks/cc/vision/hand_detector/proto:hand_detector_graph_options_java_proto_lite", "//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_landmarker_graph_options_java_proto_lite", "//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_landmarks_detector_graph_options_java_proto_lite", - "//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_roi_refinement_graph_options_java_proto_lite", ] _TEXT_TASKS_JAVA_PROTO_LITE_TARGETS = [ From 78af80027aff3205f2382f0c92bc7c8b95f45ae9 Mon Sep 17 00:00:00 2001 From: MediaPipe Team Date: Wed, 6 Dec 2023 12:01:55 -0800 Subject: [PATCH 07/14] No user facing changes PiperOrigin-RevId: 588501289 --- mediapipe/framework/encode_binary_proto.bzl | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/mediapipe/framework/encode_binary_proto.bzl b/mediapipe/framework/encode_binary_proto.bzl index bf7f0583d..4f987780d 100644 --- a/mediapipe/framework/encode_binary_proto.bzl +++ b/mediapipe/framework/encode_binary_proto.bzl @@ -76,10 +76,12 @@ def _get_proto_provider(dep): """ if ProtoInfo in dep: return dep[ProtoInfo] + elif hasattr(dep, "proto"): return dep.proto else: - fail("cannot happen, rule definition requires .proto or ProtoInfo") + fail("cannot happen, rule definition requires .proto" + + " or ProtoInfo") def _encode_binary_proto_impl(ctx): """Implementation of the encode_binary_proto rule.""" @@ -142,7 +144,10 @@ _encode_binary_proto = rule( cfg = "exec", ), "deps": attr.label_list( - providers = [[ProtoInfo], ["proto"]], + providers = [ + [ProtoInfo], + ["proto"], + ], ), "input": attr.label( mandatory = True, @@ -182,7 +187,10 @@ def _generate_proto_descriptor_set_impl(ctx): all_protos = depset(transitive = [ _get_proto_provider(dep).transitive_sources for dep in ctx.attr.deps - if ProtoInfo in dep or hasattr(dep, "proto") + if ( + ProtoInfo in dep or + hasattr(dep, "proto") + ) ]) descriptor = ctx.outputs.output @@ -213,7 +221,10 @@ generate_proto_descriptor_set = rule( cfg = "exec", ), "deps": attr.label_list( - providers = [[ProtoInfo], ["proto"]], + providers = [ + [ProtoInfo], + ["proto"], + ], ), }, outputs = {"output": "%{name}.proto.bin"}, From fad3785721a1c050fb193e4cfa04f666f98f32ed Mon Sep 17 00:00:00 2001 From: MediaPipe Team Date: Wed, 6 Dec 2023 14:05:47 -0800 Subject: [PATCH 08/14] Export java package for hand_roi_refinement_graph_options. PiperOrigin-RevId: 588537174 --- .../proto/hand_roi_refinement_graph_options.proto | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_roi_refinement_graph_options.proto b/mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_roi_refinement_graph_options.proto index 0f979c5aa..f72bd62b5 100644 --- a/mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_roi_refinement_graph_options.proto +++ b/mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_roi_refinement_graph_options.proto @@ -19,6 +19,9 @@ package mediapipe.tasks.vision.hand_landmarker.proto; import "mediapipe/tasks/cc/core/proto/base_options.proto"; +option java_package = "com.google.mediapipe.tasks.vision.handlandmarker.proto"; +option java_outer_classname = "HandRoiRefinementGraphOptionsProto"; + message HandRoiRefinementGraphOptions { core.proto.BaseOptions base_options = 1; } From 6909504ca9ad8dfff3bca2f12b17649f1bd8e4ca Mon Sep 17 00:00:00 2001 From: Kinar Date: Thu, 7 Dec 2023 00:25:54 -0800 Subject: [PATCH 09/14] Fix naming in different files --- .../vision/hand_landmarker/hand_landmarker.cc | 67 ++++++++++--------- .../vision/hand_landmarker/hand_landmarker.h | 24 +++---- .../hand_landmarker/hand_landmarker_test.cc | 46 ++++++------- 3 files changed, 69 insertions(+), 68 deletions(-) diff --git a/mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker.cc b/mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker.cc index f6df09f96..56ac960f1 100644 --- a/mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker.cc +++ b/mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker.cc @@ -122,17 +122,18 @@ HandLandmarker* CppHandLandmarkerCreate(const HandLandmarkerOptions& options, }; } - auto detector = HandLandmarker::Create(std::move(cpp_options)); - if (!detector.ok()) { - ABSL_LOG(ERROR) << "Failed to create HandLandmarker: " << detector.status(); - CppProcessError(detector.status(), error_msg); + auto landmarker = HandLandmarker::Create(std::move(cpp_options)); + if (!landmarker.ok()) { + ABSL_LOG(ERROR) << "Failed to create HandLandmarker: " + << landmarker.status(); + CppProcessError(landmarker.status(), error_msg); return nullptr; } - return detector->release(); + return landmarker->release(); } -int CppHandLandmarkerDetect(void* detector, const MpImage& image, - HandLandmarkerResult* result, char** error_msg) { +int CppHandLandmarkerDetect(void* landmarker, const MpImage& image, + HandLandmarkerResult* result, char** error_msg) { if (image.type == MpImage::GPU_BUFFER) { const absl::Status status = absl::InvalidArgumentError("GPU Buffer not supported yet."); @@ -151,8 +152,8 @@ int CppHandLandmarkerDetect(void* detector, const MpImage& image, return CppProcessError(img.status(), error_msg); } - auto cpp_detector = static_cast(detector); - auto cpp_result = cpp_detector->Detect(*img); + auto cpp_landmarker = static_cast(landmarker); + auto cpp_result = cpp_landmarker->Detect(*img); if (!cpp_result.ok()) { ABSL_LOG(ERROR) << "Recognition failed: " << cpp_result.status(); return CppProcessError(cpp_result.status(), error_msg); @@ -161,10 +162,10 @@ int CppHandLandmarkerDetect(void* detector, const MpImage& image, return 0; } -int CppHandLandmarkerDetectForVideo(void* detector, const MpImage& image, - int64_t timestamp_ms, - HandLandmarkerResult* result, - char** error_msg) { +int CppHandLandmarkerDetectForVideo(void* landmarker, const MpImage& image, + int64_t timestamp_ms, + HandLandmarkerResult* result, + char** error_msg) { if (image.type == MpImage::GPU_BUFFER) { absl::Status status = absl::InvalidArgumentError("GPU Buffer not supported yet"); @@ -183,8 +184,8 @@ int CppHandLandmarkerDetectForVideo(void* detector, const MpImage& image, return CppProcessError(img.status(), error_msg); } - auto cpp_detector = static_cast(detector); - auto cpp_result = cpp_detector->DetectForVideo(*img, timestamp_ms); + auto cpp_landmarker = static_cast(landmarker); + auto cpp_result = cpp_landmarker->DetectForVideo(*img, timestamp_ms); if (!cpp_result.ok()) { ABSL_LOG(ERROR) << "Recognition failed: " << cpp_result.status(); return CppProcessError(cpp_result.status(), error_msg); @@ -193,8 +194,8 @@ int CppHandLandmarkerDetectForVideo(void* detector, const MpImage& image, return 0; } -int CppHandLandmarkerDetectAsync(void* detector, const MpImage& image, - int64_t timestamp_ms, char** error_msg) { +int CppHandLandmarkerDetectAsync(void* landmarker, const MpImage& image, + int64_t timestamp_ms, char** error_msg) { if (image.type == MpImage::GPU_BUFFER) { absl::Status status = absl::InvalidArgumentError("GPU Buffer not supported yet"); @@ -213,8 +214,8 @@ int CppHandLandmarkerDetectAsync(void* detector, const MpImage& image, return CppProcessError(img.status(), error_msg); } - auto cpp_detector = static_cast(detector); - auto cpp_result = cpp_detector->DetectAsync(*img, timestamp_ms); + auto cpp_landmarker = static_cast(landmarker); + auto cpp_result = cpp_landmarker->DetectAsync(*img, timestamp_ms); if (!cpp_result.ok()) { ABSL_LOG(ERROR) << "Data preparation for the landmark detection failed: " << cpp_result; @@ -227,14 +228,14 @@ void CppHandLandmarkerCloseResult(HandLandmarkerResult* result) { CppCloseHandLandmarkerResult(result); } -int CppHandLandmarkerClose(void* detector, char** error_msg) { - auto cpp_detector = static_cast(detector); - auto result = cpp_detector->Close(); +int CppHandLandmarkerClose(void* landmarker, char** error_msg) { + auto cpp_landmarker = static_cast(landmarker); + auto result = cpp_landmarker->Close(); if (!result.ok()) { ABSL_LOG(ERROR) << "Failed to close HandLandmarker: " << result; return CppProcessError(result, error_msg); } - delete cpp_detector; + delete cpp_landmarker; return 0; } @@ -248,26 +249,26 @@ void* hand_landmarker_create(struct HandLandmarkerOptions* options, *options, error_msg); } -int hand_landmarker_detect_image(void* detector, const MpImage& image, +int hand_landmarker_detect_image(void* landmarker, const MpImage& image, HandLandmarkerResult* result, char** error_msg) { - return mediapipe::tasks::c::vision::hand_landmarker:: - CppHandLandmarkerDetect(detector, image, result, error_msg); + return mediapipe::tasks::c::vision::hand_landmarker::CppHandLandmarkerDetect( + landmarker, image, result, error_msg); } -int hand_landmarker_detect_for_video(void* detector, const MpImage& image, +int hand_landmarker_detect_for_video(void* landmarker, const MpImage& image, int64_t timestamp_ms, HandLandmarkerResult* result, char** error_msg) { return mediapipe::tasks::c::vision::hand_landmarker:: - CppHandLandmarkerDetectForVideo(detector, image, timestamp_ms, result, - error_msg); + CppHandLandmarkerDetectForVideo(landmarker, image, timestamp_ms, result, + error_msg); } -int hand_landmarker_detect_async(void* detector, const MpImage& image, +int hand_landmarker_detect_async(void* landmarker, const MpImage& image, int64_t timestamp_ms, char** error_msg) { return mediapipe::tasks::c::vision::hand_landmarker:: - CppHandLandmarkerDetectAsync(detector, image, timestamp_ms, error_msg); + CppHandLandmarkerDetectAsync(landmarker, image, timestamp_ms, error_msg); } void hand_landmarker_close_result(HandLandmarkerResult* result) { @@ -275,9 +276,9 @@ void hand_landmarker_close_result(HandLandmarkerResult* result) { result); } -int hand_landmarker_close(void* detector, char** error_ms) { +int hand_landmarker_close(void* landmarker, char** error_ms) { return mediapipe::tasks::c::vision::hand_landmarker::CppHandLandmarkerClose( - detector, error_ms); + landmarker, error_ms); } } // extern "C" diff --git a/mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker.h b/mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker.h index e813f07e5..0b22db206 100644 --- a/mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker.h +++ b/mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker.h @@ -82,15 +82,15 @@ struct HandLandmarkerOptions { MP_EXPORT void* hand_landmarker_create(struct HandLandmarkerOptions* options, char** error_msg); -// Performs gesture recognition on the input `image`. Returns `0` on success. -// If an error occurs, returns an error code and sets the error parameter to an -// an error message (if `error_msg` is not `nullptr`). You must free the memory -// allocated for the error message. -MP_EXPORT int hand_landmarker_detect_image(void* detector, const MpImage& image, +// Performs hand landmark detection on the input `image`. Returns `0` on +// success. If an error occurs, returns an error code and sets the error +// parameter to an an error message (if `error_msg` is not `nullptr`). You must +// free the memory allocated for the error message. +MP_EXPORT int hand_landmarker_detect_image(void* landmarker, const MpImage& image, HandLandmarkerResult* result, char** error_msg); -// Performs gesture recognition on the provided video frame. +// Performs hand landmark detection on the provided video frame. // Only use this method when the HandLandmarker is created with the video // running mode. // The image can be of any size with format RGB or RGBA. It's required to @@ -99,13 +99,13 @@ MP_EXPORT int hand_landmarker_detect_image(void* detector, const MpImage& image, // If an error occurs, returns an error code and sets the error parameter to an // an error message (if `error_msg` is not `nullptr`). You must free the memory // allocated for the error message. -MP_EXPORT int hand_landmarker_detect_for_video(void* detector, +MP_EXPORT int hand_landmarker_detect_for_video(void* landmarker, const MpImage& image, int64_t timestamp_ms, HandLandmarkerResult* result, char** error_msg); -// Sends live image data to gesture recognition, and the results will be +// Sends live image data to hand landmark detection, and the results will be // available via the `result_callback` provided in the HandLandmarkerOptions. // Only use this method when the HandLandmarker is created with the live // stream running mode. @@ -115,15 +115,15 @@ MP_EXPORT int hand_landmarker_detect_for_video(void* detector, // increasing. // The `result_callback` provides: // - The recognition results as an HandLandmarkerResult object. -// - The const reference to the corresponding input image that the gesture -// detector runs on. Note that the const reference to the image will no +// - The const reference to the corresponding input image that the hand +// landmarker runs on. Note that the const reference to the image will no // longer be valid when the callback returns. To access the image data // outside of the callback, callers need to make a copy of the image. // - The input timestamp in milliseconds. // If an error occurs, returns an error code and sets the error parameter to an // an error message (if `error_msg` is not `nullptr`). You must free the memory // allocated for the error message. -MP_EXPORT int hand_landmarker_detect_async(void* detector, const MpImage& image, +MP_EXPORT int hand_landmarker_detect_async(void* landmarker, const MpImage& image, int64_t timestamp_ms, char** error_msg); @@ -135,7 +135,7 @@ MP_EXPORT void hand_landmarker_close_result(HandLandmarkerResult* result); // If an error occurs, returns an error code and sets the error parameter to an // an error message (if `error_msg` is not `nullptr`). You must free the memory // allocated for the error message. -MP_EXPORT int hand_landmarker_close(void* detector, char** error_msg); +MP_EXPORT int hand_landmarker_close(void* landmarker, char** error_msg); #ifdef __cplusplus } // extern C diff --git a/mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker_test.cc b/mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker_test.cc index c418657e5..ed7b4646f 100644 --- a/mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker_test.cc +++ b/mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker_test.cc @@ -90,8 +90,8 @@ TEST(HandLandmarkerTest, ImageModeTest) { /* min_tracking_confidence= */ 0.5, }; - void* detector = hand_landmarker_create(&options, /* error_msg */ nullptr); - EXPECT_NE(detector, nullptr); + void* landmarker = hand_landmarker_create(&options, /* error_msg */ nullptr); + EXPECT_NE(landmarker, nullptr); const auto& image_frame = image->GetImageFrameSharedPtr(); const MpImage mp_image = { @@ -102,11 +102,11 @@ TEST(HandLandmarkerTest, ImageModeTest) { .height = image_frame->Height()}}; HandLandmarkerResult result; - hand_landmarker_detect_image(detector, mp_image, &result, + hand_landmarker_detect_image(landmarker, mp_image, &result, /* error_msg */ nullptr); MatchesHandLandmarkerResult(&result, kScorePrecision, kLandmarkPrecision); hand_landmarker_close_result(&result); - hand_landmarker_close(detector, /* error_msg */ nullptr); + hand_landmarker_close(landmarker, /* error_msg */ nullptr); } TEST(HandLandmarkerTest, VideoModeTest) { @@ -125,8 +125,8 @@ TEST(HandLandmarkerTest, VideoModeTest) { /* min_tracking_confidence= */ 0.5, }; - void* detector = hand_landmarker_create(&options, /* error_msg */ nullptr); - EXPECT_NE(detector, nullptr); + void* landmarker = hand_landmarker_create(&options, /* error_msg */ nullptr); + EXPECT_NE(landmarker, nullptr); const auto& image_frame = image->GetImageFrameSharedPtr(); const MpImage mp_image = { @@ -138,13 +138,13 @@ TEST(HandLandmarkerTest, VideoModeTest) { for (int i = 0; i < kIterations; ++i) { HandLandmarkerResult result; - hand_landmarker_detect_for_video(detector, mp_image, i, &result, + hand_landmarker_detect_for_video(landmarker, mp_image, i, &result, /* error_msg */ nullptr); MatchesHandLandmarkerResult(&result, kScorePrecision, kLandmarkPrecision); hand_landmarker_close_result(&result); } - hand_landmarker_close(detector, /* error_msg */ nullptr); + hand_landmarker_close(landmarker, /* error_msg */ nullptr); } // A structure to support LiveStreamModeTest below. This structure holds a @@ -154,16 +154,16 @@ TEST(HandLandmarkerTest, VideoModeTest) { // timestamp is greater than the previous one. struct LiveStreamModeCallback { static int64_t last_timestamp; - static void Fn(HandLandmarkerResult* detector_result, const MpImage& image, + static void Fn(HandLandmarkerResult* landmarker_result, const MpImage& image, int64_t timestamp, char* error_msg) { - ASSERT_NE(detector_result, nullptr); + ASSERT_NE(landmarker_result, nullptr); ASSERT_EQ(error_msg, nullptr); - MatchesHandLandmarkerResult(detector_result, kScorePrecision, + MatchesHandLandmarkerResult(landmarker_result, kScorePrecision, kLandmarkPrecision); EXPECT_GT(image.image_frame.width, 0); EXPECT_GT(image.image_frame.height, 0); EXPECT_GT(timestamp, last_timestamp); - last_timestamp++; + ++last_timestamp; } }; int64_t LiveStreamModeCallback::last_timestamp = -1; @@ -186,8 +186,8 @@ TEST(HandLandmarkerTest, LiveStreamModeTest) { /* result_callback= */ LiveStreamModeCallback::Fn, }; - void* detector = hand_landmarker_create(&options, /* error_msg */ nullptr); - EXPECT_NE(detector, nullptr); + void* landmarker = hand_landmarker_create(&options, /* error_msg */ nullptr); + EXPECT_NE(landmarker, nullptr); const auto& image_frame = image->GetImageFrameSharedPtr(); const MpImage mp_image = { @@ -198,11 +198,11 @@ TEST(HandLandmarkerTest, LiveStreamModeTest) { .height = image_frame->Height()}}; for (int i = 0; i < kIterations; ++i) { - EXPECT_GE(hand_landmarker_detect_async(detector, mp_image, i, + EXPECT_GE(hand_landmarker_detect_async(landmarker, mp_image, i, /* error_msg */ nullptr), 0); } - hand_landmarker_close(detector, /* error_msg */ nullptr); + hand_landmarker_close(landmarker, /* error_msg */ nullptr); // Due to the flow limiter, the total of outputs might be smaller than the // number of iterations. @@ -224,8 +224,8 @@ TEST(HandLandmarkerTest, InvalidArgumentHandling) { }; char* error_msg; - void* detector = hand_landmarker_create(&options, &error_msg); - EXPECT_EQ(detector, nullptr); + void* landmarker = hand_landmarker_create(&options, &error_msg); + EXPECT_EQ(landmarker, nullptr); EXPECT_THAT(error_msg, HasSubstr("ExternalFile must specify")); @@ -245,17 +245,17 @@ TEST(HandLandmarkerTest, FailedRecognitionHandling) { /* min_tracking_confidence= */ 0.5, }; - void* detector = hand_landmarker_create(&options, /* error_msg */ - nullptr); - EXPECT_NE(detector, nullptr); + void* landmarker = hand_landmarker_create(&options, /* error_msg */ + nullptr); + EXPECT_NE(landmarker, nullptr); const MpImage mp_image = {.type = MpImage::GPU_BUFFER, .gpu_buffer = {}}; HandLandmarkerResult result; char* error_msg; - hand_landmarker_detect_image(detector, mp_image, &result, &error_msg); + hand_landmarker_detect_image(landmarker, mp_image, &result, &error_msg); EXPECT_THAT(error_msg, HasSubstr("GPU Buffer not supported yet")); free(error_msg); - hand_landmarker_close(detector, /* error_msg */ nullptr); + hand_landmarker_close(landmarker, /* error_msg */ nullptr); } } // namespace From 66655a15b21d1076a85491189fe03b89f57bee24 Mon Sep 17 00:00:00 2001 From: MediaPipe Team Date: Thu, 7 Dec 2023 08:17:56 -0800 Subject: [PATCH 10/14] API 2: Do not redirect from MEDIAPIPE_REGISTER_NODE to REGISTER_CALCULATOR The problem with redirecting is that the calling code has to include API 1 code (often third_party/mediapipe/framework/calculator_framework.h), even though they are only using API 2 functionality (albeit deprecated). This can lead to weird issues, e.g. MakePacket confusingly does not return a Packet, but a Packet, because it's using the API 1 version. PiperOrigin-RevId: 588798455 --- mediapipe/framework/api2/BUILD | 6 ++---- mediapipe/framework/api2/node.h | 15 ++++++++------- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/mediapipe/framework/api2/BUILD b/mediapipe/framework/api2/BUILD index 5c5ec04ea..f7e5b3325 100644 --- a/mediapipe/framework/api2/BUILD +++ b/mediapipe/framework/api2/BUILD @@ -81,15 +81,13 @@ cc_library( srcs = ["node.cc"], hdrs = ["node.h"], deps = [ - ":const_str", ":contract", - ":packet", - ":port", "//mediapipe/framework:calculator_base", "//mediapipe/framework:calculator_context", "//mediapipe/framework:calculator_contract", "//mediapipe/framework:subgraph", - "//mediapipe/framework/deps:no_destructor", + "//mediapipe/framework/deps:registration", + "@com_google_absl//absl/status", ], ) diff --git a/mediapipe/framework/api2/node.h b/mediapipe/framework/api2/node.h index 58cebf1ea..6d5e3da71 100644 --- a/mediapipe/framework/api2/node.h +++ b/mediapipe/framework/api2/node.h @@ -1,17 +1,15 @@ #ifndef MEDIAPIPE_FRAMEWORK_API2_NODE_H_ #define MEDIAPIPE_FRAMEWORK_API2_NODE_H_ -#include -#include +#include +#include -#include "mediapipe/framework/api2/const_str.h" +#include "absl/status/status.h" #include "mediapipe/framework/api2/contract.h" -#include "mediapipe/framework/api2/packet.h" -#include "mediapipe/framework/api2/port.h" #include "mediapipe/framework/calculator_base.h" #include "mediapipe/framework/calculator_context.h" #include "mediapipe/framework/calculator_contract.h" -#include "mediapipe/framework/deps/no_destructor.h" +#include "mediapipe/framework/deps/registration.h" #include "mediapipe/framework/subgraph.h" namespace mediapipe { @@ -178,7 +176,10 @@ class SubgraphImpl : public Subgraph, absl::make_unique>) // This macro is used to register a non-split-contract calculator. Deprecated. -#define MEDIAPIPE_REGISTER_NODE(name) REGISTER_CALCULATOR(name) +#define MEDIAPIPE_REGISTER_NODE(name) \ + MEDIAPIPE_REGISTER_FACTORY_FUNCTION_QUALIFIED( \ + mediapipe::CalculatorBaseRegistry, calculator_registration, #name, \ + absl::make_unique>) // This macro is used to define a subgraph that does not use automatic // registration. Deprecated. From 0a77b8c57bdfe91b4f194ab730548cc2f9bfd290 Mon Sep 17 00:00:00 2001 From: Jiuqiang Tang Date: Thu, 7 Dec 2023 10:02:18 -0800 Subject: [PATCH 11/14] No public description PiperOrigin-RevId: 588827865 --- .../java/com/google/mediapipe/solutioncore/SolutionBase.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mediapipe/java/com/google/mediapipe/solutioncore/SolutionBase.java b/mediapipe/java/com/google/mediapipe/solutioncore/SolutionBase.java index 6dbc11fd5..cdf11d85f 100644 --- a/mediapipe/java/com/google/mediapipe/solutioncore/SolutionBase.java +++ b/mediapipe/java/com/google/mediapipe/solutioncore/SolutionBase.java @@ -27,8 +27,8 @@ import com.google.mediapipe.framework.Graph; import com.google.mediapipe.framework.MediaPipeException; import com.google.mediapipe.framework.Packet; import com.google.mediapipe.framework.PacketGetter; -import com.google.mediapipe.solutioncore.logging.SolutionStatsLogger; import com.google.mediapipe.solutioncore.logging.SolutionStatsDummyLogger; +import com.google.mediapipe.solutioncore.logging.SolutionStatsLogger; import com.google.protobuf.Parser; import java.io.File; import java.util.List; From 20743b811051ee99303ef7c098d08cbbaaee74e4 Mon Sep 17 00:00:00 2001 From: Sebastian Schmidt Date: Thu, 7 Dec 2023 13:25:03 -0800 Subject: [PATCH 12/14] Update MediaPipe development version to 0.10.9 PiperOrigin-RevId: 588890763 --- version.bzl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/version.bzl b/version.bzl index c5d7d4d83..24048e1a2 100644 --- a/version.bzl +++ b/version.bzl @@ -1,3 +1,5 @@ """Version number for MediaPipe""" -MEDIAPIPE_FULL_VERSION = "0.10.8" +# The next version of MediaPipe (e.g. the version that is currently in development). +# This version should be bumped after every release. +MEDIAPIPE_FULL_VERSION = "0.10.9" From 4e78e645d060abd194e87ec495cf4c7134aba7e0 Mon Sep 17 00:00:00 2001 From: Dmitri Gribenko Date: Fri, 8 Dec 2023 16:58:17 -0800 Subject: [PATCH 13/14] No public description PiperOrigin-RevId: 589279414 --- mediapipe/framework/deps/strong_int.h | 33 ++++++++++++++------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/mediapipe/framework/deps/strong_int.h b/mediapipe/framework/deps/strong_int.h index b4bfef770..d3a0f77e2 100644 --- a/mediapipe/framework/deps/strong_int.h +++ b/mediapipe/framework/deps/strong_int.h @@ -31,14 +31,14 @@ // A StrongInt with a NullStrongIntValidator should compile away to a raw T // in optimized mode. What this means is that the generated assembly for: // -// int64 foo = 123; -// int64 bar = 456; -// int64 baz = foo + bar; -// constexpr int64 fubar = 789; +// int64_t foo = 123; +// int64_t bar = 456; +// int64_t baz = foo + bar; +// constexpr int64_t fubar = 789; // // ...should be identical to the generated assembly for: // -// DEFINE_STRONG_INT_TYPE(MyStrongInt, int64); +// DEFINE_STRONG_INT_TYPE(MyStrongInt, int64_t); // MyStrongInt foo(123); // MyStrongInt bar(456); // MyStrongInt baz = foo + bar; @@ -97,6 +97,7 @@ #ifndef MEDIAPIPE_DEPS_STRONG_INT_H_ #define MEDIAPIPE_DEPS_STRONG_INT_H_ +#include #include #include #include @@ -179,11 +180,11 @@ struct NullStrongIntValidator { } // Verify lhs << rhs. template - static void ValidateLeftShift(T lhs, int64 rhs) { /* do nothing */ + static void ValidateLeftShift(T lhs, int64_t rhs) { /* do nothing */ } // Verify lhs >> rhs. template - static void ValidateRightShift(T lhs, int64 rhs) { /* do nothing */ + static void ValidateRightShift(T lhs, int64_t rhs) { /* do nothing */ } // Verify lhs & rhs. template @@ -224,8 +225,8 @@ class StrongInt { // // Example: Assume you have two StrongInt types. // - // DEFINE_STRONG_INT_TYPE(Bytes, int64); - // DEFINE_STRONG_INT_TYPE(Megabytes, int64); + // DEFINE_STRONG_INT_TYPE(Bytes, int64_t); + // DEFINE_STRONG_INT_TYPE(Megabytes, int64_t); // // If you want to be able to (explicitly) construct an instance of Bytes from // an instance of Megabytes, simply define a converter function in the same @@ -337,12 +338,12 @@ class StrongInt { value_ %= arg; return *this; } - StrongInt &operator<<=(int64 arg) { // NOLINT(whitespace/operators) + StrongInt &operator<<=(int64_t arg) { // NOLINT(whitespace/operators) ValidatorType::template ValidateLeftShift(value_, arg); value_ <<= arg; return *this; } - StrongInt &operator>>=(int64 arg) { // NOLINT(whitespace/operators) + StrongInt &operator>>=(int64_t arg) { // NOLINT(whitespace/operators) ValidatorType::template ValidateRightShift(value_, arg); value_ >>= arg; return *this; @@ -378,19 +379,19 @@ std::ostream &operator<<(std::ostream &os, return os << arg.value(); } -// Provide the << operator, primarily for logging purposes. Specialized for int8 -// so that an integer and not a character is printed. +// Provide the << operator, primarily for logging purposes. Specialized for +// int8_t so that an integer and not a character is printed. template std::ostream &operator<<(std::ostream &os, - StrongInt arg) { + StrongInt arg) { return os << static_cast(arg.value()); } // Provide the << operator, primarily for logging purposes. Specialized for -// uint8 so that an integer and not a character is printed. +// uint8_t so that an integer and not a character is printed. template std::ostream &operator<<(std::ostream &os, - StrongInt arg) { + StrongInt arg) { return os << static_cast(arg.value()); } From 61efcf5a11aa6eeb571084fe8e206d1e4ce747c2 Mon Sep 17 00:00:00 2001 From: MediaPipe Team Date: Fri, 8 Dec 2023 17:21:02 -0800 Subject: [PATCH 14/14] internal-only change PiperOrigin-RevId: 589284167 --- mediapipe/framework/tool/BUILD | 2 +- mediapipe/util/BUILD | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/mediapipe/framework/tool/BUILD b/mediapipe/framework/tool/BUILD index 7a4b5a112..2376aebb7 100644 --- a/mediapipe/framework/tool/BUILD +++ b/mediapipe/framework/tool/BUILD @@ -13,13 +13,13 @@ # limitations under the License. # +load("//mediapipe/framework:mediapipe_cc_test.bzl", "mediapipe_cc_test") load("//mediapipe/framework/port:build_config.bzl", "mediapipe_proto_library") load( "//mediapipe/framework/tool:mediapipe_graph.bzl", "data_as_c_string", "mediapipe_binary_graph", ) -load("//mediapipe/framework:mediapipe_cc_test.bzl", "mediapipe_cc_test") load("@bazel_skylib//:bzl_library.bzl", "bzl_library") licenses(["notice"]) diff --git a/mediapipe/util/BUILD b/mediapipe/util/BUILD index 0316224f7..fdeefb49e 100644 --- a/mediapipe/util/BUILD +++ b/mediapipe/util/BUILD @@ -13,8 +13,8 @@ # limitations under the License. # Placeholder: load py_library -load("//mediapipe/framework/port:build_config.bzl", "mediapipe_proto_library") load("//mediapipe/framework:mediapipe_cc_test.bzl", "mediapipe_cc_test") +load("//mediapipe/framework/port:build_config.bzl", "mediapipe_proto_library") licenses(["notice"])