From 17c0c960be5b13bffa8436fd91b37581b701459b Mon Sep 17 00:00:00 2001 From: Kinar Date: Mon, 27 Nov 2023 04:51:32 -0800 Subject: [PATCH 1/4] Added Gesture Recognizer C API and tests --- mediapipe/tasks/c/components/containers/BUILD | 52 ++++ .../gesture_recognizer_result_converter.cc | 181 +++++++++++ .../gesture_recognizer_result_converter.h | 33 ++ ...esture_recognizer_result_converter_test.cc | 24 ++ .../tasks/c/components/containers/landmark.h | 90 ++++++ .../containers/landmark_converter.cc | 132 ++++++++ .../containers/landmark_converter.h | 51 +++ .../containers/landmark_converter_test.cc | 28 ++ .../tasks/c/vision/gesture_recognizer/BUILD | 76 +++++ .../gesture_recognizer/gesture_recognizer.cc | 294 ++++++++++++++++++ .../gesture_recognizer/gesture_recognizer.h | 156 ++++++++++ .../gesture_recognizer_result.h | 70 +++++ .../gesture_recognizer_test.cc | 123 ++++++++ mediapipe/tasks/testdata/vision/BUILD | 1 + 14 files changed, 1311 insertions(+) create mode 100644 mediapipe/tasks/c/components/containers/gesture_recognizer_result_converter.cc create mode 100644 mediapipe/tasks/c/components/containers/gesture_recognizer_result_converter.h create mode 100644 mediapipe/tasks/c/components/containers/gesture_recognizer_result_converter_test.cc create mode 100644 mediapipe/tasks/c/components/containers/landmark.h create mode 100644 mediapipe/tasks/c/components/containers/landmark_converter.cc create mode 100644 mediapipe/tasks/c/components/containers/landmark_converter.h create mode 100644 mediapipe/tasks/c/components/containers/landmark_converter_test.cc create mode 100644 mediapipe/tasks/c/vision/gesture_recognizer/BUILD create mode 100644 mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer.cc create mode 100644 mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer.h create mode 100644 mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer_result.h create mode 100644 mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer_test.cc diff --git a/mediapipe/tasks/c/components/containers/BUILD b/mediapipe/tasks/c/components/containers/BUILD index 4bb580873..76ac50cc4 100644 --- a/mediapipe/tasks/c/components/containers/BUILD +++ b/mediapipe/tasks/c/components/containers/BUILD @@ -43,6 +43,33 @@ cc_test( ], ) +cc_library( + name = "landmark", + hdrs = ["landmark.h"], +) + +cc_library( + name = "landmark_converter", + srcs = ["landmark_converter.cc"], + hdrs = ["landmark_converter.h"], + deps = [ + ":landmark", + "//mediapipe/tasks/cc/components/containers:landmark", + ], +) + +cc_test( + name = "landmark_converter_test", + srcs = ["landmark_converter_test.cc"], + deps = [ + ":landmark", + ":landmark_converter", + "//mediapipe/framework/port:gtest", + "//mediapipe/tasks/cc/components/containers:landmark", + "@com_google_googletest//:gtest_main", + ], +) + cc_library( name = "classification_result", hdrs = ["classification_result.h"], @@ -121,3 +148,28 @@ cc_test( "@com_google_googletest//:gtest_main", ], ) + +cc_library( + name = "gesture_recognizer_result_converter", + srcs = ["gesture_recognizer_result_converter.cc"], + hdrs = ["gesture_recognizer_result_converter.h"], + deps = [ + ":category_converter", + ":landmark_converter", + "//mediapipe/tasks/c/vision/gesture_recognizer:gesture_recognizer_result", + "//mediapipe/tasks/cc/vision/gesture_recognizer:gesture_recognizer_result", + ], +) + +cc_test( + name = "gesture_recognizer_result_converter_test", + srcs = ["gesture_recognizer_result_converter_test.cc"], + linkstatic = 1, + deps = [ + ":gesture_recognizer_result_converter", + "//mediapipe/framework/port:gtest", + "//mediapipe/tasks/c/vision/gesture_recognizer:gesture_recognizer_result", + "//mediapipe/tasks/cc/vision/gesture_recognizer:gesture_recognizer_result", + "@com_google_googletest//:gtest_main", + ], +) \ No newline at end of file diff --git a/mediapipe/tasks/c/components/containers/gesture_recognizer_result_converter.cc b/mediapipe/tasks/c/components/containers/gesture_recognizer_result_converter.cc new file mode 100644 index 000000000..3b2da8eab --- /dev/null +++ b/mediapipe/tasks/c/components/containers/gesture_recognizer_result_converter.cc @@ -0,0 +1,181 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "mediapipe/tasks/c/components/containers/gesture_recognizer_result_converter.h" + +#include +#include + +#include "mediapipe/tasks/c/components/containers/category_converter.h" +#include "mediapipe/tasks/c/components/containers/landmark_converter.h" +#include "mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer_result.h" +#include "mediapipe/tasks/cc/vision/gesture_recognizer/gesture_recognizer_result.h" + +namespace mediapipe::tasks::c::components::containers { + +using CppCategory = ::mediapipe::tasks::components::containers::Category; +using CppLandmark = ::mediapipe::tasks::components::containers::Landmark; +using CppNormalizedLandmark = + ::mediapipe::tasks::components::containers::NormalizedLandmark; + +void CppConvertToGestureRecognizerResult( + const mediapipe::tasks::vision::gesture_recognizer::GestureRecognizerResult& + in, + GestureRecognizerResult* out) { + out->gestures_count = in.gestures.size(); + out->gestures = new Category*[out->gestures_count]; + out->gestures_categories_counts = new uint32_t[out->gestures_count]; + + for (uint32_t i = 0; i < out->gestures_count; ++i) { + uint32_t categories_count = in.gestures[i].classification_size(); + out->gestures_categories_counts[i] = categories_count; + out->gestures[i] = new Category[categories_count]; + + for (uint32_t j = 0; j < categories_count; ++j) { + const auto& classification = in.gestures[i].classification(j); + + CppCategory cpp_category; + // Set fields from the Classification protobuf + if (classification.has_index()) { + cpp_category.index = classification.index(); + } + if (classification.has_score()) { + cpp_category.score = classification.score(); + } + if (classification.has_label()) { + cpp_category.category_name = classification.label(); + } + if (classification.has_display_name()) { + cpp_category.display_name = classification.display_name(); + } + + CppConvertToCategory(cpp_category, &out->gestures[i][j]); + } + } + + out->handedness_count = in.handedness.size(); + out->handedness = new Category*[out->handedness_count]; + out->handedness_categories_counts = new uint32_t[out->handedness_count]; + + for (uint32_t i = 0; i < out->handedness_count; ++i) { + uint32_t categories_count = in.handedness[i].classification_size(); + out->handedness_categories_counts[i] = categories_count; + out->handedness[i] = new Category[categories_count]; + + for (uint32_t j = 0; j < categories_count; ++j) { + const auto& classification = in.handedness[i].classification(j); + + CppCategory cpp_category; + // Set fields from the Classification protobuf + if (classification.has_index()) { + cpp_category.index = classification.index(); + } + if (classification.has_score()) { + cpp_category.score = classification.score(); + } + if (classification.has_label()) { + cpp_category.category_name = classification.label(); + } + if (classification.has_display_name()) { + cpp_category.display_name = classification.display_name(); + } + + CppConvertToCategory(cpp_category, &out->handedness[i][j]); + } + } + + out->hand_landmarks_count = in.hand_landmarks.size(); + out->hand_landmarks = new NormalizedLandmarks[out->hand_landmarks_count]; + for (uint32_t i = 0; i < out->hand_landmarks_count; ++i) { + std::vector cpp_normalized_landmarks; + for (uint32_t j = 0; j < in.hand_landmarks[i].landmark_size(); ++j) { + const auto& landmark = in.hand_landmarks[i].landmark(j); + CppNormalizedLandmark cpp_landmark; + cpp_landmark.x = landmark.x(); + cpp_landmark.y = landmark.y(); + cpp_landmark.z = landmark.z(); + if (landmark.has_presence()) { + cpp_landmark.presence = landmark.presence(); + } + if (landmark.has_visibility()) { + cpp_landmark.visibility = landmark.visibility(); + } + cpp_normalized_landmarks.push_back(cpp_landmark); + } + CppConvertToNormalizedLandmarks(cpp_normalized_landmarks, + &out->hand_landmarks[i]); + } + + out->hand_world_landmarks_count = in.hand_world_landmarks.size(); + out->hand_world_landmarks = new Landmarks[out->hand_world_landmarks_count]; + for (uint32_t i = 0; i < out->hand_world_landmarks_count; ++i) { + std::vector cpp_landmarks; + for (uint32_t j = 0; j < in.hand_world_landmarks[i].landmark_size(); ++j) { + const auto& landmark = in.hand_world_landmarks[i].landmark(j); + CppLandmark cpp_landmark; + cpp_landmark.x = landmark.x(); + cpp_landmark.y = landmark.y(); + cpp_landmark.z = landmark.z(); + if (landmark.has_presence()) { + cpp_landmark.presence = landmark.presence(); + } + if (landmark.has_visibility()) { + cpp_landmark.visibility = landmark.visibility(); + } + cpp_landmarks.push_back(cpp_landmark); + } + CppConvertToLandmarks(cpp_landmarks, &out->hand_world_landmarks[i]); + } +} + +void CppCloseGestureRecognizerResult(GestureRecognizerResult* result) { + for (uint32_t i = 0; i < result->gestures_count; ++i) { + for (uint32_t j = 0; j < result->gestures_categories_counts[i]; ++j) { + CppCloseCategory(&result->gestures[i][j]); + } + delete[] result->gestures[i]; + } + delete[] result->gestures; + + for (uint32_t i = 0; i < result->handedness_count; ++i) { + for (uint32_t j = 0; j < result->handedness_categories_counts[i]; ++j) { + CppCloseCategory(&result->handedness[i][j]); + } + delete[] result->handedness[i]; + } + delete[] result->handedness; + + for (uint32_t i = 0; i < result->hand_landmarks_count; ++i) { + CppCloseNormalizedLandmarks(&result->hand_landmarks[i]); + } + delete[] result->hand_landmarks; + + for (uint32_t i = 0; i < result->hand_world_landmarks_count; ++i) { + CppCloseLandmarks(&result->hand_world_landmarks[i]); + } + delete[] result->hand_world_landmarks; + + result->gestures = nullptr; + result->handedness = nullptr; + result->hand_landmarks = nullptr; + result->hand_world_landmarks = nullptr; + + result->gestures_count = 0; + result->handedness_count = 0; + result->hand_landmarks_count = 0; + result->hand_world_landmarks_count = 0; +} + +} // namespace mediapipe::tasks::c::components::containers diff --git a/mediapipe/tasks/c/components/containers/gesture_recognizer_result_converter.h b/mediapipe/tasks/c/components/containers/gesture_recognizer_result_converter.h new file mode 100644 index 000000000..d5105acf6 --- /dev/null +++ b/mediapipe/tasks/c/components/containers/gesture_recognizer_result_converter.h @@ -0,0 +1,33 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef MEDIAPIPE_TASKS_C_COMPONENTS_CONTAINERS_GESTURE_RECOGNIZER_RESULT_CONVERTER_H_ +#define MEDIAPIPE_TASKS_C_COMPONENTS_CONTAINERS_GESTURE_RECOGNIZER_RESULT_CONVERTER_H_ + +#include "mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer_result.h" +#include "mediapipe/tasks/cc/vision/gesture_recognizer/gesture_recognizer_result.h" + +namespace mediapipe::tasks::c::components::containers { + +void CppConvertToGestureRecognizerResult( + const mediapipe::tasks::vision::gesture_recognizer::GestureRecognizerResult& + in, + GestureRecognizerResult* out); + +void CppCloseGestureRecognizerResult(GestureRecognizerResult* result); + +} // namespace mediapipe::tasks::c::components::containers + +#endif // MEDIAPIPE_TASKS_C_COMPONENTS_CONTAINERS_GESTURE_RECOGNIZER_RESULT_CONVERTER_H_ diff --git a/mediapipe/tasks/c/components/containers/gesture_recognizer_result_converter_test.cc b/mediapipe/tasks/c/components/containers/gesture_recognizer_result_converter_test.cc new file mode 100644 index 000000000..551498e12 --- /dev/null +++ b/mediapipe/tasks/c/components/containers/gesture_recognizer_result_converter_test.cc @@ -0,0 +1,24 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "mediapipe/tasks/c/components/containers/gesture_recognizer_result_converter.h" + +#include "mediapipe/framework/port/gtest.h" +#include "mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer_result.h" +#include "mediapipe/tasks/cc/vision/gesture_recognizer/gesture_recognizer_result.h" + +namespace mediapipe::tasks::c::components::containers { + +} // namespace mediapipe::tasks::c::components::containers diff --git a/mediapipe/tasks/c/components/containers/landmark.h b/mediapipe/tasks/c/components/containers/landmark.h new file mode 100644 index 000000000..de6dd9928 --- /dev/null +++ b/mediapipe/tasks/c/components/containers/landmark.h @@ -0,0 +1,90 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef MEDIAPIPE_TASKS_C_COMPONENTS_CONTAINERS_LANDMARK_H_ +#define MEDIAPIPE_TASKS_C_COMPONENTS_CONTAINERS_LANDMARK_H_ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +// Landmark represents a point in 3D space with x, y, z coordinates. The +// landmark coordinates are in meters. z represents the landmark depth, and the +// smaller the value the closer the world landmark is to the camera. +struct Landmark { + float x; + float y; + float z; + + // For optional visibility. + bool has_visibility; + + // Landmark visibility. Should stay unset if not supported. + // Float score of whether landmark is visible or occluded by other objects. + // Landmark considered as invisible also if it is not present on the screen + // (out of scene bounds). Depending on the model, visibility value is either + // a sigmoid or an argument of sigmoid. + float visibility; + + // For optional presence. + bool has_presence; + + // Landmark presence. Should stay unset if not supported. + // Float score of whether landmark is present on the scene (located within + // scene bounds). Depending on the model, presence value is either a result + // of sigmoid or an argument of sigmoid function to get landmark presence + // probability. + float presence; + + // Landmark name. Should stay unset if not supported. + // Defaults to nullptr. + char* name; +}; + +// A normalized version of above Landmark struct. All coordinates should be +// within [0, 1]. +struct NormalizedLandmark { + float x; + float y; + float z; + + bool has_visibility; + float visibility; + + bool has_presence; + float presence; + + char* name; +}; + +// A list of Landmarks. +struct Landmarks { + struct Landmark* landmarks; + uint32_t landmarks_count; +}; + +// A list of NormalizedLandmarks. +struct NormalizedLandmarks { + struct NormalizedLandmark* landmarks; + uint32_t landmarks_count; +}; + +#ifdef __cplusplus +} // extern C +#endif + +#endif // MEDIAPIPE_TASKS_C_COMPONENTS_CONTAINERS_LANDMARK_H_ diff --git a/mediapipe/tasks/c/components/containers/landmark_converter.cc b/mediapipe/tasks/c/components/containers/landmark_converter.cc new file mode 100644 index 000000000..5f4ab5ef2 --- /dev/null +++ b/mediapipe/tasks/c/components/containers/landmark_converter.cc @@ -0,0 +1,132 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "mediapipe/tasks/c/components/containers/landmark_converter.h" + +#include + +#include "mediapipe/tasks/c/components/containers/landmark.h" + +typedef Landmark LandmarkC; +typedef NormalizedLandmark NormalizedLandmarkC; +typedef Landmarks LandmarksC; +typedef NormalizedLandmarks NormalizedLandmarksC; + +#include "mediapipe/tasks/cc/components/containers/landmark.h" + +namespace mediapipe::tasks::c::components::containers { + +void CppConvertToLandmark( + const mediapipe::tasks::components::containers::Landmark& in, + LandmarkC* out) { + out->x = in.x; + out->y = in.y; + out->z = in.z; + + if (in.visibility.has_value()) { + out->has_visibility = true; + out->visibility = in.visibility.value(); + } else { + out->has_visibility = false; + } + + if (in.presence.has_value()) { + out->has_presence = true; + out->presence = in.presence.value(); + } else { + out->has_presence = false; + } + + out->name = in.name.has_value() ? strdup(in.name->c_str()) : nullptr; +} + +void CppConvertToNormalizedLandmark( + const mediapipe::tasks::components::containers::NormalizedLandmark& in, + NormalizedLandmarkC* out) { + out->x = in.x; + out->y = in.y; + out->z = in.z; + + if (in.visibility.has_value()) { + out->has_visibility = true; + out->visibility = in.visibility.value(); + } else { + out->has_visibility = false; + } + + if (in.presence.has_value()) { + out->has_presence = true; + out->presence = in.presence.value(); + } else { + out->has_presence = false; + } + + out->name = in.name.has_value() ? strdup(in.name->c_str()) : nullptr; +} + +void CppConvertToLandmarks( + const std::vector& in, + LandmarksC* out) { + out->landmarks_count = in.size(); + out->landmarks = new LandmarkC[out->landmarks_count]; + for (uint32_t i = 0; i < out->landmarks_count; ++i) { + CppConvertToLandmark(in[i], &out->landmarks[i]); + } +} + +void CppConvertToNormalizedLandmarks( + const std::vector< + mediapipe::tasks::components::containers::NormalizedLandmark>& in, + NormalizedLandmarksC* out) { + out->landmarks_count = in.size(); + out->landmarks = new NormalizedLandmarkC[out->landmarks_count]; + for (uint32_t i = 0; i < out->landmarks_count; ++i) { + CppConvertToNormalizedLandmark(in[i], &out->landmarks[i]); + } +} + +void CppCloseLandmark(LandmarkC* in) { + if (in && in->name) { + free(in->name); + in->name = nullptr; + } +} + +void CppCloseLandmarks(LandmarksC* in) { + for (uint32_t i = 0; i < in->landmarks_count; ++i) { + CppCloseLandmark(&in->landmarks[i]); + } + delete[] in->landmarks; + in->landmarks = nullptr; + in->landmarks_count = 0; +} + +void CppCloseNormalizedLandmark(NormalizedLandmarkC* in) { + if (in && in->name) { + free(in->name); + in->name = nullptr; + } +} + +void CppCloseNormalizedLandmarks(NormalizedLandmarksC* in) { + for (uint32_t i = 0; i < in->landmarks_count; ++i) { + CppCloseNormalizedLandmark(&in->landmarks[i]); + } + delete[] in->landmarks; + in->landmarks = nullptr; + in->landmarks_count = 0; +} + +} // namespace mediapipe::tasks::c::components::containers diff --git a/mediapipe/tasks/c/components/containers/landmark_converter.h b/mediapipe/tasks/c/components/containers/landmark_converter.h new file mode 100644 index 000000000..f59158112 --- /dev/null +++ b/mediapipe/tasks/c/components/containers/landmark_converter.h @@ -0,0 +1,51 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef MEDIAPIPE_TASKS_C_COMPONENTS_CONTAINERS_LANDMARK_CONVERTER_H_ +#define MEDIAPIPE_TASKS_C_COMPONENTS_CONTAINERS_LANDMARK_CONVERTER_H_ + +#include "mediapipe/tasks/c/components/containers/landmark.h" +#include "mediapipe/tasks/cc/components/containers/landmark.h" + +namespace mediapipe::tasks::c::components::containers { + +void CppConvertToLandmark( + const mediapipe::tasks::components::containers::Landmark& in, + Landmark* out); + +void CppConvertToNormalizedLandmark( + const mediapipe::tasks::components::containers::NormalizedLandmark& in, + NormalizedLandmark* out); + +void CppConvertToLandmarks( + const std::vector& in, + Landmarks* out); + +void CppConvertToNormalizedLandmarks( + const std::vector< + mediapipe::tasks::components::containers::NormalizedLandmark>& in, + NormalizedLandmarks* out); + +void CppCloseLandmark(struct Landmark* in); + +void CppCloseLandmarks(struct Landmarks* in); + +void CppCloseNormalizedLandmark(struct NormalizedLandmark* in); + +void CppCloseNormalizedLandmarks(struct NormalizedLandmarks* in); + +} // namespace mediapipe::tasks::c::components::containers + +#endif // MEDIAPIPE_TASKS_C_COMPONENTS_CONTAINERS_LANDMARK_CONVERTER_H_ diff --git a/mediapipe/tasks/c/components/containers/landmark_converter_test.cc b/mediapipe/tasks/c/components/containers/landmark_converter_test.cc new file mode 100644 index 000000000..cf2b5a0e9 --- /dev/null +++ b/mediapipe/tasks/c/components/containers/landmark_converter_test.cc @@ -0,0 +1,28 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "mediapipe/tasks/c/components/containers/landmark_converter.h" + +#include +#include +#include + +#include "mediapipe/framework/port/gtest.h" +#include "mediapipe/tasks/c/components/containers/landmark.h" +#include "mediapipe/tasks/cc/components/containers/landmark.h" + +namespace mediapipe::tasks::c::components::containers { + +} // namespace mediapipe::tasks::c::components::containers diff --git a/mediapipe/tasks/c/vision/gesture_recognizer/BUILD b/mediapipe/tasks/c/vision/gesture_recognizer/BUILD new file mode 100644 index 000000000..8b905b338 --- /dev/null +++ b/mediapipe/tasks/c/vision/gesture_recognizer/BUILD @@ -0,0 +1,76 @@ +# Copyright 2023 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +package(default_visibility = ["//mediapipe/tasks:internal"]) + +licenses(["notice"]) + +cc_library( + name = "gesture_recognizer_result", + hdrs = ["gesture_recognizer_result.h"], + visibility = ["//visibility:public"], + deps = [ + "//mediapipe/tasks/c/components/containers:category", + "//mediapipe/tasks/c/components/containers:landmark", + "//mediapipe/tasks/c/vision/core:common", + ], +) + +cc_library( + name = "gesture_recognizer_lib", + srcs = ["gesture_recognizer.cc"], + hdrs = ["gesture_recognizer.h"], + visibility = ["//visibility:public"], + deps = [ + ":gesture_recognizer_result", + "//mediapipe/framework/formats:image", + "//mediapipe/framework/formats:image_frame", + "//mediapipe/tasks/c/components/containers:gesture_recognizer_result_converter", + "//mediapipe/tasks/c/components/processors:classifier_options", + "//mediapipe/tasks/c/components/processors:classifier_options_converter", + "//mediapipe/tasks/c/core:base_options", + "//mediapipe/tasks/c/core:base_options_converter", + "//mediapipe/tasks/cc/vision/core:running_mode", + "//mediapipe/tasks/cc/vision/gesture_recognizer", + "//mediapipe/tasks/cc/vision/utils:image_utils", + "@com_google_absl//absl/log:absl_log", + "@com_google_absl//absl/status", + "@com_google_absl//absl/status:statusor", + ], + alwayslink = 1, +) + +cc_test( + name = "gesture_recognizer_test", + srcs = ["gesture_recognizer_test.cc"], + data = [ + "//mediapipe/framework/formats:image_frame_opencv", + "//mediapipe/framework/port:opencv_core", + "//mediapipe/framework/port:opencv_imgproc", + "//mediapipe/tasks/testdata/vision:test_images", + "//mediapipe/tasks/testdata/vision:test_models", + ], + linkstatic = 1, + deps = [ + ":gesture_recognizer_lib", + "//mediapipe/framework/deps:file_path", + "//mediapipe/framework/formats:image", + "//mediapipe/framework/port:gtest", + "//mediapipe/tasks/c/components/containers:landmark", + "//mediapipe/tasks/cc/vision/utils:image_utils", + "@com_google_absl//absl/flags:flag", + "@com_google_absl//absl/strings", + "@com_google_googletest//:gtest_main", + ], +) diff --git a/mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer.cc b/mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer.cc new file mode 100644 index 000000000..f8c42dcaf --- /dev/null +++ b/mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer.cc @@ -0,0 +1,294 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer.h" + +#include +#include +#include +#include + +#include "absl/log/absl_log.h" +#include "absl/status/status.h" +#include "absl/status/statusor.h" +#include "mediapipe/framework/formats/image.h" +#include "mediapipe/framework/formats/image_frame.h" +#include "mediapipe/tasks/c/components/containers/gesture_recognizer_result_converter.h" +#include "mediapipe/tasks/c/components/processors/classifier_options_converter.h" +#include "mediapipe/tasks/c/core/base_options_converter.h" +#include "mediapipe/tasks/cc/vision/core/running_mode.h" +#include "mediapipe/tasks/cc/vision/gesture_recognizer/gesture_recognizer.h" +#include "mediapipe/tasks/cc/vision/utils/image_utils.h" + +namespace mediapipe::tasks::c::vision::gesture_recognizer { + +namespace { + +using ::mediapipe::tasks::c::components::containers:: + CppCloseGestureRecognizerResult; +using ::mediapipe::tasks::c::components::containers:: + CppConvertToGestureRecognizerResult; +using ::mediapipe::tasks::c::components::processors:: + CppConvertToClassifierOptions; +using ::mediapipe::tasks::c::core::CppConvertToBaseOptions; +using ::mediapipe::tasks::vision::CreateImageFromBuffer; +using ::mediapipe::tasks::vision::core::RunningMode; +using ::mediapipe::tasks::vision::gesture_recognizer::GestureRecognizer; +typedef ::mediapipe::tasks::vision::gesture_recognizer::GestureRecognizerResult + CppGestureRecognizerResult; + +int CppProcessError(absl::Status status, char** error_msg) { + if (error_msg) { + *error_msg = strdup(status.ToString().c_str()); + } + return status.raw_code(); +} + +} // namespace + +void CppConvertToGestureRecognizerOptions( + const GestureRecognizerOptions& in, + mediapipe::tasks::vision::gesture_recognizer::GestureRecognizerOptions* + out) { + out->num_hands = in.num_hands; + out->min_hand_detection_confidence = in.min_hand_detection_confidence; + out->min_hand_presence_confidence = in.min_hand_presence_confidence; + out->min_tracking_confidence = in.min_tracking_confidence; + CppConvertToClassifierOptions(in.canned_gestures_classifier_options, + &out->canned_gestures_classifier_options); + CppConvertToClassifierOptions(in.custom_gestures_classifier_options, + &out->custom_gestures_classifier_options); +} + +GestureRecognizer* CppGestureRecognizerCreate( + const GestureRecognizerOptions& options, char** error_msg) { + auto cpp_options = + std::make_unique<::mediapipe::tasks::vision::gesture_recognizer:: + GestureRecognizerOptions>(); + + CppConvertToBaseOptions(options.base_options, &cpp_options->base_options); + CppConvertToGestureRecognizerOptions(options, cpp_options.get()); + cpp_options->running_mode = static_cast(options.running_mode); + + // Enable callback for processing live stream data when the running mode is + // set to RunningMode::LIVE_STREAM. + if (cpp_options->running_mode == RunningMode::LIVE_STREAM) { + if (options.result_callback == nullptr) { + const absl::Status status = absl::InvalidArgumentError( + "Provided null pointer to callback function."); + ABSL_LOG(ERROR) << "Failed to create GestureRecognizer: " << status; + CppProcessError(status, error_msg); + return nullptr; + } + + GestureRecognizerOptions::result_callback_fn result_callback = + options.result_callback; + cpp_options->result_callback = + [result_callback](absl::StatusOr cpp_result, + const Image& image, int64_t timestamp) { + char* error_msg = nullptr; + + if (!cpp_result.ok()) { + ABSL_LOG(ERROR) << "Recognition failed: " << cpp_result.status(); + CppProcessError(cpp_result.status(), &error_msg); + result_callback(nullptr, MpImage(), timestamp, error_msg); + free(error_msg); + return; + } + + // Result is valid for the lifetime of the callback function. + GestureRecognizerResult result; + CppConvertToGestureRecognizerResult(*cpp_result, &result); + + const auto& image_frame = image.GetImageFrameSharedPtr(); + const MpImage mp_image = { + .type = MpImage::IMAGE_FRAME, + .image_frame = { + .format = static_cast<::ImageFormat>(image_frame->Format()), + .image_buffer = image_frame->PixelData(), + .width = image_frame->Width(), + .height = image_frame->Height()}}; + + result_callback(&result, mp_image, timestamp, + /* error_msg= */ nullptr); + + CppCloseGestureRecognizerResult(&result); + }; + } + + auto recognizer = GestureRecognizer::Create(std::move(cpp_options)); + if (!recognizer.ok()) { + ABSL_LOG(ERROR) << "Failed to create GestureRecognizer: " + << recognizer.status(); + CppProcessError(recognizer.status(), error_msg); + return nullptr; + } + return recognizer->release(); +} + +int CppGestureRecognizerRecognize(void* recognizer, const MpImage* image, + GestureRecognizerResult* result, + char** error_msg) { + if (image->type == MpImage::GPU_BUFFER) { + const absl::Status status = + absl::InvalidArgumentError("GPU Buffer not supported yet."); + + ABSL_LOG(ERROR) << "Recognition failed: " << status.message(); + return CppProcessError(status, error_msg); + } + + const auto img = CreateImageFromBuffer( + static_cast(image->image_frame.format), + image->image_frame.image_buffer, image->image_frame.width, + image->image_frame.height); + + if (!img.ok()) { + ABSL_LOG(ERROR) << "Failed to create Image: " << img.status(); + return CppProcessError(img.status(), error_msg); + } + + auto cpp_recognizer = static_cast(recognizer); + auto cpp_result = cpp_recognizer->Recognize(*img); + if (!cpp_result.ok()) { + ABSL_LOG(ERROR) << "Recognition failed: " << cpp_result.status(); + return CppProcessError(cpp_result.status(), error_msg); + } + CppConvertToGestureRecognizerResult(*cpp_result, result); + return 0; +} + +int CppGestureRecognizerRecognizeForVideo(void* recognizer, + const MpImage* image, + int64_t timestamp_ms, + GestureRecognizerResult* result, + char** error_msg) { + if (image->type == MpImage::GPU_BUFFER) { + absl::Status status = + absl::InvalidArgumentError("GPU Buffer not supported yet"); + + ABSL_LOG(ERROR) << "Recognition failed: " << status.message(); + return CppProcessError(status, error_msg); + } + + const auto img = CreateImageFromBuffer( + static_cast(image->image_frame.format), + image->image_frame.image_buffer, image->image_frame.width, + image->image_frame.height); + + if (!img.ok()) { + ABSL_LOG(ERROR) << "Failed to create Image: " << img.status(); + return CppProcessError(img.status(), error_msg); + } + + auto cpp_recognizer = static_cast(recognizer); + auto cpp_result = cpp_recognizer->RecognizeForVideo(*img, timestamp_ms); + if (!cpp_result.ok()) { + ABSL_LOG(ERROR) << "Recognition failed: " << cpp_result.status(); + return CppProcessError(cpp_result.status(), error_msg); + } + CppConvertToGestureRecognizerResult(*cpp_result, result); + return 0; +} + +int CppGestureRecognizerRecognizeAsync(void* recognizer, const MpImage* image, + int64_t timestamp_ms, char** error_msg) { + if (image->type == MpImage::GPU_BUFFER) { + absl::Status status = + absl::InvalidArgumentError("GPU Buffer not supported yet"); + + ABSL_LOG(ERROR) << "Recognition failed: " << status.message(); + return CppProcessError(status, error_msg); + } + + const auto img = CreateImageFromBuffer( + static_cast(image->image_frame.format), + image->image_frame.image_buffer, image->image_frame.width, + image->image_frame.height); + + if (!img.ok()) { + ABSL_LOG(ERROR) << "Failed to create Image: " << img.status(); + return CppProcessError(img.status(), error_msg); + } + + auto cpp_recognizer = static_cast(recognizer); + auto cpp_result = cpp_recognizer->RecognizeAsync(*img, timestamp_ms); + if (!cpp_result.ok()) { + ABSL_LOG(ERROR) << "Data preparation for the image classification failed: " + << cpp_result; + return CppProcessError(cpp_result, error_msg); + } + return 0; +} + +void CppGestureRecognizerCloseResult(GestureRecognizerResult* result) { + CppCloseGestureRecognizerResult(result); +} + +int CppGestureRecognizerClose(void* recognizer, char** error_msg) { + auto cpp_recognizer = static_cast(recognizer); + auto result = cpp_recognizer->Close(); + if (!result.ok()) { + ABSL_LOG(ERROR) << "Failed to close GestureRecognizer: " << result; + return CppProcessError(result, error_msg); + } + delete cpp_recognizer; + return 0; +} + +} // namespace mediapipe::tasks::c::vision::gesture_recognizer + +extern "C" { + +void* gesture_recognizer_create(struct GestureRecognizerOptions* options, + char** error_msg) { + return mediapipe::tasks::c::vision::gesture_recognizer:: + CppGestureRecognizerCreate(*options, error_msg); +} + +int gesture_recognizer_recognize_image(void* recognizer, const MpImage* image, + GestureRecognizerResult* result, + char** error_msg) { + return mediapipe::tasks::c::vision::gesture_recognizer:: + CppGestureRecognizerRecognize(recognizer, image, result, error_msg); +} + +int gesture_recognizer_recognize_for_video(void* recognizer, + const MpImage* image, + int64_t timestamp_ms, + GestureRecognizerResult* result, + char** error_msg) { + return mediapipe::tasks::c::vision::gesture_recognizer:: + CppGestureRecognizerRecognizeForVideo(recognizer, image, timestamp_ms, + result, error_msg); +} + +int gesture_recognizer_recognize_async(void* recognizer, const MpImage* image, + int64_t timestamp_ms, char** error_msg) { + return mediapipe::tasks::c::vision::gesture_recognizer:: + CppGestureRecognizerRecognizeAsync(recognizer, image, timestamp_ms, + error_msg); +} + +void gesture_recognizer_close_result(GestureRecognizerResult* result) { + mediapipe::tasks::c::vision::gesture_recognizer:: + CppGestureRecognizerCloseResult(result); +} + +int gesture_recognizer_close(void* recognizer, char** error_ms) { + return mediapipe::tasks::c::vision::gesture_recognizer:: + CppGestureRecognizerClose(recognizer, error_ms); +} + +} // extern "C" diff --git a/mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer.h b/mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer.h new file mode 100644 index 000000000..1c2b65112 --- /dev/null +++ b/mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer.h @@ -0,0 +1,156 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef MEDIAPIPE_TASKS_C_VISION_GESTURE_RECOGNIZER_GESTURE_RECOGNIZER_H_ +#define MEDIAPIPE_TASKS_C_VISION_GESTURE_RECOGNIZER_GESTURE_RECOGNIZER_H_ + +#include "mediapipe/tasks/c/components/processors/classifier_options.h" +#include "mediapipe/tasks/c/core/base_options.h" +#include "mediapipe/tasks/c/vision/core/common.h" +#include "mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer_result.h" + +#ifndef MP_EXPORT +#define MP_EXPORT __attribute__((visibility("default"))) +#endif // MP_EXPORT + +#ifdef __cplusplus +extern "C" { +#endif + +// The options for configuring a MediaPipe gesture recognizer task. +struct GestureRecognizerOptions { + // Base options for configuring MediaPipe Tasks, such as specifying the model + // file with metadata, accelerator options, op resolver, etc. + struct BaseOptions base_options; + + // The running mode of the task. Default to the image mode. + // GestureRecognizer has three running modes: + // 1) The image mode for recognizing hand gestures on single image inputs. + // 2) The video mode for recognizing hand gestures on the decoded frames of a + // video. + // 3) The live stream mode for recognizing hand gestures on the live stream of + // input data, such as from camera. In this mode, the "result_callback" + // below must be specified to receive the detection results asynchronously. + RunningMode running_mode; + + // The maximum number of hands can be detected by the GestureRecognizer. + int num_hands = 1; + + // The minimum confidence score for the hand detection to be considered + // successful. + float min_hand_detection_confidence = 0.5; + + // The minimum confidence score of hand presence score in the hand landmark + // detection. + float min_hand_presence_confidence = 0.5; + + // The minimum confidence score for the hand tracking to be considered + // successful. + float min_tracking_confidence = 0.5; + + // TODO Note this option is subject to change. + // Options for configuring the canned gestures classifier, such as score + // threshold, allow list and deny list of gestures. The categories for canned + // gesture classifier are: ["None", "Closed_Fist", "Open_Palm", + // "Pointing_Up", "Thumb_Down", "Thumb_Up", "Victory", "ILoveYou"] + struct ClassifierOptions canned_gestures_classifier_options; + + // TODO Note this option is subject to change. + // Options for configuring the custom gestures classifier, such as score + // threshold, allow list and deny list of gestures. + struct ClassifierOptions custom_gestures_classifier_options; + + // The user-defined result callback for processing live stream data. + // The result callback should only be specified when the running mode is set + // to RunningMode::LIVE_STREAM. Arguments of the callback function include: + // the pointer to recognition result, the image that result was obtained + // on, the timestamp relevant to recognition results and pointer to error + // message in case of any failure. The validity of the passed arguments is + // true for the lifetime of the callback function. + // + // A caller is responsible for closing gesture recognizer result. + typedef void (*result_callback_fn)(GestureRecognizerResult* result, + const MpImage image, int64_t timestamp_ms, + char* error_msg); + result_callback_fn result_callback; +}; + +// Creates an GestureRecognizer from provided `options`. +// Returns a pointer to the gesture recognizer on success. +// If an error occurs, returns `nullptr` and sets the error parameter to an +// an error message (if `error_msg` is not `nullptr`). You must free the memory +// allocated for the error message. +MP_EXPORT void* gesture_recognizer_create( + struct GestureRecognizerOptions* options, char** error_msg); + +// Performs gesture recognition on the input `image`. Returns `0` on success. +// If an error occurs, returns an error code and sets the error parameter to an +// an error message (if `error_msg` is not `nullptr`). You must free the memory +// allocated for the error message. +MP_EXPORT int gesture_recognizer_recognize_image( + void* recognizer, const MpImage* image, GestureRecognizerResult* result, + char** error_msg); + +// Performs gesture recognition on the provided video frame. +// Only use this method when the GestureRecognizer is created with the video +// running mode. +// The image can be of any size with format RGB or RGBA. It's required to +// provide the video frame's timestamp (in milliseconds). The input timestamps +// must be monotonically increasing. +// If an error occurs, returns an error code and sets the error parameter to an +// an error message (if `error_msg` is not `nullptr`). You must free the memory +// allocated for the error message. +MP_EXPORT int gesture_recognizer_recognize_for_video( + void* recognizer, const MpImage* image, int64_t timestamp_ms, + GestureRecognizerResult* result, char** error_msg); + +// Sends live image data to gesture recognition, and the results will be +// available via the `result_callback` provided in the GestureRecognizerOptions. +// Only use this method when the GestureRecognizer is created with the live +// stream running mode. +// The image can be of any size with format RGB or RGBA. It's required to +// provide a timestamp (in milliseconds) to indicate when the input image is +// sent to the gesture recognizer. The input timestamps must be monotonically +// increasing. +// The `result_callback` provides: +// - The recognition results as an GestureRecognizerResult object. +// - The const reference to the corresponding input image that the gesture +// recognizer runs on. Note that the const reference to the image will no +// longer be valid when the callback returns. To access the image data +// outside of the callback, callers need to make a copy of the image. +// - The input timestamp in milliseconds. +// If an error occurs, returns an error code and sets the error parameter to an +// an error message (if `error_msg` is not `nullptr`). You must free the memory +// allocated for the error message. +MP_EXPORT int gesture_recognizer_recognize_async(void* recognizer, + const MpImage* image, + int64_t timestamp_ms, + char** error_msg); + +// Frees the memory allocated inside a GestureRecognizerResult result. +// Does not free the result pointer itself. +MP_EXPORT void gesture_recognizer_close_result(GestureRecognizerResult* result); + +// Frees gesture recognizer. +// If an error occurs, returns an error code and sets the error parameter to an +// an error message (if `error_msg` is not `nullptr`). You must free the memory +// allocated for the error message. +MP_EXPORT int gesture_recognizer_close(void* recognizer, char** error_msg); + +#ifdef __cplusplus +} // extern C +#endif + +#endif // MEDIAPIPE_TASKS_C_VISION_GESTURE_RECOGNIZER_GESTURE_RECOGNIZER_H_ diff --git a/mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer_result.h b/mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer_result.h new file mode 100644 index 000000000..e546fa509 --- /dev/null +++ b/mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer_result.h @@ -0,0 +1,70 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef MEDIAPIPE_TASKS_C_VISION_GESTURE_RECOGNIZER_RESULT_GESTURE_RECOGNIZER_RESULT_H_ +#define MEDIAPIPE_TASKS_C_VISION_GESTURE_RECOGNIZER_RESULT_GESTURE_RECOGNIZER_RESULT_H_ + +#include "mediapipe/tasks/c/components/containers/category.h" +#include "mediapipe/tasks/c/components/containers/landmark.h" +#include "mediapipe/tasks/c/vision/core/common.h" + +#ifndef MP_EXPORT +#define MP_EXPORT __attribute__((visibility("default"))) +#endif // MP_EXPORT + +#ifdef __cplusplus +extern "C" { +#endif + +// The gesture recognition result from GestureRecognizer, where each vector +// element represents a single hand detected in the image. +struct GestureRecognizerResult { + // Recognized hand gestures with sorted order such that the winning label is + // the first item in the list. + struct Category** gestures; + + // The number of elements in the gestures array. + uint32_t gestures_count; + + // The number of elements in the gestures categories array. + uint32_t* gestures_categories_counts; + + // Classification of handedness. + struct Category** handedness; + + // The number of elements in the handedness array. + uint32_t handedness_count; + + // The number of elements in the handedness categories array. + uint32_t* handedness_categories_counts; + + // Detected hand landmarks in normalized image coordinates. + struct NormalizedLandmarks* hand_landmarks; + + // The number of elements in the hand_landmarks array. + uint32_t hand_landmarks_count; + + // Detected hand landmarks in world coordinates. + struct Landmarks* hand_world_landmarks; + + // The number of elements in the hand_world_landmarks array. + uint32_t hand_world_landmarks_count; +}; + +#ifdef __cplusplus +} // extern C +#endif + +#endif // MEDIAPIPE_TASKS_C_VISION_GESTURE_RECOGNIZER_RESULT_GESTURE_RECOGNIZER_RESULT_H_ diff --git a/mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer_test.cc b/mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer_test.cc new file mode 100644 index 000000000..723ff9838 --- /dev/null +++ b/mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer_test.cc @@ -0,0 +1,123 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer.h" + +#include +#include +#include + +#include "absl/flags/flag.h" +#include "absl/strings/string_view.h" +#include "mediapipe/framework/deps/file_path.h" +#include "mediapipe/framework/formats/image.h" +#include "mediapipe/framework/port/gmock.h" +#include "mediapipe/framework/port/gtest.h" +#include "mediapipe/tasks/c/components/containers/landmark.h" +#include "mediapipe/tasks/cc/vision/utils/image_utils.h" + +namespace { + +using ::mediapipe::file::JoinPath; +using ::mediapipe::tasks::vision::DecodeImageFromFile; +using testing::HasSubstr; + +constexpr char kTestDataDirectory[] = "/mediapipe/tasks/testdata/vision/"; +constexpr char kModelName[] = "gesture_recognizer.task"; +constexpr float kPrecision = 1e-4; +constexpr float kLandmarkPrecision = 1e-3; +constexpr int kIterations = 100; + +std::string GetFullPath(absl::string_view file_name) { + return JoinPath("./", kTestDataDirectory, file_name); +} + +TEST(GestureRecognizerTest, ImageModeTest) { + const auto image = DecodeImageFromFile(GetFullPath("fist.jpg")); + ASSERT_TRUE(image.ok()); + + const std::string model_path = GetFullPath(kModelName); + GestureRecognizerOptions options = { + /* base_options= */ {/* model_asset_buffer= */ nullptr, + /* model_asset_buffer_count= */ 0, + /* model_asset_path= */ model_path.c_str()}, + /* running_mode= */ RunningMode::IMAGE, + /* num_hands= */ 1, + /* min_hand_detection_confidence= */ 0.5, + /* min_hand_presence_confidence= */ 0.5, + /* min_tracking_confidence= */ 0.5, + {/* display_names_locale= */ nullptr, + /* max_results= */ -1, + /* score_threshold= */ 0.0, + /* category_allowlist= */ nullptr, + /* category_allowlist_count= */ 0, + /* category_denylist= */ nullptr, + /* category_denylist_count= */ 0}, + {/* display_names_locale= */ nullptr, + /* max_results= */ -1, + /* score_threshold= */ 0.0, + /* category_allowlist= */ nullptr, + /* category_allowlist_count= */ 0, + /* category_denylist= */ nullptr, + /* category_denylist_count= */ 0}}; + + void* recognizer = + gesture_recognizer_create(&options, /* error_msg */ nullptr); + EXPECT_NE(recognizer, nullptr); + + const auto& image_frame = image->GetImageFrameSharedPtr(); + const MpImage mp_image = { + .type = MpImage::IMAGE_FRAME, + .image_frame = {.format = static_cast(image_frame->Format()), + .image_buffer = image_frame->PixelData(), + .width = image_frame->Width(), + .height = image_frame->Height()}}; + + GestureRecognizerResult result; + gesture_recognizer_recognize_image(recognizer, &mp_image, &result, + /* error_msg */ nullptr); + + // Expects to have the same number of hands detected. + EXPECT_EQ(result.gestures_count, 1); + EXPECT_EQ(result.handedness_count, 1); + // Actual gesture with top score matches expected gesture. + EXPECT_EQ(std::string{result.gestures[0][0].category_name}, "Closed_Fist"); + EXPECT_NEAR(result.gestures[0][0].score, 0.9000f, kPrecision); + + // Actual handedness matches expected handedness. + EXPECT_EQ(std::string{result.handedness[0][0].category_name}, "Right"); + EXPECT_NEAR(result.handedness[0][0].score, 0.9893f, kPrecision); + + // Actual landmarks match expected landmarks. + EXPECT_NEAR(result.hand_landmarks[0].landmarks[0].x, 0.477f, + kLandmarkPrecision); + EXPECT_NEAR(result.hand_landmarks[0].landmarks[0].y, 0.661f, + kLandmarkPrecision); + EXPECT_NEAR(result.hand_landmarks[0].landmarks[0].z, 0.0f, + kLandmarkPrecision); + EXPECT_NEAR(result.hand_world_landmarks[0].landmarks[0].x, -0.009f, + kLandmarkPrecision); + EXPECT_NEAR(result.hand_world_landmarks[0].landmarks[0].y, 0.082f, + kLandmarkPrecision); + EXPECT_NEAR(result.hand_world_landmarks[0].landmarks[0].z, 0.006f, + kLandmarkPrecision); + + gesture_recognizer_close_result(&result); + gesture_recognizer_close(recognizer, /* error_msg */ nullptr); +} + +// TODO other tests + +} // namespace diff --git a/mediapipe/tasks/testdata/vision/BUILD b/mediapipe/tasks/testdata/vision/BUILD index 3f83118b0..e35e04a97 100644 --- a/mediapipe/tasks/testdata/vision/BUILD +++ b/mediapipe/tasks/testdata/vision/BUILD @@ -185,6 +185,7 @@ filegroup( "face_landmarker.task", "face_landmarker_v2.task", "face_stylizer_color_ink.task", + "gesture_recognizer.task", "hair_segmentation.tflite", "hand_landmark_full.tflite", "hand_landmark_lite.tflite", From 3c655e23345d93c78afb3f97229b70bdf25d44ed Mon Sep 17 00:00:00 2001 From: Kinar Date: Wed, 29 Nov 2023 03:08:09 -0800 Subject: [PATCH 2/4] Revised Gesture Recognizer API implementation and associated tests --- .../gesture_recognizer_result_converter.cc | 1 - ...esture_recognizer_result_converter_test.cc | 98 +++++++ .../containers/landmark_converter.cc | 26 +- .../containers/landmark_converter.h | 16 +- .../containers/landmark_converter_test.cc | 121 ++++++++ .../gesture_recognizer/gesture_recognizer.h | 4 +- .../gesture_recognizer_test.cc | 258 ++++++++++++++++-- 7 files changed, 467 insertions(+), 57 deletions(-) diff --git a/mediapipe/tasks/c/components/containers/gesture_recognizer_result_converter.cc b/mediapipe/tasks/c/components/containers/gesture_recognizer_result_converter.cc index 3b2da8eab..6ac8b1370 100644 --- a/mediapipe/tasks/c/components/containers/gesture_recognizer_result_converter.cc +++ b/mediapipe/tasks/c/components/containers/gesture_recognizer_result_converter.cc @@ -16,7 +16,6 @@ limitations under the License. #include "mediapipe/tasks/c/components/containers/gesture_recognizer_result_converter.h" #include -#include #include "mediapipe/tasks/c/components/containers/category_converter.h" #include "mediapipe/tasks/c/components/containers/landmark_converter.h" diff --git a/mediapipe/tasks/c/components/containers/gesture_recognizer_result_converter_test.cc b/mediapipe/tasks/c/components/containers/gesture_recognizer_result_converter_test.cc index 551498e12..6c1f2f798 100644 --- a/mediapipe/tasks/c/components/containers/gesture_recognizer_result_converter_test.cc +++ b/mediapipe/tasks/c/components/containers/gesture_recognizer_result_converter_test.cc @@ -21,4 +21,102 @@ limitations under the License. namespace mediapipe::tasks::c::components::containers { +TEST(GestureRecognizerResultConverterTest, ConvertsCustomResult) { + ::mediapipe::tasks::vision::gesture_recognizer::GestureRecognizerResult + cpp_result; + + // Initialize gestures + Classification classification_for_gestures; + classification_for_gestures.set_index(0); + classification_for_gestures.set_score(0.9f); + classification_for_gestures.set_label("gesture_label_1"); + classification_for_gestures.set_display_name("gesture_display_name_1"); + ClassificationList gestures_list; + *gestures_list.add_classification() = classification_for_gestures; + cpp_result.gestures.push_back(gestures_list); + + // Initialize handedness + Classification classification_for_handedness; + classification_for_handedness.set_index(1); + classification_for_handedness.set_score(0.8f); + classification_for_handedness.set_label("handeness_label_1"); + classification_for_handedness.set_display_name("handeness_display_name_1"); + ClassificationList handedness_list; + *handedness_list.add_classification() = classification_for_handedness; + cpp_result.handedness.push_back(handedness_list); + + // Initialize hand_landmarks + NormalizedLandmark normalized_landmark; + normalized_landmark.set_x(0.1f); + normalized_landmark.set_y(0.2f); + normalized_landmark.set_z(0.3f); + NormalizedLandmarkList normalized_landmark_list; + *normalized_landmark_list.add_landmark() = normalized_landmark; + cpp_result.hand_landmarks.push_back(normalized_landmark_list); + + // Initialize hand_world_landmarks + Landmark landmark; + landmark.set_x(1.0f); + landmark.set_y(1.1f); + landmark.set_z(1.2f); + + LandmarkList landmark_list; + *landmark_list.add_landmark() = landmark; + cpp_result.hand_world_landmarks.push_back(landmark_list); + + GestureRecognizerResult c_result; + CppConvertToGestureRecognizerResult(cpp_result, &c_result); + + // Verify conversion of gestures + EXPECT_NE(c_result.gestures, nullptr); + EXPECT_EQ(c_result.gestures_count, cpp_result.gestures.size()); + + for (uint32_t i = 0; i < c_result.gestures_count; ++i) { + EXPECT_EQ(c_result.gestures_categories_counts[i], + cpp_result.gestures[i].classification_size()); + for (uint32_t j = 0; j < c_result.gestures_categories_counts[i]; ++j) { + auto gesture = cpp_result.gestures[i].classification(j); + EXPECT_EQ(std::string(c_result.gestures[i][j].category_name), + gesture.label()); + EXPECT_FLOAT_EQ(c_result.gestures[i][j].score, gesture.score()); + } + } + + // Verify conversion of hand_landmarks + EXPECT_NE(c_result.hand_landmarks, nullptr); + EXPECT_EQ(c_result.hand_landmarks_count, cpp_result.hand_landmarks.size()); + + for (uint32_t i = 0; i < c_result.hand_landmarks_count; ++i) { + EXPECT_EQ(c_result.hand_landmarks[i].landmarks_count, + cpp_result.hand_landmarks[i].landmark_size()); + for (uint32_t j = 0; j < c_result.hand_landmarks[i].landmarks_count; ++j) { + const auto& landmark = cpp_result.hand_landmarks[i].landmark(j); + EXPECT_FLOAT_EQ(c_result.hand_landmarks[i].landmarks[j].x, landmark.x()); + EXPECT_FLOAT_EQ(c_result.hand_landmarks[i].landmarks[j].y, landmark.y()); + EXPECT_FLOAT_EQ(c_result.hand_landmarks[i].landmarks[j].z, landmark.z()); + } + } + + // Verify conversion of hand_world_landmarks + EXPECT_NE(c_result.hand_world_landmarks, nullptr); + EXPECT_EQ(c_result.hand_world_landmarks_count, + cpp_result.hand_world_landmarks.size()); + for (uint32_t i = 0; i < c_result.hand_world_landmarks_count; ++i) { + EXPECT_EQ(c_result.hand_world_landmarks[i].landmarks_count, + cpp_result.hand_world_landmarks[i].landmark_size()); + for (uint32_t j = 0; j < c_result.hand_world_landmarks[i].landmarks_count; + ++j) { + const auto& landmark = cpp_result.hand_world_landmarks[i].landmark(j); + EXPECT_FLOAT_EQ(c_result.hand_world_landmarks[i].landmarks[j].x, + landmark.x()); + EXPECT_FLOAT_EQ(c_result.hand_world_landmarks[i].landmarks[j].y, + landmark.y()); + EXPECT_FLOAT_EQ(c_result.hand_world_landmarks[i].landmarks[j].z, + landmark.z()); + } + } + + CppCloseGestureRecognizerResult(&c_result); +} + } // namespace mediapipe::tasks::c::components::containers diff --git a/mediapipe/tasks/c/components/containers/landmark_converter.cc b/mediapipe/tasks/c/components/containers/landmark_converter.cc index 5f4ab5ef2..f5643ffaa 100644 --- a/mediapipe/tasks/c/components/containers/landmark_converter.cc +++ b/mediapipe/tasks/c/components/containers/landmark_converter.cc @@ -18,19 +18,13 @@ limitations under the License. #include #include "mediapipe/tasks/c/components/containers/landmark.h" - -typedef Landmark LandmarkC; -typedef NormalizedLandmark NormalizedLandmarkC; -typedef Landmarks LandmarksC; -typedef NormalizedLandmarks NormalizedLandmarksC; - #include "mediapipe/tasks/cc/components/containers/landmark.h" namespace mediapipe::tasks::c::components::containers { void CppConvertToLandmark( const mediapipe::tasks::components::containers::Landmark& in, - LandmarkC* out) { + ::Landmark* out) { out->x = in.x; out->y = in.y; out->z = in.z; @@ -54,7 +48,7 @@ void CppConvertToLandmark( void CppConvertToNormalizedLandmark( const mediapipe::tasks::components::containers::NormalizedLandmark& in, - NormalizedLandmarkC* out) { + ::NormalizedLandmark* out) { out->x = in.x; out->y = in.y; out->z = in.z; @@ -78,9 +72,9 @@ void CppConvertToNormalizedLandmark( void CppConvertToLandmarks( const std::vector& in, - LandmarksC* out) { + ::Landmarks* out) { out->landmarks_count = in.size(); - out->landmarks = new LandmarkC[out->landmarks_count]; + out->landmarks = new ::Landmark[out->landmarks_count]; for (uint32_t i = 0; i < out->landmarks_count; ++i) { CppConvertToLandmark(in[i], &out->landmarks[i]); } @@ -89,22 +83,22 @@ void CppConvertToLandmarks( void CppConvertToNormalizedLandmarks( const std::vector< mediapipe::tasks::components::containers::NormalizedLandmark>& in, - NormalizedLandmarksC* out) { + ::NormalizedLandmarks* out) { out->landmarks_count = in.size(); - out->landmarks = new NormalizedLandmarkC[out->landmarks_count]; + out->landmarks = new ::NormalizedLandmark[out->landmarks_count]; for (uint32_t i = 0; i < out->landmarks_count; ++i) { CppConvertToNormalizedLandmark(in[i], &out->landmarks[i]); } } -void CppCloseLandmark(LandmarkC* in) { +void CppCloseLandmark(::Landmark* in) { if (in && in->name) { free(in->name); in->name = nullptr; } } -void CppCloseLandmarks(LandmarksC* in) { +void CppCloseLandmarks(::Landmarks* in) { for (uint32_t i = 0; i < in->landmarks_count; ++i) { CppCloseLandmark(&in->landmarks[i]); } @@ -113,14 +107,14 @@ void CppCloseLandmarks(LandmarksC* in) { in->landmarks_count = 0; } -void CppCloseNormalizedLandmark(NormalizedLandmarkC* in) { +void CppCloseNormalizedLandmark(::NormalizedLandmark* in) { if (in && in->name) { free(in->name); in->name = nullptr; } } -void CppCloseNormalizedLandmarks(NormalizedLandmarksC* in) { +void CppCloseNormalizedLandmarks(::NormalizedLandmarks* in) { for (uint32_t i = 0; i < in->landmarks_count; ++i) { CppCloseNormalizedLandmark(&in->landmarks[i]); } diff --git a/mediapipe/tasks/c/components/containers/landmark_converter.h b/mediapipe/tasks/c/components/containers/landmark_converter.h index f59158112..1b3626386 100644 --- a/mediapipe/tasks/c/components/containers/landmark_converter.h +++ b/mediapipe/tasks/c/components/containers/landmark_converter.h @@ -23,28 +23,28 @@ namespace mediapipe::tasks::c::components::containers { void CppConvertToLandmark( const mediapipe::tasks::components::containers::Landmark& in, - Landmark* out); + ::Landmark* out); void CppConvertToNormalizedLandmark( const mediapipe::tasks::components::containers::NormalizedLandmark& in, - NormalizedLandmark* out); + ::NormalizedLandmark* out); void CppConvertToLandmarks( const std::vector& in, - Landmarks* out); + ::Landmarks* out); void CppConvertToNormalizedLandmarks( const std::vector< mediapipe::tasks::components::containers::NormalizedLandmark>& in, - NormalizedLandmarks* out); + ::NormalizedLandmarks* out); -void CppCloseLandmark(struct Landmark* in); +void CppCloseLandmark(struct ::Landmark* in); -void CppCloseLandmarks(struct Landmarks* in); +void CppCloseLandmarks(struct ::Landmarks* in); -void CppCloseNormalizedLandmark(struct NormalizedLandmark* in); +void CppCloseNormalizedLandmark(struct ::NormalizedLandmark* in); -void CppCloseNormalizedLandmarks(struct NormalizedLandmarks* in); +void CppCloseNormalizedLandmarks(struct ::NormalizedLandmarks* in); } // namespace mediapipe::tasks::c::components::containers diff --git a/mediapipe/tasks/c/components/containers/landmark_converter_test.cc b/mediapipe/tasks/c/components/containers/landmark_converter_test.cc index cf2b5a0e9..d6cbe3e85 100644 --- a/mediapipe/tasks/c/components/containers/landmark_converter_test.cc +++ b/mediapipe/tasks/c/components/containers/landmark_converter_test.cc @@ -25,4 +25,125 @@ limitations under the License. namespace mediapipe::tasks::c::components::containers { +TEST(LandmarkConverterTest, ConvertsCustomLandmark) { + mediapipe::tasks::components::containers::Landmark cpp_landmark = {0.1f, 0.2f, + 0.3f}; + + ::Landmark c_landmark; + CppConvertToLandmark(cpp_landmark, &c_landmark); + EXPECT_FLOAT_EQ(c_landmark.x, cpp_landmark.x); + EXPECT_FLOAT_EQ(c_landmark.y, cpp_landmark.y); + EXPECT_FLOAT_EQ(c_landmark.z, cpp_landmark.z); + CppCloseLandmark(&c_landmark); +} + +TEST(LandmarksConverterTest, ConvertsCustomLandmarks) { + std::vector + cpp_landmarks = { + {0.1f, 0.2f, 0.3f}, // First Landmark + {0.4f, 0.5f, 0.6f} // Second Landmark + }; + + ::Landmarks c_landmarks; + CppConvertToLandmarks(cpp_landmarks, &c_landmarks); + + EXPECT_EQ(c_landmarks.landmarks_count, cpp_landmarks.size()); + for (size_t i = 0; i < c_landmarks.landmarks_count; ++i) { + EXPECT_FLOAT_EQ(c_landmarks.landmarks[i].x, cpp_landmarks[i].x); + EXPECT_FLOAT_EQ(c_landmarks.landmarks[i].y, cpp_landmarks[i].y); + EXPECT_FLOAT_EQ(c_landmarks.landmarks[i].z, cpp_landmarks[i].z); + } + + CppCloseLandmarks(&c_landmarks); +} + +TEST(NormalizedLandmarkConverterTest, ConvertsCustomNormalizedLandmark) { + mediapipe::tasks::components::containers::NormalizedLandmark + cpp_normalized_landmark = {0.7f, 0.8f, 0.9f}; + + ::NormalizedLandmark c_normalized_landmark; + CppConvertToNormalizedLandmark(cpp_normalized_landmark, + &c_normalized_landmark); + + EXPECT_FLOAT_EQ(c_normalized_landmark.x, cpp_normalized_landmark.x); + EXPECT_FLOAT_EQ(c_normalized_landmark.y, cpp_normalized_landmark.y); + EXPECT_FLOAT_EQ(c_normalized_landmark.z, cpp_normalized_landmark.z); + + CppCloseNormalizedLandmark(&c_normalized_landmark); +} + +TEST(NormalizedLandmarksConverterTest, ConvertsCustomNormalizedLandmarks) { + std::vector + cpp_normalized_landmarks = { + {0.1f, 0.2f, 0.3f}, // First NormalizedLandmark + {0.4f, 0.5f, 0.6f} // Second NormalizedLandmark + }; + + ::NormalizedLandmarks c_normalized_landmarks; + CppConvertToNormalizedLandmarks(cpp_normalized_landmarks, + &c_normalized_landmarks); + + EXPECT_EQ(c_normalized_landmarks.landmarks_count, + cpp_normalized_landmarks.size()); + for (size_t i = 0; i < c_normalized_landmarks.landmarks_count; ++i) { + EXPECT_FLOAT_EQ(c_normalized_landmarks.landmarks[i].x, + cpp_normalized_landmarks[i].x); + EXPECT_FLOAT_EQ(c_normalized_landmarks.landmarks[i].y, + cpp_normalized_landmarks[i].y); + EXPECT_FLOAT_EQ(c_normalized_landmarks.landmarks[i].z, + cpp_normalized_landmarks[i].z); + } + + CppCloseNormalizedLandmarks(&c_normalized_landmarks); +} + +TEST(LandmarkConverterTest, FreesMemory) { + mediapipe::tasks::components::containers::Landmark cpp_landmark = { + 0.1f, 0.2f, 0.3f, 0.0f, 0.0f, "foo"}; + + ::Landmark c_landmark; + CppConvertToLandmark(cpp_landmark, &c_landmark); + EXPECT_NE(c_landmark.name, nullptr); + + CppCloseLandmark(&c_landmark); + EXPECT_EQ(c_landmark.name, nullptr); +} + +TEST(NormalizedLandmarkConverterTest, FreesMemory) { + mediapipe::tasks::components::containers::NormalizedLandmark cpp_landmark = { + 0.1f, 0.2f, 0.3f, 0.0f, 0.0f, "foo"}; + + ::NormalizedLandmark c_landmark; + CppConvertToNormalizedLandmark(cpp_landmark, &c_landmark); + EXPECT_NE(c_landmark.name, nullptr); + + CppCloseNormalizedLandmark(&c_landmark); + EXPECT_EQ(c_landmark.name, nullptr); +} + +TEST(LandmarksConverterTest, FreesMemory) { + std::vector + cpp_landmarks = {{0.1f, 0.2f, 0.3f}, {0.4f, 0.5f, 0.6f}}; + + ::Landmarks c_landmarks; + CppConvertToLandmarks(cpp_landmarks, &c_landmarks); + EXPECT_NE(c_landmarks.landmarks, nullptr); + + CppCloseLandmarks(&c_landmarks); + EXPECT_EQ(c_landmarks.landmarks, nullptr); +} + +TEST(NormalizedLandmarksConverterTest, FreesMemory) { + std::vector + cpp_normalized_landmarks = {{0.1f, 0.2f, 0.3f}, {0.4f, 0.5f, 0.6f}}; + + ::NormalizedLandmarks c_normalized_landmarks; + CppConvertToNormalizedLandmarks(cpp_normalized_landmarks, + &c_normalized_landmarks); + EXPECT_NE(c_normalized_landmarks.landmarks, nullptr); + + CppCloseNormalizedLandmarks(&c_normalized_landmarks); + EXPECT_EQ(c_normalized_landmarks.landmarks, nullptr); +} + } // namespace mediapipe::tasks::c::components::containers diff --git a/mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer.h b/mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer.h index 1c2b65112..39f4a1734 100644 --- a/mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer.h +++ b/mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer.h @@ -82,12 +82,12 @@ struct GestureRecognizerOptions { // // A caller is responsible for closing gesture recognizer result. typedef void (*result_callback_fn)(GestureRecognizerResult* result, - const MpImage image, int64_t timestamp_ms, + const MpImage& image, int64_t timestamp_ms, char* error_msg); result_callback_fn result_callback; }; -// Creates an GestureRecognizer from provided `options`. +// Creates an GestureRecognizer from the provided `options`. // Returns a pointer to the gesture recognizer on success. // If an error occurs, returns `nullptr` and sets the error parameter to an // an error message (if `error_msg` is not `nullptr`). You must free the memory diff --git a/mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer_test.cc b/mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer_test.cc index 723ff9838..ce3f5df5a 100644 --- a/mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer_test.cc +++ b/mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer_test.cc @@ -36,16 +36,46 @@ using testing::HasSubstr; constexpr char kTestDataDirectory[] = "/mediapipe/tasks/testdata/vision/"; constexpr char kModelName[] = "gesture_recognizer.task"; -constexpr float kPrecision = 1e-4; -constexpr float kLandmarkPrecision = 1e-3; +constexpr char kImageFile[] = "fist.jpg"; +constexpr float kScorePrecision = 1e-2; +constexpr float kLandmarkPrecision = 1e-1; constexpr int kIterations = 100; std::string GetFullPath(absl::string_view file_name) { return JoinPath("./", kTestDataDirectory, file_name); } +void MatchesGestureRecognizerResult(GestureRecognizerResult* result, + const float score_precision, + const float landmark_precision) { + // Expects to have the same number of hands detected. + EXPECT_EQ(result->gestures_count, 1); + EXPECT_EQ(result->handedness_count, 1); + // Actual gesture with top score matches expected gesture. + EXPECT_EQ(std::string{result->gestures[0][0].category_name}, "Closed_Fist"); + EXPECT_NEAR(result->gestures[0][0].score, 0.91f, score_precision); + + // Actual handedness matches expected handedness. + EXPECT_EQ(std::string{result->handedness[0][0].category_name}, "Right"); + EXPECT_NEAR(result->handedness[0][0].score, 0.9893f, score_precision); + + // Actual landmarks match expected landmarks. + EXPECT_NEAR(result->hand_landmarks[0].landmarks[0].x, 0.477f, + landmark_precision); + EXPECT_NEAR(result->hand_landmarks[0].landmarks[0].y, 0.661f, + landmark_precision); + EXPECT_NEAR(result->hand_landmarks[0].landmarks[0].z, 0.0f, + landmark_precision); + EXPECT_NEAR(result->hand_world_landmarks[0].landmarks[0].x, -0.009f, + landmark_precision); + EXPECT_NEAR(result->hand_world_landmarks[0].landmarks[0].y, 0.082f, + landmark_precision); + EXPECT_NEAR(result->hand_world_landmarks[0].landmarks[0].z, 0.006f, + landmark_precision); +} + TEST(GestureRecognizerTest, ImageModeTest) { - const auto image = DecodeImageFromFile(GetFullPath("fist.jpg")); + const auto image = DecodeImageFromFile(GetFullPath(kImageFile)); ASSERT_TRUE(image.ok()); const std::string model_path = GetFullPath(kModelName); @@ -88,36 +118,204 @@ TEST(GestureRecognizerTest, ImageModeTest) { GestureRecognizerResult result; gesture_recognizer_recognize_image(recognizer, &mp_image, &result, /* error_msg */ nullptr); - - // Expects to have the same number of hands detected. - EXPECT_EQ(result.gestures_count, 1); - EXPECT_EQ(result.handedness_count, 1); - // Actual gesture with top score matches expected gesture. - EXPECT_EQ(std::string{result.gestures[0][0].category_name}, "Closed_Fist"); - EXPECT_NEAR(result.gestures[0][0].score, 0.9000f, kPrecision); - - // Actual handedness matches expected handedness. - EXPECT_EQ(std::string{result.handedness[0][0].category_name}, "Right"); - EXPECT_NEAR(result.handedness[0][0].score, 0.9893f, kPrecision); - - // Actual landmarks match expected landmarks. - EXPECT_NEAR(result.hand_landmarks[0].landmarks[0].x, 0.477f, - kLandmarkPrecision); - EXPECT_NEAR(result.hand_landmarks[0].landmarks[0].y, 0.661f, - kLandmarkPrecision); - EXPECT_NEAR(result.hand_landmarks[0].landmarks[0].z, 0.0f, - kLandmarkPrecision); - EXPECT_NEAR(result.hand_world_landmarks[0].landmarks[0].x, -0.009f, - kLandmarkPrecision); - EXPECT_NEAR(result.hand_world_landmarks[0].landmarks[0].y, 0.082f, - kLandmarkPrecision); - EXPECT_NEAR(result.hand_world_landmarks[0].landmarks[0].z, 0.006f, - kLandmarkPrecision); - + MatchesGestureRecognizerResult(&result, kScorePrecision, kLandmarkPrecision); gesture_recognizer_close_result(&result); gesture_recognizer_close(recognizer, /* error_msg */ nullptr); } -// TODO other tests +TEST(GestureRecognizerTest, VideoModeTest) { + const auto image = DecodeImageFromFile(GetFullPath(kImageFile)); + ASSERT_TRUE(image.ok()); + + const std::string model_path = GetFullPath(kModelName); + GestureRecognizerOptions options = { + /* base_options= */ {/* model_asset_buffer= */ nullptr, + /* model_asset_buffer_count= */ 0, + /* model_asset_path= */ model_path.c_str()}, + /* running_mode= */ RunningMode::VIDEO, + /* num_hands= */ 1, + /* min_hand_detection_confidence= */ 0.5, + /* min_hand_presence_confidence= */ 0.5, + /* min_tracking_confidence= */ 0.5, + {/* display_names_locale= */ nullptr, + /* max_results= */ -1, + /* score_threshold= */ 0.0, + /* category_allowlist= */ nullptr, + /* category_allowlist_count= */ 0, + /* category_denylist= */ nullptr, + /* category_denylist_count= */ 0}, + {/* display_names_locale= */ nullptr, + /* max_results= */ -1, + /* score_threshold= */ 0.0, + /* category_allowlist= */ nullptr, + /* category_allowlist_count= */ 0, + /* category_denylist= */ nullptr, + /* category_denylist_count= */ 0}}; + + void* recognizer = + gesture_recognizer_create(&options, /* error_msg */ nullptr); + EXPECT_NE(recognizer, nullptr); + + const auto& image_frame = image->GetImageFrameSharedPtr(); + const MpImage mp_image = { + .type = MpImage::IMAGE_FRAME, + .image_frame = {.format = static_cast(image_frame->Format()), + .image_buffer = image_frame->PixelData(), + .width = image_frame->Width(), + .height = image_frame->Height()}}; + + for (int i = 0; i < kIterations; ++i) { + GestureRecognizerResult result; + gesture_recognizer_recognize_for_video(recognizer, &mp_image, i, &result, + /* error_msg */ nullptr); + + MatchesGestureRecognizerResult(&result, kScorePrecision, + kLandmarkPrecision); + gesture_recognizer_close_result(&result); + } + gesture_recognizer_close(recognizer, /* error_msg */ nullptr); +} + +// A structure to support LiveStreamModeTest below. This structure holds a +// static method `Fn` for a callback function of C API. A `static` qualifier +// allows to take an address of the method to follow API style. Another static +// struct member is `last_timestamp` that is used to verify that current +// timestamp is greater than the previous one. +struct LiveStreamModeCallback { + static int64_t last_timestamp; + static void Fn(GestureRecognizerResult* recognizer_result, + const MpImage& image, int64_t timestamp, char* error_msg) { + ASSERT_NE(recognizer_result, nullptr); + ASSERT_EQ(error_msg, nullptr); + MatchesGestureRecognizerResult(recognizer_result, kScorePrecision, + kLandmarkPrecision); + EXPECT_GT(image.image_frame.width, 0); + EXPECT_GT(image.image_frame.height, 0); + EXPECT_GT(timestamp, last_timestamp); + last_timestamp++; + } +}; +int64_t LiveStreamModeCallback::last_timestamp = -1; + +TEST(GestureRecognizerTest, LiveStreamModeTest) { + const auto image = DecodeImageFromFile(GetFullPath(kImageFile)); + ASSERT_TRUE(image.ok()); + + const std::string model_path = GetFullPath(kModelName); + + GestureRecognizerOptions options = { + /* base_options= */ {/* model_asset_buffer= */ nullptr, + /* model_asset_buffer_count= */ 0, + /* model_asset_path= */ model_path.c_str()}, + /* running_mode= */ RunningMode::LIVE_STREAM, + /* num_hands= */ 1, + /* min_hand_detection_confidence= */ 0.5, + /* min_hand_presence_confidence= */ 0.5, + /* min_tracking_confidence= */ 0.5, + {/* display_names_locale= */ nullptr, + /* max_results= */ -1, + /* score_threshold= */ 0.0, + /* category_allowlist= */ nullptr, + /* category_allowlist_count= */ 0, + /* category_denylist= */ nullptr, + /* category_denylist_count= */ 0}, + {/* display_names_locale= */ nullptr, + /* max_results= */ -1, + /* score_threshold= */ 0.0, + /* category_allowlist= */ nullptr, + /* category_allowlist_count= */ 0, + /* category_denylist= */ nullptr, + /* category_denylist_count= */ 0}, + /* result_callback= */ LiveStreamModeCallback::Fn, + }; + + void* recognizer = + gesture_recognizer_create(&options, /* error_msg */ nullptr); + EXPECT_NE(recognizer, nullptr); + + const auto& image_frame = image->GetImageFrameSharedPtr(); + const MpImage mp_image = { + .type = MpImage::IMAGE_FRAME, + .image_frame = {.format = static_cast(image_frame->Format()), + .image_buffer = image_frame->PixelData(), + .width = image_frame->Width(), + .height = image_frame->Height()}}; + + for (int i = 0; i < kIterations; ++i) { + EXPECT_GE(gesture_recognizer_recognize_async(recognizer, &mp_image, i, + /* error_msg */ nullptr), + 0); + } + gesture_recognizer_close(recognizer, /* error_msg */ nullptr); + + // Due to the flow limiter, the total of outputs might be smaller than the + // number of iterations. + EXPECT_LE(LiveStreamModeCallback::last_timestamp, kIterations); + EXPECT_GT(LiveStreamModeCallback::last_timestamp, 0); +} + +TEST(GestureRecognizerTest, InvalidArgumentHandling) { + // It is an error to set neither the asset buffer nor the path. + GestureRecognizerOptions options = { + /* base_options= */ {/* model_asset_buffer= */ nullptr, + /* model_asset_buffer_count= */ 0, + /* model_asset_path= */ nullptr}, + /* running_mode= */ RunningMode::IMAGE, + /* num_hands= */ 1, + /* min_hand_detection_confidence= */ 0.5, + /* min_hand_presence_confidence= */ 0.5, + /* min_tracking_confidence= */ 0.5, + {}, + {}}; + + char* error_msg; + void* recognizer = gesture_recognizer_create(&options, &error_msg); + EXPECT_EQ(recognizer, nullptr); + + EXPECT_THAT(error_msg, HasSubstr("ExternalFile must specify")); + + free(error_msg); +} + +TEST(GestureRecognizerTest, FailedRecognitionHandling) { + const std::string model_path = GetFullPath(kModelName); + GestureRecognizerOptions options = { + /* base_options= */ {/* model_asset_buffer= */ nullptr, + /* model_asset_buffer_count= */ 0, + /* model_asset_path= */ model_path.c_str()}, + /* running_mode= */ RunningMode::IMAGE, + /* num_hands= */ 1, + /* min_hand_detection_confidence= */ 0.5, + /* min_hand_presence_confidence= */ 0.5, + /* min_tracking_confidence= */ 0.5, + {/* display_names_locale= */ nullptr, + /* max_results= */ -1, + /* score_threshold= */ 0.0, + /* category_allowlist= */ nullptr, + /* category_allowlist_count= */ 0, + /* category_denylist= */ nullptr, + /* category_denylist_count= */ 0}, + {/* display_names_locale= */ nullptr, + /* max_results= */ -1, + /* score_threshold= */ 0.0, + /* category_allowlist= */ nullptr, + /* category_allowlist_count= */ 0, + /* category_denylist= */ nullptr, + /* category_denylist_count= */ 0}, + }; + + void* recognizer = gesture_recognizer_create(&options, /* error_msg */ + nullptr); + EXPECT_NE(recognizer, nullptr); + + const MpImage mp_image = {.type = MpImage::GPU_BUFFER, .gpu_buffer = {}}; + GestureRecognizerResult result; + char* error_msg; + gesture_recognizer_recognize_image(recognizer, &mp_image, &result, + &error_msg); + EXPECT_THAT(error_msg, HasSubstr("GPU Buffer not supported yet")); + free(error_msg); + gesture_recognizer_close(recognizer, /* error_msg */ nullptr); +} } // namespace From d19d5a50be08c5569e5d8f8087313fbd239a14b6 Mon Sep 17 00:00:00 2001 From: Kinar Date: Wed, 29 Nov 2023 03:17:35 -0800 Subject: [PATCH 3/4] Added FreeMemory test for GestureRecognizerResult --- ...esture_recognizer_result_converter_test.cc | 42 +++++++++++++++---- 1 file changed, 34 insertions(+), 8 deletions(-) diff --git a/mediapipe/tasks/c/components/containers/gesture_recognizer_result_converter_test.cc b/mediapipe/tasks/c/components/containers/gesture_recognizer_result_converter_test.cc index 6c1f2f798..75c8645ce 100644 --- a/mediapipe/tasks/c/components/containers/gesture_recognizer_result_converter_test.cc +++ b/mediapipe/tasks/c/components/containers/gesture_recognizer_result_converter_test.cc @@ -21,10 +21,9 @@ limitations under the License. namespace mediapipe::tasks::c::components::containers { -TEST(GestureRecognizerResultConverterTest, ConvertsCustomResult) { - ::mediapipe::tasks::vision::gesture_recognizer::GestureRecognizerResult - cpp_result; - +void InitGestureRecognizerResult( + ::mediapipe::tasks::vision::gesture_recognizer::GestureRecognizerResult* + cpp_result) { // Initialize gestures Classification classification_for_gestures; classification_for_gestures.set_index(0); @@ -33,7 +32,7 @@ TEST(GestureRecognizerResultConverterTest, ConvertsCustomResult) { classification_for_gestures.set_display_name("gesture_display_name_1"); ClassificationList gestures_list; *gestures_list.add_classification() = classification_for_gestures; - cpp_result.gestures.push_back(gestures_list); + cpp_result->gestures.push_back(gestures_list); // Initialize handedness Classification classification_for_handedness; @@ -43,7 +42,7 @@ TEST(GestureRecognizerResultConverterTest, ConvertsCustomResult) { classification_for_handedness.set_display_name("handeness_display_name_1"); ClassificationList handedness_list; *handedness_list.add_classification() = classification_for_handedness; - cpp_result.handedness.push_back(handedness_list); + cpp_result->handedness.push_back(handedness_list); // Initialize hand_landmarks NormalizedLandmark normalized_landmark; @@ -52,7 +51,7 @@ TEST(GestureRecognizerResultConverterTest, ConvertsCustomResult) { normalized_landmark.set_z(0.3f); NormalizedLandmarkList normalized_landmark_list; *normalized_landmark_list.add_landmark() = normalized_landmark; - cpp_result.hand_landmarks.push_back(normalized_landmark_list); + cpp_result->hand_landmarks.push_back(normalized_landmark_list); // Initialize hand_world_landmarks Landmark landmark; @@ -62,7 +61,13 @@ TEST(GestureRecognizerResultConverterTest, ConvertsCustomResult) { LandmarkList landmark_list; *landmark_list.add_landmark() = landmark; - cpp_result.hand_world_landmarks.push_back(landmark_list); + cpp_result->hand_world_landmarks.push_back(landmark_list); +} + +TEST(GestureRecognizerResultConverterTest, ConvertsCustomResult) { + ::mediapipe::tasks::vision::gesture_recognizer::GestureRecognizerResult + cpp_result; + InitGestureRecognizerResult(&cpp_result); GestureRecognizerResult c_result; CppConvertToGestureRecognizerResult(cpp_result, &c_result); @@ -119,4 +124,25 @@ TEST(GestureRecognizerResultConverterTest, ConvertsCustomResult) { CppCloseGestureRecognizerResult(&c_result); } +TEST(GestureRecognizerResultConverterTest, FreesMemory) { + ::mediapipe::tasks::vision::gesture_recognizer::GestureRecognizerResult + cpp_result; + InitGestureRecognizerResult(&cpp_result); + + GestureRecognizerResult c_result; + CppConvertToGestureRecognizerResult(cpp_result, &c_result); + + EXPECT_NE(c_result.gestures, nullptr); + EXPECT_NE(c_result.handedness, nullptr); + EXPECT_NE(c_result.hand_landmarks, nullptr); + EXPECT_NE(c_result.hand_world_landmarks, nullptr); + + CppCloseGestureRecognizerResult(&c_result); + + EXPECT_EQ(c_result.gestures, nullptr); + EXPECT_EQ(c_result.handedness, nullptr); + EXPECT_EQ(c_result.hand_landmarks, nullptr); + EXPECT_EQ(c_result.hand_world_landmarks, nullptr); +} + } // namespace mediapipe::tasks::c::components::containers From 9a5aa1b360e9f42a365a8154bcf561c686d7fbf6 Mon Sep 17 00:00:00 2001 From: Kinar Date: Thu, 30 Nov 2023 09:13:10 -0800 Subject: [PATCH 4/4] Refactor GestureRecognizerResult conversion for default initialization --- .../gesture_recognizer_result_converter.cc | 42 ++++++++----------- .../gesture_recognizer/gesture_recognizer.cc | 36 ++++++++-------- .../gesture_recognizer/gesture_recognizer.h | 6 +-- .../gesture_recognizer_test.cc | 9 ++-- 4 files changed, 43 insertions(+), 50 deletions(-) diff --git a/mediapipe/tasks/c/components/containers/gesture_recognizer_result_converter.cc b/mediapipe/tasks/c/components/containers/gesture_recognizer_result_converter.cc index 6ac8b1370..5aca374fc 100644 --- a/mediapipe/tasks/c/components/containers/gesture_recognizer_result_converter.cc +++ b/mediapipe/tasks/c/components/containers/gesture_recognizer_result_converter.cc @@ -47,18 +47,15 @@ void CppConvertToGestureRecognizerResult( CppCategory cpp_category; // Set fields from the Classification protobuf - if (classification.has_index()) { - cpp_category.index = classification.index(); - } - if (classification.has_score()) { - cpp_category.score = classification.score(); - } - if (classification.has_label()) { - cpp_category.category_name = classification.label(); - } - if (classification.has_display_name()) { - cpp_category.display_name = classification.display_name(); - } + cpp_category.index = + classification.has_index() ? classification.index() : 0; + cpp_category.score = + classification.has_score() ? classification.score() : 0.0f; + cpp_category.category_name = + classification.has_label() ? classification.label() : ""; + cpp_category.display_name = classification.has_display_name() + ? classification.display_name() + : ""; CppConvertToCategory(cpp_category, &out->gestures[i][j]); } @@ -78,18 +75,15 @@ void CppConvertToGestureRecognizerResult( CppCategory cpp_category; // Set fields from the Classification protobuf - if (classification.has_index()) { - cpp_category.index = classification.index(); - } - if (classification.has_score()) { - cpp_category.score = classification.score(); - } - if (classification.has_label()) { - cpp_category.category_name = classification.label(); - } - if (classification.has_display_name()) { - cpp_category.display_name = classification.display_name(); - } + cpp_category.index = + classification.has_index() ? classification.index() : 0; + cpp_category.score = + classification.has_score() ? classification.score() : 0.0f; + cpp_category.category_name = + classification.has_label() ? classification.label() : ""; + cpp_category.display_name = classification.has_display_name() + ? classification.display_name() + : ""; CppConvertToCategory(cpp_category, &out->handedness[i][j]); } diff --git a/mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer.cc b/mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer.cc index f8c42dcaf..692e3776b 100644 --- a/mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer.cc +++ b/mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer.cc @@ -138,10 +138,10 @@ GestureRecognizer* CppGestureRecognizerCreate( return recognizer->release(); } -int CppGestureRecognizerRecognize(void* recognizer, const MpImage* image, +int CppGestureRecognizerRecognize(void* recognizer, const MpImage& image, GestureRecognizerResult* result, char** error_msg) { - if (image->type == MpImage::GPU_BUFFER) { + if (image.type == MpImage::GPU_BUFFER) { const absl::Status status = absl::InvalidArgumentError("GPU Buffer not supported yet."); @@ -150,9 +150,9 @@ int CppGestureRecognizerRecognize(void* recognizer, const MpImage* image, } const auto img = CreateImageFromBuffer( - static_cast(image->image_frame.format), - image->image_frame.image_buffer, image->image_frame.width, - image->image_frame.height); + static_cast(image.image_frame.format), + image.image_frame.image_buffer, image.image_frame.width, + image.image_frame.height); if (!img.ok()) { ABSL_LOG(ERROR) << "Failed to create Image: " << img.status(); @@ -170,11 +170,11 @@ int CppGestureRecognizerRecognize(void* recognizer, const MpImage* image, } int CppGestureRecognizerRecognizeForVideo(void* recognizer, - const MpImage* image, + const MpImage& image, int64_t timestamp_ms, GestureRecognizerResult* result, char** error_msg) { - if (image->type == MpImage::GPU_BUFFER) { + if (image.type == MpImage::GPU_BUFFER) { absl::Status status = absl::InvalidArgumentError("GPU Buffer not supported yet"); @@ -183,9 +183,9 @@ int CppGestureRecognizerRecognizeForVideo(void* recognizer, } const auto img = CreateImageFromBuffer( - static_cast(image->image_frame.format), - image->image_frame.image_buffer, image->image_frame.width, - image->image_frame.height); + static_cast(image.image_frame.format), + image.image_frame.image_buffer, image.image_frame.width, + image.image_frame.height); if (!img.ok()) { ABSL_LOG(ERROR) << "Failed to create Image: " << img.status(); @@ -202,9 +202,9 @@ int CppGestureRecognizerRecognizeForVideo(void* recognizer, return 0; } -int CppGestureRecognizerRecognizeAsync(void* recognizer, const MpImage* image, +int CppGestureRecognizerRecognizeAsync(void* recognizer, const MpImage& image, int64_t timestamp_ms, char** error_msg) { - if (image->type == MpImage::GPU_BUFFER) { + if (image.type == MpImage::GPU_BUFFER) { absl::Status status = absl::InvalidArgumentError("GPU Buffer not supported yet"); @@ -213,9 +213,9 @@ int CppGestureRecognizerRecognizeAsync(void* recognizer, const MpImage* image, } const auto img = CreateImageFromBuffer( - static_cast(image->image_frame.format), - image->image_frame.image_buffer, image->image_frame.width, - image->image_frame.height); + static_cast(image.image_frame.format), + image.image_frame.image_buffer, image.image_frame.width, + image.image_frame.height); if (!img.ok()) { ABSL_LOG(ERROR) << "Failed to create Image: " << img.status(); @@ -257,7 +257,7 @@ void* gesture_recognizer_create(struct GestureRecognizerOptions* options, CppGestureRecognizerCreate(*options, error_msg); } -int gesture_recognizer_recognize_image(void* recognizer, const MpImage* image, +int gesture_recognizer_recognize_image(void* recognizer, const MpImage& image, GestureRecognizerResult* result, char** error_msg) { return mediapipe::tasks::c::vision::gesture_recognizer:: @@ -265,7 +265,7 @@ int gesture_recognizer_recognize_image(void* recognizer, const MpImage* image, } int gesture_recognizer_recognize_for_video(void* recognizer, - const MpImage* image, + const MpImage& image, int64_t timestamp_ms, GestureRecognizerResult* result, char** error_msg) { @@ -274,7 +274,7 @@ int gesture_recognizer_recognize_for_video(void* recognizer, result, error_msg); } -int gesture_recognizer_recognize_async(void* recognizer, const MpImage* image, +int gesture_recognizer_recognize_async(void* recognizer, const MpImage& image, int64_t timestamp_ms, char** error_msg) { return mediapipe::tasks::c::vision::gesture_recognizer:: CppGestureRecognizerRecognizeAsync(recognizer, image, timestamp_ms, diff --git a/mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer.h b/mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer.h index 39f4a1734..4d59df62d 100644 --- a/mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer.h +++ b/mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer.h @@ -100,7 +100,7 @@ MP_EXPORT void* gesture_recognizer_create( // an error message (if `error_msg` is not `nullptr`). You must free the memory // allocated for the error message. MP_EXPORT int gesture_recognizer_recognize_image( - void* recognizer, const MpImage* image, GestureRecognizerResult* result, + void* recognizer, const MpImage& image, GestureRecognizerResult* result, char** error_msg); // Performs gesture recognition on the provided video frame. @@ -113,7 +113,7 @@ MP_EXPORT int gesture_recognizer_recognize_image( // an error message (if `error_msg` is not `nullptr`). You must free the memory // allocated for the error message. MP_EXPORT int gesture_recognizer_recognize_for_video( - void* recognizer, const MpImage* image, int64_t timestamp_ms, + void* recognizer, const MpImage& image, int64_t timestamp_ms, GestureRecognizerResult* result, char** error_msg); // Sends live image data to gesture recognition, and the results will be @@ -135,7 +135,7 @@ MP_EXPORT int gesture_recognizer_recognize_for_video( // an error message (if `error_msg` is not `nullptr`). You must free the memory // allocated for the error message. MP_EXPORT int gesture_recognizer_recognize_async(void* recognizer, - const MpImage* image, + const MpImage& image, int64_t timestamp_ms, char** error_msg); diff --git a/mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer_test.cc b/mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer_test.cc index ce3f5df5a..bd95d7e52 100644 --- a/mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer_test.cc +++ b/mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer_test.cc @@ -116,7 +116,7 @@ TEST(GestureRecognizerTest, ImageModeTest) { .height = image_frame->Height()}}; GestureRecognizerResult result; - gesture_recognizer_recognize_image(recognizer, &mp_image, &result, + gesture_recognizer_recognize_image(recognizer, mp_image, &result, /* error_msg */ nullptr); MatchesGestureRecognizerResult(&result, kScorePrecision, kLandmarkPrecision); gesture_recognizer_close_result(&result); @@ -166,7 +166,7 @@ TEST(GestureRecognizerTest, VideoModeTest) { for (int i = 0; i < kIterations; ++i) { GestureRecognizerResult result; - gesture_recognizer_recognize_for_video(recognizer, &mp_image, i, &result, + gesture_recognizer_recognize_for_video(recognizer, mp_image, i, &result, /* error_msg */ nullptr); MatchesGestureRecognizerResult(&result, kScorePrecision, @@ -242,7 +242,7 @@ TEST(GestureRecognizerTest, LiveStreamModeTest) { .height = image_frame->Height()}}; for (int i = 0; i < kIterations; ++i) { - EXPECT_GE(gesture_recognizer_recognize_async(recognizer, &mp_image, i, + EXPECT_GE(gesture_recognizer_recognize_async(recognizer, mp_image, i, /* error_msg */ nullptr), 0); } @@ -311,8 +311,7 @@ TEST(GestureRecognizerTest, FailedRecognitionHandling) { const MpImage mp_image = {.type = MpImage::GPU_BUFFER, .gpu_buffer = {}}; GestureRecognizerResult result; char* error_msg; - gesture_recognizer_recognize_image(recognizer, &mp_image, &result, - &error_msg); + gesture_recognizer_recognize_image(recognizer, mp_image, &result, &error_msg); EXPECT_THAT(error_msg, HasSubstr("GPU Buffer not supported yet")); free(error_msg); gesture_recognizer_close(recognizer, /* error_msg */ nullptr);