Merge pull request #4993 from kinaryml:c-landmarker-apis

PiperOrigin-RevId: 586804764
This commit is contained in:
Copybara-Service 2023-11-30 15:32:12 -08:00
commit 7d73a3e1fd
17 changed files with 1807 additions and 1 deletions

View File

@ -43,6 +43,33 @@ cc_test(
],
)
cc_library(
name = "landmark",
hdrs = ["landmark.h"],
)
cc_library(
name = "landmark_converter",
srcs = ["landmark_converter.cc"],
hdrs = ["landmark_converter.h"],
deps = [
":landmark",
"//mediapipe/tasks/cc/components/containers:landmark",
],
)
cc_test(
name = "landmark_converter_test",
srcs = ["landmark_converter_test.cc"],
deps = [
":landmark",
":landmark_converter",
"//mediapipe/framework/port:gtest",
"//mediapipe/tasks/cc/components/containers:landmark",
"@com_google_googletest//:gtest_main",
],
)
cc_library(
name = "rect",
hdrs = ["rect.h"],
@ -142,7 +169,6 @@ cc_library(
":detection_result",
":keypoint",
":keypoint_converter",
":rect",
":rect_converter",
"//mediapipe/tasks/cc/components/containers:detection_result",
],

View File

@ -16,6 +16,8 @@ limitations under the License.
#ifndef MEDIAPIPE_TASKS_C_COMPONENTS_CONTAINERS_CATEGORY_H_
#define MEDIAPIPE_TASKS_C_COMPONENTS_CONTAINERS_CATEGORY_H_
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
@ -43,6 +45,12 @@ struct Category {
char* display_name;
};
// A list of categories.
struct Categories {
struct Category* categories;
uint32_t categories_count;
};
#ifdef __cplusplus
} // extern C
#endif

View File

@ -41,4 +41,14 @@ void CppCloseCategory(Category* in) {
in->display_name = nullptr;
}
void CppCloseCategories(Categories* in) {
for (int i = 0; i < in->categories_count; ++i) {
CppCloseCategory(&in->categories[i]);
}
delete[] in->categories;
in->categories = nullptr;
in->categories_count = 0;
}
} // namespace mediapipe::tasks::c::components::containers

View File

@ -27,6 +27,8 @@ void CppConvertToCategory(
void CppCloseCategory(Category* in);
void CppCloseCategories(Categories* in);
} // namespace mediapipe::tasks::c::components::containers
#endif // MEDIAPIPE_TASKS_C_COMPONENTS_CONTAINERS_CATEGORY_CONVERTER_H_

View File

@ -0,0 +1,90 @@
/* Copyright 2023 The MediaPipe Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef MEDIAPIPE_TASKS_C_COMPONENTS_CONTAINERS_LANDMARK_H_
#define MEDIAPIPE_TASKS_C_COMPONENTS_CONTAINERS_LANDMARK_H_
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
// Landmark represents a point in 3D space with x, y, z coordinates. The
// landmark coordinates are in meters. z represents the landmark depth, and the
// smaller the value the closer the world landmark is to the camera.
struct Landmark {
float x;
float y;
float z;
// For optional visibility.
bool has_visibility;
// Landmark visibility. Should stay unset if not supported.
// Float score of whether landmark is visible or occluded by other objects.
// Landmark considered as invisible also if it is not present on the screen
// (out of scene bounds). Depending on the model, visibility value is either
// a sigmoid or an argument of sigmoid.
float visibility;
// For optional presence.
bool has_presence;
// Landmark presence. Should stay unset if not supported.
// Float score of whether landmark is present on the scene (located within
// scene bounds). Depending on the model, presence value is either a result
// of sigmoid or an argument of sigmoid function to get landmark presence
// probability.
float presence;
// Landmark name. Should stay unset if not supported.
// Defaults to nullptr.
char* name;
};
// A normalized version of above Landmark struct. All coordinates should be
// within [0, 1].
struct NormalizedLandmark {
float x;
float y;
float z;
bool has_visibility;
float visibility;
bool has_presence;
float presence;
char* name;
};
// A list of Landmarks.
struct Landmarks {
struct Landmark* landmarks;
uint32_t landmarks_count;
};
// A list of NormalizedLandmarks.
struct NormalizedLandmarks {
struct NormalizedLandmark* landmarks;
uint32_t landmarks_count;
};
#ifdef __cplusplus
} // extern C
#endif
#endif // MEDIAPIPE_TASKS_C_COMPONENTS_CONTAINERS_LANDMARK_H_

View File

@ -0,0 +1,128 @@
/* Copyright 2023 The MediaPipe Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "mediapipe/tasks/c/components/containers/landmark_converter.h"
#include <cstdint>
#include <cstdlib>
#include <vector>
#include "mediapipe/tasks/c/components/containers/landmark.h"
#include "mediapipe/tasks/cc/components/containers/landmark.h"
namespace mediapipe::tasks::c::components::containers {
void CppConvertToLandmark(
const mediapipe::tasks::components::containers::Landmark& in,
::Landmark* out) {
out->x = in.x;
out->y = in.y;
out->z = in.z;
if (in.visibility.has_value()) {
out->has_visibility = true;
out->visibility = in.visibility.value();
} else {
out->has_visibility = false;
}
if (in.presence.has_value()) {
out->has_presence = true;
out->presence = in.presence.value();
} else {
out->has_presence = false;
}
out->name = in.name.has_value() ? strdup(in.name->c_str()) : nullptr;
}
void CppConvertToNormalizedLandmark(
const mediapipe::tasks::components::containers::NormalizedLandmark& in,
::NormalizedLandmark* out) {
out->x = in.x;
out->y = in.y;
out->z = in.z;
if (in.visibility.has_value()) {
out->has_visibility = true;
out->visibility = in.visibility.value();
} else {
out->has_visibility = false;
}
if (in.presence.has_value()) {
out->has_presence = true;
out->presence = in.presence.value();
} else {
out->has_presence = false;
}
out->name = in.name.has_value() ? strdup(in.name->c_str()) : nullptr;
}
void CppConvertToLandmarks(
const std::vector<mediapipe::tasks::components::containers::Landmark>& in,
::Landmarks* out) {
out->landmarks_count = in.size();
out->landmarks = new ::Landmark[out->landmarks_count];
for (uint32_t i = 0; i < out->landmarks_count; ++i) {
CppConvertToLandmark(in[i], &out->landmarks[i]);
}
}
void CppConvertToNormalizedLandmarks(
const std::vector<
mediapipe::tasks::components::containers::NormalizedLandmark>& in,
::NormalizedLandmarks* out) {
out->landmarks_count = in.size();
out->landmarks = new ::NormalizedLandmark[out->landmarks_count];
for (uint32_t i = 0; i < out->landmarks_count; ++i) {
CppConvertToNormalizedLandmark(in[i], &out->landmarks[i]);
}
}
void CppCloseLandmark(::Landmark* in) {
if (in && in->name) {
free(in->name);
in->name = nullptr;
}
}
void CppCloseLandmarks(::Landmarks* in) {
for (uint32_t i = 0; i < in->landmarks_count; ++i) {
CppCloseLandmark(&in->landmarks[i]);
}
delete[] in->landmarks;
in->landmarks = nullptr;
in->landmarks_count = 0;
}
void CppCloseNormalizedLandmark(::NormalizedLandmark* in) {
if (in && in->name) {
free(in->name);
in->name = nullptr;
}
}
void CppCloseNormalizedLandmarks(::NormalizedLandmarks* in) {
for (uint32_t i = 0; i < in->landmarks_count; ++i) {
CppCloseNormalizedLandmark(&in->landmarks[i]);
}
delete[] in->landmarks;
in->landmarks = nullptr;
in->landmarks_count = 0;
}
} // namespace mediapipe::tasks::c::components::containers

View File

@ -0,0 +1,51 @@
/* Copyright 2023 The MediaPipe Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef MEDIAPIPE_TASKS_C_COMPONENTS_CONTAINERS_LANDMARK_CONVERTER_H_
#define MEDIAPIPE_TASKS_C_COMPONENTS_CONTAINERS_LANDMARK_CONVERTER_H_
#include "mediapipe/tasks/c/components/containers/landmark.h"
#include "mediapipe/tasks/cc/components/containers/landmark.h"
namespace mediapipe::tasks::c::components::containers {
void CppConvertToLandmark(
const mediapipe::tasks::components::containers::Landmark& in,
::Landmark* out);
void CppConvertToNormalizedLandmark(
const mediapipe::tasks::components::containers::NormalizedLandmark& in,
::NormalizedLandmark* out);
void CppConvertToLandmarks(
const std::vector<mediapipe::tasks::components::containers::Landmark>& in,
::Landmarks* out);
void CppConvertToNormalizedLandmarks(
const std::vector<
mediapipe::tasks::components::containers::NormalizedLandmark>& in,
::NormalizedLandmarks* out);
void CppCloseLandmark(struct ::Landmark* in);
void CppCloseLandmarks(struct ::Landmarks* in);
void CppCloseNormalizedLandmark(struct ::NormalizedLandmark* in);
void CppCloseNormalizedLandmarks(struct ::NormalizedLandmarks* in);
} // namespace mediapipe::tasks::c::components::containers
#endif // MEDIAPIPE_TASKS_C_COMPONENTS_CONTAINERS_LANDMARK_CONVERTER_H_

View File

@ -0,0 +1,148 @@
/* Copyright 2023 The MediaPipe Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "mediapipe/tasks/c/components/containers/landmark_converter.h"
#include <cstdlib>
#include <vector>
#include "mediapipe/framework/port/gtest.h"
#include "mediapipe/tasks/c/components/containers/landmark.h"
#include "mediapipe/tasks/cc/components/containers/landmark.h"
namespace mediapipe::tasks::c::components::containers {
TEST(LandmarkConverterTest, ConvertsCustomLandmark) {
mediapipe::tasks::components::containers::Landmark cpp_landmark = {0.1f, 0.2f,
0.3f};
::Landmark c_landmark;
CppConvertToLandmark(cpp_landmark, &c_landmark);
EXPECT_FLOAT_EQ(c_landmark.x, cpp_landmark.x);
EXPECT_FLOAT_EQ(c_landmark.y, cpp_landmark.y);
EXPECT_FLOAT_EQ(c_landmark.z, cpp_landmark.z);
CppCloseLandmark(&c_landmark);
}
TEST(LandmarksConverterTest, ConvertsCustomLandmarks) {
std::vector<mediapipe::tasks::components::containers::Landmark>
cpp_landmarks = {
{0.1f, 0.2f, 0.3f}, // First Landmark
{0.4f, 0.5f, 0.6f} // Second Landmark
};
::Landmarks c_landmarks;
CppConvertToLandmarks(cpp_landmarks, &c_landmarks);
EXPECT_EQ(c_landmarks.landmarks_count, cpp_landmarks.size());
for (size_t i = 0; i < c_landmarks.landmarks_count; ++i) {
EXPECT_FLOAT_EQ(c_landmarks.landmarks[i].x, cpp_landmarks[i].x);
EXPECT_FLOAT_EQ(c_landmarks.landmarks[i].y, cpp_landmarks[i].y);
EXPECT_FLOAT_EQ(c_landmarks.landmarks[i].z, cpp_landmarks[i].z);
}
CppCloseLandmarks(&c_landmarks);
}
TEST(NormalizedLandmarkConverterTest, ConvertsCustomNormalizedLandmark) {
mediapipe::tasks::components::containers::NormalizedLandmark
cpp_normalized_landmark = {0.7f, 0.8f, 0.9f};
::NormalizedLandmark c_normalized_landmark;
CppConvertToNormalizedLandmark(cpp_normalized_landmark,
&c_normalized_landmark);
EXPECT_FLOAT_EQ(c_normalized_landmark.x, cpp_normalized_landmark.x);
EXPECT_FLOAT_EQ(c_normalized_landmark.y, cpp_normalized_landmark.y);
EXPECT_FLOAT_EQ(c_normalized_landmark.z, cpp_normalized_landmark.z);
CppCloseNormalizedLandmark(&c_normalized_landmark);
}
TEST(NormalizedLandmarksConverterTest, ConvertsCustomNormalizedLandmarks) {
std::vector<mediapipe::tasks::components::containers::NormalizedLandmark>
cpp_normalized_landmarks = {
{0.1f, 0.2f, 0.3f}, // First NormalizedLandmark
{0.4f, 0.5f, 0.6f} // Second NormalizedLandmark
};
::NormalizedLandmarks c_normalized_landmarks;
CppConvertToNormalizedLandmarks(cpp_normalized_landmarks,
&c_normalized_landmarks);
EXPECT_EQ(c_normalized_landmarks.landmarks_count,
cpp_normalized_landmarks.size());
for (size_t i = 0; i < c_normalized_landmarks.landmarks_count; ++i) {
EXPECT_FLOAT_EQ(c_normalized_landmarks.landmarks[i].x,
cpp_normalized_landmarks[i].x);
EXPECT_FLOAT_EQ(c_normalized_landmarks.landmarks[i].y,
cpp_normalized_landmarks[i].y);
EXPECT_FLOAT_EQ(c_normalized_landmarks.landmarks[i].z,
cpp_normalized_landmarks[i].z);
}
CppCloseNormalizedLandmarks(&c_normalized_landmarks);
}
TEST(LandmarkConverterTest, FreesMemory) {
mediapipe::tasks::components::containers::Landmark cpp_landmark = {
0.1f, 0.2f, 0.3f, 0.0f, 0.0f, "foo"};
::Landmark c_landmark;
CppConvertToLandmark(cpp_landmark, &c_landmark);
EXPECT_NE(c_landmark.name, nullptr);
CppCloseLandmark(&c_landmark);
EXPECT_EQ(c_landmark.name, nullptr);
}
TEST(NormalizedLandmarkConverterTest, FreesMemory) {
mediapipe::tasks::components::containers::NormalizedLandmark cpp_landmark = {
0.1f, 0.2f, 0.3f, 0.0f, 0.0f, "foo"};
::NormalizedLandmark c_landmark;
CppConvertToNormalizedLandmark(cpp_landmark, &c_landmark);
EXPECT_NE(c_landmark.name, nullptr);
CppCloseNormalizedLandmark(&c_landmark);
EXPECT_EQ(c_landmark.name, nullptr);
}
TEST(LandmarksConverterTest, FreesMemory) {
std::vector<mediapipe::tasks::components::containers::Landmark>
cpp_landmarks = {{0.1f, 0.2f, 0.3f}, {0.4f, 0.5f, 0.6f}};
::Landmarks c_landmarks;
CppConvertToLandmarks(cpp_landmarks, &c_landmarks);
EXPECT_NE(c_landmarks.landmarks, nullptr);
CppCloseLandmarks(&c_landmarks);
EXPECT_EQ(c_landmarks.landmarks, nullptr);
}
TEST(NormalizedLandmarksConverterTest, FreesMemory) {
std::vector<mediapipe::tasks::components::containers::NormalizedLandmark>
cpp_normalized_landmarks = {{0.1f, 0.2f, 0.3f}, {0.4f, 0.5f, 0.6f}};
::NormalizedLandmarks c_normalized_landmarks;
CppConvertToNormalizedLandmarks(cpp_normalized_landmarks,
&c_normalized_landmarks);
EXPECT_NE(c_normalized_landmarks.landmarks, nullptr);
CppCloseNormalizedLandmarks(&c_normalized_landmarks);
EXPECT_EQ(c_normalized_landmarks.landmarks, nullptr);
}
} // namespace mediapipe::tasks::c::components::containers

View File

@ -0,0 +1,143 @@
# Copyright 2023 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
package(default_visibility = ["//mediapipe/tasks:internal"])
licenses(["notice"])
cc_library(
name = "gesture_recognizer_result",
hdrs = ["gesture_recognizer_result.h"],
visibility = ["//visibility:public"],
deps = [
"//mediapipe/tasks/c/components/containers:category",
"//mediapipe/tasks/c/components/containers:landmark",
],
)
cc_library(
name = "gesture_recognizer_result_converter",
srcs = ["gesture_recognizer_result_converter.cc"],
hdrs = ["gesture_recognizer_result_converter.h"],
deps = [
":gesture_recognizer_result",
"//mediapipe/tasks/c/components/containers:category",
"//mediapipe/tasks/c/components/containers:category_converter",
"//mediapipe/tasks/c/components/containers:landmark",
"//mediapipe/tasks/c/components/containers:landmark_converter",
"//mediapipe/tasks/cc/components/containers:category",
"//mediapipe/tasks/cc/components/containers:landmark",
"//mediapipe/tasks/cc/vision/gesture_recognizer:gesture_recognizer_result",
],
)
cc_test(
name = "gesture_recognizer_result_converter_test",
srcs = ["gesture_recognizer_result_converter_test.cc"],
linkstatic = 1,
deps = [
":gesture_recognizer_result",
":gesture_recognizer_result_converter",
"//mediapipe/framework/port:gtest",
"//mediapipe/tasks/c/components/containers:landmark",
"//mediapipe/tasks/cc/vision/gesture_recognizer:gesture_recognizer_result",
"@com_google_googletest//:gtest_main",
],
)
cc_library(
name = "gesture_recognizer_lib",
srcs = ["gesture_recognizer.cc"],
hdrs = ["gesture_recognizer.h"],
visibility = ["//visibility:public"],
deps = [
":gesture_recognizer_result",
":gesture_recognizer_result_converter",
"//mediapipe/framework/formats:image",
"//mediapipe/framework/formats:image_frame",
"//mediapipe/tasks/c/components/processors:classifier_options",
"//mediapipe/tasks/c/components/processors:classifier_options_converter",
"//mediapipe/tasks/c/core:base_options",
"//mediapipe/tasks/c/core:base_options_converter",
"//mediapipe/tasks/c/vision/core:common",
"//mediapipe/tasks/cc/vision/core:running_mode",
"//mediapipe/tasks/cc/vision/gesture_recognizer",
"//mediapipe/tasks/cc/vision/gesture_recognizer:gesture_recognizer_result",
"//mediapipe/tasks/cc/vision/utils:image_utils",
"@com_google_absl//absl/log:absl_log",
"@com_google_absl//absl/status",
"@com_google_absl//absl/status:statusor",
],
alwayslink = 1,
)
cc_test(
name = "gesture_recognizer_test",
srcs = ["gesture_recognizer_test.cc"],
data = [
"//mediapipe/framework/formats:image_frame_opencv",
"//mediapipe/framework/port:opencv_core",
"//mediapipe/framework/port:opencv_imgproc",
"//mediapipe/tasks/testdata/vision:test_images",
"//mediapipe/tasks/testdata/vision:test_models",
],
linkstatic = 1,
deps = [
":gesture_recognizer_lib",
":gesture_recognizer_result",
"//mediapipe/framework/deps:file_path",
"//mediapipe/framework/formats:image",
"//mediapipe/framework/port:gtest",
"//mediapipe/tasks/c/components/containers:landmark",
"//mediapipe/tasks/c/vision/core:common",
"//mediapipe/tasks/cc/vision/utils:image_utils",
"@com_google_absl//absl/flags:flag",
"@com_google_absl//absl/strings",
"@com_google_googletest//:gtest_main",
],
)
# bazel build -c opt --linkopt -s --strip always --define MEDIAPIPE_DISABLE_GPU=1 \
# //mediapipe/tasks/c/vision/gesture_recognizer:libgesture_recognizer.so
cc_binary(
name = "libgesture_recognizer.so",
linkopts = [
"-Wl,-soname=libgesture_recognizer.so",
"-fvisibility=hidden",
],
linkshared = True,
tags = [
"manual",
"nobuilder",
"notap",
],
deps = [":gesture_recognizer_lib"],
)
# bazel build --config darwin_arm64 -c opt --strip always --define MEDIAPIPE_DISABLE_GPU=1 \
# //mediapipe/tasks/c/vision/gesture_recognizer:libgesture_recognizer.dylib
cc_binary(
name = "libgesture_recognizer.dylib",
linkopts = [
"-Wl,-install_name,libgesture_recognizer.dylib",
"-fvisibility=hidden",
],
linkshared = True,
tags = [
"manual",
"nobuilder",
"notap",
],
deps = [":gesture_recognizer_lib"],
)

View File

@ -0,0 +1,297 @@
/* Copyright 2023 The MediaPipe Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer.h"
#include <cstdint>
#include <cstdlib>
#include <memory>
#include <utility>
#include "absl/log/absl_log.h"
#include "absl/status/status.h"
#include "absl/status/statusor.h"
#include "mediapipe/framework/formats/image.h"
#include "mediapipe/framework/formats/image_frame.h"
#include "mediapipe/tasks/c/components/processors/classifier_options_converter.h"
#include "mediapipe/tasks/c/core/base_options_converter.h"
#include "mediapipe/tasks/c/vision/core/common.h"
#include "mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer_result.h"
#include "mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer_result_converter.h"
#include "mediapipe/tasks/cc/vision/core/running_mode.h"
#include "mediapipe/tasks/cc/vision/gesture_recognizer/gesture_recognizer.h"
#include "mediapipe/tasks/cc/vision/gesture_recognizer/gesture_recognizer_result.h"
#include "mediapipe/tasks/cc/vision/utils/image_utils.h"
namespace mediapipe::tasks::c::vision::gesture_recognizer {
namespace {
using ::mediapipe::tasks::c::components::containers::
CppCloseGestureRecognizerResult;
using ::mediapipe::tasks::c::components::containers::
CppConvertToGestureRecognizerResult;
using ::mediapipe::tasks::c::components::processors::
CppConvertToClassifierOptions;
using ::mediapipe::tasks::c::core::CppConvertToBaseOptions;
using ::mediapipe::tasks::vision::CreateImageFromBuffer;
using ::mediapipe::tasks::vision::core::RunningMode;
using ::mediapipe::tasks::vision::gesture_recognizer::GestureRecognizer;
typedef ::mediapipe::tasks::vision::gesture_recognizer::GestureRecognizerResult
CppGestureRecognizerResult;
int CppProcessError(absl::Status status, char** error_msg) {
if (error_msg) {
*error_msg = strdup(status.ToString().c_str());
}
return status.raw_code();
}
} // namespace
void CppConvertToGestureRecognizerOptions(
const GestureRecognizerOptions& in,
mediapipe::tasks::vision::gesture_recognizer::GestureRecognizerOptions*
out) {
out->num_hands = in.num_hands;
out->min_hand_detection_confidence = in.min_hand_detection_confidence;
out->min_hand_presence_confidence = in.min_hand_presence_confidence;
out->min_tracking_confidence = in.min_tracking_confidence;
CppConvertToClassifierOptions(in.canned_gestures_classifier_options,
&out->canned_gestures_classifier_options);
CppConvertToClassifierOptions(in.custom_gestures_classifier_options,
&out->custom_gestures_classifier_options);
}
GestureRecognizer* CppGestureRecognizerCreate(
const GestureRecognizerOptions& options, char** error_msg) {
auto cpp_options =
std::make_unique<::mediapipe::tasks::vision::gesture_recognizer::
GestureRecognizerOptions>();
CppConvertToBaseOptions(options.base_options, &cpp_options->base_options);
CppConvertToGestureRecognizerOptions(options, cpp_options.get());
cpp_options->running_mode = static_cast<RunningMode>(options.running_mode);
// Enable callback for processing live stream data when the running mode is
// set to RunningMode::LIVE_STREAM.
if (cpp_options->running_mode == RunningMode::LIVE_STREAM) {
if (options.result_callback == nullptr) {
const absl::Status status = absl::InvalidArgumentError(
"Provided null pointer to callback function.");
ABSL_LOG(ERROR) << "Failed to create GestureRecognizer: " << status;
CppProcessError(status, error_msg);
return nullptr;
}
GestureRecognizerOptions::result_callback_fn result_callback =
options.result_callback;
cpp_options->result_callback =
[result_callback](absl::StatusOr<CppGestureRecognizerResult> cpp_result,
const Image& image, int64_t timestamp) {
char* error_msg = nullptr;
if (!cpp_result.ok()) {
ABSL_LOG(ERROR) << "Recognition failed: " << cpp_result.status();
CppProcessError(cpp_result.status(), &error_msg);
result_callback(nullptr, MpImage(), timestamp, error_msg);
free(error_msg);
return;
}
// Result is valid for the lifetime of the callback function.
GestureRecognizerResult result;
CppConvertToGestureRecognizerResult(*cpp_result, &result);
const auto& image_frame = image.GetImageFrameSharedPtr();
const MpImage mp_image = {
.type = MpImage::IMAGE_FRAME,
.image_frame = {
.format = static_cast<::ImageFormat>(image_frame->Format()),
.image_buffer = image_frame->PixelData(),
.width = image_frame->Width(),
.height = image_frame->Height()}};
result_callback(&result, mp_image, timestamp,
/* error_msg= */ nullptr);
CppCloseGestureRecognizerResult(&result);
};
}
auto recognizer = GestureRecognizer::Create(std::move(cpp_options));
if (!recognizer.ok()) {
ABSL_LOG(ERROR) << "Failed to create GestureRecognizer: "
<< recognizer.status();
CppProcessError(recognizer.status(), error_msg);
return nullptr;
}
return recognizer->release();
}
int CppGestureRecognizerRecognize(void* recognizer, const MpImage& image,
GestureRecognizerResult* result,
char** error_msg) {
if (image.type == MpImage::GPU_BUFFER) {
const absl::Status status =
absl::InvalidArgumentError("GPU Buffer not supported yet.");
ABSL_LOG(ERROR) << "Recognition failed: " << status.message();
return CppProcessError(status, error_msg);
}
const auto img = CreateImageFromBuffer(
static_cast<ImageFormat::Format>(image.image_frame.format),
image.image_frame.image_buffer, image.image_frame.width,
image.image_frame.height);
if (!img.ok()) {
ABSL_LOG(ERROR) << "Failed to create Image: " << img.status();
return CppProcessError(img.status(), error_msg);
}
auto cpp_recognizer = static_cast<GestureRecognizer*>(recognizer);
auto cpp_result = cpp_recognizer->Recognize(*img);
if (!cpp_result.ok()) {
ABSL_LOG(ERROR) << "Recognition failed: " << cpp_result.status();
return CppProcessError(cpp_result.status(), error_msg);
}
CppConvertToGestureRecognizerResult(*cpp_result, result);
return 0;
}
int CppGestureRecognizerRecognizeForVideo(void* recognizer,
const MpImage& image,
int64_t timestamp_ms,
GestureRecognizerResult* result,
char** error_msg) {
if (image.type == MpImage::GPU_BUFFER) {
absl::Status status =
absl::InvalidArgumentError("GPU Buffer not supported yet");
ABSL_LOG(ERROR) << "Recognition failed: " << status.message();
return CppProcessError(status, error_msg);
}
const auto img = CreateImageFromBuffer(
static_cast<ImageFormat::Format>(image.image_frame.format),
image.image_frame.image_buffer, image.image_frame.width,
image.image_frame.height);
if (!img.ok()) {
ABSL_LOG(ERROR) << "Failed to create Image: " << img.status();
return CppProcessError(img.status(), error_msg);
}
auto cpp_recognizer = static_cast<GestureRecognizer*>(recognizer);
auto cpp_result = cpp_recognizer->RecognizeForVideo(*img, timestamp_ms);
if (!cpp_result.ok()) {
ABSL_LOG(ERROR) << "Recognition failed: " << cpp_result.status();
return CppProcessError(cpp_result.status(), error_msg);
}
CppConvertToGestureRecognizerResult(*cpp_result, result);
return 0;
}
int CppGestureRecognizerRecognizeAsync(void* recognizer, const MpImage& image,
int64_t timestamp_ms, char** error_msg) {
if (image.type == MpImage::GPU_BUFFER) {
absl::Status status =
absl::InvalidArgumentError("GPU Buffer not supported yet");
ABSL_LOG(ERROR) << "Recognition failed: " << status.message();
return CppProcessError(status, error_msg);
}
const auto img = CreateImageFromBuffer(
static_cast<ImageFormat::Format>(image.image_frame.format),
image.image_frame.image_buffer, image.image_frame.width,
image.image_frame.height);
if (!img.ok()) {
ABSL_LOG(ERROR) << "Failed to create Image: " << img.status();
return CppProcessError(img.status(), error_msg);
}
auto cpp_recognizer = static_cast<GestureRecognizer*>(recognizer);
auto cpp_result = cpp_recognizer->RecognizeAsync(*img, timestamp_ms);
if (!cpp_result.ok()) {
ABSL_LOG(ERROR) << "Data preparation for the image classification failed: "
<< cpp_result;
return CppProcessError(cpp_result, error_msg);
}
return 0;
}
void CppGestureRecognizerCloseResult(GestureRecognizerResult* result) {
CppCloseGestureRecognizerResult(result);
}
int CppGestureRecognizerClose(void* recognizer, char** error_msg) {
auto cpp_recognizer = static_cast<GestureRecognizer*>(recognizer);
auto result = cpp_recognizer->Close();
if (!result.ok()) {
ABSL_LOG(ERROR) << "Failed to close GestureRecognizer: " << result;
return CppProcessError(result, error_msg);
}
delete cpp_recognizer;
return 0;
}
} // namespace mediapipe::tasks::c::vision::gesture_recognizer
extern "C" {
void* gesture_recognizer_create(struct GestureRecognizerOptions* options,
char** error_msg) {
return mediapipe::tasks::c::vision::gesture_recognizer::
CppGestureRecognizerCreate(*options, error_msg);
}
int gesture_recognizer_recognize_image(void* recognizer, const MpImage& image,
GestureRecognizerResult* result,
char** error_msg) {
return mediapipe::tasks::c::vision::gesture_recognizer::
CppGestureRecognizerRecognize(recognizer, image, result, error_msg);
}
int gesture_recognizer_recognize_for_video(void* recognizer,
const MpImage& image,
int64_t timestamp_ms,
GestureRecognizerResult* result,
char** error_msg) {
return mediapipe::tasks::c::vision::gesture_recognizer::
CppGestureRecognizerRecognizeForVideo(recognizer, image, timestamp_ms,
result, error_msg);
}
int gesture_recognizer_recognize_async(void* recognizer, const MpImage& image,
int64_t timestamp_ms, char** error_msg) {
return mediapipe::tasks::c::vision::gesture_recognizer::
CppGestureRecognizerRecognizeAsync(recognizer, image, timestamp_ms,
error_msg);
}
void gesture_recognizer_close_result(GestureRecognizerResult* result) {
mediapipe::tasks::c::vision::gesture_recognizer::
CppGestureRecognizerCloseResult(result);
}
int gesture_recognizer_close(void* recognizer, char** error_ms) {
return mediapipe::tasks::c::vision::gesture_recognizer::
CppGestureRecognizerClose(recognizer, error_ms);
}
} // extern "C"

View File

@ -0,0 +1,154 @@
/* Copyright 2023 The MediaPipe Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef MEDIAPIPE_TASKS_C_VISION_GESTURE_RECOGNIZER_GESTURE_RECOGNIZER_H_
#define MEDIAPIPE_TASKS_C_VISION_GESTURE_RECOGNIZER_GESTURE_RECOGNIZER_H_
#include "mediapipe/tasks/c/components/processors/classifier_options.h"
#include "mediapipe/tasks/c/core/base_options.h"
#include "mediapipe/tasks/c/vision/core/common.h"
#include "mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer_result.h"
#ifndef MP_EXPORT
#define MP_EXPORT __attribute__((visibility("default")))
#endif // MP_EXPORT
#ifdef __cplusplus
extern "C" {
#endif
// The options for configuring a MediaPipe gesture recognizer task.
struct GestureRecognizerOptions {
// Base options for configuring MediaPipe Tasks, such as specifying the model
// file with metadata, accelerator options, op resolver, etc.
struct BaseOptions base_options;
// The running mode of the task. Default to the image mode.
// GestureRecognizer has three running modes:
// 1) The image mode for recognizing hand gestures on single image inputs.
// 2) The video mode for recognizing hand gestures on the decoded frames of a
// video.
// 3) The live stream mode for recognizing hand gestures on the live stream of
// input data, such as from camera. In this mode, the "result_callback"
// below must be specified to receive the detection results asynchronously.
RunningMode running_mode;
// The maximum number of hands can be detected by the GestureRecognizer.
int num_hands = 1;
// The minimum confidence score for the hand detection to be considered
// successful.
float min_hand_detection_confidence = 0.5;
// The minimum confidence score of hand presence score in the hand landmark
// detection.
float min_hand_presence_confidence = 0.5;
// The minimum confidence score for the hand tracking to be considered
// successful.
float min_tracking_confidence = 0.5;
// Options for configuring the canned gestures classifier, such as score
// threshold, allow list and deny list of gestures. The categories for canned
// gesture classifier are: ["None", "Closed_Fist", "Open_Palm",
// "Pointing_Up", "Thumb_Down", "Thumb_Up", "Victory", "ILoveYou"]
struct ClassifierOptions canned_gestures_classifier_options;
// Options for configuring the custom gestures classifier, such as score
// threshold, allow list and deny list of gestures.
struct ClassifierOptions custom_gestures_classifier_options;
// The user-defined result callback for processing live stream data.
// The result callback should only be specified when the running mode is set
// to RunningMode::LIVE_STREAM. Arguments of the callback function include:
// the pointer to recognition result, the image that result was obtained
// on, the timestamp relevant to recognition results and pointer to error
// message in case of any failure. The validity of the passed arguments is
// true for the lifetime of the callback function.
//
// A caller is responsible for closing gesture recognizer result.
typedef void (*result_callback_fn)(GestureRecognizerResult* result,
const MpImage& image, int64_t timestamp_ms,
char* error_msg);
result_callback_fn result_callback;
};
// Creates an GestureRecognizer from the provided `options`.
// Returns a pointer to the gesture recognizer on success.
// If an error occurs, returns `nullptr` and sets the error parameter to an
// an error message (if `error_msg` is not `nullptr`). You must free the memory
// allocated for the error message.
MP_EXPORT void* gesture_recognizer_create(
struct GestureRecognizerOptions* options, char** error_msg);
// Performs gesture recognition on the input `image`. Returns `0` on success.
// If an error occurs, returns an error code and sets the error parameter to an
// an error message (if `error_msg` is not `nullptr`). You must free the memory
// allocated for the error message.
MP_EXPORT int gesture_recognizer_recognize_image(
void* recognizer, const MpImage& image, GestureRecognizerResult* result,
char** error_msg);
// Performs gesture recognition on the provided video frame.
// Only use this method when the GestureRecognizer is created with the video
// running mode.
// The image can be of any size with format RGB or RGBA. It's required to
// provide the video frame's timestamp (in milliseconds). The input timestamps
// must be monotonically increasing.
// If an error occurs, returns an error code and sets the error parameter to an
// an error message (if `error_msg` is not `nullptr`). You must free the memory
// allocated for the error message.
MP_EXPORT int gesture_recognizer_recognize_for_video(
void* recognizer, const MpImage& image, int64_t timestamp_ms,
GestureRecognizerResult* result, char** error_msg);
// Sends live image data to gesture recognition, and the results will be
// available via the `result_callback` provided in the GestureRecognizerOptions.
// Only use this method when the GestureRecognizer is created with the live
// stream running mode.
// The image can be of any size with format RGB or RGBA. It's required to
// provide a timestamp (in milliseconds) to indicate when the input image is
// sent to the gesture recognizer. The input timestamps must be monotonically
// increasing.
// The `result_callback` provides:
// - The recognition results as an GestureRecognizerResult object.
// - The const reference to the corresponding input image that the gesture
// recognizer runs on. Note that the const reference to the image will no
// longer be valid when the callback returns. To access the image data
// outside of the callback, callers need to make a copy of the image.
// - The input timestamp in milliseconds.
// If an error occurs, returns an error code and sets the error parameter to an
// an error message (if `error_msg` is not `nullptr`). You must free the memory
// allocated for the error message.
MP_EXPORT int gesture_recognizer_recognize_async(void* recognizer,
const MpImage& image,
int64_t timestamp_ms,
char** error_msg);
// Frees the memory allocated inside a GestureRecognizerResult result.
// Does not free the result pointer itself.
MP_EXPORT void gesture_recognizer_close_result(GestureRecognizerResult* result);
// Frees gesture recognizer.
// If an error occurs, returns an error code and sets the error parameter to an
// an error message (if `error_msg` is not `nullptr`). You must free the memory
// allocated for the error message.
MP_EXPORT int gesture_recognizer_close(void* recognizer, char** error_msg);
#ifdef __cplusplus
} // extern C
#endif
#endif // MEDIAPIPE_TASKS_C_VISION_GESTURE_RECOGNIZER_GESTURE_RECOGNIZER_H_

View File

@ -0,0 +1,65 @@
/* Copyright 2023 The MediaPipe Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef MEDIAPIPE_TASKS_C_VISION_GESTURE_RECOGNIZER_RESULT_GESTURE_RECOGNIZER_RESULT_H_
#define MEDIAPIPE_TASKS_C_VISION_GESTURE_RECOGNIZER_RESULT_GESTURE_RECOGNIZER_RESULT_H_
#include <cstdint>
#include "mediapipe/tasks/c/components/containers/category.h"
#include "mediapipe/tasks/c/components/containers/landmark.h"
#ifndef MP_EXPORT
#define MP_EXPORT __attribute__((visibility("default")))
#endif // MP_EXPORT
#ifdef __cplusplus
extern "C" {
#endif
// The gesture recognition result from GestureRecognizer, where each vector
// element represents a single hand detected in the image.
struct GestureRecognizerResult {
// Recognized hand gestures with sorted order such that the winning label is
// the first item in the list.
struct Categories* gestures;
// The number of elements in the gestures array.
uint32_t gestures_count;
// Classification of handedness.
struct Categories* handedness;
// The number of elements in the handedness array.
uint32_t handedness_count;
// Detected hand landmarks in normalized image coordinates.
struct NormalizedLandmarks* hand_landmarks;
// The number of elements in the hand_landmarks array.
uint32_t hand_landmarks_count;
// Detected hand landmarks in world coordinates.
struct Landmarks* hand_world_landmarks;
// The number of elements in the hand_world_landmarks array.
uint32_t hand_world_landmarks_count;
};
#ifdef __cplusplus
} // extern C
#endif
#endif // MEDIAPIPE_TASKS_C_VISION_GESTURE_RECOGNIZER_RESULT_GESTURE_RECOGNIZER_RESULT_H_

View File

@ -0,0 +1,169 @@
/* Copyright 2023 The MediaPipe Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer_result_converter.h"
#include <cstdint>
#include <vector>
#include "mediapipe/tasks/c/components/containers/category.h"
#include "mediapipe/tasks/c/components/containers/category_converter.h"
#include "mediapipe/tasks/c/components/containers/landmark.h"
#include "mediapipe/tasks/c/components/containers/landmark_converter.h"
#include "mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer_result.h"
#include "mediapipe/tasks/cc/components/containers/category.h"
#include "mediapipe/tasks/cc/components/containers/landmark.h"
#include "mediapipe/tasks/cc/vision/gesture_recognizer/gesture_recognizer_result.h"
namespace mediapipe::tasks::c::components::containers {
using CppCategory = ::mediapipe::tasks::components::containers::Category;
using CppLandmark = ::mediapipe::tasks::components::containers::Landmark;
using CppNormalizedLandmark =
::mediapipe::tasks::components::containers::NormalizedLandmark;
void CppConvertToGestureRecognizerResult(
const mediapipe::tasks::vision::gesture_recognizer::GestureRecognizerResult&
in,
GestureRecognizerResult* out) {
out->gestures_count = in.gestures.size();
out->gestures = new Categories[out->gestures_count];
for (uint32_t i = 0; i < out->gestures_count; ++i) {
uint32_t categories_count = in.gestures[i].classification_size();
out->gestures[i].categories_count = categories_count;
out->gestures[i].categories = new Category[categories_count];
for (uint32_t j = 0; j < categories_count; ++j) {
const auto& classification = in.gestures[i].classification(j);
CppCategory cpp_category;
// Set fields from the Classification protobuf
cpp_category.index = classification.index();
cpp_category.score = classification.score();
if (classification.has_label()) {
cpp_category.category_name = classification.label();
}
if (classification.has_display_name()) {
cpp_category.display_name = classification.display_name();
}
CppConvertToCategory(cpp_category, &out->gestures[i].categories[j]);
}
}
out->handedness_count = in.handedness.size();
out->handedness = new Categories[out->handedness_count];
for (uint32_t i = 0; i < out->handedness_count; ++i) {
uint32_t categories_count = in.handedness[i].classification_size();
out->handedness[i].categories_count = categories_count;
out->handedness[i].categories = new Category[categories_count];
for (uint32_t j = 0; j < categories_count; ++j) {
const auto& classification = in.handedness[i].classification(j);
CppCategory cpp_category;
// Set fields from the Classification protobuf
cpp_category.index = classification.index();
cpp_category.score = classification.score();
if (classification.has_label()) {
cpp_category.category_name = classification.label();
}
if (classification.has_display_name()) {
cpp_category.display_name = classification.display_name();
}
CppConvertToCategory(cpp_category, &out->handedness[i].categories[j]);
}
}
out->hand_landmarks_count = in.hand_landmarks.size();
out->hand_landmarks = new NormalizedLandmarks[out->hand_landmarks_count];
for (uint32_t i = 0; i < out->hand_landmarks_count; ++i) {
std::vector<CppNormalizedLandmark> cpp_normalized_landmarks;
for (uint32_t j = 0; j < in.hand_landmarks[i].landmark_size(); ++j) {
const auto& landmark = in.hand_landmarks[i].landmark(j);
CppNormalizedLandmark cpp_landmark;
cpp_landmark.x = landmark.x();
cpp_landmark.y = landmark.y();
cpp_landmark.z = landmark.z();
if (landmark.has_presence()) {
cpp_landmark.presence = landmark.presence();
}
if (landmark.has_visibility()) {
cpp_landmark.visibility = landmark.visibility();
}
cpp_normalized_landmarks.push_back(cpp_landmark);
}
CppConvertToNormalizedLandmarks(cpp_normalized_landmarks,
&out->hand_landmarks[i]);
}
out->hand_world_landmarks_count = in.hand_world_landmarks.size();
out->hand_world_landmarks = new Landmarks[out->hand_world_landmarks_count];
for (uint32_t i = 0; i < out->hand_world_landmarks_count; ++i) {
std::vector<CppLandmark> cpp_landmarks;
for (uint32_t j = 0; j < in.hand_world_landmarks[i].landmark_size(); ++j) {
const auto& landmark = in.hand_world_landmarks[i].landmark(j);
CppLandmark cpp_landmark;
cpp_landmark.x = landmark.x();
cpp_landmark.y = landmark.y();
cpp_landmark.z = landmark.z();
if (landmark.has_presence()) {
cpp_landmark.presence = landmark.presence();
}
if (landmark.has_visibility()) {
cpp_landmark.visibility = landmark.visibility();
}
cpp_landmarks.push_back(cpp_landmark);
}
CppConvertToLandmarks(cpp_landmarks, &out->hand_world_landmarks[i]);
}
}
void CppCloseGestureRecognizerResult(GestureRecognizerResult* result) {
for (uint32_t i = 0; i < result->gestures_count; ++i) {
CppCloseCategories(&result->gestures[i]);
}
delete[] result->gestures;
for (uint32_t i = 0; i < result->handedness_count; ++i) {
CppCloseCategories(&result->handedness[i]);
}
delete[] result->handedness;
for (uint32_t i = 0; i < result->hand_landmarks_count; ++i) {
CppCloseNormalizedLandmarks(&result->hand_landmarks[i]);
}
delete[] result->hand_landmarks;
for (uint32_t i = 0; i < result->hand_world_landmarks_count; ++i) {
CppCloseLandmarks(&result->hand_world_landmarks[i]);
}
delete[] result->hand_world_landmarks;
result->gestures = nullptr;
result->handedness = nullptr;
result->hand_landmarks = nullptr;
result->hand_world_landmarks = nullptr;
result->gestures_count = 0;
result->handedness_count = 0;
result->hand_landmarks_count = 0;
result->hand_world_landmarks_count = 0;
}
} // namespace mediapipe::tasks::c::components::containers

View File

@ -0,0 +1,33 @@
/* Copyright 2023 The MediaPipe Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef MEDIAPIPE_TASKS_C_COMPONENTS_CONTAINERS_GESTURE_RECOGNIZER_RESULT_CONVERTER_H_
#define MEDIAPIPE_TASKS_C_COMPONENTS_CONTAINERS_GESTURE_RECOGNIZER_RESULT_CONVERTER_H_
#include "mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer_result.h"
#include "mediapipe/tasks/cc/vision/gesture_recognizer/gesture_recognizer_result.h"
namespace mediapipe::tasks::c::components::containers {
void CppConvertToGestureRecognizerResult(
const mediapipe::tasks::vision::gesture_recognizer::GestureRecognizerResult&
in,
GestureRecognizerResult* out);
void CppCloseGestureRecognizerResult(GestureRecognizerResult* result);
} // namespace mediapipe::tasks::c::components::containers
#endif // MEDIAPIPE_TASKS_C_COMPONENTS_CONTAINERS_GESTURE_RECOGNIZER_RESULT_CONVERTER_H_

View File

@ -0,0 +1,156 @@
/* Copyright 2023 The MediaPipe Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer_result_converter.h"
#include <cstdint>
#include <string>
#include "mediapipe/framework/port/gtest.h"
#include "mediapipe/tasks/c/components/containers/landmark.h"
#include "mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer_result.h"
#include "mediapipe/tasks/cc/vision/gesture_recognizer/gesture_recognizer_result.h"
namespace mediapipe::tasks::c::components::containers {
using mediapipe::ClassificationList;
using mediapipe::LandmarkList;
using mediapipe::NormalizedLandmarkList;
void InitGestureRecognizerResult(
::mediapipe::tasks::vision::gesture_recognizer::GestureRecognizerResult*
cpp_result) {
// Initialize gestures
mediapipe::Classification classification_for_gestures;
classification_for_gestures.set_index(0);
classification_for_gestures.set_score(0.9f);
classification_for_gestures.set_label("gesture_label_1");
classification_for_gestures.set_display_name("gesture_display_name_1");
ClassificationList gestures_list;
*gestures_list.add_classification() = classification_for_gestures;
cpp_result->gestures.push_back(gestures_list);
// Initialize handedness
mediapipe::Classification classification_for_handedness;
classification_for_handedness.set_index(1);
classification_for_handedness.set_score(0.8f);
classification_for_handedness.set_label("handeness_label_1");
classification_for_handedness.set_display_name("handeness_display_name_1");
ClassificationList handedness_list;
*handedness_list.add_classification() = classification_for_handedness;
cpp_result->handedness.push_back(handedness_list);
// Initialize hand_landmarks
NormalizedLandmarkList normalized_landmark_list;
auto& normalized_landmark = *normalized_landmark_list.add_landmark();
normalized_landmark.set_x(0.1f);
normalized_landmark.set_y(0.2f);
normalized_landmark.set_z(0.3f);
cpp_result->hand_landmarks.push_back(normalized_landmark_list);
// Initialize hand_world_landmarks
LandmarkList landmark_list;
auto& landmark = *landmark_list.add_landmark();
landmark.set_x(1.0f);
landmark.set_y(1.1f);
landmark.set_z(1.2f);
cpp_result->hand_world_landmarks.push_back(landmark_list);
}
TEST(GestureRecognizerResultConverterTest, ConvertsCustomResult) {
::mediapipe::tasks::vision::gesture_recognizer::GestureRecognizerResult
cpp_result;
InitGestureRecognizerResult(&cpp_result);
GestureRecognizerResult c_result;
CppConvertToGestureRecognizerResult(cpp_result, &c_result);
// Verify conversion of gestures
EXPECT_NE(c_result.gestures, nullptr);
EXPECT_EQ(c_result.gestures_count, cpp_result.gestures.size());
for (uint32_t i = 0; i < c_result.gestures_count; ++i) {
EXPECT_EQ(c_result.gestures[i].categories_count,
cpp_result.gestures[i].classification_size());
for (uint32_t j = 0; j < c_result.gestures[i].categories_count; ++j) {
auto gesture = cpp_result.gestures[i].classification(j);
EXPECT_EQ(std::string(c_result.gestures[i].categories[j].category_name),
gesture.label());
EXPECT_FLOAT_EQ(c_result.gestures[i].categories[j].score,
gesture.score());
}
}
// Verify conversion of hand_landmarks
EXPECT_NE(c_result.hand_landmarks, nullptr);
EXPECT_EQ(c_result.hand_landmarks_count, cpp_result.hand_landmarks.size());
for (uint32_t i = 0; i < c_result.hand_landmarks_count; ++i) {
EXPECT_EQ(c_result.hand_landmarks[i].landmarks_count,
cpp_result.hand_landmarks[i].landmark_size());
for (uint32_t j = 0; j < c_result.hand_landmarks[i].landmarks_count; ++j) {
const auto& landmark = cpp_result.hand_landmarks[i].landmark(j);
EXPECT_FLOAT_EQ(c_result.hand_landmarks[i].landmarks[j].x, landmark.x());
EXPECT_FLOAT_EQ(c_result.hand_landmarks[i].landmarks[j].y, landmark.y());
EXPECT_FLOAT_EQ(c_result.hand_landmarks[i].landmarks[j].z, landmark.z());
}
}
// Verify conversion of hand_world_landmarks
EXPECT_NE(c_result.hand_world_landmarks, nullptr);
EXPECT_EQ(c_result.hand_world_landmarks_count,
cpp_result.hand_world_landmarks.size());
for (uint32_t i = 0; i < c_result.hand_world_landmarks_count; ++i) {
EXPECT_EQ(c_result.hand_world_landmarks[i].landmarks_count,
cpp_result.hand_world_landmarks[i].landmark_size());
for (uint32_t j = 0; j < c_result.hand_world_landmarks[i].landmarks_count;
++j) {
const auto& landmark = cpp_result.hand_world_landmarks[i].landmark(j);
EXPECT_FLOAT_EQ(c_result.hand_world_landmarks[i].landmarks[j].x,
landmark.x());
EXPECT_FLOAT_EQ(c_result.hand_world_landmarks[i].landmarks[j].y,
landmark.y());
EXPECT_FLOAT_EQ(c_result.hand_world_landmarks[i].landmarks[j].z,
landmark.z());
}
}
CppCloseGestureRecognizerResult(&c_result);
}
TEST(GestureRecognizerResultConverterTest, FreesMemory) {
::mediapipe::tasks::vision::gesture_recognizer::GestureRecognizerResult
cpp_result;
InitGestureRecognizerResult(&cpp_result);
GestureRecognizerResult c_result;
CppConvertToGestureRecognizerResult(cpp_result, &c_result);
EXPECT_NE(c_result.gestures, nullptr);
EXPECT_NE(c_result.handedness, nullptr);
EXPECT_NE(c_result.hand_landmarks, nullptr);
EXPECT_NE(c_result.hand_world_landmarks, nullptr);
CppCloseGestureRecognizerResult(&c_result);
EXPECT_EQ(c_result.gestures, nullptr);
EXPECT_EQ(c_result.handedness, nullptr);
EXPECT_EQ(c_result.hand_landmarks, nullptr);
EXPECT_EQ(c_result.hand_world_landmarks, nullptr);
}
} // namespace mediapipe::tasks::c::components::containers

View File

@ -0,0 +1,325 @@
/* Copyright 2023 The MediaPipe Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer.h"
#include <cstdint>
#include <cstdlib>
#include <string>
#include "absl/flags/flag.h"
#include "absl/strings/string_view.h"
#include "mediapipe/framework/deps/file_path.h"
#include "mediapipe/framework/formats/image.h"
#include "mediapipe/framework/port/gmock.h"
#include "mediapipe/framework/port/gtest.h"
#include "mediapipe/tasks/c/components/containers/landmark.h"
#include "mediapipe/tasks/c/vision/core/common.h"
#include "mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer_result.h"
#include "mediapipe/tasks/cc/vision/utils/image_utils.h"
namespace {
using ::mediapipe::file::JoinPath;
using ::mediapipe::tasks::vision::DecodeImageFromFile;
using testing::HasSubstr;
constexpr char kTestDataDirectory[] = "/mediapipe/tasks/testdata/vision/";
constexpr char kModelName[] = "gesture_recognizer.task";
constexpr char kImageFile[] = "fist.jpg";
constexpr float kScorePrecision = 1e-2;
constexpr float kLandmarkPrecision = 1e-1;
constexpr int kIterations = 100;
std::string GetFullPath(absl::string_view file_name) {
return JoinPath("./", kTestDataDirectory, file_name);
}
void MatchesGestureRecognizerResult(GestureRecognizerResult* result,
const float score_precision,
const float landmark_precision) {
// Expects to have the same number of hands detected.
EXPECT_EQ(result->gestures_count, 1);
EXPECT_EQ(result->handedness_count, 1);
// Actual gesture with top score matches expected gesture.
EXPECT_EQ(std::string{result->gestures[0].categories[0].category_name},
"Closed_Fist");
EXPECT_NEAR(result->gestures[0].categories[0].score, 0.91f, score_precision);
// Actual handedness matches expected handedness.
EXPECT_EQ(std::string{result->handedness[0].categories[0].category_name},
"Right");
EXPECT_NEAR(result->handedness[0].categories[0].score, 0.9893f,
score_precision);
// Actual landmarks match expected landmarks.
EXPECT_NEAR(result->hand_landmarks[0].landmarks[0].x, 0.477f,
landmark_precision);
EXPECT_NEAR(result->hand_landmarks[0].landmarks[0].y, 0.661f,
landmark_precision);
EXPECT_NEAR(result->hand_landmarks[0].landmarks[0].z, 0.0f,
landmark_precision);
EXPECT_NEAR(result->hand_world_landmarks[0].landmarks[0].x, -0.009f,
landmark_precision);
EXPECT_NEAR(result->hand_world_landmarks[0].landmarks[0].y, 0.082f,
landmark_precision);
EXPECT_NEAR(result->hand_world_landmarks[0].landmarks[0].z, 0.006f,
landmark_precision);
}
TEST(GestureRecognizerTest, ImageModeTest) {
const auto image = DecodeImageFromFile(GetFullPath(kImageFile));
ASSERT_TRUE(image.ok());
const std::string model_path = GetFullPath(kModelName);
GestureRecognizerOptions options = {
/* base_options= */ {/* model_asset_buffer= */ nullptr,
/* model_asset_buffer_count= */ 0,
/* model_asset_path= */ model_path.c_str()},
/* running_mode= */ RunningMode::IMAGE,
/* num_hands= */ 1,
/* min_hand_detection_confidence= */ 0.5,
/* min_hand_presence_confidence= */ 0.5,
/* min_tracking_confidence= */ 0.5,
{/* display_names_locale= */ nullptr,
/* max_results= */ -1,
/* score_threshold= */ 0.0,
/* category_allowlist= */ nullptr,
/* category_allowlist_count= */ 0,
/* category_denylist= */ nullptr,
/* category_denylist_count= */ 0},
{/* display_names_locale= */ nullptr,
/* max_results= */ -1,
/* score_threshold= */ 0.0,
/* category_allowlist= */ nullptr,
/* category_allowlist_count= */ 0,
/* category_denylist= */ nullptr,
/* category_denylist_count= */ 0}};
void* recognizer =
gesture_recognizer_create(&options, /* error_msg */ nullptr);
EXPECT_NE(recognizer, nullptr);
const auto& image_frame = image->GetImageFrameSharedPtr();
const MpImage mp_image = {
.type = MpImage::IMAGE_FRAME,
.image_frame = {.format = static_cast<ImageFormat>(image_frame->Format()),
.image_buffer = image_frame->PixelData(),
.width = image_frame->Width(),
.height = image_frame->Height()}};
GestureRecognizerResult result;
gesture_recognizer_recognize_image(recognizer, mp_image, &result,
/* error_msg */ nullptr);
MatchesGestureRecognizerResult(&result, kScorePrecision, kLandmarkPrecision);
gesture_recognizer_close_result(&result);
gesture_recognizer_close(recognizer, /* error_msg */ nullptr);
}
TEST(GestureRecognizerTest, VideoModeTest) {
const auto image = DecodeImageFromFile(GetFullPath(kImageFile));
ASSERT_TRUE(image.ok());
const std::string model_path = GetFullPath(kModelName);
GestureRecognizerOptions options = {
/* base_options= */ {/* model_asset_buffer= */ nullptr,
/* model_asset_buffer_count= */ 0,
/* model_asset_path= */ model_path.c_str()},
/* running_mode= */ RunningMode::VIDEO,
/* num_hands= */ 1,
/* min_hand_detection_confidence= */ 0.5,
/* min_hand_presence_confidence= */ 0.5,
/* min_tracking_confidence= */ 0.5,
{/* display_names_locale= */ nullptr,
/* max_results= */ -1,
/* score_threshold= */ 0.0,
/* category_allowlist= */ nullptr,
/* category_allowlist_count= */ 0,
/* category_denylist= */ nullptr,
/* category_denylist_count= */ 0},
{/* display_names_locale= */ nullptr,
/* max_results= */ -1,
/* score_threshold= */ 0.0,
/* category_allowlist= */ nullptr,
/* category_allowlist_count= */ 0,
/* category_denylist= */ nullptr,
/* category_denylist_count= */ 0}};
void* recognizer =
gesture_recognizer_create(&options, /* error_msg */ nullptr);
EXPECT_NE(recognizer, nullptr);
const auto& image_frame = image->GetImageFrameSharedPtr();
const MpImage mp_image = {
.type = MpImage::IMAGE_FRAME,
.image_frame = {.format = static_cast<ImageFormat>(image_frame->Format()),
.image_buffer = image_frame->PixelData(),
.width = image_frame->Width(),
.height = image_frame->Height()}};
for (int i = 0; i < kIterations; ++i) {
GestureRecognizerResult result;
gesture_recognizer_recognize_for_video(recognizer, mp_image, i, &result,
/* error_msg */ nullptr);
MatchesGestureRecognizerResult(&result, kScorePrecision,
kLandmarkPrecision);
gesture_recognizer_close_result(&result);
}
gesture_recognizer_close(recognizer, /* error_msg */ nullptr);
}
// A structure to support LiveStreamModeTest below. This structure holds a
// static method `Fn` for a callback function of C API. A `static` qualifier
// allows to take an address of the method to follow API style. Another static
// struct member is `last_timestamp` that is used to verify that current
// timestamp is greater than the previous one.
struct LiveStreamModeCallback {
static int64_t last_timestamp;
static void Fn(GestureRecognizerResult* recognizer_result,
const MpImage& image, int64_t timestamp, char* error_msg) {
ASSERT_NE(recognizer_result, nullptr);
ASSERT_EQ(error_msg, nullptr);
MatchesGestureRecognizerResult(recognizer_result, kScorePrecision,
kLandmarkPrecision);
EXPECT_GT(image.image_frame.width, 0);
EXPECT_GT(image.image_frame.height, 0);
EXPECT_GT(timestamp, last_timestamp);
last_timestamp++;
}
};
int64_t LiveStreamModeCallback::last_timestamp = -1;
TEST(GestureRecognizerTest, LiveStreamModeTest) {
const auto image = DecodeImageFromFile(GetFullPath(kImageFile));
ASSERT_TRUE(image.ok());
const std::string model_path = GetFullPath(kModelName);
GestureRecognizerOptions options = {
/* base_options= */ {/* model_asset_buffer= */ nullptr,
/* model_asset_buffer_count= */ 0,
/* model_asset_path= */ model_path.c_str()},
/* running_mode= */ RunningMode::LIVE_STREAM,
/* num_hands= */ 1,
/* min_hand_detection_confidence= */ 0.5,
/* min_hand_presence_confidence= */ 0.5,
/* min_tracking_confidence= */ 0.5,
{/* display_names_locale= */ nullptr,
/* max_results= */ -1,
/* score_threshold= */ 0.0,
/* category_allowlist= */ nullptr,
/* category_allowlist_count= */ 0,
/* category_denylist= */ nullptr,
/* category_denylist_count= */ 0},
{/* display_names_locale= */ nullptr,
/* max_results= */ -1,
/* score_threshold= */ 0.0,
/* category_allowlist= */ nullptr,
/* category_allowlist_count= */ 0,
/* category_denylist= */ nullptr,
/* category_denylist_count= */ 0},
/* result_callback= */ LiveStreamModeCallback::Fn,
};
void* recognizer =
gesture_recognizer_create(&options, /* error_msg */ nullptr);
EXPECT_NE(recognizer, nullptr);
const auto& image_frame = image->GetImageFrameSharedPtr();
const MpImage mp_image = {
.type = MpImage::IMAGE_FRAME,
.image_frame = {.format = static_cast<ImageFormat>(image_frame->Format()),
.image_buffer = image_frame->PixelData(),
.width = image_frame->Width(),
.height = image_frame->Height()}};
for (int i = 0; i < kIterations; ++i) {
EXPECT_GE(gesture_recognizer_recognize_async(recognizer, mp_image, i,
/* error_msg */ nullptr),
0);
}
gesture_recognizer_close(recognizer, /* error_msg */ nullptr);
// Due to the flow limiter, the total of outputs might be smaller than the
// number of iterations.
EXPECT_LE(LiveStreamModeCallback::last_timestamp, kIterations);
EXPECT_GT(LiveStreamModeCallback::last_timestamp, 0);
}
TEST(GestureRecognizerTest, InvalidArgumentHandling) {
// It is an error to set neither the asset buffer nor the path.
GestureRecognizerOptions options = {
/* base_options= */ {/* model_asset_buffer= */ nullptr,
/* model_asset_buffer_count= */ 0,
/* model_asset_path= */ nullptr},
/* running_mode= */ RunningMode::IMAGE,
/* num_hands= */ 1,
/* min_hand_detection_confidence= */ 0.5,
/* min_hand_presence_confidence= */ 0.5,
/* min_tracking_confidence= */ 0.5,
{},
{}};
char* error_msg;
void* recognizer = gesture_recognizer_create(&options, &error_msg);
EXPECT_EQ(recognizer, nullptr);
EXPECT_THAT(error_msg, HasSubstr("ExternalFile must specify"));
free(error_msg);
}
TEST(GestureRecognizerTest, FailedRecognitionHandling) {
const std::string model_path = GetFullPath(kModelName);
GestureRecognizerOptions options = {
/* base_options= */ {/* model_asset_buffer= */ nullptr,
/* model_asset_buffer_count= */ 0,
/* model_asset_path= */ model_path.c_str()},
/* running_mode= */ RunningMode::IMAGE,
/* num_hands= */ 1,
/* min_hand_detection_confidence= */ 0.5,
/* min_hand_presence_confidence= */ 0.5,
/* min_tracking_confidence= */ 0.5,
{/* display_names_locale= */ nullptr,
/* max_results= */ -1,
/* score_threshold= */ 0.0,
/* category_allowlist= */ nullptr,
/* category_allowlist_count= */ 0,
/* category_denylist= */ nullptr,
/* category_denylist_count= */ 0},
{/* display_names_locale= */ nullptr,
/* max_results= */ -1,
/* score_threshold= */ 0.0,
/* category_allowlist= */ nullptr,
/* category_allowlist_count= */ 0,
/* category_denylist= */ nullptr,
/* category_denylist_count= */ 0},
};
void* recognizer = gesture_recognizer_create(&options, /* error_msg */
nullptr);
EXPECT_NE(recognizer, nullptr);
const MpImage mp_image = {.type = MpImage::GPU_BUFFER, .gpu_buffer = {}};
GestureRecognizerResult result;
char* error_msg;
gesture_recognizer_recognize_image(recognizer, mp_image, &result, &error_msg);
EXPECT_THAT(error_msg, HasSubstr("GPU Buffer not supported yet"));
free(error_msg);
gesture_recognizer_close(recognizer, /* error_msg */ nullptr);
}
} // namespace

View File

@ -186,6 +186,7 @@ filegroup(
"face_landmarker.task",
"face_landmarker_v2.task",
"face_stylizer_color_ink.task",
"gesture_recognizer.task",
"hair_segmentation.tflite",
"hand_landmark_full.tflite",
"hand_landmark_lite.tflite",