Add support for rotations in GestureRecognizer C++ API.

PiperOrigin-RevId: 482533599
This commit is contained in:
MediaPipe Team 2022-10-20 10:40:56 -07:00 committed by Copybara-Service
parent e71638cf67
commit 4b5df1cb96
21 changed files with 1048 additions and 63 deletions

View File

@ -56,6 +56,7 @@ cc_library(
"//mediapipe/framework/formats:classification_cc_proto", "//mediapipe/framework/formats:classification_cc_proto",
"//mediapipe/framework/formats:landmark_cc_proto", "//mediapipe/framework/formats:landmark_cc_proto",
"//mediapipe/framework/formats:matrix", "//mediapipe/framework/formats:matrix",
"//mediapipe/framework/formats:rect_cc_proto",
"//mediapipe/framework/formats:tensor", "//mediapipe/framework/formats:tensor",
"//mediapipe/tasks/cc:common", "//mediapipe/tasks/cc:common",
"//mediapipe/tasks/cc/components:image_preprocessing", "//mediapipe/tasks/cc/components:image_preprocessing",
@ -91,6 +92,7 @@ cc_library(
"//mediapipe/framework/formats:classification_cc_proto", "//mediapipe/framework/formats:classification_cc_proto",
"//mediapipe/framework/formats:image", "//mediapipe/framework/formats:image",
"//mediapipe/framework/formats:landmark_cc_proto", "//mediapipe/framework/formats:landmark_cc_proto",
"//mediapipe/framework/formats:rect_cc_proto",
"//mediapipe/tasks/cc:common", "//mediapipe/tasks/cc:common",
"//mediapipe/tasks/cc/components/processors/proto:classifier_options_cc_proto", "//mediapipe/tasks/cc/components/processors/proto:classifier_options_cc_proto",
"//mediapipe/tasks/cc/core:model_task_graph", "//mediapipe/tasks/cc/core:model_task_graph",
@ -123,6 +125,7 @@ cc_library(
"//mediapipe/framework/formats:classification_cc_proto", "//mediapipe/framework/formats:classification_cc_proto",
"//mediapipe/framework/formats:image", "//mediapipe/framework/formats:image",
"//mediapipe/framework/formats:landmark_cc_proto", "//mediapipe/framework/formats:landmark_cc_proto",
"//mediapipe/framework/formats:rect_cc_proto",
"//mediapipe/tasks/cc:common", "//mediapipe/tasks/cc:common",
"//mediapipe/tasks/cc/components:image_preprocessing", "//mediapipe/tasks/cc/components:image_preprocessing",
"//mediapipe/tasks/cc/components/containers:gesture_recognition_result", "//mediapipe/tasks/cc/components/containers:gesture_recognition_result",

View File

@ -69,6 +69,7 @@ cc_library(
"//mediapipe/framework:calculator_framework", "//mediapipe/framework:calculator_framework",
"//mediapipe/framework/formats:landmark_cc_proto", "//mediapipe/framework/formats:landmark_cc_proto",
"//mediapipe/framework/formats:matrix", "//mediapipe/framework/formats:matrix",
"//mediapipe/framework/formats:rect_cc_proto",
"//mediapipe/framework/port:ret_check", "//mediapipe/framework/port:ret_check",
"@com_google_absl//absl/status", "@com_google_absl//absl/status",
"@com_google_absl//absl/status:statusor", "@com_google_absl//absl/status:statusor",
@ -86,6 +87,7 @@ cc_test(
"//mediapipe/framework:calculator_runner", "//mediapipe/framework:calculator_runner",
"//mediapipe/framework/formats:landmark_cc_proto", "//mediapipe/framework/formats:landmark_cc_proto",
"//mediapipe/framework/formats:matrix", "//mediapipe/framework/formats:matrix",
"//mediapipe/framework/formats:rect_cc_proto",
"//mediapipe/framework/port:gtest_main", "//mediapipe/framework/port:gtest_main",
"//mediapipe/framework/port:parse_text_proto", "//mediapipe/framework/port:parse_text_proto",
"@com_google_absl//absl/strings", "@com_google_absl//absl/strings",

View File

@ -14,6 +14,7 @@ limitations under the License.
==============================================================================*/ ==============================================================================*/
#include <algorithm> #include <algorithm>
#include <cmath>
#include <limits> #include <limits>
#include <memory> #include <memory>
#include <string> #include <string>
@ -26,6 +27,7 @@ limitations under the License.
#include "mediapipe/framework/calculator_framework.h" #include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/formats/landmark.pb.h" #include "mediapipe/framework/formats/landmark.pb.h"
#include "mediapipe/framework/formats/matrix.h" #include "mediapipe/framework/formats/matrix.h"
#include "mediapipe/framework/formats/rect.pb.h"
#include "mediapipe/framework/port/ret_check.h" #include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/tasks/cc/vision/gesture_recognizer/calculators/landmarks_to_matrix_calculator.pb.h" #include "mediapipe/tasks/cc/vision/gesture_recognizer/calculators/landmarks_to_matrix_calculator.pb.h"
@ -38,6 +40,7 @@ namespace {
constexpr char kLandmarksTag[] = "LANDMARKS"; constexpr char kLandmarksTag[] = "LANDMARKS";
constexpr char kWorldLandmarksTag[] = "WORLD_LANDMARKS"; constexpr char kWorldLandmarksTag[] = "WORLD_LANDMARKS";
constexpr char kImageSizeTag[] = "IMAGE_SIZE"; constexpr char kImageSizeTag[] = "IMAGE_SIZE";
constexpr char kNormRectTag[] = "NORM_RECT";
constexpr char kLandmarksMatrixTag[] = "LANDMARKS_MATRIX"; constexpr char kLandmarksMatrixTag[] = "LANDMARKS_MATRIX";
constexpr int kFeaturesPerLandmark = 3; constexpr int kFeaturesPerLandmark = 3;
@ -62,6 +65,25 @@ absl::StatusOr<LandmarkListT> NormalizeLandmarkAspectRatio(
return normalized_landmarks; return normalized_landmarks;
} }
template <class LandmarkListT>
absl::StatusOr<LandmarkListT> RotateLandmarks(const LandmarkListT& landmarks,
float rotation) {
float cos = std::cos(rotation);
// Negate because Y-axis points down and not up.
float sin = std::sin(-rotation);
LandmarkListT rotated_landmarks;
for (int i = 0; i < landmarks.landmark_size(); ++i) {
const auto& old_landmark = landmarks.landmark(i);
float x = old_landmark.x() - 0.5;
float y = old_landmark.y() - 0.5;
auto* new_landmark = rotated_landmarks.add_landmark();
new_landmark->set_x(x * cos - y * sin + 0.5);
new_landmark->set_y(y * cos + x * sin + 0.5);
new_landmark->set_z(old_landmark.z());
}
return rotated_landmarks;
}
template <class LandmarkListT> template <class LandmarkListT>
absl::StatusOr<LandmarkListT> NormalizeObject(const LandmarkListT& landmarks, absl::StatusOr<LandmarkListT> NormalizeObject(const LandmarkListT& landmarks,
int origin_offset) { int origin_offset) {
@ -134,6 +156,13 @@ absl::Status ProcessLandmarks(LandmarkListT landmarks, CalculatorContext* cc) {
NormalizeLandmarkAspectRatio(landmarks, width, height)); NormalizeLandmarkAspectRatio(landmarks, width, height));
} }
if (cc->Inputs().HasTag(kNormRectTag)) {
RET_CHECK(!cc->Inputs().Tag(kNormRectTag).IsEmpty());
const auto rotation =
cc->Inputs().Tag(kNormRectTag).Get<NormalizedRect>().rotation();
ASSIGN_OR_RETURN(landmarks, RotateLandmarks(landmarks, rotation));
}
const auto& options = cc->Options<LandmarksToMatrixCalculatorOptions>(); const auto& options = cc->Options<LandmarksToMatrixCalculatorOptions>();
if (options.object_normalization()) { if (options.object_normalization()) {
ASSIGN_OR_RETURN( ASSIGN_OR_RETURN(
@ -163,6 +192,8 @@ absl::Status ProcessLandmarks(LandmarkListT landmarks, CalculatorContext* cc) {
// WORLD_LANDMARKS - World 3d landmarks of one object. Use *either* // WORLD_LANDMARKS - World 3d landmarks of one object. Use *either*
// LANDMARKS or WORLD_LANDMARKS. // LANDMARKS or WORLD_LANDMARKS.
// IMAGE_SIZE - (width, height) of the image // IMAGE_SIZE - (width, height) of the image
// NORM_RECT - Optional NormalizedRect object whose 'rotation' field is used
// to rotate the landmarks.
// Output: // Output:
// LANDMARKS_MATRIX - Matrix for the landmarks. // LANDMARKS_MATRIX - Matrix for the landmarks.
// //
@ -185,6 +216,7 @@ class LandmarksToMatrixCalculator : public CalculatorBase {
cc->Inputs().Tag(kLandmarksTag).Set<NormalizedLandmarkList>().Optional(); cc->Inputs().Tag(kLandmarksTag).Set<NormalizedLandmarkList>().Optional();
cc->Inputs().Tag(kWorldLandmarksTag).Set<LandmarkList>().Optional(); cc->Inputs().Tag(kWorldLandmarksTag).Set<LandmarkList>().Optional();
cc->Inputs().Tag(kImageSizeTag).Set<std::pair<int, int>>().Optional(); cc->Inputs().Tag(kImageSizeTag).Set<std::pair<int, int>>().Optional();
cc->Inputs().Tag(kNormRectTag).Set<NormalizedRect>().Optional();
cc->Outputs().Tag(kLandmarksMatrixTag).Set<Matrix>(); cc->Outputs().Tag(kLandmarksMatrixTag).Set<Matrix>();
return absl::OkStatus(); return absl::OkStatus();
} }

View File

@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. limitations under the License.
==============================================================================*/ ==============================================================================*/
#include <cmath>
#include <memory> #include <memory>
#include <string> #include <string>
#include <utility> #include <utility>
@ -23,6 +24,7 @@ limitations under the License.
#include "mediapipe/framework/calculator_runner.h" #include "mediapipe/framework/calculator_runner.h"
#include "mediapipe/framework/formats/landmark.pb.h" #include "mediapipe/framework/formats/landmark.pb.h"
#include "mediapipe/framework/formats/matrix.h" #include "mediapipe/framework/formats/matrix.h"
#include "mediapipe/framework/formats/rect.pb.h"
#include "mediapipe/framework/port/gtest.h" #include "mediapipe/framework/port/gtest.h"
#include "mediapipe/framework/port/parse_text_proto.h" #include "mediapipe/framework/port/parse_text_proto.h"
#include "mediapipe/framework/port/status_matchers.h" #include "mediapipe/framework/port/status_matchers.h"
@ -35,6 +37,7 @@ constexpr char kLandmarksTag[] = "LANDMARKS";
constexpr char kWorldLandmarksTag[] = "WORLD_LANDMARKS"; constexpr char kWorldLandmarksTag[] = "WORLD_LANDMARKS";
constexpr char kImageSizeTag[] = "IMAGE_SIZE"; constexpr char kImageSizeTag[] = "IMAGE_SIZE";
constexpr char kLandmarksMatrixTag[] = "LANDMARKS_MATRIX"; constexpr char kLandmarksMatrixTag[] = "LANDMARKS_MATRIX";
constexpr char kNormRectTag[] = "NORM_RECT";
template <class LandmarkListT> template <class LandmarkListT>
LandmarkListT BuildPseudoLandmarks(int num_landmarks, int offset = 0) { LandmarkListT BuildPseudoLandmarks(int num_landmarks, int offset = 0) {
@ -54,6 +57,7 @@ struct Landmarks2dToMatrixCalculatorTestCase {
int object_normalization_origin_offset = -1; int object_normalization_origin_offset = -1;
float expected_cell_0_2; float expected_cell_0_2;
float expected_cell_1_5; float expected_cell_1_5;
float rotation;
}; };
using Landmarks2dToMatrixCalculatorTest = using Landmarks2dToMatrixCalculatorTest =
@ -68,6 +72,7 @@ TEST_P(Landmarks2dToMatrixCalculatorTest, OutputsCorrectResult) {
calculator: "LandmarksToMatrixCalculator" calculator: "LandmarksToMatrixCalculator"
input_stream: "LANDMARKS:landmarks" input_stream: "LANDMARKS:landmarks"
input_stream: "IMAGE_SIZE:image_size" input_stream: "IMAGE_SIZE:image_size"
input_stream: "NORM_RECT:norm_rect"
output_stream: "LANDMARKS_MATRIX:landmarks_matrix" output_stream: "LANDMARKS_MATRIX:landmarks_matrix"
options { options {
[mediapipe.LandmarksToMatrixCalculatorOptions.ext] { [mediapipe.LandmarksToMatrixCalculatorOptions.ext] {
@ -91,6 +96,11 @@ TEST_P(Landmarks2dToMatrixCalculatorTest, OutputsCorrectResult) {
runner.MutableInputs() runner.MutableInputs()
->Tag(kImageSizeTag) ->Tag(kImageSizeTag)
.packets.push_back(Adopt(image_size.release()).At(Timestamp(0))); .packets.push_back(Adopt(image_size.release()).At(Timestamp(0)));
auto norm_rect = std::make_unique<NormalizedRect>();
norm_rect->set_rotation(test_case.rotation);
runner.MutableInputs()
->Tag(kNormRectTag)
.packets.push_back(Adopt(norm_rect.release()).At(Timestamp(0)));
MP_ASSERT_OK(runner.Run()) << "Calculator execution failed."; MP_ASSERT_OK(runner.Run()) << "Calculator execution failed.";
@ -109,12 +119,20 @@ INSTANTIATE_TEST_CASE_P(
.base_offset = 0, .base_offset = 0,
.object_normalization_origin_offset = 0, .object_normalization_origin_offset = 0,
.expected_cell_0_2 = 0.1f, .expected_cell_0_2 = 0.1f,
.expected_cell_1_5 = 0.1875f}, .expected_cell_1_5 = 0.1875f,
.rotation = 0},
{.test_name = "TestWithOffset21", {.test_name = "TestWithOffset21",
.base_offset = 21, .base_offset = 21,
.object_normalization_origin_offset = 0, .object_normalization_origin_offset = 0,
.expected_cell_0_2 = 0.1f, .expected_cell_0_2 = 0.1f,
.expected_cell_1_5 = 0.1875f}}), .expected_cell_1_5 = 0.1875f,
.rotation = 0},
{.test_name = "TestWithRotation",
.base_offset = 0,
.object_normalization_origin_offset = 0,
.expected_cell_0_2 = 0.075f,
.expected_cell_1_5 = -0.25f,
.rotation = M_PI / 2.0}}),
[](const testing::TestParamInfo< [](const testing::TestParamInfo<
Landmarks2dToMatrixCalculatorTest::ParamType>& info) { Landmarks2dToMatrixCalculatorTest::ParamType>& info) {
return info.param.test_name; return info.param.test_name;
@ -126,6 +144,7 @@ struct LandmarksWorld3dToMatrixCalculatorTestCase {
int object_normalization_origin_offset = -1; int object_normalization_origin_offset = -1;
float expected_cell_0_2; float expected_cell_0_2;
float expected_cell_1_5; float expected_cell_1_5;
float rotation;
}; };
using LandmarksWorld3dToMatrixCalculatorTest = using LandmarksWorld3dToMatrixCalculatorTest =
@ -140,6 +159,7 @@ TEST_P(LandmarksWorld3dToMatrixCalculatorTest, OutputsCorrectResult) {
calculator: "LandmarksToMatrixCalculator" calculator: "LandmarksToMatrixCalculator"
input_stream: "WORLD_LANDMARKS:landmarks" input_stream: "WORLD_LANDMARKS:landmarks"
input_stream: "IMAGE_SIZE:image_size" input_stream: "IMAGE_SIZE:image_size"
input_stream: "NORM_RECT:norm_rect"
output_stream: "LANDMARKS_MATRIX:landmarks_matrix" output_stream: "LANDMARKS_MATRIX:landmarks_matrix"
options { options {
[mediapipe.LandmarksToMatrixCalculatorOptions.ext] { [mediapipe.LandmarksToMatrixCalculatorOptions.ext] {
@ -162,6 +182,11 @@ TEST_P(LandmarksWorld3dToMatrixCalculatorTest, OutputsCorrectResult) {
runner.MutableInputs() runner.MutableInputs()
->Tag(kImageSizeTag) ->Tag(kImageSizeTag)
.packets.push_back(Adopt(image_size.release()).At(Timestamp(0))); .packets.push_back(Adopt(image_size.release()).At(Timestamp(0)));
auto norm_rect = std::make_unique<NormalizedRect>();
norm_rect->set_rotation(test_case.rotation);
runner.MutableInputs()
->Tag(kNormRectTag)
.packets.push_back(Adopt(norm_rect.release()).At(Timestamp(0)));
MP_ASSERT_OK(runner.Run()) << "Calculator execution failed."; MP_ASSERT_OK(runner.Run()) << "Calculator execution failed.";
@ -180,17 +205,26 @@ INSTANTIATE_TEST_CASE_P(
.base_offset = 0, .base_offset = 0,
.object_normalization_origin_offset = 0, .object_normalization_origin_offset = 0,
.expected_cell_0_2 = 0.1f, .expected_cell_0_2 = 0.1f,
.expected_cell_1_5 = 0.25}, .expected_cell_1_5 = 0.25,
.rotation = 0},
{.test_name = "TestWithOffset21", {.test_name = "TestWithOffset21",
.base_offset = 21, .base_offset = 21,
.object_normalization_origin_offset = 0, .object_normalization_origin_offset = 0,
.expected_cell_0_2 = 0.1f, .expected_cell_0_2 = 0.1f,
.expected_cell_1_5 = 0.25}, .expected_cell_1_5 = 0.25,
.rotation = 0},
{.test_name = "NoObjectNormalization", {.test_name = "NoObjectNormalization",
.base_offset = 0, .base_offset = 0,
.object_normalization_origin_offset = -1, .object_normalization_origin_offset = -1,
.expected_cell_0_2 = 0.021f, .expected_cell_0_2 = 0.021f,
.expected_cell_1_5 = 0.052f}}), .expected_cell_1_5 = 0.052f,
.rotation = 0},
{.test_name = "TestWithRotation",
.base_offset = 0,
.object_normalization_origin_offset = 0,
.expected_cell_0_2 = 0.1f,
.expected_cell_1_5 = -0.25f,
.rotation = M_PI / 2.0}}),
[](const testing::TestParamInfo< [](const testing::TestParamInfo<
LandmarksWorld3dToMatrixCalculatorTest::ParamType>& info) { LandmarksWorld3dToMatrixCalculatorTest::ParamType>& info) {
return info.param.test_name; return info.param.test_name;

View File

@ -17,6 +17,7 @@ limitations under the License.
#include <memory> #include <memory>
#include <type_traits> #include <type_traits>
#include <utility>
#include <vector> #include <vector>
#include "absl/memory/memory.h" #include "absl/memory/memory.h"
@ -27,6 +28,7 @@ limitations under the License.
#include "mediapipe/framework/formats/classification.pb.h" #include "mediapipe/framework/formats/classification.pb.h"
#include "mediapipe/framework/formats/image.h" #include "mediapipe/framework/formats/image.h"
#include "mediapipe/framework/formats/landmark.pb.h" #include "mediapipe/framework/formats/landmark.pb.h"
#include "mediapipe/framework/formats/rect.pb.h"
#include "mediapipe/framework/packet.h" #include "mediapipe/framework/packet.h"
#include "mediapipe/tasks/cc/common.h" #include "mediapipe/tasks/cc/common.h"
#include "mediapipe/tasks/cc/components/image_preprocessing.h" #include "mediapipe/tasks/cc/components/image_preprocessing.h"
@ -62,6 +64,8 @@ constexpr char kHandGestureSubgraphTypeName[] =
constexpr char kImageTag[] = "IMAGE"; constexpr char kImageTag[] = "IMAGE";
constexpr char kImageInStreamName[] = "image_in"; constexpr char kImageInStreamName[] = "image_in";
constexpr char kImageOutStreamName[] = "image_out"; constexpr char kImageOutStreamName[] = "image_out";
constexpr char kNormRectTag[] = "NORM_RECT";
constexpr char kNormRectStreamName[] = "norm_rect_in";
constexpr char kHandGesturesTag[] = "HAND_GESTURES"; constexpr char kHandGesturesTag[] = "HAND_GESTURES";
constexpr char kHandGesturesStreamName[] = "hand_gestures"; constexpr char kHandGesturesStreamName[] = "hand_gestures";
constexpr char kHandednessTag[] = "HANDEDNESS"; constexpr char kHandednessTag[] = "HANDEDNESS";
@ -72,6 +76,31 @@ constexpr char kHandWorldLandmarksTag[] = "WORLD_LANDMARKS";
constexpr char kHandWorldLandmarksStreamName[] = "world_landmarks"; constexpr char kHandWorldLandmarksStreamName[] = "world_landmarks";
constexpr int kMicroSecondsPerMilliSecond = 1000; constexpr int kMicroSecondsPerMilliSecond = 1000;
// Returns a NormalizedRect filling the whole image. If input is present, its
// rotation is set in the returned NormalizedRect and a check is performed to
// make sure no region-of-interest was provided. Otherwise, rotation is set to
// 0.
absl::StatusOr<NormalizedRect> FillNormalizedRect(
std::optional<NormalizedRect> normalized_rect) {
NormalizedRect result;
if (normalized_rect.has_value()) {
result = *normalized_rect;
}
bool has_coordinates = result.has_x_center() || result.has_y_center() ||
result.has_width() || result.has_height();
if (has_coordinates) {
return CreateStatusWithPayload(
absl::StatusCode::kInvalidArgument,
"GestureRecognizer does not support region-of-interest.",
MediaPipeTasksStatus::kInvalidArgumentError);
}
result.set_x_center(0.5);
result.set_y_center(0.5);
result.set_width(1);
result.set_height(1);
return result;
}
// Creates a MediaPipe graph config that contains a subgraph node of // Creates a MediaPipe graph config that contains a subgraph node of
// "mediapipe.tasks.vision.GestureRecognizerGraph". If the task is running // "mediapipe.tasks.vision.GestureRecognizerGraph". If the task is running
// in the live stream mode, a "FlowLimiterCalculator" will be added to limit the // in the live stream mode, a "FlowLimiterCalculator" will be added to limit the
@ -83,6 +112,7 @@ CalculatorGraphConfig CreateGraphConfig(
auto& subgraph = graph.AddNode(kHandGestureSubgraphTypeName); auto& subgraph = graph.AddNode(kHandGestureSubgraphTypeName);
subgraph.GetOptions<GestureRecognizerGraphOptionsProto>().Swap(options.get()); subgraph.GetOptions<GestureRecognizerGraphOptionsProto>().Swap(options.get());
graph.In(kImageTag).SetName(kImageInStreamName); graph.In(kImageTag).SetName(kImageInStreamName);
graph.In(kNormRectTag).SetName(kNormRectStreamName);
subgraph.Out(kHandGesturesTag).SetName(kHandGesturesStreamName) >> subgraph.Out(kHandGesturesTag).SetName(kHandGesturesStreamName) >>
graph.Out(kHandGesturesTag); graph.Out(kHandGesturesTag);
subgraph.Out(kHandednessTag).SetName(kHandednessStreamName) >> subgraph.Out(kHandednessTag).SetName(kHandednessStreamName) >>
@ -93,10 +123,11 @@ CalculatorGraphConfig CreateGraphConfig(
graph.Out(kHandWorldLandmarksTag); graph.Out(kHandWorldLandmarksTag);
subgraph.Out(kImageTag).SetName(kImageOutStreamName) >> graph.Out(kImageTag); subgraph.Out(kImageTag).SetName(kImageOutStreamName) >> graph.Out(kImageTag);
if (enable_flow_limiting) { if (enable_flow_limiting) {
return tasks::core::AddFlowLimiterCalculator(graph, subgraph, {kImageTag}, return tasks::core::AddFlowLimiterCalculator(
kHandGesturesTag); graph, subgraph, {kImageTag, kNormRectTag}, kHandGesturesTag);
} }
graph.In(kImageTag) >> subgraph.In(kImageTag); graph.In(kImageTag) >> subgraph.In(kImageTag);
graph.In(kNormRectTag) >> subgraph.In(kNormRectTag);
return graph.GetConfig(); return graph.GetConfig();
} }
@ -216,16 +247,22 @@ absl::StatusOr<std::unique_ptr<GestureRecognizer>> GestureRecognizer::Create(
} }
absl::StatusOr<GestureRecognitionResult> GestureRecognizer::Recognize( absl::StatusOr<GestureRecognitionResult> GestureRecognizer::Recognize(
mediapipe::Image image) { mediapipe::Image image,
std::optional<mediapipe::NormalizedRect> image_processing_options) {
if (image.UsesGpu()) { if (image.UsesGpu()) {
return CreateStatusWithPayload( return CreateStatusWithPayload(
absl::StatusCode::kInvalidArgument, absl::StatusCode::kInvalidArgument,
"GPU input images are currently not supported.", "GPU input images are currently not supported.",
MediaPipeTasksStatus::kRunnerUnexpectedInputError); MediaPipeTasksStatus::kRunnerUnexpectedInputError);
} }
ASSIGN_OR_RETURN(auto output_packets, ASSIGN_OR_RETURN(NormalizedRect norm_rect,
ProcessImageData({{kImageInStreamName, FillNormalizedRect(image_processing_options));
MakePacket<Image>(std::move(image))}})); ASSIGN_OR_RETURN(
auto output_packets,
ProcessImageData(
{{kImageInStreamName, MakePacket<Image>(std::move(image))},
{kNormRectStreamName,
MakePacket<NormalizedRect>(std::move(norm_rect))}}));
if (output_packets[kHandGesturesStreamName].IsEmpty()) { if (output_packets[kHandGesturesStreamName].IsEmpty()) {
return {{{}, {}, {}, {}}}; return {{{}, {}, {}, {}}};
} }
@ -245,18 +282,24 @@ absl::StatusOr<GestureRecognitionResult> GestureRecognizer::Recognize(
} }
absl::StatusOr<GestureRecognitionResult> GestureRecognizer::RecognizeForVideo( absl::StatusOr<GestureRecognitionResult> GestureRecognizer::RecognizeForVideo(
mediapipe::Image image, int64 timestamp_ms) { mediapipe::Image image, int64 timestamp_ms,
std::optional<mediapipe::NormalizedRect> image_processing_options) {
if (image.UsesGpu()) { if (image.UsesGpu()) {
return CreateStatusWithPayload( return CreateStatusWithPayload(
absl::StatusCode::kInvalidArgument, absl::StatusCode::kInvalidArgument,
absl::StrCat("GPU input images are currently not supported."), absl::StrCat("GPU input images are currently not supported."),
MediaPipeTasksStatus::kRunnerUnexpectedInputError); MediaPipeTasksStatus::kRunnerUnexpectedInputError);
} }
ASSIGN_OR_RETURN(NormalizedRect norm_rect,
FillNormalizedRect(image_processing_options));
ASSIGN_OR_RETURN( ASSIGN_OR_RETURN(
auto output_packets, auto output_packets,
ProcessVideoData( ProcessVideoData(
{{kImageInStreamName, {{kImageInStreamName,
MakePacket<Image>(std::move(image)) MakePacket<Image>(std::move(image))
.At(Timestamp(timestamp_ms * kMicroSecondsPerMilliSecond))},
{kNormRectStreamName,
MakePacket<NormalizedRect>(std::move(norm_rect))
.At(Timestamp(timestamp_ms * kMicroSecondsPerMilliSecond))}})); .At(Timestamp(timestamp_ms * kMicroSecondsPerMilliSecond))}}));
if (output_packets[kHandGesturesStreamName].IsEmpty()) { if (output_packets[kHandGesturesStreamName].IsEmpty()) {
return {{{}, {}, {}, {}}}; return {{{}, {}, {}, {}}};
@ -276,17 +319,23 @@ absl::StatusOr<GestureRecognitionResult> GestureRecognizer::RecognizeForVideo(
}; };
} }
absl::Status GestureRecognizer::RecognizeAsync(mediapipe::Image image, absl::Status GestureRecognizer::RecognizeAsync(
int64 timestamp_ms) { mediapipe::Image image, int64 timestamp_ms,
std::optional<mediapipe::NormalizedRect> image_processing_options) {
if (image.UsesGpu()) { if (image.UsesGpu()) {
return CreateStatusWithPayload( return CreateStatusWithPayload(
absl::StatusCode::kInvalidArgument, absl::StatusCode::kInvalidArgument,
absl::StrCat("GPU input images are currently not supported."), absl::StrCat("GPU input images are currently not supported."),
MediaPipeTasksStatus::kRunnerUnexpectedInputError); MediaPipeTasksStatus::kRunnerUnexpectedInputError);
} }
ASSIGN_OR_RETURN(NormalizedRect norm_rect,
FillNormalizedRect(image_processing_options));
return SendLiveStreamData( return SendLiveStreamData(
{{kImageInStreamName, {{kImageInStreamName,
MakePacket<Image>(std::move(image)) MakePacket<Image>(std::move(image))
.At(Timestamp(timestamp_ms * kMicroSecondsPerMilliSecond))},
{kNormRectStreamName,
MakePacket<NormalizedRect>(std::move(norm_rect))
.At(Timestamp(timestamp_ms * kMicroSecondsPerMilliSecond))}}); .At(Timestamp(timestamp_ms * kMicroSecondsPerMilliSecond))}});
} }

View File

@ -17,11 +17,13 @@ limitations under the License.
#define MEDIAPIPE_TASKS_CC_VISION_GESTURE_RECOGNIZRER_GESTURE_RECOGNIZER_H_ #define MEDIAPIPE_TASKS_CC_VISION_GESTURE_RECOGNIZRER_GESTURE_RECOGNIZER_H_
#include <memory> #include <memory>
#include <optional>
#include "absl/status/statusor.h" #include "absl/status/statusor.h"
#include "mediapipe/framework/formats/classification.pb.h" #include "mediapipe/framework/formats/classification.pb.h"
#include "mediapipe/framework/formats/image.h" #include "mediapipe/framework/formats/image.h"
#include "mediapipe/framework/formats/landmark.pb.h" #include "mediapipe/framework/formats/landmark.pb.h"
#include "mediapipe/framework/formats/rect.pb.h"
#include "mediapipe/tasks/cc/components/containers/gesture_recognition_result.h" #include "mediapipe/tasks/cc/components/containers/gesture_recognition_result.h"
#include "mediapipe/tasks/cc/core/base_options.h" #include "mediapipe/tasks/cc/core/base_options.h"
#include "mediapipe/tasks/cc/vision/core/base_vision_task_api.h" #include "mediapipe/tasks/cc/vision/core/base_vision_task_api.h"
@ -93,6 +95,13 @@ struct GestureRecognizerOptions {
// Inputs: // Inputs:
// Image // Image
// - The image that gesture recognition runs on. // - The image that gesture recognition runs on.
// std::optional<NormalizedRect>
// - If provided, can be used to specify the rotation to apply to the image
// before performing gesture recognition, by setting its 'rotation' field
// in radians (e.g. 'M_PI / 2' for a 90° anti-clockwise rotation). Note
// that specifying a region-of-interest using the 'x_center', 'y_center',
// 'width' and 'height' fields is NOT supported and will result in an
// invalid argument error being returned.
// Outputs: // Outputs:
// GestureRecognitionResult // GestureRecognitionResult
// - The hand gesture recognition results. // - The hand gesture recognition results.
@ -122,12 +131,23 @@ class GestureRecognizer : tasks::vision::core::BaseVisionTaskApi {
// //
// image - mediapipe::Image // image - mediapipe::Image
// Image to perform hand gesture recognition on. // Image to perform hand gesture recognition on.
// imageProcessingOptions - std::optional<NormalizedRect>
// If provided, can be used to specify the rotation to apply to the image
// before performing classification, by setting its 'rotation' field in
// radians (e.g. 'M_PI / 2' for a 90° anti-clockwise rotation). Note that
// specifying a region-of-interest using the 'x_center', 'y_center', 'width'
// and 'height' fields is NOT supported and will result in an invalid
// argument error being returned.
// //
// The image can be of any size with format RGB or RGBA. // The image can be of any size with format RGB or RGBA.
// TODO: Describes how the input image will be preprocessed // TODO: Describes how the input image will be preprocessed
// after the yuv support is implemented. // after the yuv support is implemented.
// TODO: use an ImageProcessingOptions struct instead of
// NormalizedRect.
absl::StatusOr<components::containers::GestureRecognitionResult> Recognize( absl::StatusOr<components::containers::GestureRecognitionResult> Recognize(
Image image); Image image,
std::optional<mediapipe::NormalizedRect> image_processing_options =
std::nullopt);
// Performs gesture recognition on the provided video frame. // Performs gesture recognition on the provided video frame.
// Only use this method when the GestureRecognizer is created with the video // Only use this method when the GestureRecognizer is created with the video
@ -137,7 +157,9 @@ class GestureRecognizer : tasks::vision::core::BaseVisionTaskApi {
// provide the video frame's timestamp (in milliseconds). The input timestamps // provide the video frame's timestamp (in milliseconds). The input timestamps
// must be monotonically increasing. // must be monotonically increasing.
absl::StatusOr<components::containers::GestureRecognitionResult> absl::StatusOr<components::containers::GestureRecognitionResult>
RecognizeForVideo(Image image, int64 timestamp_ms); RecognizeForVideo(Image image, int64 timestamp_ms,
std::optional<mediapipe::NormalizedRect>
image_processing_options = std::nullopt);
// Sends live image data to perform gesture recognition, and the results will // Sends live image data to perform gesture recognition, and the results will
// be available via the "result_callback" provided in the // be available via the "result_callback" provided in the
@ -157,7 +179,9 @@ class GestureRecognizer : tasks::vision::core::BaseVisionTaskApi {
// longer be valid when the callback returns. To access the image data // longer be valid when the callback returns. To access the image data
// outside of the callback, callers need to make a copy of the image. // outside of the callback, callers need to make a copy of the image.
// - The input timestamp in milliseconds. // - The input timestamp in milliseconds.
absl::Status RecognizeAsync(Image image, int64 timestamp_ms); absl::Status RecognizeAsync(Image image, int64 timestamp_ms,
std::optional<mediapipe::NormalizedRect>
image_processing_options = std::nullopt);
// Shuts down the GestureRecognizer when all works are done. // Shuts down the GestureRecognizer when all works are done.
absl::Status Close() { return runner_->Close(); } absl::Status Close() { return runner_->Close(); }

View File

@ -24,6 +24,7 @@ limitations under the License.
#include "mediapipe/framework/formats/classification.pb.h" #include "mediapipe/framework/formats/classification.pb.h"
#include "mediapipe/framework/formats/image.h" #include "mediapipe/framework/formats/image.h"
#include "mediapipe/framework/formats/landmark.pb.h" #include "mediapipe/framework/formats/landmark.pb.h"
#include "mediapipe/framework/formats/rect.pb.h"
#include "mediapipe/tasks/cc/common.h" #include "mediapipe/tasks/cc/common.h"
#include "mediapipe/tasks/cc/core/model_task_graph.h" #include "mediapipe/tasks/cc/core/model_task_graph.h"
#include "mediapipe/tasks/cc/core/utils.h" #include "mediapipe/tasks/cc/core/utils.h"
@ -53,6 +54,7 @@ using ::mediapipe::tasks::vision::hand_landmarker::proto::
HandLandmarkerGraphOptions; HandLandmarkerGraphOptions;
constexpr char kImageTag[] = "IMAGE"; constexpr char kImageTag[] = "IMAGE";
constexpr char kNormRectTag[] = "NORM_RECT";
constexpr char kLandmarksTag[] = "LANDMARKS"; constexpr char kLandmarksTag[] = "LANDMARKS";
constexpr char kWorldLandmarksTag[] = "WORLD_LANDMARKS"; constexpr char kWorldLandmarksTag[] = "WORLD_LANDMARKS";
constexpr char kHandednessTag[] = "HANDEDNESS"; constexpr char kHandednessTag[] = "HANDEDNESS";
@ -76,6 +78,9 @@ struct GestureRecognizerOutputs {
// Inputs: // Inputs:
// IMAGE - Image // IMAGE - Image
// Image to perform hand gesture recognition on. // Image to perform hand gesture recognition on.
// NORM_RECT - NormalizedRect
// Describes image rotation and region of image to perform landmarks
// detection on.
// //
// Outputs: // Outputs:
// HAND_GESTURES - std::vector<ClassificationList> // HAND_GESTURES - std::vector<ClassificationList>
@ -93,13 +98,15 @@ struct GestureRecognizerOutputs {
// IMAGE - mediapipe::Image // IMAGE - mediapipe::Image
// The image that gesture recognizer runs on and has the pixel data stored // The image that gesture recognizer runs on and has the pixel data stored
// on the target storage (CPU vs GPU). // on the target storage (CPU vs GPU).
// // All returned coordinates are in the unrotated and uncropped input image
// coordinates system.
// //
// Example: // Example:
// node { // node {
// calculator: // calculator:
// "mediapipe.tasks.vision.gesture_recognizer.GestureRecognizerGraph" // "mediapipe.tasks.vision.gesture_recognizer.GestureRecognizerGraph"
// input_stream: "IMAGE:image_in" // input_stream: "IMAGE:image_in"
// input_stream: "NORM_RECT:norm_rect"
// output_stream: "HAND_GESTURES:hand_gestures" // output_stream: "HAND_GESTURES:hand_gestures"
// output_stream: "LANDMARKS:hand_landmarks" // output_stream: "LANDMARKS:hand_landmarks"
// output_stream: "WORLD_LANDMARKS:world_hand_landmarks" // output_stream: "WORLD_LANDMARKS:world_hand_landmarks"
@ -132,7 +139,8 @@ class GestureRecognizerGraph : public core::ModelTaskGraph {
ASSIGN_OR_RETURN(auto hand_gesture_recognition_output, ASSIGN_OR_RETURN(auto hand_gesture_recognition_output,
BuildGestureRecognizerGraph( BuildGestureRecognizerGraph(
*sc->MutableOptions<GestureRecognizerGraphOptions>(), *sc->MutableOptions<GestureRecognizerGraphOptions>(),
graph[Input<Image>(kImageTag)], graph)); graph[Input<Image>(kImageTag)],
graph[Input<NormalizedRect>(kNormRectTag)], graph));
hand_gesture_recognition_output.gesture >> hand_gesture_recognition_output.gesture >>
graph[Output<std::vector<ClassificationList>>(kHandGesturesTag)]; graph[Output<std::vector<ClassificationList>>(kHandGesturesTag)];
hand_gesture_recognition_output.handedness >> hand_gesture_recognition_output.handedness >>
@ -148,7 +156,7 @@ class GestureRecognizerGraph : public core::ModelTaskGraph {
private: private:
absl::StatusOr<GestureRecognizerOutputs> BuildGestureRecognizerGraph( absl::StatusOr<GestureRecognizerOutputs> BuildGestureRecognizerGraph(
GestureRecognizerGraphOptions& graph_options, Source<Image> image_in, GestureRecognizerGraphOptions& graph_options, Source<Image> image_in,
Graph& graph) { Source<NormalizedRect> norm_rect_in, Graph& graph) {
auto& image_property = graph.AddNode("ImagePropertiesCalculator"); auto& image_property = graph.AddNode("ImagePropertiesCalculator");
image_in >> image_property.In("IMAGE"); image_in >> image_property.In("IMAGE");
auto image_size = image_property.Out("SIZE"); auto image_size = image_property.Out("SIZE");
@ -162,6 +170,7 @@ class GestureRecognizerGraph : public core::ModelTaskGraph {
graph_options.mutable_hand_landmarker_graph_options()); graph_options.mutable_hand_landmarker_graph_options());
image_in >> hand_landmarker_graph.In(kImageTag); image_in >> hand_landmarker_graph.In(kImageTag);
norm_rect_in >> hand_landmarker_graph.In(kNormRectTag);
auto hand_landmarks = auto hand_landmarks =
hand_landmarker_graph[Output<std::vector<NormalizedLandmarkList>>( hand_landmarker_graph[Output<std::vector<NormalizedLandmarkList>>(
kLandmarksTag)]; kLandmarksTag)];
@ -187,6 +196,7 @@ class GestureRecognizerGraph : public core::ModelTaskGraph {
hand_world_landmarks >> hand_gesture_subgraph.In(kWorldLandmarksTag); hand_world_landmarks >> hand_gesture_subgraph.In(kWorldLandmarksTag);
handedness >> hand_gesture_subgraph.In(kHandednessTag); handedness >> hand_gesture_subgraph.In(kHandednessTag);
image_size >> hand_gesture_subgraph.In(kImageSizeTag); image_size >> hand_gesture_subgraph.In(kImageSizeTag);
norm_rect_in >> hand_gesture_subgraph.In(kNormRectTag);
hand_landmarks_id >> hand_gesture_subgraph.In(kHandTrackingIdsTag); hand_landmarks_id >> hand_gesture_subgraph.In(kHandTrackingIdsTag);
auto hand_gestures = auto hand_gestures =
hand_gesture_subgraph[Output<std::vector<ClassificationList>>( hand_gesture_subgraph[Output<std::vector<ClassificationList>>(

View File

@ -25,6 +25,7 @@ limitations under the License.
#include "mediapipe/framework/formats/classification.pb.h" #include "mediapipe/framework/formats/classification.pb.h"
#include "mediapipe/framework/formats/landmark.pb.h" #include "mediapipe/framework/formats/landmark.pb.h"
#include "mediapipe/framework/formats/matrix.h" #include "mediapipe/framework/formats/matrix.h"
#include "mediapipe/framework/formats/rect.pb.h"
#include "mediapipe/framework/formats/tensor.h" #include "mediapipe/framework/formats/tensor.h"
#include "mediapipe/tasks/cc/common.h" #include "mediapipe/tasks/cc/common.h"
#include "mediapipe/tasks/cc/components/processors/classification_postprocessing_graph.h" #include "mediapipe/tasks/cc/components/processors/classification_postprocessing_graph.h"
@ -57,6 +58,7 @@ constexpr char kHandednessTag[] = "HANDEDNESS";
constexpr char kLandmarksTag[] = "LANDMARKS"; constexpr char kLandmarksTag[] = "LANDMARKS";
constexpr char kWorldLandmarksTag[] = "WORLD_LANDMARKS"; constexpr char kWorldLandmarksTag[] = "WORLD_LANDMARKS";
constexpr char kImageSizeTag[] = "IMAGE_SIZE"; constexpr char kImageSizeTag[] = "IMAGE_SIZE";
constexpr char kNormRectTag[] = "NORM_RECT";
constexpr char kHandTrackingIdsTag[] = "HAND_TRACKING_IDS"; constexpr char kHandTrackingIdsTag[] = "HAND_TRACKING_IDS";
constexpr char kHandGesturesTag[] = "HAND_GESTURES"; constexpr char kHandGesturesTag[] = "HAND_GESTURES";
constexpr char kLandmarksMatrixTag[] = "LANDMARKS_MATRIX"; constexpr char kLandmarksMatrixTag[] = "LANDMARKS_MATRIX";
@ -92,6 +94,9 @@ Source<std::vector<Tensor>> ConvertMatrixToTensor(Source<Matrix> matrix,
// Detected hand landmarks in world coordinates. // Detected hand landmarks in world coordinates.
// IMAGE_SIZE - std::pair<int, int> // IMAGE_SIZE - std::pair<int, int>
// The size of image from which the landmarks detected from. // The size of image from which the landmarks detected from.
// NORM_RECT - NormalizedRect
// NormalizedRect whose 'rotation' field is used to rotate the
// landmarks before processing them.
// //
// Outputs: // Outputs:
// HAND_GESTURES - ClassificationList // HAND_GESTURES - ClassificationList
@ -106,6 +111,7 @@ Source<std::vector<Tensor>> ConvertMatrixToTensor(Source<Matrix> matrix,
// input_stream: "LANDMARKS:landmarks" // input_stream: "LANDMARKS:landmarks"
// input_stream: "WORLD_LANDMARKS:world_landmarks" // input_stream: "WORLD_LANDMARKS:world_landmarks"
// input_stream: "IMAGE_SIZE:image_size" // input_stream: "IMAGE_SIZE:image_size"
// input_stream: "NORM_RECT:norm_rect"
// output_stream: "HAND_GESTURES:hand_gestures" // output_stream: "HAND_GESTURES:hand_gestures"
// options { // options {
// [mediapipe.tasks.vision.gesture_recognizer.proto.HandGestureRecognizerGraphOptions.ext] // [mediapipe.tasks.vision.gesture_recognizer.proto.HandGestureRecognizerGraphOptions.ext]
@ -133,7 +139,8 @@ class SingleHandGestureRecognizerGraph : public core::ModelTaskGraph {
graph[Input<ClassificationList>(kHandednessTag)], graph[Input<ClassificationList>(kHandednessTag)],
graph[Input<NormalizedLandmarkList>(kLandmarksTag)], graph[Input<NormalizedLandmarkList>(kLandmarksTag)],
graph[Input<LandmarkList>(kWorldLandmarksTag)], graph[Input<LandmarkList>(kWorldLandmarksTag)],
graph[Input<std::pair<int, int>>(kImageSizeTag)], graph)); graph[Input<std::pair<int, int>>(kImageSizeTag)],
graph[Input<NormalizedRect>(kNormRectTag)], graph));
hand_gestures >> graph[Output<ClassificationList>(kHandGesturesTag)]; hand_gestures >> graph[Output<ClassificationList>(kHandGesturesTag)];
return graph.GetConfig(); return graph.GetConfig();
} }
@ -145,7 +152,8 @@ class SingleHandGestureRecognizerGraph : public core::ModelTaskGraph {
Source<ClassificationList> handedness, Source<ClassificationList> handedness,
Source<NormalizedLandmarkList> hand_landmarks, Source<NormalizedLandmarkList> hand_landmarks,
Source<LandmarkList> hand_world_landmarks, Source<LandmarkList> hand_world_landmarks,
Source<std::pair<int, int>> image_size, Graph& graph) { Source<std::pair<int, int>> image_size, Source<NormalizedRect> norm_rect,
Graph& graph) {
// Converts the ClassificationList to a matrix. // Converts the ClassificationList to a matrix.
auto& handedness_to_matrix = graph.AddNode("HandednessToMatrixCalculator"); auto& handedness_to_matrix = graph.AddNode("HandednessToMatrixCalculator");
handedness >> handedness_to_matrix.In(kHandednessTag); handedness >> handedness_to_matrix.In(kHandednessTag);
@ -166,6 +174,7 @@ class SingleHandGestureRecognizerGraph : public core::ModelTaskGraph {
landmarks_options; landmarks_options;
hand_landmarks >> hand_landmarks_to_matrix.In(kLandmarksTag); hand_landmarks >> hand_landmarks_to_matrix.In(kLandmarksTag);
image_size >> hand_landmarks_to_matrix.In(kImageSizeTag); image_size >> hand_landmarks_to_matrix.In(kImageSizeTag);
norm_rect >> hand_landmarks_to_matrix.In(kNormRectTag);
auto hand_landmarks_matrix = auto hand_landmarks_matrix =
hand_landmarks_to_matrix[Output<Matrix>(kLandmarksMatrixTag)]; hand_landmarks_to_matrix[Output<Matrix>(kLandmarksMatrixTag)];
@ -181,6 +190,7 @@ class SingleHandGestureRecognizerGraph : public core::ModelTaskGraph {
hand_world_landmarks >> hand_world_landmarks >>
hand_world_landmarks_to_matrix.In(kWorldLandmarksTag); hand_world_landmarks_to_matrix.In(kWorldLandmarksTag);
image_size >> hand_world_landmarks_to_matrix.In(kImageSizeTag); image_size >> hand_world_landmarks_to_matrix.In(kImageSizeTag);
norm_rect >> hand_world_landmarks_to_matrix.In(kNormRectTag);
auto hand_world_landmarks_matrix = auto hand_world_landmarks_matrix =
hand_world_landmarks_to_matrix[Output<Matrix>(kLandmarksMatrixTag)]; hand_world_landmarks_to_matrix[Output<Matrix>(kLandmarksMatrixTag)];
@ -239,6 +249,9 @@ REGISTER_MEDIAPIPE_GRAPH(
// A vector hand landmarks in world coordinates. // A vector hand landmarks in world coordinates.
// IMAGE_SIZE - std::pair<int, int> // IMAGE_SIZE - std::pair<int, int>
// The size of image from which the landmarks detected from. // The size of image from which the landmarks detected from.
// NORM_RECT - NormalizedRect
// NormalizedRect whose 'rotation' field is used to rotate the
// landmarks before processing them.
// HAND_TRACKING_IDS - std::vector<int> // HAND_TRACKING_IDS - std::vector<int>
// A vector of the tracking ids of the hands. The tracking id is the vector // A vector of the tracking ids of the hands. The tracking id is the vector
// index corresponding to the same hand if the graph runs multiple times. // index corresponding to the same hand if the graph runs multiple times.
@ -257,6 +270,7 @@ REGISTER_MEDIAPIPE_GRAPH(
// input_stream: "LANDMARKS:landmarks" // input_stream: "LANDMARKS:landmarks"
// input_stream: "WORLD_LANDMARKS:world_landmarks" // input_stream: "WORLD_LANDMARKS:world_landmarks"
// input_stream: "IMAGE_SIZE:image_size" // input_stream: "IMAGE_SIZE:image_size"
// input_stream: "NORM_RECT:norm_rect"
// input_stream: "HAND_TRACKING_IDS:hand_tracking_ids" // input_stream: "HAND_TRACKING_IDS:hand_tracking_ids"
// output_stream: "HAND_GESTURES:hand_gestures" // output_stream: "HAND_GESTURES:hand_gestures"
// options { // options {
@ -283,6 +297,7 @@ class MultipleHandGestureRecognizerGraph : public core::ModelTaskGraph {
graph[Input<std::vector<NormalizedLandmarkList>>(kLandmarksTag)], graph[Input<std::vector<NormalizedLandmarkList>>(kLandmarksTag)],
graph[Input<std::vector<LandmarkList>>(kWorldLandmarksTag)], graph[Input<std::vector<LandmarkList>>(kWorldLandmarksTag)],
graph[Input<std::pair<int, int>>(kImageSizeTag)], graph[Input<std::pair<int, int>>(kImageSizeTag)],
graph[Input<NormalizedRect>(kNormRectTag)],
graph[Input<std::vector<int>>(kHandTrackingIdsTag)], graph)); graph[Input<std::vector<int>>(kHandTrackingIdsTag)], graph));
multi_hand_gestures >> multi_hand_gestures >>
graph[Output<std::vector<ClassificationList>>(kHandGesturesTag)]; graph[Output<std::vector<ClassificationList>>(kHandGesturesTag)];
@ -296,18 +311,20 @@ class MultipleHandGestureRecognizerGraph : public core::ModelTaskGraph {
Source<std::vector<ClassificationList>> multi_handedness, Source<std::vector<ClassificationList>> multi_handedness,
Source<std::vector<NormalizedLandmarkList>> multi_hand_landmarks, Source<std::vector<NormalizedLandmarkList>> multi_hand_landmarks,
Source<std::vector<LandmarkList>> multi_hand_world_landmarks, Source<std::vector<LandmarkList>> multi_hand_world_landmarks,
Source<std::pair<int, int>> image_size, Source<std::pair<int, int>> image_size, Source<NormalizedRect> norm_rect,
Source<std::vector<int>> multi_hand_tracking_ids, Graph& graph) { Source<std::vector<int>> multi_hand_tracking_ids, Graph& graph) {
auto& begin_loop_int = graph.AddNode("BeginLoopIntCalculator"); auto& begin_loop_int = graph.AddNode("BeginLoopIntCalculator");
image_size >> begin_loop_int.In(kCloneTag)[0]; image_size >> begin_loop_int.In(kCloneTag)[0];
multi_handedness >> begin_loop_int.In(kCloneTag)[1]; norm_rect >> begin_loop_int.In(kCloneTag)[1];
multi_hand_landmarks >> begin_loop_int.In(kCloneTag)[2]; multi_handedness >> begin_loop_int.In(kCloneTag)[2];
multi_hand_world_landmarks >> begin_loop_int.In(kCloneTag)[3]; multi_hand_landmarks >> begin_loop_int.In(kCloneTag)[3];
multi_hand_world_landmarks >> begin_loop_int.In(kCloneTag)[4];
multi_hand_tracking_ids >> begin_loop_int.In(kIterableTag); multi_hand_tracking_ids >> begin_loop_int.In(kIterableTag);
auto image_size_clone = begin_loop_int.Out(kCloneTag)[0]; auto image_size_clone = begin_loop_int.Out(kCloneTag)[0];
auto multi_handedness_clone = begin_loop_int.Out(kCloneTag)[1]; auto norm_rect_clone = begin_loop_int.Out(kCloneTag)[1];
auto multi_hand_landmarks_clone = begin_loop_int.Out(kCloneTag)[2]; auto multi_handedness_clone = begin_loop_int.Out(kCloneTag)[2];
auto multi_hand_world_landmarks_clone = begin_loop_int.Out(kCloneTag)[3]; auto multi_hand_landmarks_clone = begin_loop_int.Out(kCloneTag)[3];
auto multi_hand_world_landmarks_clone = begin_loop_int.Out(kCloneTag)[4];
auto hand_tracking_id = begin_loop_int.Out(kItemTag); auto hand_tracking_id = begin_loop_int.Out(kItemTag);
auto batch_end = begin_loop_int.Out(kBatchEndTag); auto batch_end = begin_loop_int.Out(kBatchEndTag);
@ -341,6 +358,7 @@ class MultipleHandGestureRecognizerGraph : public core::ModelTaskGraph {
hand_world_landmarks >> hand_world_landmarks >>
hand_gesture_recognizer_graph.In(kWorldLandmarksTag); hand_gesture_recognizer_graph.In(kWorldLandmarksTag);
image_size_clone >> hand_gesture_recognizer_graph.In(kImageSizeTag); image_size_clone >> hand_gesture_recognizer_graph.In(kImageSizeTag);
norm_rect_clone >> hand_gesture_recognizer_graph.In(kNormRectTag);
auto hand_gestures = hand_gesture_recognizer_graph.Out(kHandGesturesTag); auto hand_gestures = hand_gesture_recognizer_graph.Out(kHandGesturesTag);
auto& end_loop_classification_lists = auto& end_loop_classification_lists =

View File

@ -32,7 +32,7 @@ cc_library(
"//mediapipe/calculators/tflite:ssd_anchors_calculator_cc_proto", "//mediapipe/calculators/tflite:ssd_anchors_calculator_cc_proto",
"//mediapipe/calculators/util:detection_label_id_to_text_calculator", "//mediapipe/calculators/util:detection_label_id_to_text_calculator",
"//mediapipe/calculators/util:detection_label_id_to_text_calculator_cc_proto", "//mediapipe/calculators/util:detection_label_id_to_text_calculator_cc_proto",
"//mediapipe/calculators/util:detection_letterbox_removal_calculator", "//mediapipe/calculators/util:detection_projection_calculator",
"//mediapipe/calculators/util:detections_to_rects_calculator", "//mediapipe/calculators/util:detections_to_rects_calculator",
"//mediapipe/calculators/util:detections_to_rects_calculator_cc_proto", "//mediapipe/calculators/util:detections_to_rects_calculator_cc_proto",
"//mediapipe/calculators/util:non_max_suppression_calculator", "//mediapipe/calculators/util:non_max_suppression_calculator",

View File

@ -58,6 +58,7 @@ using ::mediapipe::tasks::vision::hand_detector::proto::
HandDetectorGraphOptions; HandDetectorGraphOptions;
constexpr char kImageTag[] = "IMAGE"; constexpr char kImageTag[] = "IMAGE";
constexpr char kNormRectTag[] = "NORM_RECT";
constexpr char kPalmDetectionsTag[] = "PALM_DETECTIONS"; constexpr char kPalmDetectionsTag[] = "PALM_DETECTIONS";
constexpr char kHandRectsTag[] = "HAND_RECTS"; constexpr char kHandRectsTag[] = "HAND_RECTS";
constexpr char kPalmRectsTag[] = "PALM_RECTS"; constexpr char kPalmRectsTag[] = "PALM_RECTS";
@ -148,6 +149,9 @@ void ConfigureRectTransformationCalculator(
// Inputs: // Inputs:
// IMAGE - Image // IMAGE - Image
// Image to perform detection on. // Image to perform detection on.
// NORM_RECT - NormalizedRect
// Describes image rotation and region of image to perform detection
// on.
// //
// Outputs: // Outputs:
// PALM_DETECTIONS - std::vector<Detection> // PALM_DETECTIONS - std::vector<Detection>
@ -159,11 +163,14 @@ void ConfigureRectTransformationCalculator(
// IMAGE - Image // IMAGE - Image
// The input image that the hand detector runs on and has the pixel data // The input image that the hand detector runs on and has the pixel data
// stored on the target storage (CPU vs GPU). // stored on the target storage (CPU vs GPU).
// All returned coordinates are in the unrotated and uncropped input image
// coordinates system.
// //
// Example: // Example:
// node { // node {
// calculator: "mediapipe.tasks.vision.hand_detector.HandDetectorGraph" // calculator: "mediapipe.tasks.vision.hand_detector.HandDetectorGraph"
// input_stream: "IMAGE:image" // input_stream: "IMAGE:image"
// input_stream: "NORM_RECT:norm_rect"
// output_stream: "PALM_DETECTIONS:palm_detections" // output_stream: "PALM_DETECTIONS:palm_detections"
// output_stream: "HAND_RECTS:hand_rects_from_palm_detections" // output_stream: "HAND_RECTS:hand_rects_from_palm_detections"
// output_stream: "PALM_RECTS:palm_rects" // output_stream: "PALM_RECTS:palm_rects"
@ -189,11 +196,11 @@ class HandDetectorGraph : public core::ModelTaskGraph {
ASSIGN_OR_RETURN(const auto* model_resources, ASSIGN_OR_RETURN(const auto* model_resources,
CreateModelResources<HandDetectorGraphOptions>(sc)); CreateModelResources<HandDetectorGraphOptions>(sc));
Graph graph; Graph graph;
ASSIGN_OR_RETURN( ASSIGN_OR_RETURN(auto hand_detection_outs,
auto hand_detection_outs, BuildHandDetectionSubgraph(
BuildHandDetectionSubgraph(sc->Options<HandDetectorGraphOptions>(), sc->Options<HandDetectorGraphOptions>(),
*model_resources, *model_resources, graph[Input<Image>(kImageTag)],
graph[Input<Image>(kImageTag)], graph)); graph[Input<NormalizedRect>(kNormRectTag)], graph));
hand_detection_outs.palm_detections >> hand_detection_outs.palm_detections >>
graph[Output<std::vector<Detection>>(kPalmDetectionsTag)]; graph[Output<std::vector<Detection>>(kPalmDetectionsTag)];
hand_detection_outs.hand_rects >> hand_detection_outs.hand_rects >>
@ -216,7 +223,7 @@ class HandDetectorGraph : public core::ModelTaskGraph {
absl::StatusOr<HandDetectionOuts> BuildHandDetectionSubgraph( absl::StatusOr<HandDetectionOuts> BuildHandDetectionSubgraph(
const HandDetectorGraphOptions& subgraph_options, const HandDetectorGraphOptions& subgraph_options,
const core::ModelResources& model_resources, Source<Image> image_in, const core::ModelResources& model_resources, Source<Image> image_in,
Graph& graph) { Source<NormalizedRect> norm_rect_in, Graph& graph) {
// Add image preprocessing subgraph. The model expects aspect ratio // Add image preprocessing subgraph. The model expects aspect ratio
// unchanged. // unchanged.
auto& preprocessing = auto& preprocessing =
@ -233,8 +240,9 @@ class HandDetectorGraph : public core::ModelTaskGraph {
&preprocessing &preprocessing
.GetOptions<tasks::components::ImagePreprocessingOptions>())); .GetOptions<tasks::components::ImagePreprocessingOptions>()));
image_in >> preprocessing.In("IMAGE"); image_in >> preprocessing.In("IMAGE");
norm_rect_in >> preprocessing.In("NORM_RECT");
auto preprocessed_tensors = preprocessing.Out("TENSORS"); auto preprocessed_tensors = preprocessing.Out("TENSORS");
auto letterbox_padding = preprocessing.Out("LETTERBOX_PADDING"); auto matrix = preprocessing.Out("MATRIX");
auto image_size = preprocessing.Out("IMAGE_SIZE"); auto image_size = preprocessing.Out("IMAGE_SIZE");
// Adds SSD palm detection model. // Adds SSD palm detection model.
@ -278,17 +286,12 @@ class HandDetectorGraph : public core::ModelTaskGraph {
nms_detections >> detection_label_id_to_text.In(""); nms_detections >> detection_label_id_to_text.In("");
auto detections_with_text = detection_label_id_to_text.Out(""); auto detections_with_text = detection_label_id_to_text.Out("");
// Adjusts detection locations (already normalized to [0.f, 1.f]) on the // Projects detections back into the input image coordinates system.
// letterboxed image (after image transformation with the FIT scale mode) to auto& detection_projection = graph.AddNode("DetectionProjectionCalculator");
// the corresponding locations on the same image with the letterbox removed detections_with_text >> detection_projection.In("DETECTIONS");
// (the input image to the graph before image transformation). matrix >> detection_projection.In("PROJECTION_MATRIX");
auto& detection_letterbox_removal =
graph.AddNode("DetectionLetterboxRemovalCalculator");
detections_with_text >> detection_letterbox_removal.In("DETECTIONS");
letterbox_padding >> detection_letterbox_removal.In("LETTERBOX_PADDING");
auto palm_detections = auto palm_detections =
detection_letterbox_removal[Output<std::vector<Detection>>( detection_projection[Output<std::vector<Detection>>("DETECTIONS")];
"DETECTIONS")];
// Converts each palm detection into a rectangle (normalized by image size) // Converts each palm detection into a rectangle (normalized by image size)
// that encloses the palm and is rotated such that the line connecting // that encloses the palm and is rotated such that the line connecting

View File

@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. limitations under the License.
==============================================================================*/ ==============================================================================*/
#include <cmath>
#include <iostream> #include <iostream>
#include <memory> #include <memory>
#include <string> #include <string>
@ -75,13 +76,18 @@ using ::testing::proto::Partially;
constexpr char kTestDataDirectory[] = "/mediapipe/tasks/testdata/vision/"; constexpr char kTestDataDirectory[] = "/mediapipe/tasks/testdata/vision/";
constexpr char kPalmDetectionModel[] = "palm_detection_full.tflite"; constexpr char kPalmDetectionModel[] = "palm_detection_full.tflite";
constexpr char kTestRightHandsImage[] = "right_hands.jpg"; constexpr char kTestRightHandsImage[] = "right_hands.jpg";
constexpr char kTestRightHandsRotatedImage[] = "right_hands_rotated.jpg";
constexpr char kTestModelResourcesTag[] = "test_model_resources"; constexpr char kTestModelResourcesTag[] = "test_model_resources";
constexpr char kOneHandResultFile[] = "hand_detector_result_one_hand.pbtxt"; constexpr char kOneHandResultFile[] = "hand_detector_result_one_hand.pbtxt";
constexpr char kOneHandRotatedResultFile[] =
"hand_detector_result_one_hand_rotated.pbtxt";
constexpr char kTwoHandsResultFile[] = "hand_detector_result_two_hands.pbtxt"; constexpr char kTwoHandsResultFile[] = "hand_detector_result_two_hands.pbtxt";
constexpr char kImageTag[] = "IMAGE"; constexpr char kImageTag[] = "IMAGE";
constexpr char kImageName[] = "image"; constexpr char kImageName[] = "image";
constexpr char kNormRectTag[] = "NORM_RECT";
constexpr char kNormRectName[] = "norm_rect";
constexpr char kPalmDetectionsTag[] = "PALM_DETECTIONS"; constexpr char kPalmDetectionsTag[] = "PALM_DETECTIONS";
constexpr char kPalmDetectionsName[] = "palm_detections"; constexpr char kPalmDetectionsName[] = "palm_detections";
constexpr char kHandRectsTag[] = "HAND_RECTS"; constexpr char kHandRectsTag[] = "HAND_RECTS";
@ -117,6 +123,8 @@ absl::StatusOr<std::unique_ptr<TaskRunner>> CreateTaskRunner(
graph[Input<Image>(kImageTag)].SetName(kImageName) >> graph[Input<Image>(kImageTag)].SetName(kImageName) >>
hand_detection.In(kImageTag); hand_detection.In(kImageTag);
graph[Input<NormalizedRect>(kNormRectTag)].SetName(kNormRectName) >>
hand_detection.In(kNormRectTag);
hand_detection.Out(kPalmDetectionsTag).SetName(kPalmDetectionsName) >> hand_detection.Out(kPalmDetectionsTag).SetName(kPalmDetectionsName) >>
graph[Output<std::vector<Detection>>(kPalmDetectionsTag)]; graph[Output<std::vector<Detection>>(kPalmDetectionsTag)];
@ -142,6 +150,9 @@ struct TestParams {
std::string hand_detection_model_name; std::string hand_detection_model_name;
// The filename of test image. // The filename of test image.
std::string test_image_name; std::string test_image_name;
// The rotation to apply to the test image before processing, in radians
// counter-clockwise.
float rotation;
// The number of maximum detected hands. // The number of maximum detected hands.
int num_hands; int num_hands;
// The expected hand detector result. // The expected hand detector result.
@ -154,14 +165,22 @@ TEST_P(HandDetectionTest, DetectTwoHands) {
MP_ASSERT_OK_AND_ASSIGN( MP_ASSERT_OK_AND_ASSIGN(
Image image, DecodeImageFromFile(JoinPath("./", kTestDataDirectory, Image image, DecodeImageFromFile(JoinPath("./", kTestDataDirectory,
GetParam().test_image_name))); GetParam().test_image_name)));
NormalizedRect input_norm_rect;
input_norm_rect.set_rotation(GetParam().rotation);
input_norm_rect.set_x_center(0.5);
input_norm_rect.set_y_center(0.5);
input_norm_rect.set_width(1.0);
input_norm_rect.set_height(1.0);
MP_ASSERT_OK_AND_ASSIGN( MP_ASSERT_OK_AND_ASSIGN(
auto model_resources, auto model_resources,
CreateModelResourcesForModel(GetParam().hand_detection_model_name)); CreateModelResourcesForModel(GetParam().hand_detection_model_name));
MP_ASSERT_OK_AND_ASSIGN( MP_ASSERT_OK_AND_ASSIGN(
auto task_runner, CreateTaskRunner(*model_resources, kPalmDetectionModel, auto task_runner, CreateTaskRunner(*model_resources, kPalmDetectionModel,
GetParam().num_hands)); GetParam().num_hands));
auto output_packets = auto output_packets = task_runner->Process(
task_runner->Process({{kImageName, MakePacket<Image>(std::move(image))}}); {{kImageName, MakePacket<Image>(std::move(image))},
{kNormRectName,
MakePacket<NormalizedRect>(std::move(input_norm_rect))}});
MP_ASSERT_OK(output_packets); MP_ASSERT_OK(output_packets);
const std::vector<Detection>& palm_detections = const std::vector<Detection>& palm_detections =
(*output_packets)[kPalmDetectionsName].Get<std::vector<Detection>>(); (*output_packets)[kPalmDetectionsName].Get<std::vector<Detection>>();
@ -188,15 +207,24 @@ INSTANTIATE_TEST_SUITE_P(
Values(TestParams{.test_name = "DetectOneHand", Values(TestParams{.test_name = "DetectOneHand",
.hand_detection_model_name = kPalmDetectionModel, .hand_detection_model_name = kPalmDetectionModel,
.test_image_name = kTestRightHandsImage, .test_image_name = kTestRightHandsImage,
.rotation = 0,
.num_hands = 1, .num_hands = 1,
.expected_result = .expected_result =
GetExpectedHandDetectorResult(kOneHandResultFile)}, GetExpectedHandDetectorResult(kOneHandResultFile)},
TestParams{.test_name = "DetectTwoHands", TestParams{.test_name = "DetectTwoHands",
.hand_detection_model_name = kPalmDetectionModel, .hand_detection_model_name = kPalmDetectionModel,
.test_image_name = kTestRightHandsImage, .test_image_name = kTestRightHandsImage,
.rotation = 0,
.num_hands = 2, .num_hands = 2,
.expected_result = .expected_result =
GetExpectedHandDetectorResult(kTwoHandsResultFile)}), GetExpectedHandDetectorResult(kTwoHandsResultFile)},
TestParams{.test_name = "DetectOneHandWithRotation",
.hand_detection_model_name = kPalmDetectionModel,
.test_image_name = kTestRightHandsRotatedImage,
.rotation = M_PI / 2.0f,
.num_hands = 1,
.expected_result = GetExpectedHandDetectorResult(
kOneHandRotatedResultFile)}),
[](const TestParamInfo<HandDetectionTest::ParamType>& info) { [](const TestParamInfo<HandDetectionTest::ParamType>& info) {
return info.param.test_name; return info.param.test_name;
}); });

View File

@ -64,6 +64,7 @@ using ::mediapipe::tasks::vision::hand_landmarker::proto::
HandLandmarksDetectorGraphOptions; HandLandmarksDetectorGraphOptions;
constexpr char kImageTag[] = "IMAGE"; constexpr char kImageTag[] = "IMAGE";
constexpr char kNormRectTag[] = "NORM_RECT";
constexpr char kLandmarksTag[] = "LANDMARKS"; constexpr char kLandmarksTag[] = "LANDMARKS";
constexpr char kWorldLandmarksTag[] = "WORLD_LANDMARKS"; constexpr char kWorldLandmarksTag[] = "WORLD_LANDMARKS";
constexpr char kHandRectNextFrameTag[] = "HAND_RECT_NEXT_FRAME"; constexpr char kHandRectNextFrameTag[] = "HAND_RECT_NEXT_FRAME";
@ -122,6 +123,9 @@ absl::Status SetSubTaskBaseOptions(const ModelAssetBundleResources& resources,
// Inputs: // Inputs:
// IMAGE - Image // IMAGE - Image
// Image to perform hand landmarks detection on. // Image to perform hand landmarks detection on.
// NORM_RECT - NormalizedRect
// Describes image rotation and region of image to perform landmarks
// detection on.
// //
// Outputs: // Outputs:
// LANDMARKS: - std::vector<NormalizedLandmarkList> // LANDMARKS: - std::vector<NormalizedLandmarkList>
@ -140,11 +144,14 @@ absl::Status SetSubTaskBaseOptions(const ModelAssetBundleResources& resources,
// IMAGE - Image // IMAGE - Image
// The input image that the hand landmarker runs on and has the pixel data // The input image that the hand landmarker runs on and has the pixel data
// stored on the target storage (CPU vs GPU). // stored on the target storage (CPU vs GPU).
// All returned coordinates are in the unrotated and uncropped input image
// coordinates system.
// //
// Example: // Example:
// node { // node {
// calculator: "mediapipe.tasks.vision.hand_landmarker.HandLandmarkerGraph" // calculator: "mediapipe.tasks.vision.hand_landmarker.HandLandmarkerGraph"
// input_stream: "IMAGE:image_in" // input_stream: "IMAGE:image_in"
// input_stream: "NORM_RECT:norm_rect"
// output_stream: "LANDMARKS:hand_landmarks" // output_stream: "LANDMARKS:hand_landmarks"
// output_stream: "WORLD_LANDMARKS:world_hand_landmarks" // output_stream: "WORLD_LANDMARKS:world_hand_landmarks"
// output_stream: "HAND_RECT_NEXT_FRAME:hand_rect_next_frame" // output_stream: "HAND_RECT_NEXT_FRAME:hand_rect_next_frame"
@ -198,10 +205,11 @@ class HandLandmarkerGraph : public core::ModelTaskGraph {
!sc->Service(::mediapipe::tasks::core::kModelResourcesCacheService) !sc->Service(::mediapipe::tasks::core::kModelResourcesCacheService)
.IsAvailable())); .IsAvailable()));
} }
ASSIGN_OR_RETURN( ASSIGN_OR_RETURN(auto hand_landmarker_outputs,
auto hand_landmarker_outputs, BuildHandLandmarkerGraph(
BuildHandLandmarkerGraph(sc->Options<HandLandmarkerGraphOptions>(), sc->Options<HandLandmarkerGraphOptions>(),
graph[Input<Image>(kImageTag)], graph)); graph[Input<Image>(kImageTag)],
graph[Input<NormalizedRect>(kNormRectTag)], graph));
hand_landmarker_outputs.landmark_lists >> hand_landmarker_outputs.landmark_lists >>
graph[Output<std::vector<NormalizedLandmarkList>>(kLandmarksTag)]; graph[Output<std::vector<NormalizedLandmarkList>>(kLandmarksTag)];
hand_landmarker_outputs.world_landmark_lists >> hand_landmarker_outputs.world_landmark_lists >>
@ -240,7 +248,7 @@ class HandLandmarkerGraph : public core::ModelTaskGraph {
// graph: the mediapipe graph instance to be updated. // graph: the mediapipe graph instance to be updated.
absl::StatusOr<HandLandmarkerOutputs> BuildHandLandmarkerGraph( absl::StatusOr<HandLandmarkerOutputs> BuildHandLandmarkerGraph(
const HandLandmarkerGraphOptions& tasks_options, Source<Image> image_in, const HandLandmarkerGraphOptions& tasks_options, Source<Image> image_in,
Graph& graph) { Source<NormalizedRect> norm_rect_in, Graph& graph) {
const int max_num_hands = const int max_num_hands =
tasks_options.hand_detector_graph_options().num_hands(); tasks_options.hand_detector_graph_options().num_hands();
@ -258,12 +266,15 @@ class HandLandmarkerGraph : public core::ModelTaskGraph {
auto image_for_hand_detector = auto image_for_hand_detector =
DisallowIf(image_in, has_enough_hands, graph); DisallowIf(image_in, has_enough_hands, graph);
auto norm_rect_in_for_hand_detector =
DisallowIf(norm_rect_in, has_enough_hands, graph);
auto& hand_detector = auto& hand_detector =
graph.AddNode("mediapipe.tasks.vision.hand_detector.HandDetectorGraph"); graph.AddNode("mediapipe.tasks.vision.hand_detector.HandDetectorGraph");
hand_detector.GetOptions<HandDetectorGraphOptions>().CopyFrom( hand_detector.GetOptions<HandDetectorGraphOptions>().CopyFrom(
tasks_options.hand_detector_graph_options()); tasks_options.hand_detector_graph_options());
image_for_hand_detector >> hand_detector.In("IMAGE"); image_for_hand_detector >> hand_detector.In("IMAGE");
norm_rect_in_for_hand_detector >> hand_detector.In("NORM_RECT");
auto hand_rects_from_hand_detector = hand_detector.Out("HAND_RECTS"); auto hand_rects_from_hand_detector = hand_detector.Out("HAND_RECTS");
auto& hand_association = graph.AddNode("HandAssociationCalculator"); auto& hand_association = graph.AddNode("HandAssociationCalculator");

View File

@ -13,10 +13,12 @@ See the License for the specific language governing permissions and
limitations under the License. limitations under the License.
==============================================================================*/ ==============================================================================*/
#include <cmath>
#include <iostream> #include <iostream>
#include <memory> #include <memory>
#include <string> #include <string>
#include <utility> #include <utility>
#include <vector>
#include "absl/flags/flag.h" #include "absl/flags/flag.h"
#include "absl/status/statusor.h" #include "absl/status/statusor.h"
@ -67,9 +69,12 @@ using ::testing::proto::Partially;
constexpr char kTestDataDirectory[] = "/mediapipe/tasks/testdata/vision/"; constexpr char kTestDataDirectory[] = "/mediapipe/tasks/testdata/vision/";
constexpr char kHandLandmarkerModelBundle[] = "hand_landmark.task"; constexpr char kHandLandmarkerModelBundle[] = "hand_landmark.task";
constexpr char kLeftHandsImage[] = "left_hands.jpg"; constexpr char kLeftHandsImage[] = "left_hands.jpg";
constexpr char kLeftHandsRotatedImage[] = "left_hands_rotated.jpg";
constexpr char kImageTag[] = "IMAGE"; constexpr char kImageTag[] = "IMAGE";
constexpr char kImageName[] = "image_in"; constexpr char kImageName[] = "image_in";
constexpr char kNormRectTag[] = "NORM_RECT";
constexpr char kNormRectName[] = "norm_rect_in";
constexpr char kLandmarksTag[] = "LANDMARKS"; constexpr char kLandmarksTag[] = "LANDMARKS";
constexpr char kLandmarksName[] = "landmarks"; constexpr char kLandmarksName[] = "landmarks";
constexpr char kWorldLandmarksTag[] = "WORLD_LANDMARKS"; constexpr char kWorldLandmarksTag[] = "WORLD_LANDMARKS";
@ -84,6 +89,11 @@ constexpr char kExpectedLeftUpHandLandmarksFilename[] =
"expected_left_up_hand_landmarks.prototxt"; "expected_left_up_hand_landmarks.prototxt";
constexpr char kExpectedLeftDownHandLandmarksFilename[] = constexpr char kExpectedLeftDownHandLandmarksFilename[] =
"expected_left_down_hand_landmarks.prototxt"; "expected_left_down_hand_landmarks.prototxt";
// Same but for the rotated image.
constexpr char kExpectedLeftUpHandRotatedLandmarksFilename[] =
"expected_left_up_hand_rotated_landmarks.prototxt";
constexpr char kExpectedLeftDownHandRotatedLandmarksFilename[] =
"expected_left_down_hand_rotated_landmarks.prototxt";
constexpr float kFullModelFractionDiff = 0.03; // percentage constexpr float kFullModelFractionDiff = 0.03; // percentage
constexpr float kAbsMargin = 0.03; constexpr float kAbsMargin = 0.03;
@ -111,6 +121,8 @@ absl::StatusOr<std::unique_ptr<TaskRunner>> CreateTaskRunner() {
graph[Input<Image>(kImageTag)].SetName(kImageName) >> graph[Input<Image>(kImageTag)].SetName(kImageName) >>
hand_landmarker_graph.In(kImageTag); hand_landmarker_graph.In(kImageTag);
graph[Input<NormalizedRect>(kNormRectTag)].SetName(kNormRectName) >>
hand_landmarker_graph.In(kNormRectTag);
hand_landmarker_graph.Out(kLandmarksTag).SetName(kLandmarksName) >> hand_landmarker_graph.Out(kLandmarksTag).SetName(kLandmarksName) >>
graph[Output<std::vector<NormalizedLandmarkList>>(kLandmarksTag)]; graph[Output<std::vector<NormalizedLandmarkList>>(kLandmarksTag)];
hand_landmarker_graph.Out(kWorldLandmarksTag).SetName(kWorldLandmarksName) >> hand_landmarker_graph.Out(kWorldLandmarksTag).SetName(kWorldLandmarksName) >>
@ -130,9 +142,16 @@ TEST_F(HandLandmarkerTest, Succeeds) {
MP_ASSERT_OK_AND_ASSIGN( MP_ASSERT_OK_AND_ASSIGN(
Image image, Image image,
DecodeImageFromFile(JoinPath("./", kTestDataDirectory, kLeftHandsImage))); DecodeImageFromFile(JoinPath("./", kTestDataDirectory, kLeftHandsImage)));
NormalizedRect input_norm_rect;
input_norm_rect.set_x_center(0.5);
input_norm_rect.set_y_center(0.5);
input_norm_rect.set_width(1.0);
input_norm_rect.set_height(1.0);
MP_ASSERT_OK_AND_ASSIGN(auto task_runner, CreateTaskRunner()); MP_ASSERT_OK_AND_ASSIGN(auto task_runner, CreateTaskRunner());
auto output_packets = auto output_packets = task_runner->Process(
task_runner->Process({{kImageName, MakePacket<Image>(std::move(image))}}); {{kImageName, MakePacket<Image>(std::move(image))},
{kNormRectName,
MakePacket<NormalizedRect>(std::move(input_norm_rect))}});
const auto& landmarks = (*output_packets)[kLandmarksName] const auto& landmarks = (*output_packets)[kLandmarksName]
.Get<std::vector<NormalizedLandmarkList>>(); .Get<std::vector<NormalizedLandmarkList>>();
ASSERT_EQ(landmarks.size(), kMaxNumHands); ASSERT_EQ(landmarks.size(), kMaxNumHands);
@ -150,6 +169,38 @@ TEST_F(HandLandmarkerTest, Succeeds) {
/*fraction=*/kFullModelFractionDiff)); /*fraction=*/kFullModelFractionDiff));
} }
TEST_F(HandLandmarkerTest, SucceedsWithRotation) {
MP_ASSERT_OK_AND_ASSIGN(
Image image, DecodeImageFromFile(JoinPath("./", kTestDataDirectory,
kLeftHandsRotatedImage)));
NormalizedRect input_norm_rect;
input_norm_rect.set_x_center(0.5);
input_norm_rect.set_y_center(0.5);
input_norm_rect.set_width(1.0);
input_norm_rect.set_height(1.0);
input_norm_rect.set_rotation(M_PI / 2.0);
MP_ASSERT_OK_AND_ASSIGN(auto task_runner, CreateTaskRunner());
auto output_packets = task_runner->Process(
{{kImageName, MakePacket<Image>(std::move(image))},
{kNormRectName,
MakePacket<NormalizedRect>(std::move(input_norm_rect))}});
const auto& landmarks = (*output_packets)[kLandmarksName]
.Get<std::vector<NormalizedLandmarkList>>();
ASSERT_EQ(landmarks.size(), kMaxNumHands);
std::vector<NormalizedLandmarkList> expected_landmarks = {
GetExpectedLandmarkList(kExpectedLeftUpHandRotatedLandmarksFilename),
GetExpectedLandmarkList(kExpectedLeftDownHandRotatedLandmarksFilename)};
EXPECT_THAT(landmarks[0],
Approximately(Partially(EqualsProto(expected_landmarks[0])),
/*margin=*/kAbsMargin,
/*fraction=*/kFullModelFractionDiff));
EXPECT_THAT(landmarks[1],
Approximately(Partially(EqualsProto(expected_landmarks[1])),
/*margin=*/kAbsMargin,
/*fraction=*/kFullModelFractionDiff));
}
} // namespace } // namespace
} // namespace hand_landmarker } // namespace hand_landmarker

View File

@ -15,6 +15,7 @@
package com.google.mediapipe.tasks.vision.gesturerecognizer; package com.google.mediapipe.tasks.vision.gesturerecognizer;
import android.content.Context; import android.content.Context;
import android.graphics.RectF;
import android.os.ParcelFileDescriptor; import android.os.ParcelFileDescriptor;
import com.google.auto.value.AutoValue; import com.google.auto.value.AutoValue;
import com.google.mediapipe.formats.proto.LandmarkProto.LandmarkList; import com.google.mediapipe.formats.proto.LandmarkProto.LandmarkList;
@ -71,8 +72,10 @@ import java.util.Optional;
public final class GestureRecognizer extends BaseVisionTaskApi { public final class GestureRecognizer extends BaseVisionTaskApi {
private static final String TAG = GestureRecognizer.class.getSimpleName(); private static final String TAG = GestureRecognizer.class.getSimpleName();
private static final String IMAGE_IN_STREAM_NAME = "image_in"; private static final String IMAGE_IN_STREAM_NAME = "image_in";
private static final String NORM_RECT_IN_STREAM_NAME = "norm_rect_in";
private static final List<String> INPUT_STREAMS = private static final List<String> INPUT_STREAMS =
Collections.unmodifiableList(Arrays.asList("IMAGE:" + IMAGE_IN_STREAM_NAME)); Collections.unmodifiableList(
Arrays.asList("IMAGE:" + IMAGE_IN_STREAM_NAME, "NORM_RECT:" + NORM_RECT_IN_STREAM_NAME));
private static final List<String> OUTPUT_STREAMS = private static final List<String> OUTPUT_STREAMS =
Collections.unmodifiableList( Collections.unmodifiableList(
Arrays.asList( Arrays.asList(
@ -205,7 +208,7 @@ public final class GestureRecognizer extends BaseVisionTaskApi {
* @param runningMode a mediapipe vision task {@link RunningMode}. * @param runningMode a mediapipe vision task {@link RunningMode}.
*/ */
private GestureRecognizer(TaskRunner taskRunner, RunningMode runningMode) { private GestureRecognizer(TaskRunner taskRunner, RunningMode runningMode) {
super(taskRunner, runningMode, IMAGE_IN_STREAM_NAME); super(taskRunner, runningMode, IMAGE_IN_STREAM_NAME, NORM_RECT_IN_STREAM_NAME);
} }
/** /**
@ -223,7 +226,8 @@ public final class GestureRecognizer extends BaseVisionTaskApi {
* @throws MediaPipeException if there is an internal error. * @throws MediaPipeException if there is an internal error.
*/ */
public GestureRecognitionResult recognize(Image inputImage) { public GestureRecognitionResult recognize(Image inputImage) {
return (GestureRecognitionResult) processImageData(inputImage); // TODO: add proper support for rotations.
return (GestureRecognitionResult) processImageData(inputImage, buildFullImageRectF());
} }
/** /**
@ -244,7 +248,9 @@ public final class GestureRecognizer extends BaseVisionTaskApi {
* @throws MediaPipeException if there is an internal error. * @throws MediaPipeException if there is an internal error.
*/ */
public GestureRecognitionResult recognizeForVideo(Image inputImage, long inputTimestampMs) { public GestureRecognitionResult recognizeForVideo(Image inputImage, long inputTimestampMs) {
return (GestureRecognitionResult) processVideoData(inputImage, inputTimestampMs); // TODO: add proper support for rotations.
return (GestureRecognitionResult)
processVideoData(inputImage, buildFullImageRectF(), inputTimestampMs);
} }
/** /**
@ -266,7 +272,8 @@ public final class GestureRecognizer extends BaseVisionTaskApi {
* @throws MediaPipeException if there is an internal error. * @throws MediaPipeException if there is an internal error.
*/ */
public void recognizeAsync(Image inputImage, long inputTimestampMs) { public void recognizeAsync(Image inputImage, long inputTimestampMs) {
sendLiveStreamData(inputImage, inputTimestampMs); // TODO: add proper support for rotations.
sendLiveStreamData(inputImage, buildFullImageRectF(), inputTimestampMs);
} }
/** Options for setting up an {@link GestureRecognizer}. */ /** Options for setting up an {@link GestureRecognizer}. */
@ -464,4 +471,9 @@ public final class GestureRecognizer extends BaseVisionTaskApi {
.build(); .build();
} }
} }
/** Creates a RectF covering the full image. */
private static RectF buildFullImageRectF() {
return new RectF(0, 0, 1, 1);
}
} }

View File

@ -39,6 +39,7 @@ mediapipe_files(srcs = [
"hand_landmark_full.tflite", "hand_landmark_full.tflite",
"hand_landmark_lite.tflite", "hand_landmark_lite.tflite",
"left_hands.jpg", "left_hands.jpg",
"left_hands_rotated.jpg",
"mobilenet_v1_0.25_192_quantized_1_default_1.tflite", "mobilenet_v1_0.25_192_quantized_1_default_1.tflite",
"mobilenet_v1_0.25_224_1_default_1.tflite", "mobilenet_v1_0.25_224_1_default_1.tflite",
"mobilenet_v1_0.25_224_1_metadata_1.tflite", "mobilenet_v1_0.25_224_1_metadata_1.tflite",
@ -52,7 +53,9 @@ mediapipe_files(srcs = [
"multi_objects_rotated.jpg", "multi_objects_rotated.jpg",
"palm_detection_full.tflite", "palm_detection_full.tflite",
"pointing_up.jpg", "pointing_up.jpg",
"pointing_up_rotated.jpg",
"right_hands.jpg", "right_hands.jpg",
"right_hands_rotated.jpg",
"segmentation_golden_rotation0.png", "segmentation_golden_rotation0.png",
"segmentation_input_rotation0.jpg", "segmentation_input_rotation0.jpg",
"selfie_segm_128_128_3.tflite", "selfie_segm_128_128_3.tflite",
@ -65,7 +68,9 @@ mediapipe_files(srcs = [
exports_files( exports_files(
srcs = [ srcs = [
"expected_left_down_hand_landmarks.prototxt", "expected_left_down_hand_landmarks.prototxt",
"expected_left_down_hand_rotated_landmarks.prototxt",
"expected_left_up_hand_landmarks.prototxt", "expected_left_up_hand_landmarks.prototxt",
"expected_left_up_hand_rotated_landmarks.prototxt",
"expected_right_down_hand_landmarks.prototxt", "expected_right_down_hand_landmarks.prototxt",
"expected_right_up_hand_landmarks.prototxt", "expected_right_up_hand_landmarks.prototxt",
], ],
@ -85,11 +90,14 @@ filegroup(
"hand_landmark_full.tflite", "hand_landmark_full.tflite",
"hand_landmark_lite.tflite", "hand_landmark_lite.tflite",
"left_hands.jpg", "left_hands.jpg",
"left_hands_rotated.jpg",
"mozart_square.jpg", "mozart_square.jpg",
"multi_objects.jpg", "multi_objects.jpg",
"multi_objects_rotated.jpg", "multi_objects_rotated.jpg",
"pointing_up.jpg", "pointing_up.jpg",
"pointing_up_rotated.jpg",
"right_hands.jpg", "right_hands.jpg",
"right_hands_rotated.jpg",
"segmentation_golden_rotation0.png", "segmentation_golden_rotation0.png",
"segmentation_input_rotation0.jpg", "segmentation_input_rotation0.jpg",
"selfie_segm_128_128_3_expected_mask.jpg", "selfie_segm_128_128_3_expected_mask.jpg",
@ -131,12 +139,17 @@ filegroup(
name = "test_protos", name = "test_protos",
srcs = [ srcs = [
"expected_left_down_hand_landmarks.prototxt", "expected_left_down_hand_landmarks.prototxt",
"expected_left_down_hand_rotated_landmarks.prototxt",
"expected_left_up_hand_landmarks.prototxt", "expected_left_up_hand_landmarks.prototxt",
"expected_left_up_hand_rotated_landmarks.prototxt",
"expected_right_down_hand_landmarks.prototxt", "expected_right_down_hand_landmarks.prototxt",
"expected_right_up_hand_landmarks.prototxt", "expected_right_up_hand_landmarks.prototxt",
"hand_detector_result_one_hand.pbtxt", "hand_detector_result_one_hand.pbtxt",
"hand_detector_result_one_hand_rotated.pbtxt",
"hand_detector_result_two_hands.pbtxt", "hand_detector_result_two_hands.pbtxt",
"pointing_up_landmarks.pbtxt", "pointing_up_landmarks.pbtxt",
"pointing_up_rotated_landmarks.pbtxt",
"thumb_up_landmarks.pbtxt", "thumb_up_landmarks.pbtxt",
"thumb_up_rotated_landmarks.pbtxt",
], ],
) )

View File

@ -0,0 +1,84 @@
landmark {
x: 0.9259716
y: 0.18969846
}
landmark {
x: 0.88135517
y: 0.28856543
}
landmark {
x: 0.7600651
y: 0.3578236
}
landmark {
x: 0.62631166
y: 0.40490413
}
landmark {
x: 0.5374573
y: 0.45170194
}
landmark {
x: 0.57372385
y: 0.29924914
}
landmark {
x: 0.36731184
y: 0.33081773
}
landmark {
x: 0.24132833
y: 0.34759054
}
landmark {
x: 0.13690609
y: 0.35727677
}
landmark {
x: 0.5535803
y: 0.2398035
}
landmark {
x: 0.31834763
y: 0.24999242
}
landmark {
x: 0.16748133
y: 0.25625145
}
landmark {
x: 0.050747424
y: 0.25991398
}
landmark {
x: 0.56593156
y: 0.1867483
}
landmark {
x: 0.3543046
y: 0.17923892
}
landmark {
x: 0.21360746
y: 0.17454882
}
landmark {
x: 0.11110917
y: 0.17232567
}
landmark {
x: 0.5948908
y: 0.14024714
}
landmark {
x: 0.42692152
y: 0.11949824
}
landmark {
x: 0.32239118
y: 0.106370345
}
landmark {
x: 0.23672739
y: 0.09432885
}

View File

@ -0,0 +1,84 @@
landmark {
x: 0.06676084
y: 0.8095678
}
landmark {
x: 0.11359626
y: 0.71148247
}
landmark {
x: 0.23572624
y: 0.6414506
}
landmark {
x: 0.37323278
y: 0.5959156
}
landmark {
x: 0.46243322
y: 0.55125874
}
landmark {
x: 0.4205411
y: 0.69531494
}
landmark {
x: 0.62798893
y: 0.66715276
}
landmark {
x: 0.7568023
y: 0.65208924
}
landmark {
x: 0.86370826
y: 0.6437276
}
landmark {
x: 0.445136
y: 0.75394773
}
landmark {
x: 0.6787485
y: 0.745853
}
landmark {
x: 0.8290694
y: 0.7412988
}
landmark {
x: 0.94454145
y: 0.7384017
}
landmark {
x: 0.43516788
y: 0.8082166
}
landmark {
x: 0.6459554
y: 0.81768996
}
landmark {
x: 0.7875173
y: 0.825062
}
landmark {
x: 0.89249825
y: 0.82850707
}
landmark {
x: 0.40665048
y: 0.8567925
}
landmark {
x: 0.57228816
y: 0.8802181
}
landmark {
x: 0.6762071
y: 0.8941581
}
landmark {
x: 0.76453924
y: 0.90583205
}

View File

@ -0,0 +1,33 @@
detections {
label: "Palm"
score: 0.97115
location_data {
format: RELATIVE_BOUNDING_BOX
relative_bounding_box {
xmin: 0.5198178
ymin: 0.6467485
width: 0.42467535
height: 0.22546273
}
}
}
detections {
label: "Palm"
score: 0.96701413
location_data {
format: RELATIVE_BOUNDING_BOX
relative_bounding_box {
xmin: 0.024490356
ymin: 0.12620124
width: 0.43832153
height: 0.23269764
}
}
}
hand_rects {
x_center: 0.5760683
y_center: 0.6829921
height: 0.5862031
width: 1.1048855
rotation: -0.8250832
}

View File

@ -0,0 +1,223 @@
classifications {
classification {
score: 1.0
label: "Left"
display_name: "Left"
}
}
landmarks {
landmark {
x: 0.25546086
y: 0.47584262
z: 1.835341e-07
}
landmark {
x: 0.3363011
y: 0.54135
z: -0.041144375
}
landmark {
x: 0.4375146
y: 0.57881975
z: -0.06807727
}
landmark {
x: 0.49603376
y: 0.5263966
z: -0.09387612
}
landmark {
x: 0.5022822
y: 0.4413827
z: -0.1189948
}
landmark {
x: 0.5569452
y: 0.4724485
z: -0.05138246
}
landmark {
x: 0.6687125
y: 0.47918057
z: -0.09121969
}
landmark {
x: 0.73666537
y: 0.48318353
z: -0.11703273
}
landmark {
x: 0.7998315
y: 0.4741413
z: -0.1386424
}
landmark {
x: 0.5244063
y: 0.39292705
z: -0.061040796
}
landmark {
x: 0.57215345
y: 0.41514704
z: -0.11967233
}
landmark {
x: 0.4724468
y: 0.45553637
z: -0.13287684
}
landmark {
x: 0.43794966
y: 0.45210314
z: -0.13210714
}
landmark {
x: 0.47838163
y: 0.33329
z: -0.07421263
}
landmark {
x: 0.51081127
y: 0.35479474
z: -0.13596693
}
landmark {
x: 0.42433846
y: 0.40486792
z: -0.121291734
}
landmark {
x: 0.40280548
y: 0.39977497
z: -0.09928809
}
landmark {
x: 0.42269367
y: 0.2798249
z: -0.09064263
}
landmark {
x: 0.45849988
y: 0.3069861
z: -0.12894689
}
landmark {
x: 0.40754712
y: 0.35153976
z: -0.109160855
}
landmark {
x: 0.38855004
y: 0.3467068
z: -0.08820164
}
}
world_landmarks {
landmark {
x: -0.08568013
y: 0.016593203
z: 0.036527164
}
landmark {
x: -0.0565372
y: 0.041761592
z: 0.019493781
}
landmark {
x: -0.031365488
y: 0.05031186
z: 0.0025481891
}
landmark {
x: -0.008534161
y: 0.04286737
z: -0.024755282
}
landmark {
x: -0.0047254
y: 0.015748458
z: -0.035581928
}
landmark {
x: 0.013083893
y: 0.024668094
z: 0.0035934823
}
landmark {
x: 0.04149521
y: 0.024621274
z: -0.0030611698
}
landmark {
x: 0.06257473
y: 0.025388625
z: -0.010340984
}
landmark {
x: 0.08009179
y: 0.023082614
z: -0.03162942
}
landmark {
x: 0.006135068
y: 0.000696786
z: 0.0048212176
}
landmark {
x: 0.01678449
y: 0.0067061195
z: -0.029920919
}
landmark {
x: -0.008948593
y: 0.016808286
z: -0.03755109
}
landmark {
x: -0.01789449
y: 0.0153161455
z: -0.012059977
}
landmark {
x: -0.0061980113
y: -0.017872887
z: -0.002366997
}
landmark {
x: -0.004643807
y: -0.0108282855
z: -0.034515083
}
landmark {
x: -0.027603384
y: 0.003529715
z: -0.033665676
}
landmark {
x: -0.035679806
y: 0.0038255951
z: -0.008094264
}
landmark {
x: -0.02957782
y: -0.031701155
z: -0.008180461
}
landmark {
x: -0.020741666
y: -0.02506058
z: -0.026839724
}
landmark {
x: -0.0310834
y: -0.009496164
z: -0.032422185
}
landmark {
x: -0.037420202
y: -0.012883307
z: -0.017971724
}
}

View File

@ -0,0 +1,223 @@
classifications {
classification {
score: 1.0
label: "Left"
display_name: "Left"
}
}
landmarks {
landmark {
x: 0.3283601
y: 0.63773525
z: -3.2280354e-07
}
landmark {
x: 0.46280807
y: 0.6339767
z: -0.06408348
}
landmark {
x: 0.5831279
y: 0.57430106
z: -0.08583106
}
landmark {
x: 0.6689471
y: 0.49959752
z: -0.09886064
}
landmark {
x: 0.74378216
y: 0.47357544
z: -0.09680563
}
landmark {
x: 0.5233122
y: 0.41020474
z: -0.038088404
}
landmark {
x: 0.5296913
y: 0.3372598
z: -0.08874837
}
landmark {
x: 0.49039274
y: 0.43994758
z: -0.102315836
}
landmark {
x: 0.4824569
y: 0.47969607
z: -0.1030014
}
landmark {
x: 0.4451338
y: 0.39520803
z: -0.02177739
}
landmark {
x: 0.4410001
y: 0.34107083
z: -0.07294245
}
landmark {
x: 0.4162798
y: 0.46102384
z: -0.07746907
}
landmark {
x: 0.43492994
y: 0.47154287
z: -0.07404131
}
landmark {
x: 0.37671578
y: 0.39535576
z: -0.016277775
}
landmark {
x: 0.36978847
y: 0.34265152
z: -0.07346253
}
landmark {
x: 0.3559884
y: 0.44905427
z: -0.057693005
}
landmark {
x: 0.37711847
y: 0.46414754
z: -0.03662908
}
landmark {
x: 0.3142985
y: 0.3942253
z: -0.0152847925
}
landmark {
x: 0.30000874
y: 0.35543376
z: -0.046002634
}
landmark {
x: 0.30002704
y: 0.42357764
z: -0.032671776
}
landmark {
x: 0.31079838
y: 0.44218025
z: -0.016200554
}
}
world_landmarks {
landmark {
x: -0.030687196
y: 0.0678545
z: 0.051061403
}
landmark {
x: 0.0047719833
y: 0.06330968
z: 0.018945374
}
landmark {
x: 0.039799504
y: 0.054109577
z: 0.007930638
}
landmark {
x: 0.069374144
y: 0.035063196
z: 2.2522348e-05
}
landmark {
x: 0.087818466
y: 0.018390425
z: 0.004055788
}
landmark {
x: 0.02810654
y: 0.0043561812
z: -0.0038672548
}
landmark {
x: 0.025270049
y: -0.0039896416
z: -0.032991238
}
landmark {
x: 0.020414166
y: 0.006768506
z: -0.032724563
}
landmark {
x: 0.016415983
y: 0.024563588
z: -0.0058115427
}
landmark {
x: 0.0038743173
y: -0.0044466974
z: 0.0024876352
}
landmark {
x: 0.0041790796
y: -0.0115309935
z: -0.03532454
}
landmark {
x: -0.0016900161
y: 0.015519895
z: -0.03596156
}
landmark {
x: 0.004309217
y: 0.01917039
z: 0.003907912
}
landmark {
x: -0.016969737
y: -0.005584497
z: 0.0034258277
}
landmark {
x: -0.016737012
y: -0.01159037
z: -0.02876696
}
landmark {
x: -0.018165365
y: 0.01376111
z: -0.026835402
}
landmark {
x: -0.012430167
y: 0.02064222
z: -0.00087265146
}
landmark {
x: -0.043247573
y: 0.0011161827
z: 0.0056269006
}
landmark {
x: -0.038128495
y: -0.011477032
z: -0.016374081
}
landmark {
x: -0.034920715
y: 0.005510211
z: -0.029714659
}
landmark {
x: -0.03815982
y: 0.011989757
z: -0.014853194
}
}

View File

@ -151,7 +151,7 @@ def external_files():
http_file( http_file(
name = "com_google_mediapipe_dummy_gesture_recognizer_task", name = "com_google_mediapipe_dummy_gesture_recognizer_task",
sha256 = "18e54586bda33300d459ca140cd045f6daf43d897224ba215a16db3423eae18e", sha256 = "18e54586bda33300d459ca140cd045f6daf43d897224ba215a16db3423eae18e",
urls = ["https://storage.googleapis.com/mediapipe-assets/dummy_gesture_recognizer.task?generation=1665524417056146"], urls = ["https://storage.googleapis.com/mediapipe-assets/dummy_gesture_recognizer.task?generation=1665707319890725"],
) )
http_file( http_file(
@ -166,12 +166,24 @@ def external_files():
urls = ["https://storage.googleapis.com/mediapipe-assets/expected_left_down_hand_landmarks.prototxt?generation=1661875720230540"], urls = ["https://storage.googleapis.com/mediapipe-assets/expected_left_down_hand_landmarks.prototxt?generation=1661875720230540"],
) )
http_file(
name = "com_google_mediapipe_expected_left_down_hand_rotated_landmarks_prototxt",
sha256 = "a16d6cb8dd07d60f0678ddeb6a7447b73b9b03d4ddde365c8770b472205bb6cf",
urls = ["https://storage.googleapis.com/mediapipe-assets/expected_left_down_hand_rotated_landmarks.prototxt?generation=1666037061297507"],
)
http_file( http_file(
name = "com_google_mediapipe_expected_left_up_hand_landmarks_prototxt", name = "com_google_mediapipe_expected_left_up_hand_landmarks_prototxt",
sha256 = "1353ba617c4f048083618587cd23a8a22115f634521c153d4e1bd1ebd4f49dd7", sha256 = "1353ba617c4f048083618587cd23a8a22115f634521c153d4e1bd1ebd4f49dd7",
urls = ["https://storage.googleapis.com/mediapipe-assets/expected_left_up_hand_landmarks.prototxt?generation=1661875726008879"], urls = ["https://storage.googleapis.com/mediapipe-assets/expected_left_up_hand_landmarks.prototxt?generation=1661875726008879"],
) )
http_file(
name = "com_google_mediapipe_expected_left_up_hand_rotated_landmarks_prototxt",
sha256 = "a9b9789c274d48a7cb9cc10af7bc644eb2512bb934529790d0a5404726daa86a",
urls = ["https://storage.googleapis.com/mediapipe-assets/expected_left_up_hand_rotated_landmarks.prototxt?generation=1666037063443676"],
)
http_file( http_file(
name = "com_google_mediapipe_expected_right_down_hand_landmarks_prototxt", name = "com_google_mediapipe_expected_right_down_hand_landmarks_prototxt",
sha256 = "f281b745175aaa7f458def6cf4c89521fb56302dd61a05642b3b4a4f237ffaa3", sha256 = "f281b745175aaa7f458def6cf4c89521fb56302dd61a05642b3b4a4f237ffaa3",
@ -250,6 +262,12 @@ def external_files():
urls = ["https://storage.googleapis.com/mediapipe-assets/hand_detector_result_one_hand.pbtxt?generation=1662745351291628"], urls = ["https://storage.googleapis.com/mediapipe-assets/hand_detector_result_one_hand.pbtxt?generation=1662745351291628"],
) )
http_file(
name = "com_google_mediapipe_hand_detector_result_one_hand_rotated_pbtxt",
sha256 = "ff5ca0654028d78a3380df90054273cae79abe1b7369b164063fd1d5758ec370",
urls = ["https://storage.googleapis.com/mediapipe-assets/hand_detector_result_one_hand_rotated.pbtxt?generation=1666037065601724"],
)
http_file( http_file(
name = "com_google_mediapipe_hand_detector_result_two_hands_pbtxt", name = "com_google_mediapipe_hand_detector_result_two_hands_pbtxt",
sha256 = "2589cb08b0ee027dc24649fe597adcfa2156a21d12ea2480f83832714ebdf95f", sha256 = "2589cb08b0ee027dc24649fe597adcfa2156a21d12ea2480f83832714ebdf95f",
@ -352,6 +370,12 @@ def external_files():
urls = ["https://storage.googleapis.com/mediapipe-assets/left_hands.jpg?generation=1661875796949017"], urls = ["https://storage.googleapis.com/mediapipe-assets/left_hands.jpg?generation=1661875796949017"],
) )
http_file(
name = "com_google_mediapipe_left_hands_rotated_jpg",
sha256 = "8609c6202bca43a99bbf23fa8e687e49fa525e89481152e4c0987f46d60d7931",
urls = ["https://storage.googleapis.com/mediapipe-assets/left_hands_rotated.jpg?generation=1666037068103465"],
)
http_file( http_file(
name = "com_google_mediapipe_mobilebert_embedding_with_metadata_tflite", name = "com_google_mediapipe_mobilebert_embedding_with_metadata_tflite",
sha256 = "fa47142dcc6f446168bc672f2df9605b6da5d0c0d6264e9be62870282365b95c", sha256 = "fa47142dcc6f446168bc672f2df9605b6da5d0c0d6264e9be62870282365b95c",
@ -544,6 +568,18 @@ def external_files():
urls = ["https://storage.googleapis.com/mediapipe-assets/pointing_up_landmarks.pbtxt?generation=1665174976408451"], urls = ["https://storage.googleapis.com/mediapipe-assets/pointing_up_landmarks.pbtxt?generation=1665174976408451"],
) )
http_file(
name = "com_google_mediapipe_pointing_up_rotated_jpg",
sha256 = "50ff66f50281207072a038e5bb6648c43f4aacbfb8204a4d2591868756aaeff1",
urls = ["https://storage.googleapis.com/mediapipe-assets/pointing_up_rotated.jpg?generation=1666037072219697"],
)
http_file(
name = "com_google_mediapipe_pointing_up_rotated_landmarks_pbtxt",
sha256 = "ccf67e5867094ffb6c465a4dfbf2ef1eb3f9db2465803fc25a0b84c958e050de",
urls = ["https://storage.googleapis.com/mediapipe-assets/pointing_up_rotated_landmarks.pbtxt?generation=1666037074376515"],
)
http_file( http_file(
name = "com_google_mediapipe_pose_detection_tflite", name = "com_google_mediapipe_pose_detection_tflite",
sha256 = "a63c614bef30d35947f13be361820b1e4e3bec9cfeebf4d11216a18373108e85", sha256 = "a63c614bef30d35947f13be361820b1e4e3bec9cfeebf4d11216a18373108e85",
@ -580,6 +616,12 @@ def external_files():
urls = ["https://storage.googleapis.com/mediapipe-assets/right_hands.jpg?generation=1661875908672404"], urls = ["https://storage.googleapis.com/mediapipe-assets/right_hands.jpg?generation=1661875908672404"],
) )
http_file(
name = "com_google_mediapipe_right_hands_rotated_jpg",
sha256 = "b3bdf692f0d54b86c8b67e6d1286dd0078fbe6e9dfcd507b187e3bd8b398c0f9",
urls = ["https://storage.googleapis.com/mediapipe-assets/right_hands_rotated.jpg?generation=1666037076873345"],
)
http_file( http_file(
name = "com_google_mediapipe_score_calibration_file_meta_json", name = "com_google_mediapipe_score_calibration_file_meta_json",
sha256 = "6a3c305620371f662419a496f75be5a10caebca7803b1e99d8d5d22ba51cda94", sha256 = "6a3c305620371f662419a496f75be5a10caebca7803b1e99d8d5d22ba51cda94",
@ -724,6 +766,12 @@ def external_files():
urls = ["https://storage.googleapis.com/mediapipe-assets/thumb_up_landmarks.pbtxt?generation=1665174979747784"], urls = ["https://storage.googleapis.com/mediapipe-assets/thumb_up_landmarks.pbtxt?generation=1665174979747784"],
) )
http_file(
name = "com_google_mediapipe_thumb_up_rotated_landmarks_pbtxt",
sha256 = "5d0a465959cacbd201ac8dd8fc8a66c5997a172b71809b12d27296db6a28a102",
urls = ["https://storage.googleapis.com/mediapipe-assets/thumb_up_rotated_landmarks.pbtxt?generation=1666037079490527"],
)
http_file( http_file(
name = "com_google_mediapipe_two_heads_16000_hz_mono_wav", name = "com_google_mediapipe_two_heads_16000_hz_mono_wav",
sha256 = "a291a9c22c39bba30138a26915e154a96286ba6ca3b413053123c504a58cce3b", sha256 = "a291a9c22c39bba30138a26915e154a96286ba6ca3b413053123c504a58cce3b",