Add HandLandmarkerGraph which connect HandDetectorGraph and HandLandmarkerSubgraph with landmarks tracking.

PiperOrigin-RevId: 478596004
This commit is contained in:
MediaPipe Team 2022-10-03 13:48:47 -07:00 committed by Copybara-Service
parent 65c7fb9004
commit cfd0f3e79f
13 changed files with 600 additions and 99 deletions

View File

@ -51,7 +51,7 @@ cc_library(
"//mediapipe/tasks/cc/core:model_task_graph", "//mediapipe/tasks/cc/core:model_task_graph",
"//mediapipe/tasks/cc/core:utils", "//mediapipe/tasks/cc/core:utils",
"//mediapipe/tasks/cc/core/proto:inference_subgraph_cc_proto", "//mediapipe/tasks/cc/core/proto:inference_subgraph_cc_proto",
"//mediapipe/tasks/cc/vision/hand_detector/proto:hand_detector_options_cc_proto", "//mediapipe/tasks/cc/vision/hand_detector/proto:hand_detector_graph_options_cc_proto",
"//mediapipe/tasks/cc/vision/utils:image_tensor_specs", "//mediapipe/tasks/cc/vision/utils:image_tensor_specs",
"@com_google_absl//absl/status", "@com_google_absl//absl/status",
"@com_google_absl//absl/status:statusor", "@com_google_absl//absl/status:statusor",

View File

@ -40,7 +40,7 @@ limitations under the License.
#include "mediapipe/tasks/cc/core/model_task_graph.h" #include "mediapipe/tasks/cc/core/model_task_graph.h"
#include "mediapipe/tasks/cc/core/proto/inference_subgraph.pb.h" #include "mediapipe/tasks/cc/core/proto/inference_subgraph.pb.h"
#include "mediapipe/tasks/cc/core/utils.h" #include "mediapipe/tasks/cc/core/utils.h"
#include "mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_options.pb.h" #include "mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_graph_options.pb.h"
#include "mediapipe/tasks/cc/vision/utils/image_tensor_specs.h" #include "mediapipe/tasks/cc/vision/utils/image_tensor_specs.h"
namespace mediapipe { namespace mediapipe {
@ -53,18 +53,23 @@ using ::mediapipe::api2::Input;
using ::mediapipe::api2::Output; using ::mediapipe::api2::Output;
using ::mediapipe::api2::builder::Graph; using ::mediapipe::api2::builder::Graph;
using ::mediapipe::api2::builder::Source; using ::mediapipe::api2::builder::Source;
using ::mediapipe::tasks::vision::hand_detector::proto::HandDetectorOptions; using ::mediapipe::tasks::vision::hand_detector::proto::
HandDetectorGraphOptions;
constexpr char kImageTag[] = "IMAGE"; constexpr char kImageTag[] = "IMAGE";
constexpr char kDetectionsTag[] = "DETECTIONS"; constexpr char kPalmDetectionsTag[] = "PALM_DETECTIONS";
constexpr char kNormRectsTag[] = "NORM_RECTS"; constexpr char kHandRectsTag[] = "HAND_RECTS";
constexpr char kPalmRectsTag[] = "PALM_RECTS";
struct HandDetectionOuts { struct HandDetectionOuts {
Source<std::vector<Detection>> palm_detections; Source<std::vector<Detection>> palm_detections;
Source<std::vector<NormalizedRect>> hand_rects; Source<std::vector<NormalizedRect>> hand_rects;
Source<std::vector<NormalizedRect>> palm_rects;
Source<Image> image;
}; };
void ConfigureTensorsToDetectionsCalculator( void ConfigureTensorsToDetectionsCalculator(
const HandDetectorGraphOptions& tasks_options,
mediapipe::TensorsToDetectionsCalculatorOptions* options) { mediapipe::TensorsToDetectionsCalculatorOptions* options) {
// TODO use metadata to configure these fields. // TODO use metadata to configure these fields.
options->set_num_classes(1); options->set_num_classes(1);
@ -77,7 +82,7 @@ void ConfigureTensorsToDetectionsCalculator(
options->set_sigmoid_score(true); options->set_sigmoid_score(true);
options->set_score_clipping_thresh(100.0); options->set_score_clipping_thresh(100.0);
options->set_reverse_output_order(true); options->set_reverse_output_order(true);
options->set_min_score_thresh(0.5); options->set_min_score_thresh(tasks_options.min_detection_confidence());
options->set_x_scale(192.0); options->set_x_scale(192.0);
options->set_y_scale(192.0); options->set_y_scale(192.0);
options->set_w_scale(192.0); options->set_w_scale(192.0);
@ -144,19 +149,26 @@ void ConfigureRectTransformationCalculator(
// Image to perform detection on. // Image to perform detection on.
// //
// Outputs: // Outputs:
// DETECTIONS - std::vector<Detection> // PALM_DETECTIONS - std::vector<Detection>
// Detected palms with maximum `num_hands` specified in options. // Detected palms with maximum `num_hands` specified in options.
// NORM_RECTS - std::vector<NormalizedRect> // HAND_RECTS - std::vector<NormalizedRect>
// Detected hand bounding boxes in normalized coordinates. // Detected hand bounding boxes in normalized coordinates.
// PLAM_RECTS - std::vector<NormalizedRect>
// Detected palm bounding boxes in normalized coordinates.
// IMAGE - Image
// The input image that the hand detector runs on and has the pixel data
// stored on the target storage (CPU vs GPU).
// //
// Example: // Example:
// node { // node {
// calculator: "mediapipe.tasks.vision.HandDetectorGraph" // calculator: "mediapipe.tasks.vision.HandDetectorGraph"
// input_stream: "IMAGE:image" // input_stream: "IMAGE:image"
// output_stream: "DETECTIONS:palm_detections" // output_stream: "PALM_DETECTIONS:palm_detections"
// output_stream: "NORM_RECTS:hand_rects_from_palm_detections" // output_stream: "HAND_RECTS:hand_rects_from_palm_detections"
// output_stream: "PALM_RECTS:palm_rects"
// output_stream: "IMAGE:image_out"
// options { // options {
// [mediapipe.tasks.hand_detector.proto.HandDetectorOptions.ext] { // [mediapipe.tasks.hand_detector.proto.HandDetectorGraphOptions.ext] {
// base_options { // base_options {
// model_asset { // model_asset {
// file_name: "palm_detection.tflite" // file_name: "palm_detection.tflite"
@ -173,16 +185,20 @@ class HandDetectorGraph : public core::ModelTaskGraph {
absl::StatusOr<CalculatorGraphConfig> GetConfig( absl::StatusOr<CalculatorGraphConfig> GetConfig(
SubgraphContext* sc) override { SubgraphContext* sc) override {
ASSIGN_OR_RETURN(const auto* model_resources, ASSIGN_OR_RETURN(const auto* model_resources,
CreateModelResources<HandDetectorOptions>(sc)); CreateModelResources<HandDetectorGraphOptions>(sc));
Graph graph; Graph graph;
ASSIGN_OR_RETURN(auto hand_detection_outs, ASSIGN_OR_RETURN(
BuildHandDetectionSubgraph( auto hand_detection_outs,
sc->Options<HandDetectorOptions>(), *model_resources, BuildHandDetectionSubgraph(sc->Options<HandDetectorGraphOptions>(),
*model_resources,
graph[Input<Image>(kImageTag)], graph)); graph[Input<Image>(kImageTag)], graph));
hand_detection_outs.palm_detections >> hand_detection_outs.palm_detections >>
graph[Output<std::vector<Detection>>(kDetectionsTag)]; graph[Output<std::vector<Detection>>(kPalmDetectionsTag)];
hand_detection_outs.hand_rects >> hand_detection_outs.hand_rects >>
graph[Output<std::vector<NormalizedRect>>(kNormRectsTag)]; graph[Output<std::vector<NormalizedRect>>(kHandRectsTag)];
hand_detection_outs.palm_rects >>
graph[Output<std::vector<NormalizedRect>>(kPalmRectsTag)];
hand_detection_outs.image >> graph[Output<Image>(kImageTag)];
return graph.GetConfig(); return graph.GetConfig();
} }
@ -196,7 +212,7 @@ class HandDetectorGraph : public core::ModelTaskGraph {
// image_in: image stream to run hand detection on. // image_in: image stream to run hand detection on.
// graph: the mediapipe builder::Graph instance to be updated. // graph: the mediapipe builder::Graph instance to be updated.
absl::StatusOr<HandDetectionOuts> BuildHandDetectionSubgraph( absl::StatusOr<HandDetectionOuts> BuildHandDetectionSubgraph(
const HandDetectorOptions& subgraph_options, const HandDetectorGraphOptions& subgraph_options,
const core::ModelResources& model_resources, Source<Image> image_in, const core::ModelResources& model_resources, Source<Image> image_in,
Graph& graph) { Graph& graph) {
// Add image preprocessing subgraph. The model expects aspect ratio // Add image preprocessing subgraph. The model expects aspect ratio
@ -235,6 +251,7 @@ class HandDetectorGraph : public core::ModelTaskGraph {
auto& tensors_to_detections = auto& tensors_to_detections =
graph.AddNode("TensorsToDetectionsCalculator"); graph.AddNode("TensorsToDetectionsCalculator");
ConfigureTensorsToDetectionsCalculator( ConfigureTensorsToDetectionsCalculator(
subgraph_options,
&tensors_to_detections &tensors_to_detections
.GetOptions<mediapipe::TensorsToDetectionsCalculatorOptions>()); .GetOptions<mediapipe::TensorsToDetectionsCalculatorOptions>());
model_output_tensors >> tensors_to_detections.In("TENSORS"); model_output_tensors >> tensors_to_detections.In("TENSORS");
@ -281,7 +298,8 @@ class HandDetectorGraph : public core::ModelTaskGraph {
.GetOptions<mediapipe::DetectionsToRectsCalculatorOptions>()); .GetOptions<mediapipe::DetectionsToRectsCalculatorOptions>());
palm_detections >> detections_to_rects.In("DETECTIONS"); palm_detections >> detections_to_rects.In("DETECTIONS");
image_size >> detections_to_rects.In("IMAGE_SIZE"); image_size >> detections_to_rects.In("IMAGE_SIZE");
auto palm_rects = detections_to_rects.Out("NORM_RECTS"); auto palm_rects =
detections_to_rects[Output<std::vector<NormalizedRect>>("NORM_RECTS")];
// Expands and shifts the rectangle that contains the palm so that it's // Expands and shifts the rectangle that contains the palm so that it's
// likely to cover the entire hand. // likely to cover the entire hand.
@ -308,8 +326,11 @@ class HandDetectorGraph : public core::ModelTaskGraph {
clip_normalized_rect_vector_size[Output<std::vector<NormalizedRect>>( clip_normalized_rect_vector_size[Output<std::vector<NormalizedRect>>(
"")]; "")];
return HandDetectionOuts{.palm_detections = palm_detections, return HandDetectionOuts{
.hand_rects = clipped_hand_rects}; /* palm_detections= */ palm_detections,
/* hand_rects= */ clipped_hand_rects,
/* palm_rects= */ palm_rects,
/* image= */ preprocessing[Output<Image>(kImageTag)]};
} }
}; };

View File

@ -40,7 +40,7 @@ limitations under the License.
#include "mediapipe/tasks/cc/core/proto/base_options.pb.h" #include "mediapipe/tasks/cc/core/proto/base_options.pb.h"
#include "mediapipe/tasks/cc/core/proto/external_file.pb.h" #include "mediapipe/tasks/cc/core/proto/external_file.pb.h"
#include "mediapipe/tasks/cc/core/task_runner.h" #include "mediapipe/tasks/cc/core/task_runner.h"
#include "mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_options.pb.h" #include "mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_graph_options.pb.h"
#include "mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_result.pb.h" #include "mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_result.pb.h"
#include "mediapipe/tasks/cc/vision/utils/image_utils.h" #include "mediapipe/tasks/cc/vision/utils/image_utils.h"
@ -60,7 +60,8 @@ using ::mediapipe::tasks::core::ModelResources;
using ::mediapipe::tasks::core::TaskRunner; using ::mediapipe::tasks::core::TaskRunner;
using ::mediapipe::tasks::core::proto::ExternalFile; using ::mediapipe::tasks::core::proto::ExternalFile;
using ::mediapipe::tasks::vision::DecodeImageFromFile; using ::mediapipe::tasks::vision::DecodeImageFromFile;
using ::mediapipe::tasks::vision::hand_detector::proto::HandDetectorOptions; using ::mediapipe::tasks::vision::hand_detector::proto::
HandDetectorGraphOptions;
using ::mediapipe::tasks::vision::hand_detector::proto::HandDetectorResult; using ::mediapipe::tasks::vision::hand_detector::proto::HandDetectorResult;
using ::testing::EqualsProto; using ::testing::EqualsProto;
using ::testing::TestParamInfo; using ::testing::TestParamInfo;
@ -80,9 +81,9 @@ constexpr char kTwoHandsResultFile[] = "hand_detector_result_two_hands.pbtxt";
constexpr char kImageTag[] = "IMAGE"; constexpr char kImageTag[] = "IMAGE";
constexpr char kImageName[] = "image"; constexpr char kImageName[] = "image";
constexpr char kPalmDetectionsTag[] = "DETECTIONS"; constexpr char kPalmDetectionsTag[] = "PALM_DETECTIONS";
constexpr char kPalmDetectionsName[] = "palm_detections"; constexpr char kPalmDetectionsName[] = "palm_detections";
constexpr char kHandNormRectsTag[] = "NORM_RECTS"; constexpr char kHandRectsTag[] = "HAND_RECTS";
constexpr char kHandNormRectsName[] = "hand_norm_rects"; constexpr char kHandNormRectsName[] = "hand_norm_rects";
constexpr float kPalmDetectionBboxMaxDiff = 0.01; constexpr float kPalmDetectionBboxMaxDiff = 0.01;
@ -106,20 +107,20 @@ absl::StatusOr<std::unique_ptr<TaskRunner>> CreateTaskRunner(
auto& hand_detection = auto& hand_detection =
graph.AddNode("mediapipe.tasks.vision.HandDetectorGraph"); graph.AddNode("mediapipe.tasks.vision.HandDetectorGraph");
auto options = std::make_unique<HandDetectorOptions>(); auto options = std::make_unique<HandDetectorGraphOptions>();
options->mutable_base_options()->mutable_model_asset()->set_file_name( options->mutable_base_options()->mutable_model_asset()->set_file_name(
JoinPath("./", kTestDataDirectory, model_name)); JoinPath("./", kTestDataDirectory, model_name));
options->set_min_detection_confidence(0.5); options->set_min_detection_confidence(0.5);
options->set_num_hands(num_hands); options->set_num_hands(num_hands);
hand_detection.GetOptions<HandDetectorOptions>().Swap(options.get()); hand_detection.GetOptions<HandDetectorGraphOptions>().Swap(options.get());
graph[Input<Image>(kImageTag)].SetName(kImageName) >> graph[Input<Image>(kImageTag)].SetName(kImageName) >>
hand_detection.In(kImageTag); hand_detection.In(kImageTag);
hand_detection.Out(kPalmDetectionsTag).SetName(kPalmDetectionsName) >> hand_detection.Out(kPalmDetectionsTag).SetName(kPalmDetectionsName) >>
graph[Output<std::vector<Detection>>(kPalmDetectionsTag)]; graph[Output<std::vector<Detection>>(kPalmDetectionsTag)];
hand_detection.Out(kHandNormRectsTag).SetName(kHandNormRectsName) >> hand_detection.Out(kHandRectsTag).SetName(kHandNormRectsName) >>
graph[Output<std::vector<NormalizedRect>>(kHandNormRectsTag)]; graph[Output<std::vector<NormalizedRect>>(kHandRectsTag)];
return TaskRunner::Create( return TaskRunner::Create(
graph.GetConfig(), std::make_unique<core::MediaPipeBuiltinOpResolver>()); graph.GetConfig(), std::make_unique<core::MediaPipeBuiltinOpResolver>());

View File

@ -21,8 +21,8 @@ package(default_visibility = [
licenses(["notice"]) licenses(["notice"])
mediapipe_proto_library( mediapipe_proto_library(
name = "hand_detector_options_proto", name = "hand_detector_graph_options_proto",
srcs = ["hand_detector_options.proto"], srcs = ["hand_detector_graph_options.proto"],
deps = [ deps = [
"//mediapipe/framework:calculator_options_proto", "//mediapipe/framework:calculator_options_proto",
"//mediapipe/framework:calculator_proto", "//mediapipe/framework:calculator_proto",

View File

@ -21,24 +21,20 @@ import "mediapipe/framework/calculator.proto";
import "mediapipe/tasks/cc/core/proto/base_options.proto"; import "mediapipe/tasks/cc/core/proto/base_options.proto";
option java_package = "com.google.mediapipe.tasks.vision.handdetector"; option java_package = "com.google.mediapipe.tasks.vision.handdetector";
option java_outer_classname = "HandDetectorOptionsProto"; option java_outer_classname = "HandDetectorGraphOptionsProto";
message HandDetectorOptions { message HandDetectorGraphOptions {
extend mediapipe.CalculatorOptions { extend mediapipe.CalculatorOptions {
optional HandDetectorOptions ext = 464864288; optional HandDetectorGraphOptions ext = 464864288;
} }
// Base options for configuring Task library, such as specifying the TfLite // Base options for configuring Task library, such as specifying the TfLite
// model file with metadata, accelerator options, etc. // model file with metadata, accelerator options, etc.
optional core.proto.BaseOptions base_options = 1; optional core.proto.BaseOptions base_options = 1;
// The locale to use for display names specified through the TFLite Model
// Metadata, if any. Defaults to English.
optional string display_names_locale = 2 [default = "en"];
// Minimum confidence value ([0.0, 1.0]) for confidence score to be considered // Minimum confidence value ([0.0, 1.0]) for confidence score to be considered
// successfully detecting a hand in the image. // successfully detecting a hand in the image.
optional float min_detection_confidence = 3 [default = 0.5]; optional float min_detection_confidence = 2 [default = 0.5];
// The maximum number of hands output by the detector. // The maximum number of hands output by the detector.
optional int32 num_hands = 4; optional int32 num_hands = 3;
} }

View File

@ -51,6 +51,7 @@ cc_library(
# TODO: move calculators in modules/hand_landmark/calculators to tasks dir. # TODO: move calculators in modules/hand_landmark/calculators to tasks dir.
"//mediapipe/modules/hand_landmark/calculators:hand_landmarks_to_rect_calculator", "//mediapipe/modules/hand_landmark/calculators:hand_landmarks_to_rect_calculator",
"//mediapipe/tasks/cc:common", "//mediapipe/tasks/cc:common",
"//mediapipe/tasks/cc/components/utils:gate",
"//mediapipe/tasks/cc/components:image_preprocessing", "//mediapipe/tasks/cc/components:image_preprocessing",
"//mediapipe/tasks/cc/core:model_resources", "//mediapipe/tasks/cc/core:model_resources",
"//mediapipe/tasks/cc/core:model_task_graph", "//mediapipe/tasks/cc/core:model_task_graph",
@ -66,3 +67,41 @@ cc_library(
) )
# TODO: Enable this test # TODO: Enable this test
cc_library(
name = "hand_landmarker_graph",
srcs = ["hand_landmarker_graph.cc"],
deps = [
":hand_landmarker_subgraph",
"//mediapipe/calculators/core:begin_loop_calculator",
"//mediapipe/calculators/core:clip_vector_size_calculator_cc_proto",
"//mediapipe/calculators/core:end_loop_calculator",
"//mediapipe/calculators/core:gate_calculator",
"//mediapipe/calculators/core:gate_calculator_cc_proto",
"//mediapipe/calculators/core:pass_through_calculator",
"//mediapipe/calculators/core:previous_loopback_calculator",
"//mediapipe/calculators/util:collection_has_min_size_calculator",
"//mediapipe/calculators/util:collection_has_min_size_calculator_cc_proto",
"//mediapipe/framework/api2:builder",
"//mediapipe/framework/api2:port",
"//mediapipe/framework/formats:classification_cc_proto",
"//mediapipe/framework/formats:detection_cc_proto",
"//mediapipe/framework/formats:image",
"//mediapipe/framework/formats:landmark_cc_proto",
"//mediapipe/framework/formats:rect_cc_proto",
"//mediapipe/framework/formats:tensor",
"//mediapipe/tasks/cc:common",
"//mediapipe/tasks/cc/components/utils:gate",
"//mediapipe/tasks/cc/core:model_task_graph",
"//mediapipe/tasks/cc/core:utils",
"//mediapipe/tasks/cc/vision/hand_detector:hand_detector_graph",
"//mediapipe/tasks/cc/vision/hand_detector/proto:hand_detector_graph_options_cc_proto",
"//mediapipe/tasks/cc/vision/hand_landmarker/calculators:hand_association_calculator",
"//mediapipe/tasks/cc/vision/hand_landmarker/calculators:hand_association_calculator_cc_proto",
"//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_landmarker_graph_options_cc_proto",
"//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_landmarker_subgraph_options_cc_proto",
],
alwayslink = 1,
)
# TODO: Enable this test

View File

@ -0,0 +1,284 @@
/* Copyright 2022 The MediaPipe Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include <memory>
#include <type_traits>
#include <utility>
#include <vector>
#include "mediapipe/calculators/core/clip_vector_size_calculator.pb.h"
#include "mediapipe/calculators/core/gate_calculator.pb.h"
#include "mediapipe/calculators/util/collection_has_min_size_calculator.pb.h"
#include "mediapipe/framework/api2/builder.h"
#include "mediapipe/framework/api2/port.h"
#include "mediapipe/framework/formats/classification.pb.h"
#include "mediapipe/framework/formats/detection.pb.h"
#include "mediapipe/framework/formats/image.h"
#include "mediapipe/framework/formats/landmark.pb.h"
#include "mediapipe/framework/formats/rect.pb.h"
#include "mediapipe/framework/formats/tensor.h"
#include "mediapipe/tasks/cc/common.h"
#include "mediapipe/tasks/cc/components/utils/gate.h"
#include "mediapipe/tasks/cc/core/model_task_graph.h"
#include "mediapipe/tasks/cc/core/utils.h"
#include "mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_graph_options.pb.h"
#include "mediapipe/tasks/cc/vision/hand_landmarker/calculators/hand_association_calculator.pb.h"
#include "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarker_graph_options.pb.h"
#include "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarker_subgraph_options.pb.h"
namespace mediapipe {
namespace tasks {
namespace vision {
namespace hand_landmarker {
namespace {
using ::mediapipe::api2::Input;
using ::mediapipe::api2::Output;
using ::mediapipe::api2::builder::Graph;
using ::mediapipe::api2::builder::Source;
using ::mediapipe::tasks::components::utils::DisallowIf;
using ::mediapipe::tasks::vision::hand_detector::proto::
HandDetectorGraphOptions;
using ::mediapipe::tasks::vision::hand_landmarker::proto::
HandLandmarkerGraphOptions;
using ::mediapipe::tasks::vision::hand_landmarker::proto::
HandLandmarkerSubgraphOptions;
constexpr char kImageTag[] = "IMAGE";
constexpr char kLandmarksTag[] = "LANDMARKS";
constexpr char kWorldLandmarksTag[] = "WORLD_LANDMARKS";
constexpr char kHandRectNextFrameTag[] = "HAND_RECT_NEXT_FRAME";
constexpr char kHandednessTag[] = "HANDEDNESS";
constexpr char kPalmDetectionsTag[] = "PALM_DETECTIONS";
constexpr char kPalmRectsTag[] = "PALM_RECTS";
constexpr char kPreviousLoopbackCalculatorName[] = "PreviousLoopbackCalculator";
struct HandLandmarkerOutputs {
Source<std::vector<NormalizedLandmarkList>> landmark_lists;
Source<std::vector<LandmarkList>> world_landmark_lists;
Source<std::vector<NormalizedRect>> hand_rects_next_frame;
Source<std::vector<ClassificationList>> handednesses;
Source<std::vector<NormalizedRect>> palm_rects;
Source<std::vector<Detection>> palm_detections;
Source<Image> image;
};
} // namespace
// A "mediapipe.tasks.vision.HandLandmarkerGraph" performs hand
// landmarks detection. The HandLandmarkerGraph consists of two subgraphs:
// HandDetectorGraph and HandLandmarkerSubgraph. HandLandmarkerSubgraph detects
// landmarks from bounding boxes produced by HandDetectorGraph.
// HandLandmarkerGraph tracks the landmarks over time, and skips the
// HandDetectorGraph. If the tracking is lost or the detectd hands are
// less than configured max number hands, HandDetectorGraph would be triggered
// to detect hands.
//
// Accepts CPU input images and outputs Landmarks on CPU.
//
// Inputs:
// IMAGE - Image
// Image to perform hand landmarks detection on.
//
// Outputs:
// LANDMARKS: - std::vector<NormalizedLandmarkList>
// Vector of detected hand landmarks.
// WORLD_LANDMARKS - std::vector<LandmarkList>
// Vector of detected hand landmarks in world coordinates.
// HAND_RECT_NEXT_FRAME - std::vector<NormalizedRect>
// Vector of the predicted rects enclosing the same hand RoI for landmark
// detection on the next frame.
// HANDEDNESS - std::vector<ClassificationList>
// Vector of classification of handedness.
// PALM_RECTS - std::vector<NormalizedRect>
// Detected palm bounding boxes in normalized coordinates.
// PALM_DETECTIONS - std::vector<Detection>
// Detected palms with maximum `num_hands` specified in options.
// IMAGE - Image
// The input image that the hand landmarker runs on and has the pixel data
// stored on the target storage (CPU vs GPU).
//
// Example:
// node {
// calculator: "mediapipe.tasks.vision.HandLandmarkerGraph"
// input_stream: "IMAGE:image_in"
// output_stream: "LANDMARKS:hand_landmarks"
// output_stream: "WORLD_LANDMARKS:world_hand_landmarks"
// output_stream: "HAND_RECT_NEXT_FRAME:hand_rect_next_frame"
// output_stream: "HANDEDNESS:handedness"
// output_stream: "PALM_RECTS:palm_rects"
// output_stream: "PALM_DETECTIONS:palm_detections"
// output_stream: "IMAGE:image_out"
// options {
// [mediapipe.tasks.hand_landmarker.proto.HandLandmarkerGraphOptions.ext] {
// base_options {
// model_asset {
// file_name: "hand_landmarker.task"
// }
// }
// hand_detector_graph_options {
// base_options {
// model_asset {
// file_name: "palm_detection.tflite"
// }
// }
// min_detection_confidence: 0.5
// num_hands: 2
// }
// hand_landmarker_subgraph_options {
// base_options {
// model_asset {
// file_name: "hand_landmark_lite.tflite"
// }
// }
// min_detection_confidence: 0.5
// }
// }
// }
// }
class HandLandmarkerGraph : public core::ModelTaskGraph {
public:
absl::StatusOr<CalculatorGraphConfig> GetConfig(
SubgraphContext* sc) override {
Graph graph;
ASSIGN_OR_RETURN(
auto hand_landmarker_outputs,
BuildHandLandmarkerGraph(sc->Options<HandLandmarkerGraphOptions>(),
graph[Input<Image>(kImageTag)], graph));
hand_landmarker_outputs.landmark_lists >>
graph[Output<std::vector<NormalizedLandmarkList>>(kLandmarksTag)];
hand_landmarker_outputs.world_landmark_lists >>
graph[Output<std::vector<LandmarkList>>(kWorldLandmarksTag)];
hand_landmarker_outputs.hand_rects_next_frame >>
graph[Output<std::vector<NormalizedRect>>(kHandRectNextFrameTag)];
hand_landmarker_outputs.handednesses >>
graph[Output<std::vector<ClassificationList>>(kHandednessTag)];
hand_landmarker_outputs.palm_rects >>
graph[Output<std::vector<NormalizedRect>>(kPalmRectsTag)];
hand_landmarker_outputs.palm_detections >>
graph[Output<std::vector<Detection>>(kPalmDetectionsTag)];
hand_landmarker_outputs.image >> graph[Output<Image>(kImageTag)];
// TODO remove when support is fixed.
// As mediapipe GraphBuilder currently doesn't support configuring
// InputStreamInfo, modifying the CalculatorGraphConfig proto directly.
CalculatorGraphConfig config = graph.GetConfig();
for (int i = 0; i < config.node_size(); ++i) {
if (config.node(i).calculator() == kPreviousLoopbackCalculatorName) {
auto* info = config.mutable_node(i)->add_input_stream_info();
info->set_tag_index("LOOP");
info->set_back_edge(true);
break;
}
}
return config;
}
private:
// Adds a mediapipe hand landmark detection graph into the provided
// builder::Graph instance.
//
// tasks_options: the mediapipe tasks module HandLandmarkerGraphOptions.
// image_in: (mediapipe::Image) stream to run hand landmark detection on.
// graph: the mediapipe graph instance to be updated.
absl::StatusOr<HandLandmarkerOutputs> BuildHandLandmarkerGraph(
const HandLandmarkerGraphOptions& tasks_options, Source<Image> image_in,
Graph& graph) {
const int max_num_hands =
tasks_options.hand_detector_graph_options().num_hands();
auto& previous_loopback = graph.AddNode(kPreviousLoopbackCalculatorName);
image_in >> previous_loopback.In("MAIN");
auto prev_hand_rects_from_landmarks =
previous_loopback[Output<std::vector<NormalizedRect>>("PREV_LOOP")];
auto& min_size_node =
graph.AddNode("NormalizedRectVectorHasMinSizeCalculator");
prev_hand_rects_from_landmarks >> min_size_node.In("ITERABLE");
min_size_node.GetOptions<CollectionHasMinSizeCalculatorOptions>()
.set_min_size(max_num_hands);
auto has_enough_hands = min_size_node.Out("").Cast<bool>();
auto image_for_hand_detector =
DisallowIf(image_in, has_enough_hands, graph);
auto& hand_detector =
graph.AddNode("mediapipe.tasks.vision.HandDetectorGraph");
hand_detector.GetOptions<HandDetectorGraphOptions>().CopyFrom(
tasks_options.hand_detector_graph_options());
image_for_hand_detector >> hand_detector.In("IMAGE");
auto hand_rects_from_hand_detector = hand_detector.Out("HAND_RECTS");
auto& hand_association = graph.AddNode("HandAssociationCalculator");
hand_association.GetOptions<HandAssociationCalculatorOptions>()
.set_min_similarity_threshold(tasks_options.min_tracking_confidence());
prev_hand_rects_from_landmarks >>
hand_association[Input<std::vector<NormalizedRect>>::Multiple("")][0];
hand_rects_from_hand_detector >>
hand_association[Input<std::vector<NormalizedRect>>::Multiple("")][1];
auto hand_rects = hand_association.Out("");
auto& clip_hand_rects =
graph.AddNode("ClipNormalizedRectVectorSizeCalculator");
clip_hand_rects.GetOptions<ClipVectorSizeCalculatorOptions>()
.set_max_vec_size(max_num_hands);
hand_rects >> clip_hand_rects.In("");
auto clipped_hand_rects = clip_hand_rects.Out("");
auto& hand_landmarker_subgraph = graph.AddNode(
"mediapipe.tasks.vision.hand_landmarker.HandLandmarkerSubgraph");
hand_landmarker_subgraph.GetOptions<HandLandmarkerSubgraphOptions>()
.CopyFrom(tasks_options.hand_landmarker_subgraph_options());
image_in >> hand_landmarker_subgraph.In("IMAGE");
clipped_hand_rects >> hand_landmarker_subgraph.In("HAND_RECT");
auto hand_rects_for_next_frame =
hand_landmarker_subgraph[Output<std::vector<NormalizedRect>>(
kHandRectNextFrameTag)];
// Back edge.
hand_rects_for_next_frame >> previous_loopback.In("LOOP");
// TODO: Replace PassThroughCalculator with a calculator that
// converts the pixel data to be stored on the target storage (CPU vs GPU).
auto& pass_through = graph.AddNode("PassThroughCalculator");
image_in >> pass_through.In("");
return {{
/* landmark_lists= */ hand_landmarker_subgraph
[Output<std::vector<NormalizedLandmarkList>>(kLandmarksTag)],
/* world_landmark_lists= */
hand_landmarker_subgraph[Output<std::vector<LandmarkList>>(
kWorldLandmarksTag)],
/* hand_rects_next_frame= */ hand_rects_for_next_frame,
hand_landmarker_subgraph[Output<std::vector<ClassificationList>>(
kHandednessTag)],
/* palm_rects= */
hand_detector[Output<std::vector<NormalizedRect>>(kPalmRectsTag)],
/* palm_detections */
hand_detector[Output<std::vector<Detection>>(kPalmDetectionsTag)],
/* image */
pass_through[Output<Image>("")],
}};
}
};
REGISTER_MEDIAPIPE_GRAPH(
::mediapipe::tasks::vision::hand_landmarker::HandLandmarkerGraph);
} // namespace hand_landmarker
} // namespace vision
} // namespace tasks
} // namespace mediapipe

View File

@ -0,0 +1,167 @@
/* Copyright 2022 The MediaPipe Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include <iostream>
#include <memory>
#include <string>
#include <utility>
#include "absl/flags/flag.h"
#include "absl/status/statusor.h"
#include "absl/strings/str_format.h"
#include "absl/strings/string_view.h"
#include "mediapipe/framework/api2/builder.h"
#include "mediapipe/framework/api2/port.h"
#include "mediapipe/framework/deps/file_path.h"
#include "mediapipe/framework/formats/classification.pb.h"
#include "mediapipe/framework/formats/image.h"
#include "mediapipe/framework/formats/landmark.pb.h"
#include "mediapipe/framework/formats/rect.pb.h"
#include "mediapipe/framework/port/file_helpers.h"
#include "mediapipe/framework/port/gmock.h"
#include "mediapipe/framework/port/gtest.h"
#include "mediapipe/tasks/cc/core/mediapipe_builtin_op_resolver.h"
#include "mediapipe/tasks/cc/core/proto/base_options.pb.h"
#include "mediapipe/tasks/cc/core/proto/external_file.pb.h"
#include "mediapipe/tasks/cc/core/task_runner.h"
#include "mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_graph_options.pb.h"
#include "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarker_graph_options.pb.h"
#include "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarker_subgraph_options.pb.h"
#include "mediapipe/tasks/cc/vision/utils/image_utils.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/core/shims/cc/shims_test_util.h"
namespace mediapipe {
namespace tasks {
namespace vision {
namespace hand_landmarker {
namespace {
using ::file::Defaults;
using ::file::GetTextProto;
using ::mediapipe::api2::Input;
using ::mediapipe::api2::Output;
using ::mediapipe::api2::builder::Graph;
using ::mediapipe::api2::builder::Source;
using ::mediapipe::file::JoinPath;
using ::mediapipe::tasks::core::TaskRunner;
using ::mediapipe::tasks::vision::hand_landmarker::proto::
HandLandmarkerGraphOptions;
using ::testing::EqualsProto;
using ::testing::proto::Approximately;
using ::testing::proto::Partially;
constexpr char kTestDataDirectory[] = "/mediapipe/tasks/testdata/vision/";
constexpr char kPalmDetectionModel[] = "palm_detection_full.tflite";
constexpr char kHandLandmarkerFullModel[] = "hand_landmark_full.tflite";
constexpr char kLeftHandsImage[] = "left_hands.jpg";
constexpr char kImageTag[] = "IMAGE";
constexpr char kImageName[] = "image_in";
constexpr char kLandmarksTag[] = "LANDMARKS";
constexpr char kLandmarksName[] = "landmarks";
constexpr char kWorldLandmarksTag[] = "WORLD_LANDMARKS";
constexpr char kWorldLandmarksName[] = "world_landmarks";
constexpr char kHandRectNextFrameTag[] = "HAND_RECT_NEXT_FRAME";
constexpr char kHandRectNextFrameName[] = "hand_rect_next_frame";
constexpr char kHandednessTag[] = "HANDEDNESS";
constexpr char kHandednessName[] = "handedness";
// Expected hand landmarks positions, in text proto format.
constexpr char kExpectedLeftUpHandLandmarksFilename[] =
"expected_left_up_hand_landmarks.prototxt";
constexpr char kExpectedLeftDownHandLandmarksFilename[] =
"expected_left_down_hand_landmarks.prototxt";
constexpr float kFullModelFractionDiff = 0.03; // percentage
constexpr float kAbsMargin = 0.03;
constexpr int kMaxNumHands = 2;
constexpr float kMinTrackingConfidence = 0.5;
NormalizedLandmarkList GetExpectedLandmarkList(absl::string_view filename) {
NormalizedLandmarkList expected_landmark_list;
MP_EXPECT_OK(GetTextProto(file::JoinPath("./", kTestDataDirectory, filename),
&expected_landmark_list, Defaults()));
return expected_landmark_list;
}
// Helper function to create a Hand Landmarker TaskRunner.
absl::StatusOr<std::unique_ptr<TaskRunner>> CreateTaskRunner() {
Graph graph;
auto& hand_landmarker_graph = graph.AddNode(
"mediapipe.tasks.vision.hand_landmarker.HandLandmarkerGraph");
auto& options =
hand_landmarker_graph.GetOptions<HandLandmarkerGraphOptions>();
options.mutable_hand_detector_graph_options()
->mutable_base_options()
->mutable_model_asset()
->set_file_name(JoinPath("./", kTestDataDirectory, kPalmDetectionModel));
options.mutable_hand_detector_graph_options()->mutable_base_options();
options.mutable_hand_detector_graph_options()->set_num_hands(kMaxNumHands);
options.mutable_hand_landmarker_subgraph_options()
->mutable_base_options()
->mutable_model_asset()
->set_file_name(
JoinPath("./", kTestDataDirectory, kHandLandmarkerFullModel));
options.set_min_tracking_confidence(kMinTrackingConfidence);
graph[Input<Image>(kImageTag)].SetName(kImageName) >>
hand_landmarker_graph.In(kImageTag);
hand_landmarker_graph.Out(kLandmarksTag).SetName(kLandmarksName) >>
graph[Output<std::vector<NormalizedLandmarkList>>(kLandmarksTag)];
hand_landmarker_graph.Out(kWorldLandmarksTag).SetName(kWorldLandmarksName) >>
graph[Output<std::vector<LandmarkList>>(kWorldLandmarksTag)];
hand_landmarker_graph.Out(kHandednessTag).SetName(kHandednessName) >>
graph[Output<std::vector<ClassificationList>>(kHandednessTag)];
hand_landmarker_graph.Out(kHandRectNextFrameTag)
.SetName(kHandRectNextFrameName) >>
graph[Output<std::vector<NormalizedRect>>(kHandRectNextFrameTag)];
return TaskRunner::Create(
graph.GetConfig(), absl::make_unique<core::MediaPipeBuiltinOpResolver>());
}
class HandLandmarkerTest : public tflite_shims::testing::Test {};
TEST_F(HandLandmarkerTest, Succeeds) {
MP_ASSERT_OK_AND_ASSIGN(
Image image,
DecodeImageFromFile(JoinPath("./", kTestDataDirectory, kLeftHandsImage)));
MP_ASSERT_OK_AND_ASSIGN(auto task_runner, CreateTaskRunner());
auto output_packets =
task_runner->Process({{kImageName, MakePacket<Image>(std::move(image))}});
const auto& landmarks = (*output_packets)[kLandmarksName]
.Get<std::vector<NormalizedLandmarkList>>();
ASSERT_EQ(landmarks.size(), kMaxNumHands);
std::vector<NormalizedLandmarkList> expected_landmarks = {
GetExpectedLandmarkList(kExpectedLeftUpHandLandmarksFilename),
GetExpectedLandmarkList(kExpectedLeftDownHandLandmarksFilename)};
EXPECT_THAT(landmarks[0],
Approximately(Partially(EqualsProto(expected_landmarks[0])),
/*margin=*/kAbsMargin,
/*fraction=*/kFullModelFractionDiff));
EXPECT_THAT(landmarks[1],
Approximately(Partially(EqualsProto(expected_landmarks[1])),
/*margin=*/kAbsMargin,
/*fraction=*/kFullModelFractionDiff));
}
} // namespace
} // namespace hand_landmarker
} // namespace vision
} // namespace tasks
} // namespace mediapipe

View File

@ -34,6 +34,7 @@ limitations under the License.
#include "mediapipe/framework/formats/tensor.h" #include "mediapipe/framework/formats/tensor.h"
#include "mediapipe/tasks/cc/common.h" #include "mediapipe/tasks/cc/common.h"
#include "mediapipe/tasks/cc/components/image_preprocessing.h" #include "mediapipe/tasks/cc/components/image_preprocessing.h"
#include "mediapipe/tasks/cc/components/utils/gate.h"
#include "mediapipe/tasks/cc/core/model_resources.h" #include "mediapipe/tasks/cc/core/model_resources.h"
#include "mediapipe/tasks/cc/core/model_task_graph.h" #include "mediapipe/tasks/cc/core/model_task_graph.h"
#include "mediapipe/tasks/cc/core/proto/inference_subgraph.pb.h" #include "mediapipe/tasks/cc/core/proto/inference_subgraph.pb.h"
@ -48,6 +49,7 @@ limitations under the License.
namespace mediapipe { namespace mediapipe {
namespace tasks { namespace tasks {
namespace vision { namespace vision {
namespace hand_landmarker {
namespace { namespace {
@ -55,6 +57,7 @@ using ::mediapipe::api2::Input;
using ::mediapipe::api2::Output; using ::mediapipe::api2::Output;
using ::mediapipe::api2::builder::Graph; using ::mediapipe::api2::builder::Graph;
using ::mediapipe::api2::builder::Source; using ::mediapipe::api2::builder::Source;
using ::mediapipe::tasks::components::utils::AllowIf;
using ::mediapipe::tasks::core::ModelResources; using ::mediapipe::tasks::core::ModelResources;
using ::mediapipe::tasks::vision::hand_landmarker::proto:: using ::mediapipe::tasks::vision::hand_landmarker::proto::
HandLandmarkerSubgraphOptions; HandLandmarkerSubgraphOptions;
@ -82,7 +85,6 @@ struct SingleHandLandmarkerOutputs {
Source<bool> hand_presence; Source<bool> hand_presence;
Source<float> hand_presence_score; Source<float> hand_presence_score;
Source<ClassificationList> handedness; Source<ClassificationList> handedness;
Source<std::pair<int, int>> image_size;
}; };
struct HandLandmarkerOutputs { struct HandLandmarkerOutputs {
@ -92,7 +94,6 @@ struct HandLandmarkerOutputs {
Source<std::vector<bool>> presences; Source<std::vector<bool>> presences;
Source<std::vector<float>> presence_scores; Source<std::vector<float>> presence_scores;
Source<std::vector<ClassificationList>> handednesses; Source<std::vector<ClassificationList>> handednesses;
Source<std::pair<int, int>> image_size;
}; };
absl::Status SanityCheckOptions(const HandLandmarkerSubgraphOptions& options) { absl::Status SanityCheckOptions(const HandLandmarkerSubgraphOptions& options) {
@ -208,8 +209,6 @@ void ConfigureHandRectTransformationCalculator(
// Float value indicates the probability that the hand is present. // Float value indicates the probability that the hand is present.
// HANDEDNESS - ClassificationList // HANDEDNESS - ClassificationList
// Classification of handedness. // Classification of handedness.
// IMAGE_SIZE - std::vector<int, int>
// The size of input image.
// //
// Example: // Example:
// node { // node {
@ -221,8 +220,6 @@ void ConfigureHandRectTransformationCalculator(
// output_stream: "HAND_RECT_NEXT_FRAME:hand_rect_next_frame" // output_stream: "HAND_RECT_NEXT_FRAME:hand_rect_next_frame"
// output_stream: "PRESENCE:hand_presence" // output_stream: "PRESENCE:hand_presence"
// output_stream: "PRESENCE_SCORE:hand_presence_score" // output_stream: "PRESENCE_SCORE:hand_presence_score"
// output_stream: "HANDEDNESS:handedness"
// output_stream: "IMAGE_SIZE:image_size"
// options { // options {
// [mediapipe.tasks.vision.hand_landmarker.proto.HandLandmarkerSubgraphOptions.ext] // [mediapipe.tasks.vision.hand_landmarker.proto.HandLandmarkerSubgraphOptions.ext]
// { // {
@ -259,8 +256,6 @@ class SingleHandLandmarkerSubgraph : public core::ModelTaskGraph {
graph[Output<float>(kPresenceScoreTag)]; graph[Output<float>(kPresenceScoreTag)];
hand_landmark_detection_outs.handedness >> hand_landmark_detection_outs.handedness >>
graph[Output<ClassificationList>(kHandednessTag)]; graph[Output<ClassificationList>(kHandednessTag)];
hand_landmark_detection_outs.image_size >>
graph[Output<std::pair<int, int>>(kImageSizeTag)];
return graph.GetConfig(); return graph.GetConfig();
} }
@ -332,18 +327,7 @@ class SingleHandLandmarkerSubgraph : public core::ModelTaskGraph {
// score of hand presence. // score of hand presence.
auto& tensors_to_hand_presence = graph.AddNode("TensorsToFloatsCalculator"); auto& tensors_to_hand_presence = graph.AddNode("TensorsToFloatsCalculator");
hand_flag_tensors >> tensors_to_hand_presence.In("TENSORS"); hand_flag_tensors >> tensors_to_hand_presence.In("TENSORS");
// Converts the handedness tensor into a float that represents the
// classification score of handedness.
auto& tensors_to_handedness =
graph.AddNode("TensorsToClassificationCalculator");
ConfigureTensorsToHandednessCalculator(
&tensors_to_handedness.GetOptions<
mediapipe::TensorsToClassificationCalculatorOptions>());
handedness_tensors >> tensors_to_handedness.In("TENSORS");
auto hand_presence_score = tensors_to_hand_presence[Output<float>("FLOAT")]; auto hand_presence_score = tensors_to_hand_presence[Output<float>("FLOAT")];
auto handedness =
tensors_to_handedness[Output<ClassificationList>("CLASSIFICATIONS")];
// Applies a threshold to the confidence score to determine whether a // Applies a threshold to the confidence score to determine whether a
// hand is present. // hand is present.
@ -354,6 +338,18 @@ class SingleHandLandmarkerSubgraph : public core::ModelTaskGraph {
hand_presence_score >> hand_presence_thresholding.In("FLOAT"); hand_presence_score >> hand_presence_thresholding.In("FLOAT");
auto hand_presence = hand_presence_thresholding[Output<bool>("FLAG")]; auto hand_presence = hand_presence_thresholding[Output<bool>("FLAG")];
// Converts the handedness tensor into a float that represents the
// classification score of handedness.
auto& tensors_to_handedness =
graph.AddNode("TensorsToClassificationCalculator");
ConfigureTensorsToHandednessCalculator(
&tensors_to_handedness.GetOptions<
mediapipe::TensorsToClassificationCalculatorOptions>());
handedness_tensors >> tensors_to_handedness.In("TENSORS");
auto handedness = AllowIf(
tensors_to_handedness[Output<ClassificationList>("CLASSIFICATIONS")],
hand_presence, graph);
// Adjusts landmarks (already normalized to [0.f, 1.f]) on the letterboxed // Adjusts landmarks (already normalized to [0.f, 1.f]) on the letterboxed
// hand image (after image transformation with the FIT scale mode) to the // hand image (after image transformation with the FIT scale mode) to the
// corresponding locations on the same image with the letterbox removed // corresponding locations on the same image with the letterbox removed
@ -371,8 +367,9 @@ class SingleHandLandmarkerSubgraph : public core::ModelTaskGraph {
landmark_letterbox_removal.Out("LANDMARKS") >> landmark_letterbox_removal.Out("LANDMARKS") >>
landmark_projection.In("NORM_LANDMARKS"); landmark_projection.In("NORM_LANDMARKS");
hand_rect >> landmark_projection.In("NORM_RECT"); hand_rect >> landmark_projection.In("NORM_RECT");
auto projected_landmarks = auto projected_landmarks = AllowIf(
landmark_projection[Output<NormalizedLandmarkList>("NORM_LANDMARKS")]; landmark_projection[Output<NormalizedLandmarkList>("NORM_LANDMARKS")],
hand_presence, graph);
// Projects the world landmarks from the cropped hand image to the // Projects the world landmarks from the cropped hand image to the
// corresponding locations on the full image before cropping (input to the // corresponding locations on the full image before cropping (input to the
@ -383,7 +380,8 @@ class SingleHandLandmarkerSubgraph : public core::ModelTaskGraph {
world_landmark_projection.In("LANDMARKS"); world_landmark_projection.In("LANDMARKS");
hand_rect >> world_landmark_projection.In("NORM_RECT"); hand_rect >> world_landmark_projection.In("NORM_RECT");
auto projected_world_landmarks = auto projected_world_landmarks =
world_landmark_projection[Output<LandmarkList>("LANDMARKS")]; AllowIf(world_landmark_projection[Output<LandmarkList>("LANDMARKS")],
hand_presence, graph);
// Converts the hand landmarks into a rectangle (normalized by image size) // Converts the hand landmarks into a rectangle (normalized by image size)
// that encloses the hand. // that encloses the hand.
@ -403,7 +401,8 @@ class SingleHandLandmarkerSubgraph : public core::ModelTaskGraph {
hand_landmarks_to_rect.Out("NORM_RECT") >> hand_landmarks_to_rect.Out("NORM_RECT") >>
hand_rect_transformation.In("NORM_RECT"); hand_rect_transformation.In("NORM_RECT");
auto hand_rect_next_frame = auto hand_rect_next_frame =
hand_rect_transformation[Output<NormalizedRect>("")]; AllowIf(hand_rect_transformation[Output<NormalizedRect>("")],
hand_presence, graph);
return {{ return {{
/* hand_landmarks= */ projected_landmarks, /* hand_landmarks= */ projected_landmarks,
@ -412,16 +411,15 @@ class SingleHandLandmarkerSubgraph : public core::ModelTaskGraph {
/* hand_presence= */ hand_presence, /* hand_presence= */ hand_presence,
/* hand_presence_score= */ hand_presence_score, /* hand_presence_score= */ hand_presence_score,
/* handedness= */ handedness, /* handedness= */ handedness,
/* image_size= */ image_size,
}}; }};
} }
}; };
REGISTER_MEDIAPIPE_GRAPH( REGISTER_MEDIAPIPE_GRAPH(
::mediapipe::tasks::vision::SingleHandLandmarkerSubgraph); ::mediapipe::tasks::vision::hand_landmarker::SingleHandLandmarkerSubgraph);
// A "mediapipe.tasks.vision.HandLandmarkerSubgraph" performs multi // A "mediapipe.tasks.vision.HandLandmarkerSubgraph" performs multi hand
// hand landmark detection. // landmark detection.
// - Accepts CPU input image and a vector of hand rect RoIs to detect the // - Accepts CPU input image and a vector of hand rect RoIs to detect the
// multiple hands landmarks enclosed by the RoIs. Output vectors of // multiple hands landmarks enclosed by the RoIs. Output vectors of
// hand landmarks related results, where each element in the vectors // hand landmarks related results, where each element in the vectors
@ -449,8 +447,6 @@ REGISTER_MEDIAPIPE_GRAPH(
// Vector of float value indicates the probability that the hand is present. // Vector of float value indicates the probability that the hand is present.
// HANDEDNESS - std::vector<ClassificationList> // HANDEDNESS - std::vector<ClassificationList>
// Vector of classification of handedness. // Vector of classification of handedness.
// IMAGE_SIZE - std::vector<int, int>
// The size of input image.
// //
// Example: // Example:
// node { // node {
@ -463,7 +459,6 @@ REGISTER_MEDIAPIPE_GRAPH(
// output_stream: "PRESENCE:hand_presence" // output_stream: "PRESENCE:hand_presence"
// output_stream: "PRESENCE_SCORE:hand_presence_score" // output_stream: "PRESENCE_SCORE:hand_presence_score"
// output_stream: "HANDEDNESS:handedness" // output_stream: "HANDEDNESS:handedness"
// output_stream: "IMAGE_SIZE:image_size"
// options { // options {
// [mediapipe.tasks.vision.hand_landmarker.proto.HandLandmarkerSubgraphOptions.ext] // [mediapipe.tasks.vision.hand_landmarker.proto.HandLandmarkerSubgraphOptions.ext]
// { // {
@ -499,8 +494,6 @@ class HandLandmarkerSubgraph : public core::ModelTaskGraph {
graph[Output<std::vector<float>>(kPresenceScoreTag)]; graph[Output<std::vector<float>>(kPresenceScoreTag)];
hand_landmark_detection_outputs.handednesses >> hand_landmark_detection_outputs.handednesses >>
graph[Output<std::vector<ClassificationList>>(kHandednessTag)]; graph[Output<std::vector<ClassificationList>>(kHandednessTag)];
hand_landmark_detection_outputs.image_size >>
graph[Output<std::pair<int, int>>(kImageSizeTag)];
return graph.GetConfig(); return graph.GetConfig();
} }
@ -510,8 +503,8 @@ class HandLandmarkerSubgraph : public core::ModelTaskGraph {
const HandLandmarkerSubgraphOptions& subgraph_options, const HandLandmarkerSubgraphOptions& subgraph_options,
Source<Image> image_in, Source<Image> image_in,
Source<std::vector<NormalizedRect>> multi_hand_rects, Graph& graph) { Source<std::vector<NormalizedRect>> multi_hand_rects, Graph& graph) {
auto& hand_landmark_subgraph = auto& hand_landmark_subgraph = graph.AddNode(
graph.AddNode("mediapipe.tasks.vision.SingleHandLandmarkerSubgraph"); "mediapipe.tasks.vision.hand_landmarker.SingleHandLandmarkerSubgraph");
hand_landmark_subgraph.GetOptions<HandLandmarkerSubgraphOptions>().CopyFrom( hand_landmark_subgraph.GetOptions<HandLandmarkerSubgraphOptions>().CopyFrom(
subgraph_options); subgraph_options);
@ -533,8 +526,6 @@ class HandLandmarkerSubgraph : public core::ModelTaskGraph {
hand_landmark_subgraph.Out("HAND_RECT_NEXT_FRAME"); hand_landmark_subgraph.Out("HAND_RECT_NEXT_FRAME");
auto landmarks = hand_landmark_subgraph.Out("LANDMARKS"); auto landmarks = hand_landmark_subgraph.Out("LANDMARKS");
auto world_landmarks = hand_landmark_subgraph.Out("WORLD_LANDMARKS"); auto world_landmarks = hand_landmark_subgraph.Out("WORLD_LANDMARKS");
auto image_size =
hand_landmark_subgraph[Output<std::pair<int, int>>("IMAGE_SIZE")];
auto& end_loop_handedness = auto& end_loop_handedness =
graph.AddNode("EndLoopClassificationListCalculator"); graph.AddNode("EndLoopClassificationListCalculator");
@ -585,13 +576,14 @@ class HandLandmarkerSubgraph : public core::ModelTaskGraph {
/* presences= */ presences, /* presences= */ presences,
/* presence_scores= */ presence_scores, /* presence_scores= */ presence_scores,
/* handednesses= */ handednesses, /* handednesses= */ handednesses,
/* image_size= */ image_size,
}}; }};
} }
}; };
REGISTER_MEDIAPIPE_GRAPH(::mediapipe::tasks::vision::HandLandmarkerSubgraph); REGISTER_MEDIAPIPE_GRAPH(
::mediapipe::tasks::vision::hand_landmarker::HandLandmarkerSubgraph);
} // namespace hand_landmarker
} // namespace vision } // namespace vision
} // namespace tasks } // namespace tasks
} // namespace mediapipe } // namespace mediapipe

View File

@ -45,6 +45,7 @@ limitations under the License.
namespace mediapipe { namespace mediapipe {
namespace tasks { namespace tasks {
namespace vision { namespace vision {
namespace hand_landmarker {
namespace { namespace {
using ::file::Defaults; using ::file::Defaults;
@ -112,8 +113,8 @@ absl::StatusOr<std::unique_ptr<TaskRunner>> CreateSingleHandTaskRunner(
absl::string_view model_name) { absl::string_view model_name) {
Graph graph; Graph graph;
auto& hand_landmark_detection = auto& hand_landmark_detection = graph.AddNode(
graph.AddNode("mediapipe.tasks.vision.SingleHandLandmarkerSubgraph"); "mediapipe.tasks.vision.hand_landmarker.SingleHandLandmarkerSubgraph");
auto options = std::make_unique<HandLandmarkerSubgraphOptions>(); auto options = std::make_unique<HandLandmarkerSubgraphOptions>();
options->mutable_base_options()->mutable_model_asset()->set_file_name( options->mutable_base_options()->mutable_model_asset()->set_file_name(
@ -151,8 +152,8 @@ absl::StatusOr<std::unique_ptr<TaskRunner>> CreateMultiHandTaskRunner(
absl::string_view model_name) { absl::string_view model_name) {
Graph graph; Graph graph;
auto& multi_hand_landmark_detection = auto& multi_hand_landmark_detection = graph.AddNode(
graph.AddNode("mediapipe.tasks.vision.HandLandmarkerSubgraph"); "mediapipe.tasks.vision.hand_landmarker.HandLandmarkerSubgraph");
auto options = std::make_unique<HandLandmarkerSubgraphOptions>(); auto options = std::make_unique<HandLandmarkerSubgraphOptions>();
options->mutable_base_options()->mutable_model_asset()->set_file_name( options->mutable_base_options()->mutable_model_asset()->set_file_name(
@ -462,6 +463,7 @@ INSTANTIATE_TEST_SUITE_P(
}); });
} // namespace } // namespace
} // namespace hand_landmarker
} // namespace vision } // namespace vision
} // namespace tasks } // namespace tasks
} // namespace mediapipe } // namespace mediapipe

View File

@ -31,13 +31,13 @@ mediapipe_proto_library(
) )
mediapipe_proto_library( mediapipe_proto_library(
name = "hand_landmarker_options_proto", name = "hand_landmarker_graph_options_proto",
srcs = ["hand_landmarker_options.proto"], srcs = ["hand_landmarker_graph_options.proto"],
deps = [ deps = [
":hand_landmarker_subgraph_options_proto", ":hand_landmarker_subgraph_options_proto",
"//mediapipe/framework:calculator_options_proto", "//mediapipe/framework:calculator_options_proto",
"//mediapipe/framework:calculator_proto", "//mediapipe/framework:calculator_proto",
"//mediapipe/tasks/cc/core/proto:base_options_proto", "//mediapipe/tasks/cc/core/proto:base_options_proto",
"//mediapipe/tasks/cc/vision/hand_detector/proto:hand_detector_options_proto", "//mediapipe/tasks/cc/vision/hand_detector/proto:hand_detector_graph_options_proto",
], ],
) )

View File

@ -19,22 +19,25 @@ package mediapipe.tasks.vision.hand_landmarker.proto;
import "mediapipe/framework/calculator.proto"; import "mediapipe/framework/calculator.proto";
import "mediapipe/tasks/cc/core/proto/base_options.proto"; import "mediapipe/tasks/cc/core/proto/base_options.proto";
import "mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_options.proto"; import "mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_graph_options.proto";
import "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarker_subgraph_options.proto"; import "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarker_subgraph_options.proto";
message HandLandmarkerOptions { message HandLandmarkerGraphOptions {
extend mediapipe.CalculatorOptions { extend mediapipe.CalculatorOptions {
optional HandLandmarkerOptions ext = 462713202; optional HandLandmarkerGraphOptions ext = 462713202;
} }
// Base options for configuring MediaPipe Tasks, such as specifying the TfLite // Base options for configuring MediaPipe Tasks, such as specifying the TfLite
// model file with metadata, accelerator options, etc. // model file with metadata, accelerator options, etc.
optional core.proto.BaseOptions base_options = 1; optional core.proto.BaseOptions base_options = 1;
// The locale to use for display names specified through the TFLite Model // Options for hand detector graph.
// Metadata, if any. Defaults to English. optional hand_detector.proto.HandDetectorGraphOptions
optional string display_names_locale = 2 [default = "en"]; hand_detector_graph_options = 2;
optional hand_detector.proto.HandDetectorOptions hand_detector_options = 3; // Options for hand landmarker subgraph.
optional HandLandmarkerSubgraphOptions hand_landmarker_subgraph_options = 3;
optional HandLandmarkerSubgraphOptions hand_landmarker_subgraph_options = 4; // Minimum confidence for hand landmarks tracking to be considered
// successfully.
optional float min_tracking_confidence = 4 [default = 0.5];
} }

View File

@ -28,11 +28,7 @@ message HandLandmarkerSubgraphOptions {
// model file with metadata, accelerator options, etc. // model file with metadata, accelerator options, etc.
optional core.proto.BaseOptions base_options = 1; optional core.proto.BaseOptions base_options = 1;
// The locale to use for display names specified through the TFLite Model
// Metadata, if any. Defaults to English.
optional string display_names_locale = 2 [default = "en"];
// Minimum confidence value ([0.0, 1.0]) for hand presence score to be // Minimum confidence value ([0.0, 1.0]) for hand presence score to be
// considered successfully detecting a hand in the image. // considered successfully detecting a hand in the image.
optional float min_detection_confidence = 3 [default = 0.5]; optional float min_detection_confidence = 2 [default = 0.5];
} }