Add HandLandmarkerGraph which connect HandDetectorGraph and HandLandmarkerSubgraph with landmarks tracking.
PiperOrigin-RevId: 478596004
This commit is contained in:
parent
65c7fb9004
commit
cfd0f3e79f
|
@ -51,7 +51,7 @@ cc_library(
|
|||
"//mediapipe/tasks/cc/core:model_task_graph",
|
||||
"//mediapipe/tasks/cc/core:utils",
|
||||
"//mediapipe/tasks/cc/core/proto:inference_subgraph_cc_proto",
|
||||
"//mediapipe/tasks/cc/vision/hand_detector/proto:hand_detector_options_cc_proto",
|
||||
"//mediapipe/tasks/cc/vision/hand_detector/proto:hand_detector_graph_options_cc_proto",
|
||||
"//mediapipe/tasks/cc/vision/utils:image_tensor_specs",
|
||||
"@com_google_absl//absl/status",
|
||||
"@com_google_absl//absl/status:statusor",
|
||||
|
|
|
@ -40,7 +40,7 @@ limitations under the License.
|
|||
#include "mediapipe/tasks/cc/core/model_task_graph.h"
|
||||
#include "mediapipe/tasks/cc/core/proto/inference_subgraph.pb.h"
|
||||
#include "mediapipe/tasks/cc/core/utils.h"
|
||||
#include "mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_options.pb.h"
|
||||
#include "mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_graph_options.pb.h"
|
||||
#include "mediapipe/tasks/cc/vision/utils/image_tensor_specs.h"
|
||||
|
||||
namespace mediapipe {
|
||||
|
@ -53,18 +53,23 @@ using ::mediapipe::api2::Input;
|
|||
using ::mediapipe::api2::Output;
|
||||
using ::mediapipe::api2::builder::Graph;
|
||||
using ::mediapipe::api2::builder::Source;
|
||||
using ::mediapipe::tasks::vision::hand_detector::proto::HandDetectorOptions;
|
||||
using ::mediapipe::tasks::vision::hand_detector::proto::
|
||||
HandDetectorGraphOptions;
|
||||
|
||||
constexpr char kImageTag[] = "IMAGE";
|
||||
constexpr char kDetectionsTag[] = "DETECTIONS";
|
||||
constexpr char kNormRectsTag[] = "NORM_RECTS";
|
||||
constexpr char kPalmDetectionsTag[] = "PALM_DETECTIONS";
|
||||
constexpr char kHandRectsTag[] = "HAND_RECTS";
|
||||
constexpr char kPalmRectsTag[] = "PALM_RECTS";
|
||||
|
||||
struct HandDetectionOuts {
|
||||
Source<std::vector<Detection>> palm_detections;
|
||||
Source<std::vector<NormalizedRect>> hand_rects;
|
||||
Source<std::vector<NormalizedRect>> palm_rects;
|
||||
Source<Image> image;
|
||||
};
|
||||
|
||||
void ConfigureTensorsToDetectionsCalculator(
|
||||
const HandDetectorGraphOptions& tasks_options,
|
||||
mediapipe::TensorsToDetectionsCalculatorOptions* options) {
|
||||
// TODO use metadata to configure these fields.
|
||||
options->set_num_classes(1);
|
||||
|
@ -77,7 +82,7 @@ void ConfigureTensorsToDetectionsCalculator(
|
|||
options->set_sigmoid_score(true);
|
||||
options->set_score_clipping_thresh(100.0);
|
||||
options->set_reverse_output_order(true);
|
||||
options->set_min_score_thresh(0.5);
|
||||
options->set_min_score_thresh(tasks_options.min_detection_confidence());
|
||||
options->set_x_scale(192.0);
|
||||
options->set_y_scale(192.0);
|
||||
options->set_w_scale(192.0);
|
||||
|
@ -144,19 +149,26 @@ void ConfigureRectTransformationCalculator(
|
|||
// Image to perform detection on.
|
||||
//
|
||||
// Outputs:
|
||||
// DETECTIONS - std::vector<Detection>
|
||||
// PALM_DETECTIONS - std::vector<Detection>
|
||||
// Detected palms with maximum `num_hands` specified in options.
|
||||
// NORM_RECTS - std::vector<NormalizedRect>
|
||||
// HAND_RECTS - std::vector<NormalizedRect>
|
||||
// Detected hand bounding boxes in normalized coordinates.
|
||||
// PLAM_RECTS - std::vector<NormalizedRect>
|
||||
// Detected palm bounding boxes in normalized coordinates.
|
||||
// IMAGE - Image
|
||||
// The input image that the hand detector runs on and has the pixel data
|
||||
// stored on the target storage (CPU vs GPU).
|
||||
//
|
||||
// Example:
|
||||
// node {
|
||||
// calculator: "mediapipe.tasks.vision.HandDetectorGraph"
|
||||
// input_stream: "IMAGE:image"
|
||||
// output_stream: "DETECTIONS:palm_detections"
|
||||
// output_stream: "NORM_RECTS:hand_rects_from_palm_detections"
|
||||
// output_stream: "PALM_DETECTIONS:palm_detections"
|
||||
// output_stream: "HAND_RECTS:hand_rects_from_palm_detections"
|
||||
// output_stream: "PALM_RECTS:palm_rects"
|
||||
// output_stream: "IMAGE:image_out"
|
||||
// options {
|
||||
// [mediapipe.tasks.hand_detector.proto.HandDetectorOptions.ext] {
|
||||
// [mediapipe.tasks.hand_detector.proto.HandDetectorGraphOptions.ext] {
|
||||
// base_options {
|
||||
// model_asset {
|
||||
// file_name: "palm_detection.tflite"
|
||||
|
@ -173,16 +185,20 @@ class HandDetectorGraph : public core::ModelTaskGraph {
|
|||
absl::StatusOr<CalculatorGraphConfig> GetConfig(
|
||||
SubgraphContext* sc) override {
|
||||
ASSIGN_OR_RETURN(const auto* model_resources,
|
||||
CreateModelResources<HandDetectorOptions>(sc));
|
||||
CreateModelResources<HandDetectorGraphOptions>(sc));
|
||||
Graph graph;
|
||||
ASSIGN_OR_RETURN(auto hand_detection_outs,
|
||||
BuildHandDetectionSubgraph(
|
||||
sc->Options<HandDetectorOptions>(), *model_resources,
|
||||
graph[Input<Image>(kImageTag)], graph));
|
||||
ASSIGN_OR_RETURN(
|
||||
auto hand_detection_outs,
|
||||
BuildHandDetectionSubgraph(sc->Options<HandDetectorGraphOptions>(),
|
||||
*model_resources,
|
||||
graph[Input<Image>(kImageTag)], graph));
|
||||
hand_detection_outs.palm_detections >>
|
||||
graph[Output<std::vector<Detection>>(kDetectionsTag)];
|
||||
graph[Output<std::vector<Detection>>(kPalmDetectionsTag)];
|
||||
hand_detection_outs.hand_rects >>
|
||||
graph[Output<std::vector<NormalizedRect>>(kNormRectsTag)];
|
||||
graph[Output<std::vector<NormalizedRect>>(kHandRectsTag)];
|
||||
hand_detection_outs.palm_rects >>
|
||||
graph[Output<std::vector<NormalizedRect>>(kPalmRectsTag)];
|
||||
hand_detection_outs.image >> graph[Output<Image>(kImageTag)];
|
||||
return graph.GetConfig();
|
||||
}
|
||||
|
||||
|
@ -196,7 +212,7 @@ class HandDetectorGraph : public core::ModelTaskGraph {
|
|||
// image_in: image stream to run hand detection on.
|
||||
// graph: the mediapipe builder::Graph instance to be updated.
|
||||
absl::StatusOr<HandDetectionOuts> BuildHandDetectionSubgraph(
|
||||
const HandDetectorOptions& subgraph_options,
|
||||
const HandDetectorGraphOptions& subgraph_options,
|
||||
const core::ModelResources& model_resources, Source<Image> image_in,
|
||||
Graph& graph) {
|
||||
// Add image preprocessing subgraph. The model expects aspect ratio
|
||||
|
@ -235,6 +251,7 @@ class HandDetectorGraph : public core::ModelTaskGraph {
|
|||
auto& tensors_to_detections =
|
||||
graph.AddNode("TensorsToDetectionsCalculator");
|
||||
ConfigureTensorsToDetectionsCalculator(
|
||||
subgraph_options,
|
||||
&tensors_to_detections
|
||||
.GetOptions<mediapipe::TensorsToDetectionsCalculatorOptions>());
|
||||
model_output_tensors >> tensors_to_detections.In("TENSORS");
|
||||
|
@ -281,7 +298,8 @@ class HandDetectorGraph : public core::ModelTaskGraph {
|
|||
.GetOptions<mediapipe::DetectionsToRectsCalculatorOptions>());
|
||||
palm_detections >> detections_to_rects.In("DETECTIONS");
|
||||
image_size >> detections_to_rects.In("IMAGE_SIZE");
|
||||
auto palm_rects = detections_to_rects.Out("NORM_RECTS");
|
||||
auto palm_rects =
|
||||
detections_to_rects[Output<std::vector<NormalizedRect>>("NORM_RECTS")];
|
||||
|
||||
// Expands and shifts the rectangle that contains the palm so that it's
|
||||
// likely to cover the entire hand.
|
||||
|
@ -308,8 +326,11 @@ class HandDetectorGraph : public core::ModelTaskGraph {
|
|||
clip_normalized_rect_vector_size[Output<std::vector<NormalizedRect>>(
|
||||
"")];
|
||||
|
||||
return HandDetectionOuts{.palm_detections = palm_detections,
|
||||
.hand_rects = clipped_hand_rects};
|
||||
return HandDetectionOuts{
|
||||
/* palm_detections= */ palm_detections,
|
||||
/* hand_rects= */ clipped_hand_rects,
|
||||
/* palm_rects= */ palm_rects,
|
||||
/* image= */ preprocessing[Output<Image>(kImageTag)]};
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -40,7 +40,7 @@ limitations under the License.
|
|||
#include "mediapipe/tasks/cc/core/proto/base_options.pb.h"
|
||||
#include "mediapipe/tasks/cc/core/proto/external_file.pb.h"
|
||||
#include "mediapipe/tasks/cc/core/task_runner.h"
|
||||
#include "mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_options.pb.h"
|
||||
#include "mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_graph_options.pb.h"
|
||||
#include "mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_result.pb.h"
|
||||
#include "mediapipe/tasks/cc/vision/utils/image_utils.h"
|
||||
|
||||
|
@ -60,7 +60,8 @@ using ::mediapipe::tasks::core::ModelResources;
|
|||
using ::mediapipe::tasks::core::TaskRunner;
|
||||
using ::mediapipe::tasks::core::proto::ExternalFile;
|
||||
using ::mediapipe::tasks::vision::DecodeImageFromFile;
|
||||
using ::mediapipe::tasks::vision::hand_detector::proto::HandDetectorOptions;
|
||||
using ::mediapipe::tasks::vision::hand_detector::proto::
|
||||
HandDetectorGraphOptions;
|
||||
using ::mediapipe::tasks::vision::hand_detector::proto::HandDetectorResult;
|
||||
using ::testing::EqualsProto;
|
||||
using ::testing::TestParamInfo;
|
||||
|
@ -80,9 +81,9 @@ constexpr char kTwoHandsResultFile[] = "hand_detector_result_two_hands.pbtxt";
|
|||
|
||||
constexpr char kImageTag[] = "IMAGE";
|
||||
constexpr char kImageName[] = "image";
|
||||
constexpr char kPalmDetectionsTag[] = "DETECTIONS";
|
||||
constexpr char kPalmDetectionsTag[] = "PALM_DETECTIONS";
|
||||
constexpr char kPalmDetectionsName[] = "palm_detections";
|
||||
constexpr char kHandNormRectsTag[] = "NORM_RECTS";
|
||||
constexpr char kHandRectsTag[] = "HAND_RECTS";
|
||||
constexpr char kHandNormRectsName[] = "hand_norm_rects";
|
||||
|
||||
constexpr float kPalmDetectionBboxMaxDiff = 0.01;
|
||||
|
@ -106,20 +107,20 @@ absl::StatusOr<std::unique_ptr<TaskRunner>> CreateTaskRunner(
|
|||
auto& hand_detection =
|
||||
graph.AddNode("mediapipe.tasks.vision.HandDetectorGraph");
|
||||
|
||||
auto options = std::make_unique<HandDetectorOptions>();
|
||||
auto options = std::make_unique<HandDetectorGraphOptions>();
|
||||
options->mutable_base_options()->mutable_model_asset()->set_file_name(
|
||||
JoinPath("./", kTestDataDirectory, model_name));
|
||||
options->set_min_detection_confidence(0.5);
|
||||
options->set_num_hands(num_hands);
|
||||
hand_detection.GetOptions<HandDetectorOptions>().Swap(options.get());
|
||||
hand_detection.GetOptions<HandDetectorGraphOptions>().Swap(options.get());
|
||||
|
||||
graph[Input<Image>(kImageTag)].SetName(kImageName) >>
|
||||
hand_detection.In(kImageTag);
|
||||
|
||||
hand_detection.Out(kPalmDetectionsTag).SetName(kPalmDetectionsName) >>
|
||||
graph[Output<std::vector<Detection>>(kPalmDetectionsTag)];
|
||||
hand_detection.Out(kHandNormRectsTag).SetName(kHandNormRectsName) >>
|
||||
graph[Output<std::vector<NormalizedRect>>(kHandNormRectsTag)];
|
||||
hand_detection.Out(kHandRectsTag).SetName(kHandNormRectsName) >>
|
||||
graph[Output<std::vector<NormalizedRect>>(kHandRectsTag)];
|
||||
|
||||
return TaskRunner::Create(
|
||||
graph.GetConfig(), std::make_unique<core::MediaPipeBuiltinOpResolver>());
|
||||
|
|
|
@ -21,8 +21,8 @@ package(default_visibility = [
|
|||
licenses(["notice"])
|
||||
|
||||
mediapipe_proto_library(
|
||||
name = "hand_detector_options_proto",
|
||||
srcs = ["hand_detector_options.proto"],
|
||||
name = "hand_detector_graph_options_proto",
|
||||
srcs = ["hand_detector_graph_options.proto"],
|
||||
deps = [
|
||||
"//mediapipe/framework:calculator_options_proto",
|
||||
"//mediapipe/framework:calculator_proto",
|
||||
|
|
|
@ -21,24 +21,20 @@ import "mediapipe/framework/calculator.proto";
|
|||
import "mediapipe/tasks/cc/core/proto/base_options.proto";
|
||||
|
||||
option java_package = "com.google.mediapipe.tasks.vision.handdetector";
|
||||
option java_outer_classname = "HandDetectorOptionsProto";
|
||||
option java_outer_classname = "HandDetectorGraphOptionsProto";
|
||||
|
||||
message HandDetectorOptions {
|
||||
message HandDetectorGraphOptions {
|
||||
extend mediapipe.CalculatorOptions {
|
||||
optional HandDetectorOptions ext = 464864288;
|
||||
optional HandDetectorGraphOptions ext = 464864288;
|
||||
}
|
||||
// Base options for configuring Task library, such as specifying the TfLite
|
||||
// model file with metadata, accelerator options, etc.
|
||||
optional core.proto.BaseOptions base_options = 1;
|
||||
|
||||
// The locale to use for display names specified through the TFLite Model
|
||||
// Metadata, if any. Defaults to English.
|
||||
optional string display_names_locale = 2 [default = "en"];
|
||||
|
||||
// Minimum confidence value ([0.0, 1.0]) for confidence score to be considered
|
||||
// successfully detecting a hand in the image.
|
||||
optional float min_detection_confidence = 3 [default = 0.5];
|
||||
optional float min_detection_confidence = 2 [default = 0.5];
|
||||
|
||||
// The maximum number of hands output by the detector.
|
||||
optional int32 num_hands = 4;
|
||||
optional int32 num_hands = 3;
|
||||
}
|
|
@ -51,6 +51,7 @@ cc_library(
|
|||
# TODO: move calculators in modules/hand_landmark/calculators to tasks dir.
|
||||
"//mediapipe/modules/hand_landmark/calculators:hand_landmarks_to_rect_calculator",
|
||||
"//mediapipe/tasks/cc:common",
|
||||
"//mediapipe/tasks/cc/components/utils:gate",
|
||||
"//mediapipe/tasks/cc/components:image_preprocessing",
|
||||
"//mediapipe/tasks/cc/core:model_resources",
|
||||
"//mediapipe/tasks/cc/core:model_task_graph",
|
||||
|
@ -66,3 +67,41 @@ cc_library(
|
|||
)
|
||||
|
||||
# TODO: Enable this test
|
||||
|
||||
cc_library(
|
||||
name = "hand_landmarker_graph",
|
||||
srcs = ["hand_landmarker_graph.cc"],
|
||||
deps = [
|
||||
":hand_landmarker_subgraph",
|
||||
"//mediapipe/calculators/core:begin_loop_calculator",
|
||||
"//mediapipe/calculators/core:clip_vector_size_calculator_cc_proto",
|
||||
"//mediapipe/calculators/core:end_loop_calculator",
|
||||
"//mediapipe/calculators/core:gate_calculator",
|
||||
"//mediapipe/calculators/core:gate_calculator_cc_proto",
|
||||
"//mediapipe/calculators/core:pass_through_calculator",
|
||||
"//mediapipe/calculators/core:previous_loopback_calculator",
|
||||
"//mediapipe/calculators/util:collection_has_min_size_calculator",
|
||||
"//mediapipe/calculators/util:collection_has_min_size_calculator_cc_proto",
|
||||
"//mediapipe/framework/api2:builder",
|
||||
"//mediapipe/framework/api2:port",
|
||||
"//mediapipe/framework/formats:classification_cc_proto",
|
||||
"//mediapipe/framework/formats:detection_cc_proto",
|
||||
"//mediapipe/framework/formats:image",
|
||||
"//mediapipe/framework/formats:landmark_cc_proto",
|
||||
"//mediapipe/framework/formats:rect_cc_proto",
|
||||
"//mediapipe/framework/formats:tensor",
|
||||
"//mediapipe/tasks/cc:common",
|
||||
"//mediapipe/tasks/cc/components/utils:gate",
|
||||
"//mediapipe/tasks/cc/core:model_task_graph",
|
||||
"//mediapipe/tasks/cc/core:utils",
|
||||
"//mediapipe/tasks/cc/vision/hand_detector:hand_detector_graph",
|
||||
"//mediapipe/tasks/cc/vision/hand_detector/proto:hand_detector_graph_options_cc_proto",
|
||||
"//mediapipe/tasks/cc/vision/hand_landmarker/calculators:hand_association_calculator",
|
||||
"//mediapipe/tasks/cc/vision/hand_landmarker/calculators:hand_association_calculator_cc_proto",
|
||||
"//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_landmarker_graph_options_cc_proto",
|
||||
"//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_landmarker_subgraph_options_cc_proto",
|
||||
],
|
||||
alwayslink = 1,
|
||||
)
|
||||
|
||||
# TODO: Enable this test
|
||||
|
|
|
@ -0,0 +1,284 @@
|
|||
/* Copyright 2022 The MediaPipe Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include <memory>
|
||||
#include <type_traits>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "mediapipe/calculators/core/clip_vector_size_calculator.pb.h"
|
||||
#include "mediapipe/calculators/core/gate_calculator.pb.h"
|
||||
#include "mediapipe/calculators/util/collection_has_min_size_calculator.pb.h"
|
||||
#include "mediapipe/framework/api2/builder.h"
|
||||
#include "mediapipe/framework/api2/port.h"
|
||||
#include "mediapipe/framework/formats/classification.pb.h"
|
||||
#include "mediapipe/framework/formats/detection.pb.h"
|
||||
#include "mediapipe/framework/formats/image.h"
|
||||
#include "mediapipe/framework/formats/landmark.pb.h"
|
||||
#include "mediapipe/framework/formats/rect.pb.h"
|
||||
#include "mediapipe/framework/formats/tensor.h"
|
||||
#include "mediapipe/tasks/cc/common.h"
|
||||
#include "mediapipe/tasks/cc/components/utils/gate.h"
|
||||
#include "mediapipe/tasks/cc/core/model_task_graph.h"
|
||||
#include "mediapipe/tasks/cc/core/utils.h"
|
||||
#include "mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_graph_options.pb.h"
|
||||
#include "mediapipe/tasks/cc/vision/hand_landmarker/calculators/hand_association_calculator.pb.h"
|
||||
#include "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarker_graph_options.pb.h"
|
||||
#include "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarker_subgraph_options.pb.h"
|
||||
|
||||
namespace mediapipe {
|
||||
namespace tasks {
|
||||
namespace vision {
|
||||
namespace hand_landmarker {
|
||||
|
||||
namespace {
|
||||
|
||||
using ::mediapipe::api2::Input;
|
||||
using ::mediapipe::api2::Output;
|
||||
using ::mediapipe::api2::builder::Graph;
|
||||
using ::mediapipe::api2::builder::Source;
|
||||
using ::mediapipe::tasks::components::utils::DisallowIf;
|
||||
using ::mediapipe::tasks::vision::hand_detector::proto::
|
||||
HandDetectorGraphOptions;
|
||||
using ::mediapipe::tasks::vision::hand_landmarker::proto::
|
||||
HandLandmarkerGraphOptions;
|
||||
using ::mediapipe::tasks::vision::hand_landmarker::proto::
|
||||
HandLandmarkerSubgraphOptions;
|
||||
|
||||
constexpr char kImageTag[] = "IMAGE";
|
||||
constexpr char kLandmarksTag[] = "LANDMARKS";
|
||||
constexpr char kWorldLandmarksTag[] = "WORLD_LANDMARKS";
|
||||
constexpr char kHandRectNextFrameTag[] = "HAND_RECT_NEXT_FRAME";
|
||||
constexpr char kHandednessTag[] = "HANDEDNESS";
|
||||
constexpr char kPalmDetectionsTag[] = "PALM_DETECTIONS";
|
||||
constexpr char kPalmRectsTag[] = "PALM_RECTS";
|
||||
constexpr char kPreviousLoopbackCalculatorName[] = "PreviousLoopbackCalculator";
|
||||
|
||||
struct HandLandmarkerOutputs {
|
||||
Source<std::vector<NormalizedLandmarkList>> landmark_lists;
|
||||
Source<std::vector<LandmarkList>> world_landmark_lists;
|
||||
Source<std::vector<NormalizedRect>> hand_rects_next_frame;
|
||||
Source<std::vector<ClassificationList>> handednesses;
|
||||
Source<std::vector<NormalizedRect>> palm_rects;
|
||||
Source<std::vector<Detection>> palm_detections;
|
||||
Source<Image> image;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
// A "mediapipe.tasks.vision.HandLandmarkerGraph" performs hand
|
||||
// landmarks detection. The HandLandmarkerGraph consists of two subgraphs:
|
||||
// HandDetectorGraph and HandLandmarkerSubgraph. HandLandmarkerSubgraph detects
|
||||
// landmarks from bounding boxes produced by HandDetectorGraph.
|
||||
// HandLandmarkerGraph tracks the landmarks over time, and skips the
|
||||
// HandDetectorGraph. If the tracking is lost or the detectd hands are
|
||||
// less than configured max number hands, HandDetectorGraph would be triggered
|
||||
// to detect hands.
|
||||
//
|
||||
// Accepts CPU input images and outputs Landmarks on CPU.
|
||||
//
|
||||
// Inputs:
|
||||
// IMAGE - Image
|
||||
// Image to perform hand landmarks detection on.
|
||||
//
|
||||
// Outputs:
|
||||
// LANDMARKS: - std::vector<NormalizedLandmarkList>
|
||||
// Vector of detected hand landmarks.
|
||||
// WORLD_LANDMARKS - std::vector<LandmarkList>
|
||||
// Vector of detected hand landmarks in world coordinates.
|
||||
// HAND_RECT_NEXT_FRAME - std::vector<NormalizedRect>
|
||||
// Vector of the predicted rects enclosing the same hand RoI for landmark
|
||||
// detection on the next frame.
|
||||
// HANDEDNESS - std::vector<ClassificationList>
|
||||
// Vector of classification of handedness.
|
||||
// PALM_RECTS - std::vector<NormalizedRect>
|
||||
// Detected palm bounding boxes in normalized coordinates.
|
||||
// PALM_DETECTIONS - std::vector<Detection>
|
||||
// Detected palms with maximum `num_hands` specified in options.
|
||||
// IMAGE - Image
|
||||
// The input image that the hand landmarker runs on and has the pixel data
|
||||
// stored on the target storage (CPU vs GPU).
|
||||
//
|
||||
// Example:
|
||||
// node {
|
||||
// calculator: "mediapipe.tasks.vision.HandLandmarkerGraph"
|
||||
// input_stream: "IMAGE:image_in"
|
||||
// output_stream: "LANDMARKS:hand_landmarks"
|
||||
// output_stream: "WORLD_LANDMARKS:world_hand_landmarks"
|
||||
// output_stream: "HAND_RECT_NEXT_FRAME:hand_rect_next_frame"
|
||||
// output_stream: "HANDEDNESS:handedness"
|
||||
// output_stream: "PALM_RECTS:palm_rects"
|
||||
// output_stream: "PALM_DETECTIONS:palm_detections"
|
||||
// output_stream: "IMAGE:image_out"
|
||||
// options {
|
||||
// [mediapipe.tasks.hand_landmarker.proto.HandLandmarkerGraphOptions.ext] {
|
||||
// base_options {
|
||||
// model_asset {
|
||||
// file_name: "hand_landmarker.task"
|
||||
// }
|
||||
// }
|
||||
// hand_detector_graph_options {
|
||||
// base_options {
|
||||
// model_asset {
|
||||
// file_name: "palm_detection.tflite"
|
||||
// }
|
||||
// }
|
||||
// min_detection_confidence: 0.5
|
||||
// num_hands: 2
|
||||
// }
|
||||
// hand_landmarker_subgraph_options {
|
||||
// base_options {
|
||||
// model_asset {
|
||||
// file_name: "hand_landmark_lite.tflite"
|
||||
// }
|
||||
// }
|
||||
// min_detection_confidence: 0.5
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
class HandLandmarkerGraph : public core::ModelTaskGraph {
|
||||
public:
|
||||
absl::StatusOr<CalculatorGraphConfig> GetConfig(
|
||||
SubgraphContext* sc) override {
|
||||
Graph graph;
|
||||
ASSIGN_OR_RETURN(
|
||||
auto hand_landmarker_outputs,
|
||||
BuildHandLandmarkerGraph(sc->Options<HandLandmarkerGraphOptions>(),
|
||||
graph[Input<Image>(kImageTag)], graph));
|
||||
hand_landmarker_outputs.landmark_lists >>
|
||||
graph[Output<std::vector<NormalizedLandmarkList>>(kLandmarksTag)];
|
||||
hand_landmarker_outputs.world_landmark_lists >>
|
||||
graph[Output<std::vector<LandmarkList>>(kWorldLandmarksTag)];
|
||||
hand_landmarker_outputs.hand_rects_next_frame >>
|
||||
graph[Output<std::vector<NormalizedRect>>(kHandRectNextFrameTag)];
|
||||
hand_landmarker_outputs.handednesses >>
|
||||
graph[Output<std::vector<ClassificationList>>(kHandednessTag)];
|
||||
hand_landmarker_outputs.palm_rects >>
|
||||
graph[Output<std::vector<NormalizedRect>>(kPalmRectsTag)];
|
||||
hand_landmarker_outputs.palm_detections >>
|
||||
graph[Output<std::vector<Detection>>(kPalmDetectionsTag)];
|
||||
hand_landmarker_outputs.image >> graph[Output<Image>(kImageTag)];
|
||||
|
||||
// TODO remove when support is fixed.
|
||||
// As mediapipe GraphBuilder currently doesn't support configuring
|
||||
// InputStreamInfo, modifying the CalculatorGraphConfig proto directly.
|
||||
CalculatorGraphConfig config = graph.GetConfig();
|
||||
for (int i = 0; i < config.node_size(); ++i) {
|
||||
if (config.node(i).calculator() == kPreviousLoopbackCalculatorName) {
|
||||
auto* info = config.mutable_node(i)->add_input_stream_info();
|
||||
info->set_tag_index("LOOP");
|
||||
info->set_back_edge(true);
|
||||
break;
|
||||
}
|
||||
}
|
||||
return config;
|
||||
}
|
||||
|
||||
private:
|
||||
// Adds a mediapipe hand landmark detection graph into the provided
|
||||
// builder::Graph instance.
|
||||
//
|
||||
// tasks_options: the mediapipe tasks module HandLandmarkerGraphOptions.
|
||||
// image_in: (mediapipe::Image) stream to run hand landmark detection on.
|
||||
// graph: the mediapipe graph instance to be updated.
|
||||
absl::StatusOr<HandLandmarkerOutputs> BuildHandLandmarkerGraph(
|
||||
const HandLandmarkerGraphOptions& tasks_options, Source<Image> image_in,
|
||||
Graph& graph) {
|
||||
const int max_num_hands =
|
||||
tasks_options.hand_detector_graph_options().num_hands();
|
||||
|
||||
auto& previous_loopback = graph.AddNode(kPreviousLoopbackCalculatorName);
|
||||
image_in >> previous_loopback.In("MAIN");
|
||||
auto prev_hand_rects_from_landmarks =
|
||||
previous_loopback[Output<std::vector<NormalizedRect>>("PREV_LOOP")];
|
||||
|
||||
auto& min_size_node =
|
||||
graph.AddNode("NormalizedRectVectorHasMinSizeCalculator");
|
||||
prev_hand_rects_from_landmarks >> min_size_node.In("ITERABLE");
|
||||
min_size_node.GetOptions<CollectionHasMinSizeCalculatorOptions>()
|
||||
.set_min_size(max_num_hands);
|
||||
auto has_enough_hands = min_size_node.Out("").Cast<bool>();
|
||||
|
||||
auto image_for_hand_detector =
|
||||
DisallowIf(image_in, has_enough_hands, graph);
|
||||
|
||||
auto& hand_detector =
|
||||
graph.AddNode("mediapipe.tasks.vision.HandDetectorGraph");
|
||||
hand_detector.GetOptions<HandDetectorGraphOptions>().CopyFrom(
|
||||
tasks_options.hand_detector_graph_options());
|
||||
image_for_hand_detector >> hand_detector.In("IMAGE");
|
||||
auto hand_rects_from_hand_detector = hand_detector.Out("HAND_RECTS");
|
||||
|
||||
auto& hand_association = graph.AddNode("HandAssociationCalculator");
|
||||
hand_association.GetOptions<HandAssociationCalculatorOptions>()
|
||||
.set_min_similarity_threshold(tasks_options.min_tracking_confidence());
|
||||
prev_hand_rects_from_landmarks >>
|
||||
hand_association[Input<std::vector<NormalizedRect>>::Multiple("")][0];
|
||||
hand_rects_from_hand_detector >>
|
||||
hand_association[Input<std::vector<NormalizedRect>>::Multiple("")][1];
|
||||
auto hand_rects = hand_association.Out("");
|
||||
|
||||
auto& clip_hand_rects =
|
||||
graph.AddNode("ClipNormalizedRectVectorSizeCalculator");
|
||||
clip_hand_rects.GetOptions<ClipVectorSizeCalculatorOptions>()
|
||||
.set_max_vec_size(max_num_hands);
|
||||
hand_rects >> clip_hand_rects.In("");
|
||||
auto clipped_hand_rects = clip_hand_rects.Out("");
|
||||
|
||||
auto& hand_landmarker_subgraph = graph.AddNode(
|
||||
"mediapipe.tasks.vision.hand_landmarker.HandLandmarkerSubgraph");
|
||||
hand_landmarker_subgraph.GetOptions<HandLandmarkerSubgraphOptions>()
|
||||
.CopyFrom(tasks_options.hand_landmarker_subgraph_options());
|
||||
image_in >> hand_landmarker_subgraph.In("IMAGE");
|
||||
clipped_hand_rects >> hand_landmarker_subgraph.In("HAND_RECT");
|
||||
|
||||
auto hand_rects_for_next_frame =
|
||||
hand_landmarker_subgraph[Output<std::vector<NormalizedRect>>(
|
||||
kHandRectNextFrameTag)];
|
||||
// Back edge.
|
||||
hand_rects_for_next_frame >> previous_loopback.In("LOOP");
|
||||
|
||||
// TODO: Replace PassThroughCalculator with a calculator that
|
||||
// converts the pixel data to be stored on the target storage (CPU vs GPU).
|
||||
auto& pass_through = graph.AddNode("PassThroughCalculator");
|
||||
image_in >> pass_through.In("");
|
||||
|
||||
return {{
|
||||
/* landmark_lists= */ hand_landmarker_subgraph
|
||||
[Output<std::vector<NormalizedLandmarkList>>(kLandmarksTag)],
|
||||
/* world_landmark_lists= */
|
||||
hand_landmarker_subgraph[Output<std::vector<LandmarkList>>(
|
||||
kWorldLandmarksTag)],
|
||||
/* hand_rects_next_frame= */ hand_rects_for_next_frame,
|
||||
hand_landmarker_subgraph[Output<std::vector<ClassificationList>>(
|
||||
kHandednessTag)],
|
||||
/* palm_rects= */
|
||||
hand_detector[Output<std::vector<NormalizedRect>>(kPalmRectsTag)],
|
||||
/* palm_detections */
|
||||
hand_detector[Output<std::vector<Detection>>(kPalmDetectionsTag)],
|
||||
/* image */
|
||||
pass_through[Output<Image>("")],
|
||||
}};
|
||||
}
|
||||
};
|
||||
|
||||
REGISTER_MEDIAPIPE_GRAPH(
|
||||
::mediapipe::tasks::vision::hand_landmarker::HandLandmarkerGraph);
|
||||
|
||||
} // namespace hand_landmarker
|
||||
} // namespace vision
|
||||
} // namespace tasks
|
||||
} // namespace mediapipe
|
|
@ -0,0 +1,167 @@
|
|||
/* Copyright 2022 The MediaPipe Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
|
||||
#include "absl/flags/flag.h"
|
||||
#include "absl/status/statusor.h"
|
||||
#include "absl/strings/str_format.h"
|
||||
#include "absl/strings/string_view.h"
|
||||
#include "mediapipe/framework/api2/builder.h"
|
||||
#include "mediapipe/framework/api2/port.h"
|
||||
#include "mediapipe/framework/deps/file_path.h"
|
||||
#include "mediapipe/framework/formats/classification.pb.h"
|
||||
#include "mediapipe/framework/formats/image.h"
|
||||
#include "mediapipe/framework/formats/landmark.pb.h"
|
||||
#include "mediapipe/framework/formats/rect.pb.h"
|
||||
#include "mediapipe/framework/port/file_helpers.h"
|
||||
#include "mediapipe/framework/port/gmock.h"
|
||||
#include "mediapipe/framework/port/gtest.h"
|
||||
#include "mediapipe/tasks/cc/core/mediapipe_builtin_op_resolver.h"
|
||||
#include "mediapipe/tasks/cc/core/proto/base_options.pb.h"
|
||||
#include "mediapipe/tasks/cc/core/proto/external_file.pb.h"
|
||||
#include "mediapipe/tasks/cc/core/task_runner.h"
|
||||
#include "mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_graph_options.pb.h"
|
||||
#include "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarker_graph_options.pb.h"
|
||||
#include "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarker_subgraph_options.pb.h"
|
||||
#include "mediapipe/tasks/cc/vision/utils/image_utils.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/core/shims/cc/shims_test_util.h"
|
||||
|
||||
namespace mediapipe {
|
||||
namespace tasks {
|
||||
namespace vision {
|
||||
namespace hand_landmarker {
|
||||
|
||||
namespace {
|
||||
|
||||
using ::file::Defaults;
|
||||
using ::file::GetTextProto;
|
||||
using ::mediapipe::api2::Input;
|
||||
using ::mediapipe::api2::Output;
|
||||
using ::mediapipe::api2::builder::Graph;
|
||||
using ::mediapipe::api2::builder::Source;
|
||||
using ::mediapipe::file::JoinPath;
|
||||
using ::mediapipe::tasks::core::TaskRunner;
|
||||
using ::mediapipe::tasks::vision::hand_landmarker::proto::
|
||||
HandLandmarkerGraphOptions;
|
||||
using ::testing::EqualsProto;
|
||||
using ::testing::proto::Approximately;
|
||||
using ::testing::proto::Partially;
|
||||
|
||||
constexpr char kTestDataDirectory[] = "/mediapipe/tasks/testdata/vision/";
|
||||
constexpr char kPalmDetectionModel[] = "palm_detection_full.tflite";
|
||||
constexpr char kHandLandmarkerFullModel[] = "hand_landmark_full.tflite";
|
||||
constexpr char kLeftHandsImage[] = "left_hands.jpg";
|
||||
|
||||
constexpr char kImageTag[] = "IMAGE";
|
||||
constexpr char kImageName[] = "image_in";
|
||||
constexpr char kLandmarksTag[] = "LANDMARKS";
|
||||
constexpr char kLandmarksName[] = "landmarks";
|
||||
constexpr char kWorldLandmarksTag[] = "WORLD_LANDMARKS";
|
||||
constexpr char kWorldLandmarksName[] = "world_landmarks";
|
||||
constexpr char kHandRectNextFrameTag[] = "HAND_RECT_NEXT_FRAME";
|
||||
constexpr char kHandRectNextFrameName[] = "hand_rect_next_frame";
|
||||
constexpr char kHandednessTag[] = "HANDEDNESS";
|
||||
constexpr char kHandednessName[] = "handedness";
|
||||
|
||||
// Expected hand landmarks positions, in text proto format.
|
||||
constexpr char kExpectedLeftUpHandLandmarksFilename[] =
|
||||
"expected_left_up_hand_landmarks.prototxt";
|
||||
constexpr char kExpectedLeftDownHandLandmarksFilename[] =
|
||||
"expected_left_down_hand_landmarks.prototxt";
|
||||
|
||||
constexpr float kFullModelFractionDiff = 0.03; // percentage
|
||||
constexpr float kAbsMargin = 0.03;
|
||||
constexpr int kMaxNumHands = 2;
|
||||
constexpr float kMinTrackingConfidence = 0.5;
|
||||
|
||||
NormalizedLandmarkList GetExpectedLandmarkList(absl::string_view filename) {
|
||||
NormalizedLandmarkList expected_landmark_list;
|
||||
MP_EXPECT_OK(GetTextProto(file::JoinPath("./", kTestDataDirectory, filename),
|
||||
&expected_landmark_list, Defaults()));
|
||||
return expected_landmark_list;
|
||||
}
|
||||
|
||||
// Helper function to create a Hand Landmarker TaskRunner.
|
||||
absl::StatusOr<std::unique_ptr<TaskRunner>> CreateTaskRunner() {
|
||||
Graph graph;
|
||||
auto& hand_landmarker_graph = graph.AddNode(
|
||||
"mediapipe.tasks.vision.hand_landmarker.HandLandmarkerGraph");
|
||||
auto& options =
|
||||
hand_landmarker_graph.GetOptions<HandLandmarkerGraphOptions>();
|
||||
options.mutable_hand_detector_graph_options()
|
||||
->mutable_base_options()
|
||||
->mutable_model_asset()
|
||||
->set_file_name(JoinPath("./", kTestDataDirectory, kPalmDetectionModel));
|
||||
options.mutable_hand_detector_graph_options()->mutable_base_options();
|
||||
options.mutable_hand_detector_graph_options()->set_num_hands(kMaxNumHands);
|
||||
options.mutable_hand_landmarker_subgraph_options()
|
||||
->mutable_base_options()
|
||||
->mutable_model_asset()
|
||||
->set_file_name(
|
||||
JoinPath("./", kTestDataDirectory, kHandLandmarkerFullModel));
|
||||
options.set_min_tracking_confidence(kMinTrackingConfidence);
|
||||
|
||||
graph[Input<Image>(kImageTag)].SetName(kImageName) >>
|
||||
hand_landmarker_graph.In(kImageTag);
|
||||
hand_landmarker_graph.Out(kLandmarksTag).SetName(kLandmarksName) >>
|
||||
graph[Output<std::vector<NormalizedLandmarkList>>(kLandmarksTag)];
|
||||
hand_landmarker_graph.Out(kWorldLandmarksTag).SetName(kWorldLandmarksName) >>
|
||||
graph[Output<std::vector<LandmarkList>>(kWorldLandmarksTag)];
|
||||
hand_landmarker_graph.Out(kHandednessTag).SetName(kHandednessName) >>
|
||||
graph[Output<std::vector<ClassificationList>>(kHandednessTag)];
|
||||
hand_landmarker_graph.Out(kHandRectNextFrameTag)
|
||||
.SetName(kHandRectNextFrameName) >>
|
||||
graph[Output<std::vector<NormalizedRect>>(kHandRectNextFrameTag)];
|
||||
return TaskRunner::Create(
|
||||
graph.GetConfig(), absl::make_unique<core::MediaPipeBuiltinOpResolver>());
|
||||
}
|
||||
|
||||
class HandLandmarkerTest : public tflite_shims::testing::Test {};
|
||||
|
||||
TEST_F(HandLandmarkerTest, Succeeds) {
|
||||
MP_ASSERT_OK_AND_ASSIGN(
|
||||
Image image,
|
||||
DecodeImageFromFile(JoinPath("./", kTestDataDirectory, kLeftHandsImage)));
|
||||
MP_ASSERT_OK_AND_ASSIGN(auto task_runner, CreateTaskRunner());
|
||||
auto output_packets =
|
||||
task_runner->Process({{kImageName, MakePacket<Image>(std::move(image))}});
|
||||
const auto& landmarks = (*output_packets)[kLandmarksName]
|
||||
.Get<std::vector<NormalizedLandmarkList>>();
|
||||
ASSERT_EQ(landmarks.size(), kMaxNumHands);
|
||||
std::vector<NormalizedLandmarkList> expected_landmarks = {
|
||||
GetExpectedLandmarkList(kExpectedLeftUpHandLandmarksFilename),
|
||||
GetExpectedLandmarkList(kExpectedLeftDownHandLandmarksFilename)};
|
||||
|
||||
EXPECT_THAT(landmarks[0],
|
||||
Approximately(Partially(EqualsProto(expected_landmarks[0])),
|
||||
/*margin=*/kAbsMargin,
|
||||
/*fraction=*/kFullModelFractionDiff));
|
||||
EXPECT_THAT(landmarks[1],
|
||||
Approximately(Partially(EqualsProto(expected_landmarks[1])),
|
||||
/*margin=*/kAbsMargin,
|
||||
/*fraction=*/kFullModelFractionDiff));
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
} // namespace hand_landmarker
|
||||
} // namespace vision
|
||||
} // namespace tasks
|
||||
} // namespace mediapipe
|
|
@ -34,6 +34,7 @@ limitations under the License.
|
|||
#include "mediapipe/framework/formats/tensor.h"
|
||||
#include "mediapipe/tasks/cc/common.h"
|
||||
#include "mediapipe/tasks/cc/components/image_preprocessing.h"
|
||||
#include "mediapipe/tasks/cc/components/utils/gate.h"
|
||||
#include "mediapipe/tasks/cc/core/model_resources.h"
|
||||
#include "mediapipe/tasks/cc/core/model_task_graph.h"
|
||||
#include "mediapipe/tasks/cc/core/proto/inference_subgraph.pb.h"
|
||||
|
@ -48,6 +49,7 @@ limitations under the License.
|
|||
namespace mediapipe {
|
||||
namespace tasks {
|
||||
namespace vision {
|
||||
namespace hand_landmarker {
|
||||
|
||||
namespace {
|
||||
|
||||
|
@ -55,6 +57,7 @@ using ::mediapipe::api2::Input;
|
|||
using ::mediapipe::api2::Output;
|
||||
using ::mediapipe::api2::builder::Graph;
|
||||
using ::mediapipe::api2::builder::Source;
|
||||
using ::mediapipe::tasks::components::utils::AllowIf;
|
||||
using ::mediapipe::tasks::core::ModelResources;
|
||||
using ::mediapipe::tasks::vision::hand_landmarker::proto::
|
||||
HandLandmarkerSubgraphOptions;
|
||||
|
@ -82,7 +85,6 @@ struct SingleHandLandmarkerOutputs {
|
|||
Source<bool> hand_presence;
|
||||
Source<float> hand_presence_score;
|
||||
Source<ClassificationList> handedness;
|
||||
Source<std::pair<int, int>> image_size;
|
||||
};
|
||||
|
||||
struct HandLandmarkerOutputs {
|
||||
|
@ -92,7 +94,6 @@ struct HandLandmarkerOutputs {
|
|||
Source<std::vector<bool>> presences;
|
||||
Source<std::vector<float>> presence_scores;
|
||||
Source<std::vector<ClassificationList>> handednesses;
|
||||
Source<std::pair<int, int>> image_size;
|
||||
};
|
||||
|
||||
absl::Status SanityCheckOptions(const HandLandmarkerSubgraphOptions& options) {
|
||||
|
@ -208,8 +209,6 @@ void ConfigureHandRectTransformationCalculator(
|
|||
// Float value indicates the probability that the hand is present.
|
||||
// HANDEDNESS - ClassificationList
|
||||
// Classification of handedness.
|
||||
// IMAGE_SIZE - std::vector<int, int>
|
||||
// The size of input image.
|
||||
//
|
||||
// Example:
|
||||
// node {
|
||||
|
@ -221,8 +220,6 @@ void ConfigureHandRectTransformationCalculator(
|
|||
// output_stream: "HAND_RECT_NEXT_FRAME:hand_rect_next_frame"
|
||||
// output_stream: "PRESENCE:hand_presence"
|
||||
// output_stream: "PRESENCE_SCORE:hand_presence_score"
|
||||
// output_stream: "HANDEDNESS:handedness"
|
||||
// output_stream: "IMAGE_SIZE:image_size"
|
||||
// options {
|
||||
// [mediapipe.tasks.vision.hand_landmarker.proto.HandLandmarkerSubgraphOptions.ext]
|
||||
// {
|
||||
|
@ -259,8 +256,6 @@ class SingleHandLandmarkerSubgraph : public core::ModelTaskGraph {
|
|||
graph[Output<float>(kPresenceScoreTag)];
|
||||
hand_landmark_detection_outs.handedness >>
|
||||
graph[Output<ClassificationList>(kHandednessTag)];
|
||||
hand_landmark_detection_outs.image_size >>
|
||||
graph[Output<std::pair<int, int>>(kImageSizeTag)];
|
||||
|
||||
return graph.GetConfig();
|
||||
}
|
||||
|
@ -332,18 +327,7 @@ class SingleHandLandmarkerSubgraph : public core::ModelTaskGraph {
|
|||
// score of hand presence.
|
||||
auto& tensors_to_hand_presence = graph.AddNode("TensorsToFloatsCalculator");
|
||||
hand_flag_tensors >> tensors_to_hand_presence.In("TENSORS");
|
||||
|
||||
// Converts the handedness tensor into a float that represents the
|
||||
// classification score of handedness.
|
||||
auto& tensors_to_handedness =
|
||||
graph.AddNode("TensorsToClassificationCalculator");
|
||||
ConfigureTensorsToHandednessCalculator(
|
||||
&tensors_to_handedness.GetOptions<
|
||||
mediapipe::TensorsToClassificationCalculatorOptions>());
|
||||
handedness_tensors >> tensors_to_handedness.In("TENSORS");
|
||||
auto hand_presence_score = tensors_to_hand_presence[Output<float>("FLOAT")];
|
||||
auto handedness =
|
||||
tensors_to_handedness[Output<ClassificationList>("CLASSIFICATIONS")];
|
||||
|
||||
// Applies a threshold to the confidence score to determine whether a
|
||||
// hand is present.
|
||||
|
@ -354,6 +338,18 @@ class SingleHandLandmarkerSubgraph : public core::ModelTaskGraph {
|
|||
hand_presence_score >> hand_presence_thresholding.In("FLOAT");
|
||||
auto hand_presence = hand_presence_thresholding[Output<bool>("FLAG")];
|
||||
|
||||
// Converts the handedness tensor into a float that represents the
|
||||
// classification score of handedness.
|
||||
auto& tensors_to_handedness =
|
||||
graph.AddNode("TensorsToClassificationCalculator");
|
||||
ConfigureTensorsToHandednessCalculator(
|
||||
&tensors_to_handedness.GetOptions<
|
||||
mediapipe::TensorsToClassificationCalculatorOptions>());
|
||||
handedness_tensors >> tensors_to_handedness.In("TENSORS");
|
||||
auto handedness = AllowIf(
|
||||
tensors_to_handedness[Output<ClassificationList>("CLASSIFICATIONS")],
|
||||
hand_presence, graph);
|
||||
|
||||
// Adjusts landmarks (already normalized to [0.f, 1.f]) on the letterboxed
|
||||
// hand image (after image transformation with the FIT scale mode) to the
|
||||
// corresponding locations on the same image with the letterbox removed
|
||||
|
@ -371,8 +367,9 @@ class SingleHandLandmarkerSubgraph : public core::ModelTaskGraph {
|
|||
landmark_letterbox_removal.Out("LANDMARKS") >>
|
||||
landmark_projection.In("NORM_LANDMARKS");
|
||||
hand_rect >> landmark_projection.In("NORM_RECT");
|
||||
auto projected_landmarks =
|
||||
landmark_projection[Output<NormalizedLandmarkList>("NORM_LANDMARKS")];
|
||||
auto projected_landmarks = AllowIf(
|
||||
landmark_projection[Output<NormalizedLandmarkList>("NORM_LANDMARKS")],
|
||||
hand_presence, graph);
|
||||
|
||||
// Projects the world landmarks from the cropped hand image to the
|
||||
// corresponding locations on the full image before cropping (input to the
|
||||
|
@ -383,7 +380,8 @@ class SingleHandLandmarkerSubgraph : public core::ModelTaskGraph {
|
|||
world_landmark_projection.In("LANDMARKS");
|
||||
hand_rect >> world_landmark_projection.In("NORM_RECT");
|
||||
auto projected_world_landmarks =
|
||||
world_landmark_projection[Output<LandmarkList>("LANDMARKS")];
|
||||
AllowIf(world_landmark_projection[Output<LandmarkList>("LANDMARKS")],
|
||||
hand_presence, graph);
|
||||
|
||||
// Converts the hand landmarks into a rectangle (normalized by image size)
|
||||
// that encloses the hand.
|
||||
|
@ -403,7 +401,8 @@ class SingleHandLandmarkerSubgraph : public core::ModelTaskGraph {
|
|||
hand_landmarks_to_rect.Out("NORM_RECT") >>
|
||||
hand_rect_transformation.In("NORM_RECT");
|
||||
auto hand_rect_next_frame =
|
||||
hand_rect_transformation[Output<NormalizedRect>("")];
|
||||
AllowIf(hand_rect_transformation[Output<NormalizedRect>("")],
|
||||
hand_presence, graph);
|
||||
|
||||
return {{
|
||||
/* hand_landmarks= */ projected_landmarks,
|
||||
|
@ -412,16 +411,15 @@ class SingleHandLandmarkerSubgraph : public core::ModelTaskGraph {
|
|||
/* hand_presence= */ hand_presence,
|
||||
/* hand_presence_score= */ hand_presence_score,
|
||||
/* handedness= */ handedness,
|
||||
/* image_size= */ image_size,
|
||||
}};
|
||||
}
|
||||
};
|
||||
|
||||
REGISTER_MEDIAPIPE_GRAPH(
|
||||
::mediapipe::tasks::vision::SingleHandLandmarkerSubgraph);
|
||||
::mediapipe::tasks::vision::hand_landmarker::SingleHandLandmarkerSubgraph);
|
||||
|
||||
// A "mediapipe.tasks.vision.HandLandmarkerSubgraph" performs multi
|
||||
// hand landmark detection.
|
||||
// A "mediapipe.tasks.vision.HandLandmarkerSubgraph" performs multi hand
|
||||
// landmark detection.
|
||||
// - Accepts CPU input image and a vector of hand rect RoIs to detect the
|
||||
// multiple hands landmarks enclosed by the RoIs. Output vectors of
|
||||
// hand landmarks related results, where each element in the vectors
|
||||
|
@ -449,8 +447,6 @@ REGISTER_MEDIAPIPE_GRAPH(
|
|||
// Vector of float value indicates the probability that the hand is present.
|
||||
// HANDEDNESS - std::vector<ClassificationList>
|
||||
// Vector of classification of handedness.
|
||||
// IMAGE_SIZE - std::vector<int, int>
|
||||
// The size of input image.
|
||||
//
|
||||
// Example:
|
||||
// node {
|
||||
|
@ -463,7 +459,6 @@ REGISTER_MEDIAPIPE_GRAPH(
|
|||
// output_stream: "PRESENCE:hand_presence"
|
||||
// output_stream: "PRESENCE_SCORE:hand_presence_score"
|
||||
// output_stream: "HANDEDNESS:handedness"
|
||||
// output_stream: "IMAGE_SIZE:image_size"
|
||||
// options {
|
||||
// [mediapipe.tasks.vision.hand_landmarker.proto.HandLandmarkerSubgraphOptions.ext]
|
||||
// {
|
||||
|
@ -499,8 +494,6 @@ class HandLandmarkerSubgraph : public core::ModelTaskGraph {
|
|||
graph[Output<std::vector<float>>(kPresenceScoreTag)];
|
||||
hand_landmark_detection_outputs.handednesses >>
|
||||
graph[Output<std::vector<ClassificationList>>(kHandednessTag)];
|
||||
hand_landmark_detection_outputs.image_size >>
|
||||
graph[Output<std::pair<int, int>>(kImageSizeTag)];
|
||||
|
||||
return graph.GetConfig();
|
||||
}
|
||||
|
@ -510,8 +503,8 @@ class HandLandmarkerSubgraph : public core::ModelTaskGraph {
|
|||
const HandLandmarkerSubgraphOptions& subgraph_options,
|
||||
Source<Image> image_in,
|
||||
Source<std::vector<NormalizedRect>> multi_hand_rects, Graph& graph) {
|
||||
auto& hand_landmark_subgraph =
|
||||
graph.AddNode("mediapipe.tasks.vision.SingleHandLandmarkerSubgraph");
|
||||
auto& hand_landmark_subgraph = graph.AddNode(
|
||||
"mediapipe.tasks.vision.hand_landmarker.SingleHandLandmarkerSubgraph");
|
||||
hand_landmark_subgraph.GetOptions<HandLandmarkerSubgraphOptions>().CopyFrom(
|
||||
subgraph_options);
|
||||
|
||||
|
@ -533,8 +526,6 @@ class HandLandmarkerSubgraph : public core::ModelTaskGraph {
|
|||
hand_landmark_subgraph.Out("HAND_RECT_NEXT_FRAME");
|
||||
auto landmarks = hand_landmark_subgraph.Out("LANDMARKS");
|
||||
auto world_landmarks = hand_landmark_subgraph.Out("WORLD_LANDMARKS");
|
||||
auto image_size =
|
||||
hand_landmark_subgraph[Output<std::pair<int, int>>("IMAGE_SIZE")];
|
||||
|
||||
auto& end_loop_handedness =
|
||||
graph.AddNode("EndLoopClassificationListCalculator");
|
||||
|
@ -585,13 +576,14 @@ class HandLandmarkerSubgraph : public core::ModelTaskGraph {
|
|||
/* presences= */ presences,
|
||||
/* presence_scores= */ presence_scores,
|
||||
/* handednesses= */ handednesses,
|
||||
/* image_size= */ image_size,
|
||||
}};
|
||||
}
|
||||
};
|
||||
|
||||
REGISTER_MEDIAPIPE_GRAPH(::mediapipe::tasks::vision::HandLandmarkerSubgraph);
|
||||
REGISTER_MEDIAPIPE_GRAPH(
|
||||
::mediapipe::tasks::vision::hand_landmarker::HandLandmarkerSubgraph);
|
||||
|
||||
} // namespace hand_landmarker
|
||||
} // namespace vision
|
||||
} // namespace tasks
|
||||
} // namespace mediapipe
|
||||
|
|
|
@ -45,6 +45,7 @@ limitations under the License.
|
|||
namespace mediapipe {
|
||||
namespace tasks {
|
||||
namespace vision {
|
||||
namespace hand_landmarker {
|
||||
namespace {
|
||||
|
||||
using ::file::Defaults;
|
||||
|
@ -112,8 +113,8 @@ absl::StatusOr<std::unique_ptr<TaskRunner>> CreateSingleHandTaskRunner(
|
|||
absl::string_view model_name) {
|
||||
Graph graph;
|
||||
|
||||
auto& hand_landmark_detection =
|
||||
graph.AddNode("mediapipe.tasks.vision.SingleHandLandmarkerSubgraph");
|
||||
auto& hand_landmark_detection = graph.AddNode(
|
||||
"mediapipe.tasks.vision.hand_landmarker.SingleHandLandmarkerSubgraph");
|
||||
|
||||
auto options = std::make_unique<HandLandmarkerSubgraphOptions>();
|
||||
options->mutable_base_options()->mutable_model_asset()->set_file_name(
|
||||
|
@ -151,8 +152,8 @@ absl::StatusOr<std::unique_ptr<TaskRunner>> CreateMultiHandTaskRunner(
|
|||
absl::string_view model_name) {
|
||||
Graph graph;
|
||||
|
||||
auto& multi_hand_landmark_detection =
|
||||
graph.AddNode("mediapipe.tasks.vision.HandLandmarkerSubgraph");
|
||||
auto& multi_hand_landmark_detection = graph.AddNode(
|
||||
"mediapipe.tasks.vision.hand_landmarker.HandLandmarkerSubgraph");
|
||||
|
||||
auto options = std::make_unique<HandLandmarkerSubgraphOptions>();
|
||||
options->mutable_base_options()->mutable_model_asset()->set_file_name(
|
||||
|
@ -462,6 +463,7 @@ INSTANTIATE_TEST_SUITE_P(
|
|||
});
|
||||
|
||||
} // namespace
|
||||
} // namespace hand_landmarker
|
||||
} // namespace vision
|
||||
} // namespace tasks
|
||||
} // namespace mediapipe
|
||||
|
|
|
@ -31,13 +31,13 @@ mediapipe_proto_library(
|
|||
)
|
||||
|
||||
mediapipe_proto_library(
|
||||
name = "hand_landmarker_options_proto",
|
||||
srcs = ["hand_landmarker_options.proto"],
|
||||
name = "hand_landmarker_graph_options_proto",
|
||||
srcs = ["hand_landmarker_graph_options.proto"],
|
||||
deps = [
|
||||
":hand_landmarker_subgraph_options_proto",
|
||||
"//mediapipe/framework:calculator_options_proto",
|
||||
"//mediapipe/framework:calculator_proto",
|
||||
"//mediapipe/tasks/cc/core/proto:base_options_proto",
|
||||
"//mediapipe/tasks/cc/vision/hand_detector/proto:hand_detector_options_proto",
|
||||
"//mediapipe/tasks/cc/vision/hand_detector/proto:hand_detector_graph_options_proto",
|
||||
],
|
||||
)
|
||||
|
|
|
@ -19,22 +19,25 @@ package mediapipe.tasks.vision.hand_landmarker.proto;
|
|||
|
||||
import "mediapipe/framework/calculator.proto";
|
||||
import "mediapipe/tasks/cc/core/proto/base_options.proto";
|
||||
import "mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_options.proto";
|
||||
import "mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_graph_options.proto";
|
||||
import "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarker_subgraph_options.proto";
|
||||
|
||||
message HandLandmarkerOptions {
|
||||
message HandLandmarkerGraphOptions {
|
||||
extend mediapipe.CalculatorOptions {
|
||||
optional HandLandmarkerOptions ext = 462713202;
|
||||
optional HandLandmarkerGraphOptions ext = 462713202;
|
||||
}
|
||||
// Base options for configuring MediaPipe Tasks, such as specifying the TfLite
|
||||
// model file with metadata, accelerator options, etc.
|
||||
optional core.proto.BaseOptions base_options = 1;
|
||||
|
||||
// The locale to use for display names specified through the TFLite Model
|
||||
// Metadata, if any. Defaults to English.
|
||||
optional string display_names_locale = 2 [default = "en"];
|
||||
// Options for hand detector graph.
|
||||
optional hand_detector.proto.HandDetectorGraphOptions
|
||||
hand_detector_graph_options = 2;
|
||||
|
||||
optional hand_detector.proto.HandDetectorOptions hand_detector_options = 3;
|
||||
// Options for hand landmarker subgraph.
|
||||
optional HandLandmarkerSubgraphOptions hand_landmarker_subgraph_options = 3;
|
||||
|
||||
optional HandLandmarkerSubgraphOptions hand_landmarker_subgraph_options = 4;
|
||||
// Minimum confidence for hand landmarks tracking to be considered
|
||||
// successfully.
|
||||
optional float min_tracking_confidence = 4 [default = 0.5];
|
||||
}
|
|
@ -28,11 +28,7 @@ message HandLandmarkerSubgraphOptions {
|
|||
// model file with metadata, accelerator options, etc.
|
||||
optional core.proto.BaseOptions base_options = 1;
|
||||
|
||||
// The locale to use for display names specified through the TFLite Model
|
||||
// Metadata, if any. Defaults to English.
|
||||
optional string display_names_locale = 2 [default = "en"];
|
||||
|
||||
// Minimum confidence value ([0.0, 1.0]) for hand presence score to be
|
||||
// considered successfully detecting a hand in the image.
|
||||
optional float min_detection_confidence = 3 [default = 0.5];
|
||||
optional float min_detection_confidence = 2 [default = 0.5];
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue
Block a user