diff --git a/docs/framework_concepts/graphs.md b/docs/framework_concepts/graphs.md index f951b506d..b20a87467 100644 --- a/docs/framework_concepts/graphs.md +++ b/docs/framework_concepts/graphs.md @@ -143,6 +143,98 @@ Below is an example of how to create a subgraph named `TwoPassThroughSubgraph`. } ``` +## Graph Options + +It is possible to specify a "graph options" protobuf for a MediaPipe graph +similar to the [`Calculator Options`](calculators.md#calculator-options) +protobuf specified for a MediaPipe calculator. These "graph options" can be +specified where a graph is invoked, and used to populate calculator options and +subgraph options within the graph. + +In a CalculatorGraphConfig, graph options can be specified for a subgraph +exactly like calculator options, as shown below: + +``` +node { + calculator: "FlowLimiterCalculator" + input_stream: "image" + output_stream: "throttled_image" + node_options: { + [type.googleapis.com/mediapipe.FlowLimiterCalculatorOptions] { + max_in_flight: 1 + } + } +} + +node { + calculator: "FaceDetectionSubgraph" + input_stream: "IMAGE:throttled_image" + node_options: { + [type.googleapis.com/mediapipe.FaceDetectionOptions] { + tensor_width: 192 + tensor_height: 192 + } + } +} +``` + +In a CalculatorGraphConfig, graph options can be accepted and used to populate +calculator options, as shown below: + +``` +graph_options: { + [type.googleapis.com/mediapipe.FaceDetectionOptions] {} +} + +node: { + calculator: "ImageToTensorCalculator" + input_stream: "IMAGE:multi_backend_image" + node_options: { + [type.googleapis.com/mediapipe.ImageToTensorCalculatorOptions] { + keep_aspect_ratio: true + border_mode: BORDER_ZERO + } + } + option_value: "output_tensor_width:options/tensor_width" + option_value: "output_tensor_height:options/tensor_height" +} + +node { + calculator: "InferenceCalculator" + node_options: { + [type.googleapis.com/mediapipe.InferenceCalculatorOptions] {} + } + option_value: "delegate:options/delegate" + option_value: "model_path:options/model_path" +} +``` + +In this example, the `FaceDetectionSubgraph` accepts graph option protobuf +`FaceDetectionOptions`. The `FaceDetectionOptions` is used to define some field +values in the calculator options `ImageToTensorCalculatorOptions` and some field +values in the subgraph options `InferenceCalculatorOptions`. The field values +are defined using the `option_value:` syntax. + +In the `CalculatorGraphConfig::Node` protobuf, the fields `node_options:` and +`option_value:` together define the option values for a calculator such as +`ImageToTensorCalculator`. The `node_options:` field defines a set of literal +constant values using the text protobuf syntax. Each `option_value:` field +defines the value for one protobuf field using information from the enclosing +graph, specifically from field values of the graph options of the enclosing +graph. In the example above, the `option_value:` +`"output_tensor_width:options/tensor_width"` defines the field +`ImageToTensorCalculatorOptions.output_tensor_width` using the value of +`FaceDetectionOptions.tensor_width`. + +The syntax of `option_value:` is similar to the syntax of `input_stream:`. The +syntax is `option_value: "LHS:RHS"`. The LHS identifies a calculator option +field and the RHS identifies a graph option field. More specifically, the LHS +and RHS each consists of a series of protobuf field names identifying nested +protobuf messages and fields separated by '/'. This is known as the "ProtoPath" +syntax. Nested messages that are referenced in the LHS or RHS must already be +defined in the enclosing protobuf in order to be traversed using +`option_value:`. + ## Cycles diff --git a/mediapipe/tasks/cc/components/processors/classification_postprocessing_graph.cc b/mediapipe/tasks/cc/components/processors/classification_postprocessing_graph.cc index 649ff2c11..cd5933ee6 100644 --- a/mediapipe/tasks/cc/components/processors/classification_postprocessing_graph.cc +++ b/mediapipe/tasks/cc/components/processors/classification_postprocessing_graph.cc @@ -507,8 +507,11 @@ class ClassificationPostprocessingGraph : public mediapipe::Subgraph { } }; +// REGISTER_MEDIAPIPE_GRAPH argument has to fit on one line to work properly. +// clang-format off REGISTER_MEDIAPIPE_GRAPH( - ::mediapipe::tasks::components::processors::ClassificationPostprocessingGraph); // NOLINT + ::mediapipe::tasks::components::processors::ClassificationPostprocessingGraph); // NOLINT +// clang-format on } // namespace processors } // namespace components diff --git a/mediapipe/tasks/cc/vision/hand_gesture_recognizer/BUILD b/mediapipe/tasks/cc/vision/gesture_recognizer/BUILD similarity index 83% rename from mediapipe/tasks/cc/vision/hand_gesture_recognizer/BUILD rename to mediapipe/tasks/cc/vision/gesture_recognizer/BUILD index 9e2d9bd17..c9319e946 100644 --- a/mediapipe/tasks/cc/vision/hand_gesture_recognizer/BUILD +++ b/mediapipe/tasks/cc/vision/gesture_recognizer/BUILD @@ -41,8 +41,8 @@ cc_test( ) cc_library( - name = "hand_gesture_recognizer_subgraph", - srcs = ["hand_gesture_recognizer_subgraph.cc"], + name = "hand_gesture_recognizer_graph", + srcs = ["hand_gesture_recognizer_graph.cc"], deps = [ "//mediapipe/calculators/core:concatenate_vector_calculator", "//mediapipe/calculators/tensor:tensor_converter_calculator", @@ -62,11 +62,11 @@ cc_library( "//mediapipe/tasks/cc/core:model_task_graph", "//mediapipe/tasks/cc/core:utils", "//mediapipe/tasks/cc/core/proto:inference_subgraph_cc_proto", - "//mediapipe/tasks/cc/vision/hand_gesture_recognizer/calculators:handedness_to_matrix_calculator", - "//mediapipe/tasks/cc/vision/hand_gesture_recognizer/calculators:landmarks_to_matrix_calculator", - "//mediapipe/tasks/cc/vision/hand_gesture_recognizer/proto:hand_gesture_recognizer_subgraph_options_cc_proto", - "//mediapipe/tasks/cc/vision/hand_gesture_recognizer/proto:landmarks_to_matrix_calculator_cc_proto", - "//mediapipe/tasks/cc/vision/hand_landmarker:hand_landmarker_subgraph", + "//mediapipe/tasks/cc/vision/gesture_recognizer/calculators:handedness_to_matrix_calculator", + "//mediapipe/tasks/cc/vision/gesture_recognizer/calculators:landmarks_to_matrix_calculator", + "//mediapipe/tasks/cc/vision/gesture_recognizer/calculators:landmarks_to_matrix_calculator_cc_proto", + "//mediapipe/tasks/cc/vision/gesture_recognizer/proto:hand_gesture_recognizer_graph_options_cc_proto", + "//mediapipe/tasks/cc/vision/hand_landmarker:hand_landmarks_detector_graph", "//mediapipe/tasks/cc/vision/utils:image_tensor_specs", "//mediapipe/tasks/metadata:metadata_schema_cc", "@com_google_absl//absl/status", diff --git a/mediapipe/tasks/cc/vision/hand_gesture_recognizer/calculators/BUILD b/mediapipe/tasks/cc/vision/gesture_recognizer/calculators/BUILD similarity index 84% rename from mediapipe/tasks/cc/vision/hand_gesture_recognizer/calculators/BUILD rename to mediapipe/tasks/cc/vision/gesture_recognizer/calculators/BUILD index 4863c8682..a6de4f950 100644 --- a/mediapipe/tasks/cc/vision/hand_gesture_recognizer/calculators/BUILD +++ b/mediapipe/tasks/cc/vision/gesture_recognizer/calculators/BUILD @@ -12,11 +12,23 @@ # See the License for the specific language governing permissions and # limitations under the License. +load("//mediapipe/framework/port:build_config.bzl", "mediapipe_proto_library") + package(default_visibility = [ "//mediapipe/app/xeno:__subpackages__", "//mediapipe/tasks:internal", ]) +mediapipe_proto_library( + name = "landmarks_to_matrix_calculator_proto", + srcs = ["landmarks_to_matrix_calculator.proto"], + deps = [ + "//mediapipe/framework:calculator_options_proto", + "//mediapipe/framework:calculator_proto", + "//mediapipe/tasks/cc/core/proto:base_options_proto", + ], +) + cc_library( name = "handedness_to_matrix_calculator", srcs = ["handedness_to_matrix_calculator.cc"], @@ -25,7 +37,7 @@ cc_library( "//mediapipe/framework/formats:classification_cc_proto", "//mediapipe/framework/formats:matrix", "//mediapipe/framework/port:ret_check", - "//mediapipe/tasks/cc/vision/hand_gesture_recognizer:handedness_util", + "//mediapipe/tasks/cc/vision/gesture_recognizer:handedness_util", "@com_google_absl//absl/memory", "@com_google_absl//absl/status", "@com_google_absl//absl/status:statusor", @@ -53,11 +65,11 @@ cc_library( name = "landmarks_to_matrix_calculator", srcs = ["landmarks_to_matrix_calculator.cc"], deps = [ + ":landmarks_to_matrix_calculator_cc_proto", "//mediapipe/framework:calculator_framework", "//mediapipe/framework/formats:landmark_cc_proto", "//mediapipe/framework/formats:matrix", "//mediapipe/framework/port:ret_check", - "//mediapipe/tasks/cc/vision/hand_gesture_recognizer/proto:landmarks_to_matrix_calculator_cc_proto", "@com_google_absl//absl/status", "@com_google_absl//absl/status:statusor", "@com_google_absl//absl/strings", diff --git a/mediapipe/tasks/cc/vision/hand_gesture_recognizer/calculators/handedness_to_matrix_calculator.cc b/mediapipe/tasks/cc/vision/gesture_recognizer/calculators/handedness_to_matrix_calculator.cc similarity index 90% rename from mediapipe/tasks/cc/vision/hand_gesture_recognizer/calculators/handedness_to_matrix_calculator.cc rename to mediapipe/tasks/cc/vision/gesture_recognizer/calculators/handedness_to_matrix_calculator.cc index 746293d21..b6c973a1b 100644 --- a/mediapipe/tasks/cc/vision/hand_gesture_recognizer/calculators/handedness_to_matrix_calculator.cc +++ b/mediapipe/tasks/cc/vision/gesture_recognizer/calculators/handedness_to_matrix_calculator.cc @@ -26,14 +26,16 @@ limitations under the License. #include "mediapipe/framework/formats/classification.pb.h" #include "mediapipe/framework/formats/matrix.h" #include "mediapipe/framework/port/ret_check.h" -#include "mediapipe/tasks/cc/vision/hand_gesture_recognizer/handedness_util.h" +#include "mediapipe/tasks/cc/vision/gesture_recognizer/handedness_util.h" +// TODO Update to use API2 namespace mediapipe { -namespace tasks { -namespace vision { +namespace api2 { namespace { +using ::mediapipe::tasks::vision::gesture_recognizer::GetLeftHandScore; + constexpr char kHandednessTag[] = "HANDEDNESS"; constexpr char kHandednessMatrixTag[] = "HANDEDNESS_MATRIX"; @@ -71,6 +73,8 @@ class HandednessToMatrixCalculator : public CalculatorBase { return absl::OkStatus(); } + // TODO remove this after change to API2, because Setting offset + // to 0 is the default in API2 absl::Status Open(CalculatorContext* cc) override { cc->SetOffset(TimestampDiff(0)); return absl::OkStatus(); @@ -95,6 +99,5 @@ absl::Status HandednessToMatrixCalculator::Process(CalculatorContext* cc) { return absl::OkStatus(); } -} // namespace vision -} // namespace tasks +} // namespace api2 } // namespace mediapipe diff --git a/mediapipe/tasks/cc/vision/hand_gesture_recognizer/calculators/handedness_to_matrix_calculator_test.cc b/mediapipe/tasks/cc/vision/gesture_recognizer/calculators/handedness_to_matrix_calculator_test.cc similarity index 97% rename from mediapipe/tasks/cc/vision/hand_gesture_recognizer/calculators/handedness_to_matrix_calculator_test.cc rename to mediapipe/tasks/cc/vision/gesture_recognizer/calculators/handedness_to_matrix_calculator_test.cc index c93c48ac5..17b16bf80 100644 --- a/mediapipe/tasks/cc/vision/hand_gesture_recognizer/calculators/handedness_to_matrix_calculator_test.cc +++ b/mediapipe/tasks/cc/vision/gesture_recognizer/calculators/handedness_to_matrix_calculator_test.cc @@ -28,8 +28,6 @@ limitations under the License. #include "mediapipe/framework/port/status_matchers.h" namespace mediapipe { -namespace tasks { -namespace vision { namespace { @@ -95,6 +93,4 @@ INSTANTIATE_TEST_CASE_P( } // namespace -} // namespace vision -} // namespace tasks } // namespace mediapipe diff --git a/mediapipe/tasks/cc/vision/hand_gesture_recognizer/calculators/landmarks_to_matrix_calculator.cc b/mediapipe/tasks/cc/vision/gesture_recognizer/calculators/landmarks_to_matrix_calculator.cc similarity index 96% rename from mediapipe/tasks/cc/vision/hand_gesture_recognizer/calculators/landmarks_to_matrix_calculator.cc rename to mediapipe/tasks/cc/vision/gesture_recognizer/calculators/landmarks_to_matrix_calculator.cc index 990e99920..b70689eaf 100644 --- a/mediapipe/tasks/cc/vision/hand_gesture_recognizer/calculators/landmarks_to_matrix_calculator.cc +++ b/mediapipe/tasks/cc/vision/gesture_recognizer/calculators/landmarks_to_matrix_calculator.cc @@ -27,13 +27,11 @@ limitations under the License. #include "mediapipe/framework/formats/landmark.pb.h" #include "mediapipe/framework/formats/matrix.h" #include "mediapipe/framework/port/ret_check.h" -#include "mediapipe/tasks/cc/vision/hand_gesture_recognizer/proto/landmarks_to_matrix_calculator.pb.h" +#include "mediapipe/tasks/cc/vision/gesture_recognizer/calculators/landmarks_to_matrix_calculator.pb.h" +// TODO Update to use API2 namespace mediapipe { -namespace tasks { -namespace vision { - -using proto::LandmarksToMatrixCalculatorOptions; +namespace api2 { namespace { @@ -175,7 +173,7 @@ absl::Status ProcessLandmarks(LandmarkListT landmarks, CalculatorContext* cc) { // input_stream: "IMAGE_SIZE:image_size" // output_stream: "LANDMARKS_MATRIX:landmarks_matrix" // options { -// [mediapipe.tasks.vision.proto.LandmarksToMatrixCalculatorOptions.ext] { +// [mediapipe.LandmarksToMatrixCalculatorOptions.ext] { // object_normalization: true // object_normalization_origin_offset: 0 // } @@ -221,6 +219,5 @@ absl::Status LandmarksToMatrixCalculator::Process(CalculatorContext* cc) { return absl::OkStatus(); } -} // namespace vision -} // namespace tasks +} // namespace api2 } // namespace mediapipe diff --git a/mediapipe/tasks/cc/vision/hand_gesture_recognizer/proto/landmarks_to_matrix_calculator.proto b/mediapipe/tasks/cc/vision/gesture_recognizer/calculators/landmarks_to_matrix_calculator.proto similarity index 97% rename from mediapipe/tasks/cc/vision/hand_gesture_recognizer/proto/landmarks_to_matrix_calculator.proto rename to mediapipe/tasks/cc/vision/gesture_recognizer/calculators/landmarks_to_matrix_calculator.proto index 6b004e203..10b034447 100644 --- a/mediapipe/tasks/cc/vision/hand_gesture_recognizer/proto/landmarks_to_matrix_calculator.proto +++ b/mediapipe/tasks/cc/vision/gesture_recognizer/calculators/landmarks_to_matrix_calculator.proto @@ -15,7 +15,7 @@ limitations under the License. syntax = "proto2"; -package mediapipe.tasks.vision.proto; +package mediapipe; import "mediapipe/framework/calculator.proto"; diff --git a/mediapipe/tasks/cc/vision/hand_gesture_recognizer/calculators/landmarks_to_matrix_calculator_test.cc b/mediapipe/tasks/cc/vision/gesture_recognizer/calculators/landmarks_to_matrix_calculator_test.cc similarity index 96% rename from mediapipe/tasks/cc/vision/hand_gesture_recognizer/calculators/landmarks_to_matrix_calculator_test.cc rename to mediapipe/tasks/cc/vision/gesture_recognizer/calculators/landmarks_to_matrix_calculator_test.cc index 05d238f66..8a68d8dae 100644 --- a/mediapipe/tasks/cc/vision/hand_gesture_recognizer/calculators/landmarks_to_matrix_calculator_test.cc +++ b/mediapipe/tasks/cc/vision/gesture_recognizer/calculators/landmarks_to_matrix_calculator_test.cc @@ -28,8 +28,6 @@ limitations under the License. #include "mediapipe/framework/port/status_matchers.h" namespace mediapipe { -namespace tasks { -namespace vision { namespace { @@ -72,8 +70,7 @@ TEST_P(Landmarks2dToMatrixCalculatorTest, OutputsCorrectResult) { input_stream: "IMAGE_SIZE:image_size" output_stream: "LANDMARKS_MATRIX:landmarks_matrix" options { - [mediapipe.tasks.vision.proto.LandmarksToMatrixCalculatorOptions - .ext] { + [mediapipe.LandmarksToMatrixCalculatorOptions.ext] { object_normalization: $0 object_normalization_origin_offset: $1 } @@ -145,8 +142,7 @@ TEST_P(LandmarksWorld3dToMatrixCalculatorTest, OutputsCorrectResult) { input_stream: "IMAGE_SIZE:image_size" output_stream: "LANDMARKS_MATRIX:landmarks_matrix" options { - [mediapipe.tasks.vision.proto.LandmarksToMatrixCalculatorOptions - .ext] { + [mediapipe.LandmarksToMatrixCalculatorOptions.ext] { object_normalization: $0 object_normalization_origin_offset: $1 } @@ -202,6 +198,4 @@ INSTANTIATE_TEST_CASE_P( } // namespace -} // namespace vision -} // namespace tasks } // namespace mediapipe diff --git a/mediapipe/tasks/cc/vision/hand_gesture_recognizer/hand_gesture_recognizer_subgraph.cc b/mediapipe/tasks/cc/vision/gesture_recognizer/hand_gesture_recognizer_graph.cc similarity index 80% rename from mediapipe/tasks/cc/vision/hand_gesture_recognizer/hand_gesture_recognizer_subgraph.cc rename to mediapipe/tasks/cc/vision/gesture_recognizer/hand_gesture_recognizer_graph.cc index 247d8453d..05bc607ae 100644 --- a/mediapipe/tasks/cc/vision/hand_gesture_recognizer/hand_gesture_recognizer_subgraph.cc +++ b/mediapipe/tasks/cc/vision/gesture_recognizer/hand_gesture_recognizer_graph.cc @@ -34,14 +34,15 @@ limitations under the License. #include "mediapipe/tasks/cc/core/model_task_graph.h" #include "mediapipe/tasks/cc/core/proto/inference_subgraph.pb.h" #include "mediapipe/tasks/cc/core/utils.h" -#include "mediapipe/tasks/cc/vision/hand_gesture_recognizer/proto/hand_gesture_recognizer_subgraph_options.pb.h" -#include "mediapipe/tasks/cc/vision/hand_gesture_recognizer/proto/landmarks_to_matrix_calculator.pb.h" +#include "mediapipe/tasks/cc/vision/gesture_recognizer/calculators/landmarks_to_matrix_calculator.pb.h" +#include "mediapipe/tasks/cc/vision/gesture_recognizer/proto/hand_gesture_recognizer_graph_options.pb.h" #include "mediapipe/tasks/cc/vision/utils/image_tensor_specs.h" #include "mediapipe/tasks/metadata/metadata_schema_generated.h" namespace mediapipe { namespace tasks { namespace vision { +namespace gesture_recognizer { namespace { @@ -50,9 +51,8 @@ using ::mediapipe::api2::Output; using ::mediapipe::api2::builder::Graph; using ::mediapipe::api2::builder::Source; using ::mediapipe::tasks::components::containers::proto::ClassificationResult; -using ::mediapipe::tasks::vision::hand_gesture_recognizer::proto:: - HandGestureRecognizerSubgraphOptions; -using ::mediapipe::tasks::vision::proto::LandmarksToMatrixCalculatorOptions; +using ::mediapipe::tasks::vision::gesture_recognizer::proto:: + HandGestureRecognizerGraphOptions; constexpr char kHandednessTag[] = "HANDEDNESS"; constexpr char kLandmarksTag[] = "LANDMARKS"; @@ -70,18 +70,6 @@ constexpr char kIndexTag[] = "INDEX"; constexpr char kIterableTag[] = "ITERABLE"; constexpr char kBatchEndTag[] = "BATCH_END"; -absl::Status SanityCheckOptions( - const HandGestureRecognizerSubgraphOptions& options) { - if (options.min_tracking_confidence() < 0 || - options.min_tracking_confidence() > 1) { - return CreateStatusWithPayload(absl::StatusCode::kInvalidArgument, - "Invalid `min_tracking_confidence` option: " - "value must be in the range [0.0, 1.0]", - MediaPipeTasksStatus::kInvalidArgumentError); - } - return absl::OkStatus(); -} - Source> ConvertMatrixToTensor(Source matrix, Graph& graph) { auto& node = graph.AddNode("TensorConverterCalculator"); @@ -91,9 +79,10 @@ Source> ConvertMatrixToTensor(Source matrix, } // namespace -// A "mediapipe.tasks.vision.SingleHandGestureRecognizerSubgraph" performs -// single hand gesture recognition. This graph is used as a building block for -// mediapipe.tasks.vision.HandGestureRecognizerGraph. +// A +// "mediapipe.tasks.vision.gesture_recognizer.SingleHandGestureRecognizerGraph" +// performs single hand gesture recognition. This graph is used as a building +// block for mediapipe.tasks.vision.GestureRecognizerGraph. // // Inputs: // HANDEDNESS - ClassificationList @@ -113,14 +102,15 @@ Source> ConvertMatrixToTensor(Source matrix, // // Example: // node { -// calculator: "mediapipe.tasks.vision.SingleHandGestureRecognizerSubgraph" +// calculator: +// "mediapipe.tasks.vision.gesture_recognizer.SingleHandGestureRecognizerGraph" // input_stream: "HANDEDNESS:handedness" // input_stream: "LANDMARKS:landmarks" // input_stream: "WORLD_LANDMARKS:world_landmarks" // input_stream: "IMAGE_SIZE:image_size" // output_stream: "HAND_GESTURES:hand_gestures" // options { -// [mediapipe.tasks.vision.hand_gesture_recognizer.proto.HandGestureRecognizerSubgraphOptions.ext] +// [mediapipe.tasks.vision.gesture_recognizer.proto.HandGestureRecognizerGraphOptions.ext] // { // base_options { // model_asset { @@ -130,19 +120,19 @@ Source> ConvertMatrixToTensor(Source matrix, // } // } // } -class SingleHandGestureRecognizerSubgraph : public core::ModelTaskGraph { +class SingleHandGestureRecognizerGraph : public core::ModelTaskGraph { public: absl::StatusOr GetConfig( SubgraphContext* sc) override { ASSIGN_OR_RETURN( const auto* model_resources, - CreateModelResources(sc)); + CreateModelResources(sc)); Graph graph; ASSIGN_OR_RETURN( auto hand_gestures, - BuildHandGestureRecognizerGraph( - sc->Options(), - *model_resources, graph[Input(kHandednessTag)], + BuildGestureRecognizerGraph( + sc->Options(), *model_resources, + graph[Input(kHandednessTag)], graph[Input(kLandmarksTag)], graph[Input(kWorldLandmarksTag)], graph[Input>(kImageSizeTag)], graph)); @@ -151,15 +141,13 @@ class SingleHandGestureRecognizerSubgraph : public core::ModelTaskGraph { } private: - absl::StatusOr> BuildHandGestureRecognizerGraph( - const HandGestureRecognizerSubgraphOptions& graph_options, + absl::StatusOr> BuildGestureRecognizerGraph( + const HandGestureRecognizerGraphOptions& graph_options, const core::ModelResources& model_resources, Source handedness, Source hand_landmarks, Source hand_world_landmarks, Source> image_size, Graph& graph) { - MP_RETURN_IF_ERROR(SanityCheckOptions(graph_options)); - // Converts the ClassificationList to a matrix. auto& handedness_to_matrix = graph.AddNode("HandednessToMatrixCalculator"); handedness >> handedness_to_matrix.In(kHandednessTag); @@ -235,12 +223,15 @@ class SingleHandGestureRecognizerSubgraph : public core::ModelTaskGraph { } }; +// clang-format off REGISTER_MEDIAPIPE_GRAPH( - ::mediapipe::tasks::vision::SingleHandGestureRecognizerSubgraph); + ::mediapipe::tasks::vision::gesture_recognizer::SingleHandGestureRecognizerGraph); // NOLINT +// clang-format on -// A "mediapipe.tasks.vision.HandGestureRecognizerSubgraph" performs multi -// hand gesture recognition. This graph is used as a building block for -// mediapipe.tasks.vision.HandGestureRecognizerGraph. +// A +// "mediapipe.tasks.vision.gesture_recognizer.MultipleHandGestureRecognizerGraph" +// performs multi hand gesture recognition. This graph is used as a building +// block for mediapipe.tasks.vision.gesture_recognizer.GestureRecognizerGraph. // // Inputs: // HANDEDNESS - std::vector @@ -263,7 +254,8 @@ REGISTER_MEDIAPIPE_GRAPH( // // Example: // node { -// calculator: "mediapipe.tasks.vision.HandGestureRecognizerSubgraph" +// calculator: +// "mediapipe.tasks.vision.gesture_recognizer.MultipleHandGestureRecognizerGraph" // input_stream: "HANDEDNESS:handedness" // input_stream: "LANDMARKS:landmarks" // input_stream: "WORLD_LANDMARKS:world_landmarks" @@ -271,7 +263,7 @@ REGISTER_MEDIAPIPE_GRAPH( // input_stream: "HAND_TRACKING_IDS:hand_tracking_ids" // output_stream: "HAND_GESTURES:hand_gestures" // options { -// [mediapipe.tasks.vision.hand_gesture_recognizer.proto.HandGestureRecognizerSubgraph.ext] +// [mediapipe.tasks.vision.gesture_recognizer.proto.MultipleHandGestureRecognizerGraph.ext] // { // base_options { // model_asset { @@ -281,15 +273,15 @@ REGISTER_MEDIAPIPE_GRAPH( // } // } // } -class HandGestureRecognizerSubgraph : public core::ModelTaskGraph { +class MultipleHandGestureRecognizerGraph : public core::ModelTaskGraph { public: absl::StatusOr GetConfig( SubgraphContext* sc) override { Graph graph; ASSIGN_OR_RETURN( auto multi_hand_gestures, - BuildMultiHandGestureRecognizerSubraph( - sc->Options(), + BuildMultiGestureRecognizerSubraph( + sc->Options(), graph[Input>(kHandednessTag)], graph[Input>(kLandmarksTag)], graph[Input>(kWorldLandmarksTag)], @@ -302,8 +294,8 @@ class HandGestureRecognizerSubgraph : public core::ModelTaskGraph { private: absl::StatusOr>> - BuildMultiHandGestureRecognizerSubraph( - const HandGestureRecognizerSubgraphOptions& graph_options, + BuildMultiGestureRecognizerSubraph( + const HandGestureRecognizerGraphOptions& graph_options, Source> multi_handedness, Source> multi_hand_landmarks, Source> multi_hand_world_landmarks, @@ -341,17 +333,18 @@ class HandGestureRecognizerSubgraph : public core::ModelTaskGraph { hand_tracking_id >> get_world_landmarks_at_index.In(kIndexTag); auto hand_world_landmarks = get_world_landmarks_at_index.Out(kItemTag); - auto& hand_gesture_recognizer_subgraph = graph.AddNode( - "mediapipe.tasks.vision.SingleHandGestureRecognizerSubgraph"); - hand_gesture_recognizer_subgraph - .GetOptions() + auto& hand_gesture_recognizer_graph = graph.AddNode( + "mediapipe.tasks.vision.gesture_recognizer." + "SingleHandGestureRecognizerGraph"); + hand_gesture_recognizer_graph + .GetOptions() .CopyFrom(graph_options); - handedness >> hand_gesture_recognizer_subgraph.In(kHandednessTag); - hand_landmarks >> hand_gesture_recognizer_subgraph.In(kLandmarksTag); + handedness >> hand_gesture_recognizer_graph.In(kHandednessTag); + hand_landmarks >> hand_gesture_recognizer_graph.In(kLandmarksTag); hand_world_landmarks >> - hand_gesture_recognizer_subgraph.In(kWorldLandmarksTag); - image_size_clone >> hand_gesture_recognizer_subgraph.In(kImageSizeTag); - auto hand_gestures = hand_gesture_recognizer_subgraph.Out(kHandGesturesTag); + hand_gesture_recognizer_graph.In(kWorldLandmarksTag); + image_size_clone >> hand_gesture_recognizer_graph.In(kImageSizeTag); + auto hand_gestures = hand_gesture_recognizer_graph.Out(kHandGesturesTag); auto& end_loop_classification_results = graph.AddNode("mediapipe.tasks.EndLoopClassificationResultCalculator"); @@ -364,9 +357,12 @@ class HandGestureRecognizerSubgraph : public core::ModelTaskGraph { } }; +// clang-format off REGISTER_MEDIAPIPE_GRAPH( - ::mediapipe::tasks::vision::HandGestureRecognizerSubgraph); + ::mediapipe::tasks::vision::gesture_recognizer::MultipleHandGestureRecognizerGraph); // NOLINT +// clang-format on +} // namespace gesture_recognizer } // namespace vision } // namespace tasks } // namespace mediapipe diff --git a/mediapipe/tasks/cc/vision/hand_gesture_recognizer/handedness_util.cc b/mediapipe/tasks/cc/vision/gesture_recognizer/handedness_util.cc similarity index 93% rename from mediapipe/tasks/cc/vision/hand_gesture_recognizer/handedness_util.cc rename to mediapipe/tasks/cc/vision/gesture_recognizer/handedness_util.cc index 00e19cdb5..60ccae92c 100644 --- a/mediapipe/tasks/cc/vision/hand_gesture_recognizer/handedness_util.cc +++ b/mediapipe/tasks/cc/vision/gesture_recognizer/handedness_util.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "mediapipe/tasks/cc/vision/hand_gesture_recognizer/handedness_util.h" +#include "mediapipe/tasks/cc/vision/gesture_recognizer/handedness_util.h" #include @@ -25,6 +25,7 @@ limitations under the License. namespace mediapipe { namespace tasks { namespace vision { +namespace gesture_recognizer { namespace {} // namespace @@ -58,6 +59,7 @@ absl::StatusOr GetLeftHandScore( } } +} // namespace gesture_recognizer } // namespace vision } // namespace tasks } // namespace mediapipe diff --git a/mediapipe/tasks/cc/vision/hand_gesture_recognizer/handedness_util.h b/mediapipe/tasks/cc/vision/gesture_recognizer/handedness_util.h similarity index 79% rename from mediapipe/tasks/cc/vision/hand_gesture_recognizer/handedness_util.h rename to mediapipe/tasks/cc/vision/gesture_recognizer/handedness_util.h index 74e04b8cc..ae4137d0f 100644 --- a/mediapipe/tasks/cc/vision/hand_gesture_recognizer/handedness_util.h +++ b/mediapipe/tasks/cc/vision/gesture_recognizer/handedness_util.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef MEDIAPIPE_TASKS_CC_VISION_HAND_GESTURE_RECOGNIZER_HADNDEDNESS_UTILS_H_ -#define MEDIAPIPE_TASKS_CC_VISION_HAND_GESTURE_RECOGNIZER_HADNDEDNESS_UTILS_H_ +#ifndef MEDIAPIPE_TASKS_CC_VISION_GESTURE_RECOGNIZER_HADNDEDNESS_UTILS_H_ +#define MEDIAPIPE_TASKS_CC_VISION_GESTURE_RECOGNIZER_HADNDEDNESS_UTILS_H_ #include "absl/status/statusor.h" #include "mediapipe/framework/formats/classification.pb.h" @@ -22,6 +22,7 @@ limitations under the License. namespace mediapipe { namespace tasks { namespace vision { +namespace gesture_recognizer { bool IsLeftHand(const mediapipe::Classification& c); @@ -30,8 +31,9 @@ bool IsRightHand(const mediapipe::Classification& c); absl::StatusOr GetLeftHandScore( const mediapipe::ClassificationList& classification_list); +} // namespace gesture_recognizer } // namespace vision } // namespace tasks } // namespace mediapipe -#endif // MEDIAPIPE_TASKS_CC_VISION_HAND_GESTURE_RECOGNIZER_HADNDEDNESS_UTILS_H_ +#endif // MEDIAPIPE_TASKS_CC_VISION_GESTURE_RECOGNIZER_HADNDEDNESS_UTILS_H_ diff --git a/mediapipe/tasks/cc/vision/hand_gesture_recognizer/handedness_util_test.cc b/mediapipe/tasks/cc/vision/gesture_recognizer/handedness_util_test.cc similarity index 94% rename from mediapipe/tasks/cc/vision/hand_gesture_recognizer/handedness_util_test.cc rename to mediapipe/tasks/cc/vision/gesture_recognizer/handedness_util_test.cc index 51dfb5dea..40a201ae8 100644 --- a/mediapipe/tasks/cc/vision/hand_gesture_recognizer/handedness_util_test.cc +++ b/mediapipe/tasks/cc/vision/gesture_recognizer/handedness_util_test.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "mediapipe/tasks/cc/vision/hand_gesture_recognizer/handedness_util.h" +#include "mediapipe/tasks/cc/vision/gesture_recognizer/handedness_util.h" #include "mediapipe/framework/formats/classification.pb.h" #include "mediapipe/framework/port/gmock.h" @@ -23,6 +23,7 @@ limitations under the License. namespace mediapipe { namespace tasks { namespace vision { +namespace gesture_recognizer { namespace { TEST(GetLeftHandScore, SingleLeftHandClassification) { @@ -72,6 +73,7 @@ TEST(GetLeftHandScore, LeftAndRightLowerCaseHandClassification) { } } // namespace +} // namespace gesture_recognizer } // namespace vision } // namespace tasks } // namespace mediapipe diff --git a/mediapipe/tasks/cc/vision/hand_gesture_recognizer/proto/BUILD b/mediapipe/tasks/cc/vision/gesture_recognizer/proto/BUILD similarity index 73% rename from mediapipe/tasks/cc/vision/hand_gesture_recognizer/proto/BUILD rename to mediapipe/tasks/cc/vision/gesture_recognizer/proto/BUILD index 44ec611b2..cb6ec8289 100644 --- a/mediapipe/tasks/cc/vision/hand_gesture_recognizer/proto/BUILD +++ b/mediapipe/tasks/cc/vision/gesture_recognizer/proto/BUILD @@ -21,8 +21,8 @@ package(default_visibility = [ licenses(["notice"]) mediapipe_proto_library( - name = "hand_gesture_recognizer_subgraph_options_proto", - srcs = ["hand_gesture_recognizer_subgraph_options.proto"], + name = "hand_gesture_recognizer_graph_options_proto", + srcs = ["hand_gesture_recognizer_graph_options.proto"], deps = [ "//mediapipe/framework:calculator_options_proto", "//mediapipe/framework:calculator_proto", @@ -30,12 +30,3 @@ mediapipe_proto_library( "//mediapipe/tasks/cc/core/proto:base_options_proto", ], ) - -mediapipe_proto_library( - name = "landmarks_to_matrix_calculator_proto", - srcs = ["landmarks_to_matrix_calculator.proto"], - deps = [ - "//mediapipe/framework:calculator_options_proto", - "//mediapipe/framework:calculator_proto", - ], -) diff --git a/mediapipe/tasks/cc/vision/hand_gesture_recognizer/proto/hand_gesture_recognizer_subgraph_options.proto b/mediapipe/tasks/cc/vision/gesture_recognizer/proto/hand_gesture_recognizer_graph_options.proto similarity index 89% rename from mediapipe/tasks/cc/vision/hand_gesture_recognizer/proto/hand_gesture_recognizer_subgraph_options.proto rename to mediapipe/tasks/cc/vision/gesture_recognizer/proto/hand_gesture_recognizer_graph_options.proto index d8ee95037..ac8cda15c 100644 --- a/mediapipe/tasks/cc/vision/hand_gesture_recognizer/proto/hand_gesture_recognizer_subgraph_options.proto +++ b/mediapipe/tasks/cc/vision/gesture_recognizer/proto/hand_gesture_recognizer_graph_options.proto @@ -15,15 +15,15 @@ limitations under the License. // TODO Refactor naming and class structure of hand related Tasks. syntax = "proto2"; -package mediapipe.tasks.vision.hand_gesture_recognizer.proto; +package mediapipe.tasks.vision.gesture_recognizer.proto; import "mediapipe/framework/calculator.proto"; import "mediapipe/tasks/cc/components/processors/proto/classifier_options.proto"; import "mediapipe/tasks/cc/core/proto/base_options.proto"; -message HandGestureRecognizerSubgraphOptions { +message HandGestureRecognizerGraphOptions { extend mediapipe.CalculatorOptions { - optional HandGestureRecognizerSubgraphOptions ext = 463370452; + optional HandGestureRecognizerGraphOptions ext = 463370452; } // Base options for configuring hand gesture recognition subgraph, such as // specifying the TfLite model file with metadata, accelerator options, etc. diff --git a/mediapipe/tasks/cc/vision/hand_detector/BUILD b/mediapipe/tasks/cc/vision/hand_detector/BUILD index c87cc50a6..433a30471 100644 --- a/mediapipe/tasks/cc/vision/hand_detector/BUILD +++ b/mediapipe/tasks/cc/vision/hand_detector/BUILD @@ -51,7 +51,7 @@ cc_library( "//mediapipe/tasks/cc/core:model_task_graph", "//mediapipe/tasks/cc/core:utils", "//mediapipe/tasks/cc/core/proto:inference_subgraph_cc_proto", - "//mediapipe/tasks/cc/vision/hand_detector/proto:hand_detector_options_cc_proto", + "//mediapipe/tasks/cc/vision/hand_detector/proto:hand_detector_graph_options_cc_proto", "//mediapipe/tasks/cc/vision/utils:image_tensor_specs", "@com_google_absl//absl/status", "@com_google_absl//absl/status:statusor", diff --git a/mediapipe/tasks/cc/vision/hand_detector/hand_detector_graph.cc b/mediapipe/tasks/cc/vision/hand_detector/hand_detector_graph.cc index 7ead21bad..8573d718f 100644 --- a/mediapipe/tasks/cc/vision/hand_detector/hand_detector_graph.cc +++ b/mediapipe/tasks/cc/vision/hand_detector/hand_detector_graph.cc @@ -40,12 +40,13 @@ limitations under the License. #include "mediapipe/tasks/cc/core/model_task_graph.h" #include "mediapipe/tasks/cc/core/proto/inference_subgraph.pb.h" #include "mediapipe/tasks/cc/core/utils.h" -#include "mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_options.pb.h" +#include "mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_graph_options.pb.h" #include "mediapipe/tasks/cc/vision/utils/image_tensor_specs.h" namespace mediapipe { namespace tasks { namespace vision { +namespace hand_detector { namespace { @@ -53,18 +54,23 @@ using ::mediapipe::api2::Input; using ::mediapipe::api2::Output; using ::mediapipe::api2::builder::Graph; using ::mediapipe::api2::builder::Source; -using ::mediapipe::tasks::vision::hand_detector::proto::HandDetectorOptions; +using ::mediapipe::tasks::vision::hand_detector::proto:: + HandDetectorGraphOptions; constexpr char kImageTag[] = "IMAGE"; -constexpr char kDetectionsTag[] = "DETECTIONS"; -constexpr char kNormRectsTag[] = "NORM_RECTS"; +constexpr char kPalmDetectionsTag[] = "PALM_DETECTIONS"; +constexpr char kHandRectsTag[] = "HAND_RECTS"; +constexpr char kPalmRectsTag[] = "PALM_RECTS"; struct HandDetectionOuts { Source> palm_detections; Source> hand_rects; + Source> palm_rects; + Source image; }; void ConfigureTensorsToDetectionsCalculator( + const HandDetectorGraphOptions& tasks_options, mediapipe::TensorsToDetectionsCalculatorOptions* options) { // TODO use metadata to configure these fields. options->set_num_classes(1); @@ -77,7 +83,7 @@ void ConfigureTensorsToDetectionsCalculator( options->set_sigmoid_score(true); options->set_score_clipping_thresh(100.0); options->set_reverse_output_order(true); - options->set_min_score_thresh(0.5); + options->set_min_score_thresh(tasks_options.min_detection_confidence()); options->set_x_scale(192.0); options->set_y_scale(192.0); options->set_w_scale(192.0); @@ -134,9 +140,9 @@ void ConfigureRectTransformationCalculator( } // namespace -// A "mediapipe.tasks.vision.HandDetectorGraph" performs hand detection. The -// Hand Detection Graph is based on palm detection model, and scale the detected -// palm bounding box to enclose the detected whole hand. +// A "mediapipe.tasks.vision.hand_detector.HandDetectorGraph" performs hand +// detection. The Hand Detection Graph is based on palm detection model, and +// scale the detected palm bounding box to enclose the detected whole hand. // Accepts CPU input images and outputs Landmark on CPU. // // Inputs: @@ -144,19 +150,27 @@ void ConfigureRectTransformationCalculator( // Image to perform detection on. // // Outputs: -// DETECTIONS - std::vector +// PALM_DETECTIONS - std::vector // Detected palms with maximum `num_hands` specified in options. -// NORM_RECTS - std::vector +// HAND_RECTS - std::vector // Detected hand bounding boxes in normalized coordinates. +// PLAM_RECTS - std::vector +// Detected palm bounding boxes in normalized coordinates. +// IMAGE - Image +// The input image that the hand detector runs on and has the pixel data +// stored on the target storage (CPU vs GPU). // // Example: // node { -// calculator: "mediapipe.tasks.vision.HandDetectorGraph" +// calculator: "mediapipe.tasks.vision.hand_detector.HandDetectorGraph" // input_stream: "IMAGE:image" -// output_stream: "DETECTIONS:palm_detections" -// output_stream: "NORM_RECTS:hand_rects_from_palm_detections" +// output_stream: "PALM_DETECTIONS:palm_detections" +// output_stream: "HAND_RECTS:hand_rects_from_palm_detections" +// output_stream: "PALM_RECTS:palm_rects" +// output_stream: "IMAGE:image_out" // options { -// [mediapipe.tasks.hand_detector.proto.HandDetectorOptions.ext] { +// [mediapipe.tasks.vision.hand_detector.proto.HandDetectorGraphOptions.ext] +// { // base_options { // model_asset { // file_name: "palm_detection.tflite" @@ -173,16 +187,20 @@ class HandDetectorGraph : public core::ModelTaskGraph { absl::StatusOr GetConfig( SubgraphContext* sc) override { ASSIGN_OR_RETURN(const auto* model_resources, - CreateModelResources(sc)); + CreateModelResources(sc)); Graph graph; - ASSIGN_OR_RETURN(auto hand_detection_outs, - BuildHandDetectionSubgraph( - sc->Options(), *model_resources, - graph[Input(kImageTag)], graph)); + ASSIGN_OR_RETURN( + auto hand_detection_outs, + BuildHandDetectionSubgraph(sc->Options(), + *model_resources, + graph[Input(kImageTag)], graph)); hand_detection_outs.palm_detections >> - graph[Output>(kDetectionsTag)]; + graph[Output>(kPalmDetectionsTag)]; hand_detection_outs.hand_rects >> - graph[Output>(kNormRectsTag)]; + graph[Output>(kHandRectsTag)]; + hand_detection_outs.palm_rects >> + graph[Output>(kPalmRectsTag)]; + hand_detection_outs.image >> graph[Output(kImageTag)]; return graph.GetConfig(); } @@ -196,7 +214,7 @@ class HandDetectorGraph : public core::ModelTaskGraph { // image_in: image stream to run hand detection on. // graph: the mediapipe builder::Graph instance to be updated. absl::StatusOr BuildHandDetectionSubgraph( - const HandDetectorOptions& subgraph_options, + const HandDetectorGraphOptions& subgraph_options, const core::ModelResources& model_resources, Source image_in, Graph& graph) { // Add image preprocessing subgraph. The model expects aspect ratio @@ -235,6 +253,7 @@ class HandDetectorGraph : public core::ModelTaskGraph { auto& tensors_to_detections = graph.AddNode("TensorsToDetectionsCalculator"); ConfigureTensorsToDetectionsCalculator( + subgraph_options, &tensors_to_detections .GetOptions()); model_output_tensors >> tensors_to_detections.In("TENSORS"); @@ -281,7 +300,8 @@ class HandDetectorGraph : public core::ModelTaskGraph { .GetOptions()); palm_detections >> detections_to_rects.In("DETECTIONS"); image_size >> detections_to_rects.In("IMAGE_SIZE"); - auto palm_rects = detections_to_rects.Out("NORM_RECTS"); + auto palm_rects = + detections_to_rects[Output>("NORM_RECTS")]; // Expands and shifts the rectangle that contains the palm so that it's // likely to cover the entire hand. @@ -308,13 +328,18 @@ class HandDetectorGraph : public core::ModelTaskGraph { clip_normalized_rect_vector_size[Output>( "")]; - return HandDetectionOuts{.palm_detections = palm_detections, - .hand_rects = clipped_hand_rects}; + return HandDetectionOuts{ + /* palm_detections= */ palm_detections, + /* hand_rects= */ clipped_hand_rects, + /* palm_rects= */ palm_rects, + /* image= */ preprocessing[Output(kImageTag)]}; } }; -REGISTER_MEDIAPIPE_GRAPH(::mediapipe::tasks::vision::HandDetectorGraph); +REGISTER_MEDIAPIPE_GRAPH( + ::mediapipe::tasks::vision::hand_detector::HandDetectorGraph); +} // namespace hand_detector } // namespace vision } // namespace tasks } // namespace mediapipe diff --git a/mediapipe/tasks/cc/vision/hand_detector/hand_detector_graph_test.cc b/mediapipe/tasks/cc/vision/hand_detector/hand_detector_graph_test.cc index 3fa97664e..11cfc3026 100644 --- a/mediapipe/tasks/cc/vision/hand_detector/hand_detector_graph_test.cc +++ b/mediapipe/tasks/cc/vision/hand_detector/hand_detector_graph_test.cc @@ -40,13 +40,14 @@ limitations under the License. #include "mediapipe/tasks/cc/core/proto/base_options.pb.h" #include "mediapipe/tasks/cc/core/proto/external_file.pb.h" #include "mediapipe/tasks/cc/core/task_runner.h" -#include "mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_options.pb.h" +#include "mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_graph_options.pb.h" #include "mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_result.pb.h" #include "mediapipe/tasks/cc/vision/utils/image_utils.h" namespace mediapipe { namespace tasks { namespace vision { +namespace hand_detector { namespace { using ::file::Defaults; @@ -60,7 +61,8 @@ using ::mediapipe::tasks::core::ModelResources; using ::mediapipe::tasks::core::TaskRunner; using ::mediapipe::tasks::core::proto::ExternalFile; using ::mediapipe::tasks::vision::DecodeImageFromFile; -using ::mediapipe::tasks::vision::hand_detector::proto::HandDetectorOptions; +using ::mediapipe::tasks::vision::hand_detector::proto:: + HandDetectorGraphOptions; using ::mediapipe::tasks::vision::hand_detector::proto::HandDetectorResult; using ::testing::EqualsProto; using ::testing::TestParamInfo; @@ -80,9 +82,9 @@ constexpr char kTwoHandsResultFile[] = "hand_detector_result_two_hands.pbtxt"; constexpr char kImageTag[] = "IMAGE"; constexpr char kImageName[] = "image"; -constexpr char kPalmDetectionsTag[] = "DETECTIONS"; +constexpr char kPalmDetectionsTag[] = "PALM_DETECTIONS"; constexpr char kPalmDetectionsName[] = "palm_detections"; -constexpr char kHandNormRectsTag[] = "NORM_RECTS"; +constexpr char kHandRectsTag[] = "HAND_RECTS"; constexpr char kHandNormRectsName[] = "hand_norm_rects"; constexpr float kPalmDetectionBboxMaxDiff = 0.01; @@ -104,22 +106,22 @@ absl::StatusOr> CreateTaskRunner( Graph graph; auto& hand_detection = - graph.AddNode("mediapipe.tasks.vision.HandDetectorGraph"); + graph.AddNode("mediapipe.tasks.vision.hand_detector.HandDetectorGraph"); - auto options = std::make_unique(); + auto options = std::make_unique(); options->mutable_base_options()->mutable_model_asset()->set_file_name( JoinPath("./", kTestDataDirectory, model_name)); options->set_min_detection_confidence(0.5); options->set_num_hands(num_hands); - hand_detection.GetOptions().Swap(options.get()); + hand_detection.GetOptions().Swap(options.get()); graph[Input(kImageTag)].SetName(kImageName) >> hand_detection.In(kImageTag); hand_detection.Out(kPalmDetectionsTag).SetName(kPalmDetectionsName) >> graph[Output>(kPalmDetectionsTag)]; - hand_detection.Out(kHandNormRectsTag).SetName(kHandNormRectsName) >> - graph[Output>(kHandNormRectsTag)]; + hand_detection.Out(kHandRectsTag).SetName(kHandNormRectsName) >> + graph[Output>(kHandRectsTag)]; return TaskRunner::Create( graph.GetConfig(), std::make_unique()); @@ -200,6 +202,7 @@ INSTANTIATE_TEST_SUITE_P( }); } // namespace +} // namespace hand_detector } // namespace vision } // namespace tasks } // namespace mediapipe diff --git a/mediapipe/tasks/cc/vision/hand_detector/proto/BUILD b/mediapipe/tasks/cc/vision/hand_detector/proto/BUILD index 2d22aab10..77f3b2649 100644 --- a/mediapipe/tasks/cc/vision/hand_detector/proto/BUILD +++ b/mediapipe/tasks/cc/vision/hand_detector/proto/BUILD @@ -21,8 +21,8 @@ package(default_visibility = [ licenses(["notice"]) mediapipe_proto_library( - name = "hand_detector_options_proto", - srcs = ["hand_detector_options.proto"], + name = "hand_detector_graph_options_proto", + srcs = ["hand_detector_graph_options.proto"], deps = [ "//mediapipe/framework:calculator_options_proto", "//mediapipe/framework:calculator_proto", diff --git a/mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_options.proto b/mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_graph_options.proto similarity index 76% rename from mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_options.proto rename to mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_graph_options.proto index ae22c7991..be20583d0 100644 --- a/mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_options.proto +++ b/mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_graph_options.proto @@ -21,24 +21,20 @@ import "mediapipe/framework/calculator.proto"; import "mediapipe/tasks/cc/core/proto/base_options.proto"; option java_package = "com.google.mediapipe.tasks.vision.handdetector"; -option java_outer_classname = "HandDetectorOptionsProto"; +option java_outer_classname = "HandDetectorGraphOptionsProto"; -message HandDetectorOptions { +message HandDetectorGraphOptions { extend mediapipe.CalculatorOptions { - optional HandDetectorOptions ext = 464864288; + optional HandDetectorGraphOptions ext = 464864288; } // Base options for configuring Task library, such as specifying the TfLite // model file with metadata, accelerator options, etc. optional core.proto.BaseOptions base_options = 1; - // The locale to use for display names specified through the TFLite Model - // Metadata, if any. Defaults to English. - optional string display_names_locale = 2 [default = "en"]; - // Minimum confidence value ([0.0, 1.0]) for confidence score to be considered // successfully detecting a hand in the image. - optional float min_detection_confidence = 3 [default = 0.5]; + optional float min_detection_confidence = 2 [default = 0.5]; // The maximum number of hands output by the detector. - optional int32 num_hands = 4; + optional int32 num_hands = 3; } diff --git a/mediapipe/tasks/cc/vision/hand_landmarker/BUILD b/mediapipe/tasks/cc/vision/hand_landmarker/BUILD index 653976b96..a2bb458db 100644 --- a/mediapipe/tasks/cc/vision/hand_landmarker/BUILD +++ b/mediapipe/tasks/cc/vision/hand_landmarker/BUILD @@ -19,10 +19,10 @@ package(default_visibility = [ licenses(["notice"]) cc_library( - name = "hand_landmarker_subgraph", - srcs = ["hand_landmarker_subgraph.cc"], + name = "hand_landmarks_detector_graph", + srcs = ["hand_landmarks_detector_graph.cc"], deps = [ - "//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_landmarker_subgraph_options_cc_proto", + "//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_landmarks_detector_graph_options_cc_proto", "@com_google_absl//absl/status", "@com_google_absl//absl/status:statusor", "//mediapipe/calculators/core:split_vector_calculator", @@ -51,6 +51,7 @@ cc_library( # TODO: move calculators in modules/hand_landmark/calculators to tasks dir. "//mediapipe/modules/hand_landmark/calculators:hand_landmarks_to_rect_calculator", "//mediapipe/tasks/cc:common", + "//mediapipe/tasks/cc/components/utils:gate", "//mediapipe/tasks/cc/components:image_preprocessing", "//mediapipe/tasks/cc/core:model_resources", "//mediapipe/tasks/cc/core:model_task_graph", @@ -66,3 +67,41 @@ cc_library( ) # TODO: Enable this test + +cc_library( + name = "hand_landmarker_graph", + srcs = ["hand_landmarker_graph.cc"], + deps = [ + ":hand_landmarks_detector_graph", + "//mediapipe/calculators/core:begin_loop_calculator", + "//mediapipe/calculators/core:clip_vector_size_calculator_cc_proto", + "//mediapipe/calculators/core:end_loop_calculator", + "//mediapipe/calculators/core:gate_calculator", + "//mediapipe/calculators/core:gate_calculator_cc_proto", + "//mediapipe/calculators/core:pass_through_calculator", + "//mediapipe/calculators/core:previous_loopback_calculator", + "//mediapipe/calculators/util:collection_has_min_size_calculator", + "//mediapipe/calculators/util:collection_has_min_size_calculator_cc_proto", + "//mediapipe/framework/api2:builder", + "//mediapipe/framework/api2:port", + "//mediapipe/framework/formats:classification_cc_proto", + "//mediapipe/framework/formats:detection_cc_proto", + "//mediapipe/framework/formats:image", + "//mediapipe/framework/formats:landmark_cc_proto", + "//mediapipe/framework/formats:rect_cc_proto", + "//mediapipe/framework/formats:tensor", + "//mediapipe/tasks/cc:common", + "//mediapipe/tasks/cc/components/utils:gate", + "//mediapipe/tasks/cc/core:model_task_graph", + "//mediapipe/tasks/cc/core:utils", + "//mediapipe/tasks/cc/vision/hand_detector:hand_detector_graph", + "//mediapipe/tasks/cc/vision/hand_detector/proto:hand_detector_graph_options_cc_proto", + "//mediapipe/tasks/cc/vision/hand_landmarker/calculators:hand_association_calculator", + "//mediapipe/tasks/cc/vision/hand_landmarker/calculators:hand_association_calculator_cc_proto", + "//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_landmarker_graph_options_cc_proto", + "//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_landmarks_detector_graph_options_cc_proto", + ], + alwayslink = 1, +) + +# TODO: Enable this test diff --git a/mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarker_graph.cc b/mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarker_graph.cc new file mode 100644 index 000000000..949c06520 --- /dev/null +++ b/mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarker_graph.cc @@ -0,0 +1,286 @@ +/* Copyright 2022 The MediaPipe Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include +#include +#include +#include + +#include "mediapipe/calculators/core/clip_vector_size_calculator.pb.h" +#include "mediapipe/calculators/core/gate_calculator.pb.h" +#include "mediapipe/calculators/util/collection_has_min_size_calculator.pb.h" +#include "mediapipe/framework/api2/builder.h" +#include "mediapipe/framework/api2/port.h" +#include "mediapipe/framework/formats/classification.pb.h" +#include "mediapipe/framework/formats/detection.pb.h" +#include "mediapipe/framework/formats/image.h" +#include "mediapipe/framework/formats/landmark.pb.h" +#include "mediapipe/framework/formats/rect.pb.h" +#include "mediapipe/framework/formats/tensor.h" +#include "mediapipe/tasks/cc/common.h" +#include "mediapipe/tasks/cc/components/utils/gate.h" +#include "mediapipe/tasks/cc/core/model_task_graph.h" +#include "mediapipe/tasks/cc/core/utils.h" +#include "mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_graph_options.pb.h" +#include "mediapipe/tasks/cc/vision/hand_landmarker/calculators/hand_association_calculator.pb.h" +#include "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarker_graph_options.pb.h" +#include "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarks_detector_graph_options.pb.h" + +namespace mediapipe { +namespace tasks { +namespace vision { +namespace hand_landmarker { + +namespace { + +using ::mediapipe::api2::Input; +using ::mediapipe::api2::Output; +using ::mediapipe::api2::builder::Graph; +using ::mediapipe::api2::builder::Source; +using ::mediapipe::tasks::components::utils::DisallowIf; +using ::mediapipe::tasks::vision::hand_detector::proto:: + HandDetectorGraphOptions; +using ::mediapipe::tasks::vision::hand_landmarker::proto:: + HandLandmarkerGraphOptions; +using ::mediapipe::tasks::vision::hand_landmarker::proto:: + HandLandmarksDetectorGraphOptions; + +constexpr char kImageTag[] = "IMAGE"; +constexpr char kLandmarksTag[] = "LANDMARKS"; +constexpr char kWorldLandmarksTag[] = "WORLD_LANDMARKS"; +constexpr char kHandRectNextFrameTag[] = "HAND_RECT_NEXT_FRAME"; +constexpr char kHandednessTag[] = "HANDEDNESS"; +constexpr char kPalmDetectionsTag[] = "PALM_DETECTIONS"; +constexpr char kPalmRectsTag[] = "PALM_RECTS"; +constexpr char kPreviousLoopbackCalculatorName[] = "PreviousLoopbackCalculator"; + +struct HandLandmarkerOutputs { + Source> landmark_lists; + Source> world_landmark_lists; + Source> hand_rects_next_frame; + Source> handednesses; + Source> palm_rects; + Source> palm_detections; + Source image; +}; + +} // namespace + +// A "mediapipe.tasks.vision.hand_landmarker.HandLandmarkerGraph" performs hand +// landmarks detection. The HandLandmarkerGraph consists of two subgraphs: +// HandDetectorGraph and MultipleHandLandmarksDetectorGraph. +// MultipleHandLandmarksDetectorGraph detects landmarks from bounding boxes +// produced by HandDetectorGraph. HandLandmarkerGraph tracks the landmarks over +// time, and skips the HandDetectorGraph. If the tracking is lost or the detectd +// hands are less than configured max number hands, HandDetectorGraph would be +// triggered to detect hands. +// +// Accepts CPU input images and outputs Landmarks on CPU. +// +// Inputs: +// IMAGE - Image +// Image to perform hand landmarks detection on. +// +// Outputs: +// LANDMARKS: - std::vector +// Vector of detected hand landmarks. +// WORLD_LANDMARKS - std::vector +// Vector of detected hand landmarks in world coordinates. +// HAND_RECT_NEXT_FRAME - std::vector +// Vector of the predicted rects enclosing the same hand RoI for landmark +// detection on the next frame. +// HANDEDNESS - std::vector +// Vector of classification of handedness. +// PALM_RECTS - std::vector +// Detected palm bounding boxes in normalized coordinates. +// PALM_DETECTIONS - std::vector +// Detected palms with maximum `num_hands` specified in options. +// IMAGE - Image +// The input image that the hand landmarker runs on and has the pixel data +// stored on the target storage (CPU vs GPU). +// +// Example: +// node { +// calculator: "mediapipe.tasks.vision.hand_landmarker.HandLandmarkerGraph" +// input_stream: "IMAGE:image_in" +// output_stream: "LANDMARKS:hand_landmarks" +// output_stream: "WORLD_LANDMARKS:world_hand_landmarks" +// output_stream: "HAND_RECT_NEXT_FRAME:hand_rect_next_frame" +// output_stream: "HANDEDNESS:handedness" +// output_stream: "PALM_RECTS:palm_rects" +// output_stream: "PALM_DETECTIONS:palm_detections" +// output_stream: "IMAGE:image_out" +// options { +// [mediapipe.tasks.hand_landmarker.proto.HandLandmarkerGraphOptions.ext] { +// base_options { +// model_asset { +// file_name: "hand_landmarker.task" +// } +// } +// hand_detector_graph_options { +// base_options { +// model_asset { +// file_name: "palm_detection.tflite" +// } +// } +// min_detection_confidence: 0.5 +// num_hands: 2 +// } +// hand_landmarks_detector_graph_options { +// base_options { +// model_asset { +// file_name: "hand_landmark_lite.tflite" +// } +// } +// min_detection_confidence: 0.5 +// } +// } +// } +// } +class HandLandmarkerGraph : public core::ModelTaskGraph { + public: + absl::StatusOr GetConfig( + SubgraphContext* sc) override { + Graph graph; + ASSIGN_OR_RETURN( + auto hand_landmarker_outputs, + BuildHandLandmarkerGraph(sc->Options(), + graph[Input(kImageTag)], graph)); + hand_landmarker_outputs.landmark_lists >> + graph[Output>(kLandmarksTag)]; + hand_landmarker_outputs.world_landmark_lists >> + graph[Output>(kWorldLandmarksTag)]; + hand_landmarker_outputs.hand_rects_next_frame >> + graph[Output>(kHandRectNextFrameTag)]; + hand_landmarker_outputs.handednesses >> + graph[Output>(kHandednessTag)]; + hand_landmarker_outputs.palm_rects >> + graph[Output>(kPalmRectsTag)]; + hand_landmarker_outputs.palm_detections >> + graph[Output>(kPalmDetectionsTag)]; + hand_landmarker_outputs.image >> graph[Output(kImageTag)]; + + // TODO remove when support is fixed. + // As mediapipe GraphBuilder currently doesn't support configuring + // InputStreamInfo, modifying the CalculatorGraphConfig proto directly. + CalculatorGraphConfig config = graph.GetConfig(); + for (int i = 0; i < config.node_size(); ++i) { + if (config.node(i).calculator() == kPreviousLoopbackCalculatorName) { + auto* info = config.mutable_node(i)->add_input_stream_info(); + info->set_tag_index("LOOP"); + info->set_back_edge(true); + break; + } + } + return config; + } + + private: + // Adds a mediapipe hand landmark detection graph into the provided + // builder::Graph instance. + // + // tasks_options: the mediapipe tasks module HandLandmarkerGraphOptions. + // image_in: (mediapipe::Image) stream to run hand landmark detection on. + // graph: the mediapipe graph instance to be updated. + absl::StatusOr BuildHandLandmarkerGraph( + const HandLandmarkerGraphOptions& tasks_options, Source image_in, + Graph& graph) { + const int max_num_hands = + tasks_options.hand_detector_graph_options().num_hands(); + + auto& previous_loopback = graph.AddNode(kPreviousLoopbackCalculatorName); + image_in >> previous_loopback.In("MAIN"); + auto prev_hand_rects_from_landmarks = + previous_loopback[Output>("PREV_LOOP")]; + + auto& min_size_node = + graph.AddNode("NormalizedRectVectorHasMinSizeCalculator"); + prev_hand_rects_from_landmarks >> min_size_node.In("ITERABLE"); + min_size_node.GetOptions() + .set_min_size(max_num_hands); + auto has_enough_hands = min_size_node.Out("").Cast(); + + auto image_for_hand_detector = + DisallowIf(image_in, has_enough_hands, graph); + + auto& hand_detector = + graph.AddNode("mediapipe.tasks.vision.hand_detector.HandDetectorGraph"); + hand_detector.GetOptions().CopyFrom( + tasks_options.hand_detector_graph_options()); + image_for_hand_detector >> hand_detector.In("IMAGE"); + auto hand_rects_from_hand_detector = hand_detector.Out("HAND_RECTS"); + + auto& hand_association = graph.AddNode("HandAssociationCalculator"); + hand_association.GetOptions() + .set_min_similarity_threshold(tasks_options.min_tracking_confidence()); + prev_hand_rects_from_landmarks >> + hand_association[Input>::Multiple("")][0]; + hand_rects_from_hand_detector >> + hand_association[Input>::Multiple("")][1]; + auto hand_rects = hand_association.Out(""); + + auto& clip_hand_rects = + graph.AddNode("ClipNormalizedRectVectorSizeCalculator"); + clip_hand_rects.GetOptions() + .set_max_vec_size(max_num_hands); + hand_rects >> clip_hand_rects.In(""); + auto clipped_hand_rects = clip_hand_rects.Out(""); + + auto& hand_landmarks_detector_graph = graph.AddNode( + "mediapipe.tasks.vision.hand_landmarker." + "MultipleHandLandmarksDetectorGraph"); + hand_landmarks_detector_graph + .GetOptions() + .CopyFrom(tasks_options.hand_landmarks_detector_graph_options()); + image_in >> hand_landmarks_detector_graph.In("IMAGE"); + clipped_hand_rects >> hand_landmarks_detector_graph.In("HAND_RECT"); + + auto hand_rects_for_next_frame = + hand_landmarks_detector_graph[Output>( + kHandRectNextFrameTag)]; + // Back edge. + hand_rects_for_next_frame >> previous_loopback.In("LOOP"); + + // TODO: Replace PassThroughCalculator with a calculator that + // converts the pixel data to be stored on the target storage (CPU vs GPU). + auto& pass_through = graph.AddNode("PassThroughCalculator"); + image_in >> pass_through.In(""); + + return {{ + /* landmark_lists= */ hand_landmarks_detector_graph + [Output>(kLandmarksTag)], + /* world_landmark_lists= */ + hand_landmarks_detector_graph[Output>( + kWorldLandmarksTag)], + /* hand_rects_next_frame= */ hand_rects_for_next_frame, + hand_landmarks_detector_graph[Output>( + kHandednessTag)], + /* palm_rects= */ + hand_detector[Output>(kPalmRectsTag)], + /* palm_detections */ + hand_detector[Output>(kPalmDetectionsTag)], + /* image */ + pass_through[Output("")], + }}; + } +}; + +REGISTER_MEDIAPIPE_GRAPH( + ::mediapipe::tasks::vision::hand_landmarker::HandLandmarkerGraph); + +} // namespace hand_landmarker +} // namespace vision +} // namespace tasks +} // namespace mediapipe diff --git a/mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarker_graph_test.cc b/mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarker_graph_test.cc new file mode 100644 index 000000000..bce5613ff --- /dev/null +++ b/mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarker_graph_test.cc @@ -0,0 +1,167 @@ +/* Copyright 2022 The MediaPipe Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include +#include +#include +#include + +#include "absl/flags/flag.h" +#include "absl/status/statusor.h" +#include "absl/strings/str_format.h" +#include "absl/strings/string_view.h" +#include "mediapipe/framework/api2/builder.h" +#include "mediapipe/framework/api2/port.h" +#include "mediapipe/framework/deps/file_path.h" +#include "mediapipe/framework/formats/classification.pb.h" +#include "mediapipe/framework/formats/image.h" +#include "mediapipe/framework/formats/landmark.pb.h" +#include "mediapipe/framework/formats/rect.pb.h" +#include "mediapipe/framework/port/file_helpers.h" +#include "mediapipe/framework/port/gmock.h" +#include "mediapipe/framework/port/gtest.h" +#include "mediapipe/tasks/cc/core/mediapipe_builtin_op_resolver.h" +#include "mediapipe/tasks/cc/core/proto/base_options.pb.h" +#include "mediapipe/tasks/cc/core/proto/external_file.pb.h" +#include "mediapipe/tasks/cc/core/task_runner.h" +#include "mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_graph_options.pb.h" +#include "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarker_graph_options.pb.h" +#include "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarks_detector_graph_options.pb.h" +#include "mediapipe/tasks/cc/vision/utils/image_utils.h" +#include "tensorflow/lite/c/common.h" +#include "tensorflow/lite/core/shims/cc/shims_test_util.h" + +namespace mediapipe { +namespace tasks { +namespace vision { +namespace hand_landmarker { + +namespace { + +using ::file::Defaults; +using ::file::GetTextProto; +using ::mediapipe::api2::Input; +using ::mediapipe::api2::Output; +using ::mediapipe::api2::builder::Graph; +using ::mediapipe::api2::builder::Source; +using ::mediapipe::file::JoinPath; +using ::mediapipe::tasks::core::TaskRunner; +using ::mediapipe::tasks::vision::hand_landmarker::proto:: + HandLandmarkerGraphOptions; +using ::testing::EqualsProto; +using ::testing::proto::Approximately; +using ::testing::proto::Partially; + +constexpr char kTestDataDirectory[] = "/mediapipe/tasks/testdata/vision/"; +constexpr char kPalmDetectionModel[] = "palm_detection_full.tflite"; +constexpr char kHandLandmarkerFullModel[] = "hand_landmark_full.tflite"; +constexpr char kLeftHandsImage[] = "left_hands.jpg"; + +constexpr char kImageTag[] = "IMAGE"; +constexpr char kImageName[] = "image_in"; +constexpr char kLandmarksTag[] = "LANDMARKS"; +constexpr char kLandmarksName[] = "landmarks"; +constexpr char kWorldLandmarksTag[] = "WORLD_LANDMARKS"; +constexpr char kWorldLandmarksName[] = "world_landmarks"; +constexpr char kHandRectNextFrameTag[] = "HAND_RECT_NEXT_FRAME"; +constexpr char kHandRectNextFrameName[] = "hand_rect_next_frame"; +constexpr char kHandednessTag[] = "HANDEDNESS"; +constexpr char kHandednessName[] = "handedness"; + +// Expected hand landmarks positions, in text proto format. +constexpr char kExpectedLeftUpHandLandmarksFilename[] = + "expected_left_up_hand_landmarks.prototxt"; +constexpr char kExpectedLeftDownHandLandmarksFilename[] = + "expected_left_down_hand_landmarks.prototxt"; + +constexpr float kFullModelFractionDiff = 0.03; // percentage +constexpr float kAbsMargin = 0.03; +constexpr int kMaxNumHands = 2; +constexpr float kMinTrackingConfidence = 0.5; + +NormalizedLandmarkList GetExpectedLandmarkList(absl::string_view filename) { + NormalizedLandmarkList expected_landmark_list; + MP_EXPECT_OK(GetTextProto(file::JoinPath("./", kTestDataDirectory, filename), + &expected_landmark_list, Defaults())); + return expected_landmark_list; +} + +// Helper function to create a Hand Landmarker TaskRunner. +absl::StatusOr> CreateTaskRunner() { + Graph graph; + auto& hand_landmarker_graph = graph.AddNode( + "mediapipe.tasks.vision.hand_landmarker.HandLandmarkerGraph"); + auto& options = + hand_landmarker_graph.GetOptions(); + options.mutable_hand_detector_graph_options() + ->mutable_base_options() + ->mutable_model_asset() + ->set_file_name(JoinPath("./", kTestDataDirectory, kPalmDetectionModel)); + options.mutable_hand_detector_graph_options()->mutable_base_options(); + options.mutable_hand_detector_graph_options()->set_num_hands(kMaxNumHands); + options.mutable_hand_landmarks_detector_graph_options() + ->mutable_base_options() + ->mutable_model_asset() + ->set_file_name( + JoinPath("./", kTestDataDirectory, kHandLandmarkerFullModel)); + options.set_min_tracking_confidence(kMinTrackingConfidence); + + graph[Input(kImageTag)].SetName(kImageName) >> + hand_landmarker_graph.In(kImageTag); + hand_landmarker_graph.Out(kLandmarksTag).SetName(kLandmarksName) >> + graph[Output>(kLandmarksTag)]; + hand_landmarker_graph.Out(kWorldLandmarksTag).SetName(kWorldLandmarksName) >> + graph[Output>(kWorldLandmarksTag)]; + hand_landmarker_graph.Out(kHandednessTag).SetName(kHandednessName) >> + graph[Output>(kHandednessTag)]; + hand_landmarker_graph.Out(kHandRectNextFrameTag) + .SetName(kHandRectNextFrameName) >> + graph[Output>(kHandRectNextFrameTag)]; + return TaskRunner::Create( + graph.GetConfig(), absl::make_unique()); +} + +class HandLandmarkerTest : public tflite_shims::testing::Test {}; + +TEST_F(HandLandmarkerTest, Succeeds) { + MP_ASSERT_OK_AND_ASSIGN( + Image image, + DecodeImageFromFile(JoinPath("./", kTestDataDirectory, kLeftHandsImage))); + MP_ASSERT_OK_AND_ASSIGN(auto task_runner, CreateTaskRunner()); + auto output_packets = + task_runner->Process({{kImageName, MakePacket(std::move(image))}}); + const auto& landmarks = (*output_packets)[kLandmarksName] + .Get>(); + ASSERT_EQ(landmarks.size(), kMaxNumHands); + std::vector expected_landmarks = { + GetExpectedLandmarkList(kExpectedLeftUpHandLandmarksFilename), + GetExpectedLandmarkList(kExpectedLeftDownHandLandmarksFilename)}; + + EXPECT_THAT(landmarks[0], + Approximately(Partially(EqualsProto(expected_landmarks[0])), + /*margin=*/kAbsMargin, + /*fraction=*/kFullModelFractionDiff)); + EXPECT_THAT(landmarks[1], + Approximately(Partially(EqualsProto(expected_landmarks[1])), + /*margin=*/kAbsMargin, + /*fraction=*/kFullModelFractionDiff)); +} + +} // namespace + +} // namespace hand_landmarker +} // namespace vision +} // namespace tasks +} // namespace mediapipe diff --git a/mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarker_subgraph.cc b/mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarks_detector_graph.cc similarity index 89% rename from mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarker_subgraph.cc rename to mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarks_detector_graph.cc index fff4ae0d4..23521790d 100644 --- a/mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarker_subgraph.cc +++ b/mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarks_detector_graph.cc @@ -34,12 +34,13 @@ limitations under the License. #include "mediapipe/framework/formats/tensor.h" #include "mediapipe/tasks/cc/common.h" #include "mediapipe/tasks/cc/components/image_preprocessing.h" +#include "mediapipe/tasks/cc/components/utils/gate.h" #include "mediapipe/tasks/cc/core/model_resources.h" #include "mediapipe/tasks/cc/core/model_task_graph.h" #include "mediapipe/tasks/cc/core/proto/inference_subgraph.pb.h" #include "mediapipe/tasks/cc/core/utils.h" #include "mediapipe/tasks/cc/metadata/metadata_extractor.h" -#include "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarker_subgraph_options.pb.h" +#include "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarks_detector_graph_options.pb.h" #include "mediapipe/tasks/cc/vision/utils/image_tensor_specs.h" #include "mediapipe/tasks/metadata/metadata_schema_generated.h" #include "mediapipe/util/label_map.pb.h" @@ -48,6 +49,7 @@ limitations under the License. namespace mediapipe { namespace tasks { namespace vision { +namespace hand_landmarker { namespace { @@ -55,9 +57,10 @@ using ::mediapipe::api2::Input; using ::mediapipe::api2::Output; using ::mediapipe::api2::builder::Graph; using ::mediapipe::api2::builder::Source; +using ::mediapipe::tasks::components::utils::AllowIf; using ::mediapipe::tasks::core::ModelResources; using ::mediapipe::tasks::vision::hand_landmarker::proto:: - HandLandmarkerSubgraphOptions; + HandLandmarksDetectorGraphOptions; using LabelItems = mediapipe::proto_ns::Map; constexpr char kImageTag[] = "IMAGE"; @@ -82,7 +85,6 @@ struct SingleHandLandmarkerOutputs { Source hand_presence; Source hand_presence_score; Source handedness; - Source> image_size; }; struct HandLandmarkerOutputs { @@ -92,10 +94,10 @@ struct HandLandmarkerOutputs { Source> presences; Source> presence_scores; Source> handednesses; - Source> image_size; }; -absl::Status SanityCheckOptions(const HandLandmarkerSubgraphOptions& options) { +absl::Status SanityCheckOptions( + const HandLandmarksDetectorGraphOptions& options) { if (options.min_detection_confidence() < 0 || options.min_detection_confidence() > 1) { return CreateStatusWithPayload(absl::StatusCode::kInvalidArgument, @@ -182,8 +184,8 @@ void ConfigureHandRectTransformationCalculator( } // namespace -// A "mediapipe.tasks.vision.SingleHandLandmarkerSubgraph" performs hand -// landmark detection. +// A "mediapipe.tasks.vision.hand_landmarker.SingleHandLandmarksDetectorGraph" +// performs hand landmarks detection. // - Accepts CPU input images and outputs Landmark on CPU. // // Inputs: @@ -208,12 +210,11 @@ void ConfigureHandRectTransformationCalculator( // Float value indicates the probability that the hand is present. // HANDEDNESS - ClassificationList // Classification of handedness. -// IMAGE_SIZE - std::vector -// The size of input image. // // Example: // node { -// calculator: "mediapipe.tasks.vision.SingleHandLandmarkerSubgraph" +// calculator: +// "mediapipe.tasks.vision.hand_landmarker.SingleHandLandmarksDetectorGraph" // input_stream: "IMAGE:input_image" // input_stream: "HAND_RECT:hand_rect" // output_stream: "LANDMARKS:hand_landmarks" @@ -221,10 +222,8 @@ void ConfigureHandRectTransformationCalculator( // output_stream: "HAND_RECT_NEXT_FRAME:hand_rect_next_frame" // output_stream: "PRESENCE:hand_presence" // output_stream: "PRESENCE_SCORE:hand_presence_score" -// output_stream: "HANDEDNESS:handedness" -// output_stream: "IMAGE_SIZE:image_size" // options { -// [mediapipe.tasks.vision.hand_landmarker.proto.HandLandmarkerSubgraphOptions.ext] +// [mediapipe.tasks.vision.hand_landmarker.proto.HandLandmarksDetectorGraphOptions.ext] // { // base_options { // model_asset { @@ -235,16 +234,17 @@ void ConfigureHandRectTransformationCalculator( // } // } // } -class SingleHandLandmarkerSubgraph : public core::ModelTaskGraph { +class SingleHandLandmarksDetectorGraph : public core::ModelTaskGraph { public: absl::StatusOr GetConfig( SubgraphContext* sc) override { - ASSIGN_OR_RETURN(const auto* model_resources, - CreateModelResources(sc)); + ASSIGN_OR_RETURN( + const auto* model_resources, + CreateModelResources(sc)); Graph graph; ASSIGN_OR_RETURN(auto hand_landmark_detection_outs, - BuildSingleHandLandmarkerSubgraph( - sc->Options(), + BuildSingleHandLandmarksDetectorGraph( + sc->Options(), *model_resources, graph[Input(kImageTag)], graph[Input(kHandRectTag)], graph)); hand_landmark_detection_outs.hand_landmarks >> @@ -259,8 +259,6 @@ class SingleHandLandmarkerSubgraph : public core::ModelTaskGraph { graph[Output(kPresenceScoreTag)]; hand_landmark_detection_outs.handedness >> graph[Output(kHandednessTag)]; - hand_landmark_detection_outs.image_size >> - graph[Output>(kImageSizeTag)]; return graph.GetConfig(); } @@ -269,14 +267,16 @@ class SingleHandLandmarkerSubgraph : public core::ModelTaskGraph { // Adds a mediapipe hand landmark detection graph into the provided // builder::Graph instance. // - // subgraph_options: the mediapipe tasks module HandLandmarkerSubgraphOptions. - // model_resources: the ModelSources object initialized from a hand landmark + // subgraph_options: the mediapipe tasks module + // HandLandmarksDetectorGraphOptions. model_resources: the ModelSources object + // initialized from a hand landmark // detection model file with model metadata. // image_in: (mediapipe::Image) stream to run hand landmark detection on. // rect: (NormalizedRect) stream to run on the RoI of image. // graph: the mediapipe graph instance to be updated. - absl::StatusOr BuildSingleHandLandmarkerSubgraph( - const HandLandmarkerSubgraphOptions& subgraph_options, + absl::StatusOr + BuildSingleHandLandmarksDetectorGraph( + const HandLandmarksDetectorGraphOptions& subgraph_options, const core::ModelResources& model_resources, Source image_in, Source hand_rect, Graph& graph) { MP_RETURN_IF_ERROR(SanityCheckOptions(subgraph_options)); @@ -332,18 +332,7 @@ class SingleHandLandmarkerSubgraph : public core::ModelTaskGraph { // score of hand presence. auto& tensors_to_hand_presence = graph.AddNode("TensorsToFloatsCalculator"); hand_flag_tensors >> tensors_to_hand_presence.In("TENSORS"); - - // Converts the handedness tensor into a float that represents the - // classification score of handedness. - auto& tensors_to_handedness = - graph.AddNode("TensorsToClassificationCalculator"); - ConfigureTensorsToHandednessCalculator( - &tensors_to_handedness.GetOptions< - mediapipe::TensorsToClassificationCalculatorOptions>()); - handedness_tensors >> tensors_to_handedness.In("TENSORS"); auto hand_presence_score = tensors_to_hand_presence[Output("FLOAT")]; - auto handedness = - tensors_to_handedness[Output("CLASSIFICATIONS")]; // Applies a threshold to the confidence score to determine whether a // hand is present. @@ -354,6 +343,18 @@ class SingleHandLandmarkerSubgraph : public core::ModelTaskGraph { hand_presence_score >> hand_presence_thresholding.In("FLOAT"); auto hand_presence = hand_presence_thresholding[Output("FLAG")]; + // Converts the handedness tensor into a float that represents the + // classification score of handedness. + auto& tensors_to_handedness = + graph.AddNode("TensorsToClassificationCalculator"); + ConfigureTensorsToHandednessCalculator( + &tensors_to_handedness.GetOptions< + mediapipe::TensorsToClassificationCalculatorOptions>()); + handedness_tensors >> tensors_to_handedness.In("TENSORS"); + auto handedness = AllowIf( + tensors_to_handedness[Output("CLASSIFICATIONS")], + hand_presence, graph); + // Adjusts landmarks (already normalized to [0.f, 1.f]) on the letterboxed // hand image (after image transformation with the FIT scale mode) to the // corresponding locations on the same image with the letterbox removed @@ -371,8 +372,9 @@ class SingleHandLandmarkerSubgraph : public core::ModelTaskGraph { landmark_letterbox_removal.Out("LANDMARKS") >> landmark_projection.In("NORM_LANDMARKS"); hand_rect >> landmark_projection.In("NORM_RECT"); - auto projected_landmarks = - landmark_projection[Output("NORM_LANDMARKS")]; + auto projected_landmarks = AllowIf( + landmark_projection[Output("NORM_LANDMARKS")], + hand_presence, graph); // Projects the world landmarks from the cropped hand image to the // corresponding locations on the full image before cropping (input to the @@ -383,7 +385,8 @@ class SingleHandLandmarkerSubgraph : public core::ModelTaskGraph { world_landmark_projection.In("LANDMARKS"); hand_rect >> world_landmark_projection.In("NORM_RECT"); auto projected_world_landmarks = - world_landmark_projection[Output("LANDMARKS")]; + AllowIf(world_landmark_projection[Output("LANDMARKS")], + hand_presence, graph); // Converts the hand landmarks into a rectangle (normalized by image size) // that encloses the hand. @@ -403,7 +406,8 @@ class SingleHandLandmarkerSubgraph : public core::ModelTaskGraph { hand_landmarks_to_rect.Out("NORM_RECT") >> hand_rect_transformation.In("NORM_RECT"); auto hand_rect_next_frame = - hand_rect_transformation[Output("")]; + AllowIf(hand_rect_transformation[Output("")], + hand_presence, graph); return {{ /* hand_landmarks= */ projected_landmarks, @@ -412,16 +416,17 @@ class SingleHandLandmarkerSubgraph : public core::ModelTaskGraph { /* hand_presence= */ hand_presence, /* hand_presence_score= */ hand_presence_score, /* handedness= */ handedness, - /* image_size= */ image_size, }}; } }; +// clang-format off REGISTER_MEDIAPIPE_GRAPH( - ::mediapipe::tasks::vision::SingleHandLandmarkerSubgraph); + ::mediapipe::tasks::vision::hand_landmarker::SingleHandLandmarksDetectorGraph); // NOLINT +// clang-format on -// A "mediapipe.tasks.vision.HandLandmarkerSubgraph" performs multi -// hand landmark detection. +// A "mediapipe.tasks.vision.hand_landmarker.MultipleHandLandmarksDetectorGraph" +// performs multi hand landmark detection. // - Accepts CPU input image and a vector of hand rect RoIs to detect the // multiple hands landmarks enclosed by the RoIs. Output vectors of // hand landmarks related results, where each element in the vectors @@ -449,12 +454,11 @@ REGISTER_MEDIAPIPE_GRAPH( // Vector of float value indicates the probability that the hand is present. // HANDEDNESS - std::vector // Vector of classification of handedness. -// IMAGE_SIZE - std::vector -// The size of input image. // // Example: // node { -// calculator: "mediapipe.tasks.vision.HandLandmarkerSubgraph" +// calculator: +// "mediapipe.tasks.vision.hand_landmarker.MultipleHandLandmarksDetectorGraph" // input_stream: "IMAGE:input_image" // input_stream: "HAND_RECT:hand_rect" // output_stream: "LANDMARKS:hand_landmarks" @@ -463,9 +467,8 @@ REGISTER_MEDIAPIPE_GRAPH( // output_stream: "PRESENCE:hand_presence" // output_stream: "PRESENCE_SCORE:hand_presence_score" // output_stream: "HANDEDNESS:handedness" -// output_stream: "IMAGE_SIZE:image_size" // options { -// [mediapipe.tasks.vision.hand_landmarker.proto.HandLandmarkerSubgraphOptions.ext] +// [mediapipe.tasks.vision.hand_landmarker.proto.HandLandmarksDetectorGraphOptions.ext] // { // base_options { // model_asset { @@ -476,15 +479,15 @@ REGISTER_MEDIAPIPE_GRAPH( // } // } // } -class HandLandmarkerSubgraph : public core::ModelTaskGraph { +class MultipleHandLandmarksDetectorGraph : public core::ModelTaskGraph { public: absl::StatusOr GetConfig( SubgraphContext* sc) override { Graph graph; ASSIGN_OR_RETURN( auto hand_landmark_detection_outputs, - BuildHandLandmarkerSubgraph( - sc->Options(), + BuildHandLandmarksDetectorGraph( + sc->Options(), graph[Input(kImageTag)], graph[Input>(kHandRectTag)], graph)); hand_landmark_detection_outputs.landmark_lists >> @@ -499,21 +502,20 @@ class HandLandmarkerSubgraph : public core::ModelTaskGraph { graph[Output>(kPresenceScoreTag)]; hand_landmark_detection_outputs.handednesses >> graph[Output>(kHandednessTag)]; - hand_landmark_detection_outputs.image_size >> - graph[Output>(kImageSizeTag)]; return graph.GetConfig(); } private: - absl::StatusOr BuildHandLandmarkerSubgraph( - const HandLandmarkerSubgraphOptions& subgraph_options, + absl::StatusOr BuildHandLandmarksDetectorGraph( + const HandLandmarksDetectorGraphOptions& subgraph_options, Source image_in, Source> multi_hand_rects, Graph& graph) { - auto& hand_landmark_subgraph = - graph.AddNode("mediapipe.tasks.vision.SingleHandLandmarkerSubgraph"); - hand_landmark_subgraph.GetOptions().CopyFrom( - subgraph_options); + auto& hand_landmark_subgraph = graph.AddNode( + "mediapipe.tasks.vision.hand_landmarker." + "SingleHandLandmarksDetectorGraph"); + hand_landmark_subgraph.GetOptions() + .CopyFrom(subgraph_options); auto& begin_loop_multi_hand_rects = graph.AddNode("BeginLoopNormalizedRectCalculator"); @@ -533,8 +535,6 @@ class HandLandmarkerSubgraph : public core::ModelTaskGraph { hand_landmark_subgraph.Out("HAND_RECT_NEXT_FRAME"); auto landmarks = hand_landmark_subgraph.Out("LANDMARKS"); auto world_landmarks = hand_landmark_subgraph.Out("WORLD_LANDMARKS"); - auto image_size = - hand_landmark_subgraph[Output>("IMAGE_SIZE")]; auto& end_loop_handedness = graph.AddNode("EndLoopClassificationListCalculator"); @@ -585,13 +585,16 @@ class HandLandmarkerSubgraph : public core::ModelTaskGraph { /* presences= */ presences, /* presence_scores= */ presence_scores, /* handednesses= */ handednesses, - /* image_size= */ image_size, }}; } }; -REGISTER_MEDIAPIPE_GRAPH(::mediapipe::tasks::vision::HandLandmarkerSubgraph); +// clang-format off +REGISTER_MEDIAPIPE_GRAPH( + ::mediapipe::tasks::vision::hand_landmarker::MultipleHandLandmarksDetectorGraph); // NOLINT +// clang-format on +} // namespace hand_landmarker } // namespace vision } // namespace tasks } // namespace mediapipe diff --git a/mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarker_subgraph_test.cc b/mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarks_detector_graph_test.cc similarity index 96% rename from mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarker_subgraph_test.cc rename to mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarks_detector_graph_test.cc index 1c2bc6da7..d1e928ce7 100644 --- a/mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarker_subgraph_test.cc +++ b/mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarks_detector_graph_test.cc @@ -39,12 +39,13 @@ limitations under the License. #include "mediapipe/tasks/cc/core/proto/base_options.pb.h" #include "mediapipe/tasks/cc/core/proto/external_file.pb.h" #include "mediapipe/tasks/cc/core/task_runner.h" -#include "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarker_subgraph_options.pb.h" +#include "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarks_detector_graph_options.pb.h" #include "mediapipe/tasks/cc/vision/utils/image_utils.h" namespace mediapipe { namespace tasks { namespace vision { +namespace hand_landmarker { namespace { using ::file::Defaults; @@ -57,7 +58,7 @@ using ::mediapipe::file::JoinPath; using ::mediapipe::tasks::core::TaskRunner; using ::mediapipe::tasks::vision::DecodeImageFromFile; using ::mediapipe::tasks::vision::hand_landmarker::proto:: - HandLandmarkerSubgraphOptions; + HandLandmarksDetectorGraphOptions; using ::testing::ElementsAreArray; using ::testing::EqualsProto; using ::testing::Pointwise; @@ -112,13 +113,14 @@ absl::StatusOr> CreateSingleHandTaskRunner( absl::string_view model_name) { Graph graph; - auto& hand_landmark_detection = - graph.AddNode("mediapipe.tasks.vision.SingleHandLandmarkerSubgraph"); + auto& hand_landmark_detection = graph.AddNode( + "mediapipe.tasks.vision.hand_landmarker." + "SingleHandLandmarksDetectorGraph"); - auto options = std::make_unique(); + auto options = std::make_unique(); options->mutable_base_options()->mutable_model_asset()->set_file_name( JoinPath("./", kTestDataDirectory, model_name)); - hand_landmark_detection.GetOptions().Swap( + hand_landmark_detection.GetOptions().Swap( options.get()); graph[Input(kImageTag)].SetName(kImageName) >> @@ -151,13 +153,14 @@ absl::StatusOr> CreateMultiHandTaskRunner( absl::string_view model_name) { Graph graph; - auto& multi_hand_landmark_detection = - graph.AddNode("mediapipe.tasks.vision.HandLandmarkerSubgraph"); + auto& multi_hand_landmark_detection = graph.AddNode( + "mediapipe.tasks.vision.hand_landmarker." + "MultipleHandLandmarksDetectorGraph"); - auto options = std::make_unique(); + auto options = std::make_unique(); options->mutable_base_options()->mutable_model_asset()->set_file_name( JoinPath("./", kTestDataDirectory, model_name)); - multi_hand_landmark_detection.GetOptions() + multi_hand_landmark_detection.GetOptions() .Swap(options.get()); graph[Input(kImageTag)].SetName(kImageName) >> @@ -462,6 +465,7 @@ INSTANTIATE_TEST_SUITE_P( }); } // namespace +} // namespace hand_landmarker } // namespace vision } // namespace tasks } // namespace mediapipe diff --git a/mediapipe/tasks/cc/vision/hand_landmarker/proto/BUILD b/mediapipe/tasks/cc/vision/hand_landmarker/proto/BUILD index 8cc984c47..945b12f3e 100644 --- a/mediapipe/tasks/cc/vision/hand_landmarker/proto/BUILD +++ b/mediapipe/tasks/cc/vision/hand_landmarker/proto/BUILD @@ -21,8 +21,8 @@ package(default_visibility = [ licenses(["notice"]) mediapipe_proto_library( - name = "hand_landmarker_subgraph_options_proto", - srcs = ["hand_landmarker_subgraph_options.proto"], + name = "hand_landmarks_detector_graph_options_proto", + srcs = ["hand_landmarks_detector_graph_options.proto"], deps = [ "//mediapipe/framework:calculator_options_proto", "//mediapipe/framework:calculator_proto", @@ -31,13 +31,13 @@ mediapipe_proto_library( ) mediapipe_proto_library( - name = "hand_landmarker_options_proto", - srcs = ["hand_landmarker_options.proto"], + name = "hand_landmarker_graph_options_proto", + srcs = ["hand_landmarker_graph_options.proto"], deps = [ - ":hand_landmarker_subgraph_options_proto", + ":hand_landmarks_detector_graph_options_proto", "//mediapipe/framework:calculator_options_proto", "//mediapipe/framework:calculator_proto", "//mediapipe/tasks/cc/core/proto:base_options_proto", - "//mediapipe/tasks/cc/vision/hand_detector/proto:hand_detector_options_proto", + "//mediapipe/tasks/cc/vision/hand_detector/proto:hand_detector_graph_options_proto", ], ) diff --git a/mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarker_options.proto b/mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarker_graph_options.proto similarity index 67% rename from mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarker_options.proto rename to mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarker_graph_options.proto index b3d82eda4..7f3536b09 100644 --- a/mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarker_options.proto +++ b/mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarker_graph_options.proto @@ -19,22 +19,26 @@ package mediapipe.tasks.vision.hand_landmarker.proto; import "mediapipe/framework/calculator.proto"; import "mediapipe/tasks/cc/core/proto/base_options.proto"; -import "mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_options.proto"; -import "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarker_subgraph_options.proto"; +import "mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_graph_options.proto"; +import "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarks_detector_graph_options.proto"; -message HandLandmarkerOptions { +message HandLandmarkerGraphOptions { extend mediapipe.CalculatorOptions { - optional HandLandmarkerOptions ext = 462713202; + optional HandLandmarkerGraphOptions ext = 462713202; } // Base options for configuring MediaPipe Tasks, such as specifying the TfLite // model file with metadata, accelerator options, etc. optional core.proto.BaseOptions base_options = 1; - // The locale to use for display names specified through the TFLite Model - // Metadata, if any. Defaults to English. - optional string display_names_locale = 2 [default = "en"]; + // Options for hand detector graph. + optional hand_detector.proto.HandDetectorGraphOptions + hand_detector_graph_options = 2; - optional hand_detector.proto.HandDetectorOptions hand_detector_options = 3; + // Options for hand landmarker subgraph. + optional HandLandmarksDetectorGraphOptions + hand_landmarks_detector_graph_options = 3; - optional HandLandmarkerSubgraphOptions hand_landmarker_subgraph_options = 4; + // Minimum confidence for hand landmarks tracking to be considered + // successfully. + optional float min_tracking_confidence = 4 [default = 0.5]; } diff --git a/mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarker_subgraph_options.proto b/mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarks_detector_graph_options.proto similarity index 77% rename from mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarker_subgraph_options.proto rename to mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarks_detector_graph_options.proto index 9e93384d6..8c0fc66f2 100644 --- a/mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarker_subgraph_options.proto +++ b/mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarks_detector_graph_options.proto @@ -20,19 +20,15 @@ package mediapipe.tasks.vision.hand_landmarker.proto; import "mediapipe/framework/calculator.proto"; import "mediapipe/tasks/cc/core/proto/base_options.proto"; -message HandLandmarkerSubgraphOptions { +message HandLandmarksDetectorGraphOptions { extend mediapipe.CalculatorOptions { - optional HandLandmarkerSubgraphOptions ext = 474472470; + optional HandLandmarksDetectorGraphOptions ext = 474472470; } // Base options for configuring MediaPipe Tasks, such as specifying the TfLite // model file with metadata, accelerator options, etc. optional core.proto.BaseOptions base_options = 1; - // The locale to use for display names specified through the TFLite Model - // Metadata, if any. Defaults to English. - optional string display_names_locale = 2 [default = "en"]; - // Minimum confidence value ([0.0, 1.0]) for hand presence score to be // considered successfully detecting a hand in the image. - optional float min_detection_confidence = 3 [default = 0.5]; + optional float min_detection_confidence = 2 [default = 0.5]; }