From cfd0f3e79fa631692ac4e809f4619d6ff53d4421 Mon Sep 17 00:00:00 2001 From: MediaPipe Team Date: Mon, 3 Oct 2022 13:48:47 -0700 Subject: [PATCH 01/10] Add HandLandmarkerGraph which connect HandDetectorGraph and HandLandmarkerSubgraph with landmarks tracking. PiperOrigin-RevId: 478596004 --- mediapipe/tasks/cc/vision/hand_detector/BUILD | 2 +- .../hand_detector/hand_detector_graph.cc | 63 ++-- .../hand_detector/hand_detector_graph_test.cc | 17 +- .../tasks/cc/vision/hand_detector/proto/BUILD | 4 +- ...roto => hand_detector_graph_options.proto} | 14 +- .../tasks/cc/vision/hand_landmarker/BUILD | 39 +++ .../hand_landmarker/hand_landmarker_graph.cc | 284 ++++++++++++++++++ .../hand_landmarker_graph_test.cc | 167 ++++++++++ .../hand_landmarker_subgraph.cc | 68 ++--- .../hand_landmarker_subgraph_test.cc | 10 +- .../cc/vision/hand_landmarker/proto/BUILD | 6 +- ...to => hand_landmarker_graph_options.proto} | 19 +- .../hand_landmarker_subgraph_options.proto | 6 +- 13 files changed, 600 insertions(+), 99 deletions(-) rename mediapipe/tasks/cc/vision/hand_detector/proto/{hand_detector_options.proto => hand_detector_graph_options.proto} (76%) create mode 100644 mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarker_graph.cc create mode 100644 mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarker_graph_test.cc rename mediapipe/tasks/cc/vision/hand_landmarker/proto/{hand_landmarker_options.proto => hand_landmarker_graph_options.proto} (74%) diff --git a/mediapipe/tasks/cc/vision/hand_detector/BUILD b/mediapipe/tasks/cc/vision/hand_detector/BUILD index c87cc50a6..433a30471 100644 --- a/mediapipe/tasks/cc/vision/hand_detector/BUILD +++ b/mediapipe/tasks/cc/vision/hand_detector/BUILD @@ -51,7 +51,7 @@ cc_library( "//mediapipe/tasks/cc/core:model_task_graph", "//mediapipe/tasks/cc/core:utils", "//mediapipe/tasks/cc/core/proto:inference_subgraph_cc_proto", - "//mediapipe/tasks/cc/vision/hand_detector/proto:hand_detector_options_cc_proto", + "//mediapipe/tasks/cc/vision/hand_detector/proto:hand_detector_graph_options_cc_proto", "//mediapipe/tasks/cc/vision/utils:image_tensor_specs", "@com_google_absl//absl/status", "@com_google_absl//absl/status:statusor", diff --git a/mediapipe/tasks/cc/vision/hand_detector/hand_detector_graph.cc b/mediapipe/tasks/cc/vision/hand_detector/hand_detector_graph.cc index 7ead21bad..7ef8d62f5 100644 --- a/mediapipe/tasks/cc/vision/hand_detector/hand_detector_graph.cc +++ b/mediapipe/tasks/cc/vision/hand_detector/hand_detector_graph.cc @@ -40,7 +40,7 @@ limitations under the License. #include "mediapipe/tasks/cc/core/model_task_graph.h" #include "mediapipe/tasks/cc/core/proto/inference_subgraph.pb.h" #include "mediapipe/tasks/cc/core/utils.h" -#include "mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_options.pb.h" +#include "mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_graph_options.pb.h" #include "mediapipe/tasks/cc/vision/utils/image_tensor_specs.h" namespace mediapipe { @@ -53,18 +53,23 @@ using ::mediapipe::api2::Input; using ::mediapipe::api2::Output; using ::mediapipe::api2::builder::Graph; using ::mediapipe::api2::builder::Source; -using ::mediapipe::tasks::vision::hand_detector::proto::HandDetectorOptions; +using ::mediapipe::tasks::vision::hand_detector::proto:: + HandDetectorGraphOptions; constexpr char kImageTag[] = "IMAGE"; -constexpr char kDetectionsTag[] = "DETECTIONS"; -constexpr char kNormRectsTag[] = "NORM_RECTS"; +constexpr char kPalmDetectionsTag[] = "PALM_DETECTIONS"; +constexpr char kHandRectsTag[] = "HAND_RECTS"; +constexpr char kPalmRectsTag[] = "PALM_RECTS"; struct HandDetectionOuts { Source> palm_detections; Source> hand_rects; + Source> palm_rects; + Source image; }; void ConfigureTensorsToDetectionsCalculator( + const HandDetectorGraphOptions& tasks_options, mediapipe::TensorsToDetectionsCalculatorOptions* options) { // TODO use metadata to configure these fields. options->set_num_classes(1); @@ -77,7 +82,7 @@ void ConfigureTensorsToDetectionsCalculator( options->set_sigmoid_score(true); options->set_score_clipping_thresh(100.0); options->set_reverse_output_order(true); - options->set_min_score_thresh(0.5); + options->set_min_score_thresh(tasks_options.min_detection_confidence()); options->set_x_scale(192.0); options->set_y_scale(192.0); options->set_w_scale(192.0); @@ -144,19 +149,26 @@ void ConfigureRectTransformationCalculator( // Image to perform detection on. // // Outputs: -// DETECTIONS - std::vector +// PALM_DETECTIONS - std::vector // Detected palms with maximum `num_hands` specified in options. -// NORM_RECTS - std::vector +// HAND_RECTS - std::vector // Detected hand bounding boxes in normalized coordinates. +// PLAM_RECTS - std::vector +// Detected palm bounding boxes in normalized coordinates. +// IMAGE - Image +// The input image that the hand detector runs on and has the pixel data +// stored on the target storage (CPU vs GPU). // // Example: // node { // calculator: "mediapipe.tasks.vision.HandDetectorGraph" // input_stream: "IMAGE:image" -// output_stream: "DETECTIONS:palm_detections" -// output_stream: "NORM_RECTS:hand_rects_from_palm_detections" +// output_stream: "PALM_DETECTIONS:palm_detections" +// output_stream: "HAND_RECTS:hand_rects_from_palm_detections" +// output_stream: "PALM_RECTS:palm_rects" +// output_stream: "IMAGE:image_out" // options { -// [mediapipe.tasks.hand_detector.proto.HandDetectorOptions.ext] { +// [mediapipe.tasks.hand_detector.proto.HandDetectorGraphOptions.ext] { // base_options { // model_asset { // file_name: "palm_detection.tflite" @@ -173,16 +185,20 @@ class HandDetectorGraph : public core::ModelTaskGraph { absl::StatusOr GetConfig( SubgraphContext* sc) override { ASSIGN_OR_RETURN(const auto* model_resources, - CreateModelResources(sc)); + CreateModelResources(sc)); Graph graph; - ASSIGN_OR_RETURN(auto hand_detection_outs, - BuildHandDetectionSubgraph( - sc->Options(), *model_resources, - graph[Input(kImageTag)], graph)); + ASSIGN_OR_RETURN( + auto hand_detection_outs, + BuildHandDetectionSubgraph(sc->Options(), + *model_resources, + graph[Input(kImageTag)], graph)); hand_detection_outs.palm_detections >> - graph[Output>(kDetectionsTag)]; + graph[Output>(kPalmDetectionsTag)]; hand_detection_outs.hand_rects >> - graph[Output>(kNormRectsTag)]; + graph[Output>(kHandRectsTag)]; + hand_detection_outs.palm_rects >> + graph[Output>(kPalmRectsTag)]; + hand_detection_outs.image >> graph[Output(kImageTag)]; return graph.GetConfig(); } @@ -196,7 +212,7 @@ class HandDetectorGraph : public core::ModelTaskGraph { // image_in: image stream to run hand detection on. // graph: the mediapipe builder::Graph instance to be updated. absl::StatusOr BuildHandDetectionSubgraph( - const HandDetectorOptions& subgraph_options, + const HandDetectorGraphOptions& subgraph_options, const core::ModelResources& model_resources, Source image_in, Graph& graph) { // Add image preprocessing subgraph. The model expects aspect ratio @@ -235,6 +251,7 @@ class HandDetectorGraph : public core::ModelTaskGraph { auto& tensors_to_detections = graph.AddNode("TensorsToDetectionsCalculator"); ConfigureTensorsToDetectionsCalculator( + subgraph_options, &tensors_to_detections .GetOptions()); model_output_tensors >> tensors_to_detections.In("TENSORS"); @@ -281,7 +298,8 @@ class HandDetectorGraph : public core::ModelTaskGraph { .GetOptions()); palm_detections >> detections_to_rects.In("DETECTIONS"); image_size >> detections_to_rects.In("IMAGE_SIZE"); - auto palm_rects = detections_to_rects.Out("NORM_RECTS"); + auto palm_rects = + detections_to_rects[Output>("NORM_RECTS")]; // Expands and shifts the rectangle that contains the palm so that it's // likely to cover the entire hand. @@ -308,8 +326,11 @@ class HandDetectorGraph : public core::ModelTaskGraph { clip_normalized_rect_vector_size[Output>( "")]; - return HandDetectionOuts{.palm_detections = palm_detections, - .hand_rects = clipped_hand_rects}; + return HandDetectionOuts{ + /* palm_detections= */ palm_detections, + /* hand_rects= */ clipped_hand_rects, + /* palm_rects= */ palm_rects, + /* image= */ preprocessing[Output(kImageTag)]}; } }; diff --git a/mediapipe/tasks/cc/vision/hand_detector/hand_detector_graph_test.cc b/mediapipe/tasks/cc/vision/hand_detector/hand_detector_graph_test.cc index 3fa97664e..850ff2732 100644 --- a/mediapipe/tasks/cc/vision/hand_detector/hand_detector_graph_test.cc +++ b/mediapipe/tasks/cc/vision/hand_detector/hand_detector_graph_test.cc @@ -40,7 +40,7 @@ limitations under the License. #include "mediapipe/tasks/cc/core/proto/base_options.pb.h" #include "mediapipe/tasks/cc/core/proto/external_file.pb.h" #include "mediapipe/tasks/cc/core/task_runner.h" -#include "mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_options.pb.h" +#include "mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_graph_options.pb.h" #include "mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_result.pb.h" #include "mediapipe/tasks/cc/vision/utils/image_utils.h" @@ -60,7 +60,8 @@ using ::mediapipe::tasks::core::ModelResources; using ::mediapipe::tasks::core::TaskRunner; using ::mediapipe::tasks::core::proto::ExternalFile; using ::mediapipe::tasks::vision::DecodeImageFromFile; -using ::mediapipe::tasks::vision::hand_detector::proto::HandDetectorOptions; +using ::mediapipe::tasks::vision::hand_detector::proto:: + HandDetectorGraphOptions; using ::mediapipe::tasks::vision::hand_detector::proto::HandDetectorResult; using ::testing::EqualsProto; using ::testing::TestParamInfo; @@ -80,9 +81,9 @@ constexpr char kTwoHandsResultFile[] = "hand_detector_result_two_hands.pbtxt"; constexpr char kImageTag[] = "IMAGE"; constexpr char kImageName[] = "image"; -constexpr char kPalmDetectionsTag[] = "DETECTIONS"; +constexpr char kPalmDetectionsTag[] = "PALM_DETECTIONS"; constexpr char kPalmDetectionsName[] = "palm_detections"; -constexpr char kHandNormRectsTag[] = "NORM_RECTS"; +constexpr char kHandRectsTag[] = "HAND_RECTS"; constexpr char kHandNormRectsName[] = "hand_norm_rects"; constexpr float kPalmDetectionBboxMaxDiff = 0.01; @@ -106,20 +107,20 @@ absl::StatusOr> CreateTaskRunner( auto& hand_detection = graph.AddNode("mediapipe.tasks.vision.HandDetectorGraph"); - auto options = std::make_unique(); + auto options = std::make_unique(); options->mutable_base_options()->mutable_model_asset()->set_file_name( JoinPath("./", kTestDataDirectory, model_name)); options->set_min_detection_confidence(0.5); options->set_num_hands(num_hands); - hand_detection.GetOptions().Swap(options.get()); + hand_detection.GetOptions().Swap(options.get()); graph[Input(kImageTag)].SetName(kImageName) >> hand_detection.In(kImageTag); hand_detection.Out(kPalmDetectionsTag).SetName(kPalmDetectionsName) >> graph[Output>(kPalmDetectionsTag)]; - hand_detection.Out(kHandNormRectsTag).SetName(kHandNormRectsName) >> - graph[Output>(kHandNormRectsTag)]; + hand_detection.Out(kHandRectsTag).SetName(kHandNormRectsName) >> + graph[Output>(kHandRectsTag)]; return TaskRunner::Create( graph.GetConfig(), std::make_unique()); diff --git a/mediapipe/tasks/cc/vision/hand_detector/proto/BUILD b/mediapipe/tasks/cc/vision/hand_detector/proto/BUILD index 2d22aab10..77f3b2649 100644 --- a/mediapipe/tasks/cc/vision/hand_detector/proto/BUILD +++ b/mediapipe/tasks/cc/vision/hand_detector/proto/BUILD @@ -21,8 +21,8 @@ package(default_visibility = [ licenses(["notice"]) mediapipe_proto_library( - name = "hand_detector_options_proto", - srcs = ["hand_detector_options.proto"], + name = "hand_detector_graph_options_proto", + srcs = ["hand_detector_graph_options.proto"], deps = [ "//mediapipe/framework:calculator_options_proto", "//mediapipe/framework:calculator_proto", diff --git a/mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_options.proto b/mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_graph_options.proto similarity index 76% rename from mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_options.proto rename to mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_graph_options.proto index ae22c7991..be20583d0 100644 --- a/mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_options.proto +++ b/mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_graph_options.proto @@ -21,24 +21,20 @@ import "mediapipe/framework/calculator.proto"; import "mediapipe/tasks/cc/core/proto/base_options.proto"; option java_package = "com.google.mediapipe.tasks.vision.handdetector"; -option java_outer_classname = "HandDetectorOptionsProto"; +option java_outer_classname = "HandDetectorGraphOptionsProto"; -message HandDetectorOptions { +message HandDetectorGraphOptions { extend mediapipe.CalculatorOptions { - optional HandDetectorOptions ext = 464864288; + optional HandDetectorGraphOptions ext = 464864288; } // Base options for configuring Task library, such as specifying the TfLite // model file with metadata, accelerator options, etc. optional core.proto.BaseOptions base_options = 1; - // The locale to use for display names specified through the TFLite Model - // Metadata, if any. Defaults to English. - optional string display_names_locale = 2 [default = "en"]; - // Minimum confidence value ([0.0, 1.0]) for confidence score to be considered // successfully detecting a hand in the image. - optional float min_detection_confidence = 3 [default = 0.5]; + optional float min_detection_confidence = 2 [default = 0.5]; // The maximum number of hands output by the detector. - optional int32 num_hands = 4; + optional int32 num_hands = 3; } diff --git a/mediapipe/tasks/cc/vision/hand_landmarker/BUILD b/mediapipe/tasks/cc/vision/hand_landmarker/BUILD index 653976b96..c968c17fa 100644 --- a/mediapipe/tasks/cc/vision/hand_landmarker/BUILD +++ b/mediapipe/tasks/cc/vision/hand_landmarker/BUILD @@ -51,6 +51,7 @@ cc_library( # TODO: move calculators in modules/hand_landmark/calculators to tasks dir. "//mediapipe/modules/hand_landmark/calculators:hand_landmarks_to_rect_calculator", "//mediapipe/tasks/cc:common", + "//mediapipe/tasks/cc/components/utils:gate", "//mediapipe/tasks/cc/components:image_preprocessing", "//mediapipe/tasks/cc/core:model_resources", "//mediapipe/tasks/cc/core:model_task_graph", @@ -66,3 +67,41 @@ cc_library( ) # TODO: Enable this test + +cc_library( + name = "hand_landmarker_graph", + srcs = ["hand_landmarker_graph.cc"], + deps = [ + ":hand_landmarker_subgraph", + "//mediapipe/calculators/core:begin_loop_calculator", + "//mediapipe/calculators/core:clip_vector_size_calculator_cc_proto", + "//mediapipe/calculators/core:end_loop_calculator", + "//mediapipe/calculators/core:gate_calculator", + "//mediapipe/calculators/core:gate_calculator_cc_proto", + "//mediapipe/calculators/core:pass_through_calculator", + "//mediapipe/calculators/core:previous_loopback_calculator", + "//mediapipe/calculators/util:collection_has_min_size_calculator", + "//mediapipe/calculators/util:collection_has_min_size_calculator_cc_proto", + "//mediapipe/framework/api2:builder", + "//mediapipe/framework/api2:port", + "//mediapipe/framework/formats:classification_cc_proto", + "//mediapipe/framework/formats:detection_cc_proto", + "//mediapipe/framework/formats:image", + "//mediapipe/framework/formats:landmark_cc_proto", + "//mediapipe/framework/formats:rect_cc_proto", + "//mediapipe/framework/formats:tensor", + "//mediapipe/tasks/cc:common", + "//mediapipe/tasks/cc/components/utils:gate", + "//mediapipe/tasks/cc/core:model_task_graph", + "//mediapipe/tasks/cc/core:utils", + "//mediapipe/tasks/cc/vision/hand_detector:hand_detector_graph", + "//mediapipe/tasks/cc/vision/hand_detector/proto:hand_detector_graph_options_cc_proto", + "//mediapipe/tasks/cc/vision/hand_landmarker/calculators:hand_association_calculator", + "//mediapipe/tasks/cc/vision/hand_landmarker/calculators:hand_association_calculator_cc_proto", + "//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_landmarker_graph_options_cc_proto", + "//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_landmarker_subgraph_options_cc_proto", + ], + alwayslink = 1, +) + +# TODO: Enable this test diff --git a/mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarker_graph.cc b/mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarker_graph.cc new file mode 100644 index 000000000..6041d528f --- /dev/null +++ b/mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarker_graph.cc @@ -0,0 +1,284 @@ +/* Copyright 2022 The MediaPipe Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include +#include +#include +#include + +#include "mediapipe/calculators/core/clip_vector_size_calculator.pb.h" +#include "mediapipe/calculators/core/gate_calculator.pb.h" +#include "mediapipe/calculators/util/collection_has_min_size_calculator.pb.h" +#include "mediapipe/framework/api2/builder.h" +#include "mediapipe/framework/api2/port.h" +#include "mediapipe/framework/formats/classification.pb.h" +#include "mediapipe/framework/formats/detection.pb.h" +#include "mediapipe/framework/formats/image.h" +#include "mediapipe/framework/formats/landmark.pb.h" +#include "mediapipe/framework/formats/rect.pb.h" +#include "mediapipe/framework/formats/tensor.h" +#include "mediapipe/tasks/cc/common.h" +#include "mediapipe/tasks/cc/components/utils/gate.h" +#include "mediapipe/tasks/cc/core/model_task_graph.h" +#include "mediapipe/tasks/cc/core/utils.h" +#include "mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_graph_options.pb.h" +#include "mediapipe/tasks/cc/vision/hand_landmarker/calculators/hand_association_calculator.pb.h" +#include "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarker_graph_options.pb.h" +#include "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarker_subgraph_options.pb.h" + +namespace mediapipe { +namespace tasks { +namespace vision { +namespace hand_landmarker { + +namespace { + +using ::mediapipe::api2::Input; +using ::mediapipe::api2::Output; +using ::mediapipe::api2::builder::Graph; +using ::mediapipe::api2::builder::Source; +using ::mediapipe::tasks::components::utils::DisallowIf; +using ::mediapipe::tasks::vision::hand_detector::proto:: + HandDetectorGraphOptions; +using ::mediapipe::tasks::vision::hand_landmarker::proto:: + HandLandmarkerGraphOptions; +using ::mediapipe::tasks::vision::hand_landmarker::proto:: + HandLandmarkerSubgraphOptions; + +constexpr char kImageTag[] = "IMAGE"; +constexpr char kLandmarksTag[] = "LANDMARKS"; +constexpr char kWorldLandmarksTag[] = "WORLD_LANDMARKS"; +constexpr char kHandRectNextFrameTag[] = "HAND_RECT_NEXT_FRAME"; +constexpr char kHandednessTag[] = "HANDEDNESS"; +constexpr char kPalmDetectionsTag[] = "PALM_DETECTIONS"; +constexpr char kPalmRectsTag[] = "PALM_RECTS"; +constexpr char kPreviousLoopbackCalculatorName[] = "PreviousLoopbackCalculator"; + +struct HandLandmarkerOutputs { + Source> landmark_lists; + Source> world_landmark_lists; + Source> hand_rects_next_frame; + Source> handednesses; + Source> palm_rects; + Source> palm_detections; + Source image; +}; + +} // namespace + +// A "mediapipe.tasks.vision.HandLandmarkerGraph" performs hand +// landmarks detection. The HandLandmarkerGraph consists of two subgraphs: +// HandDetectorGraph and HandLandmarkerSubgraph. HandLandmarkerSubgraph detects +// landmarks from bounding boxes produced by HandDetectorGraph. +// HandLandmarkerGraph tracks the landmarks over time, and skips the +// HandDetectorGraph. If the tracking is lost or the detectd hands are +// less than configured max number hands, HandDetectorGraph would be triggered +// to detect hands. +// +// Accepts CPU input images and outputs Landmarks on CPU. +// +// Inputs: +// IMAGE - Image +// Image to perform hand landmarks detection on. +// +// Outputs: +// LANDMARKS: - std::vector +// Vector of detected hand landmarks. +// WORLD_LANDMARKS - std::vector +// Vector of detected hand landmarks in world coordinates. +// HAND_RECT_NEXT_FRAME - std::vector +// Vector of the predicted rects enclosing the same hand RoI for landmark +// detection on the next frame. +// HANDEDNESS - std::vector +// Vector of classification of handedness. +// PALM_RECTS - std::vector +// Detected palm bounding boxes in normalized coordinates. +// PALM_DETECTIONS - std::vector +// Detected palms with maximum `num_hands` specified in options. +// IMAGE - Image +// The input image that the hand landmarker runs on and has the pixel data +// stored on the target storage (CPU vs GPU). +// +// Example: +// node { +// calculator: "mediapipe.tasks.vision.HandLandmarkerGraph" +// input_stream: "IMAGE:image_in" +// output_stream: "LANDMARKS:hand_landmarks" +// output_stream: "WORLD_LANDMARKS:world_hand_landmarks" +// output_stream: "HAND_RECT_NEXT_FRAME:hand_rect_next_frame" +// output_stream: "HANDEDNESS:handedness" +// output_stream: "PALM_RECTS:palm_rects" +// output_stream: "PALM_DETECTIONS:palm_detections" +// output_stream: "IMAGE:image_out" +// options { +// [mediapipe.tasks.hand_landmarker.proto.HandLandmarkerGraphOptions.ext] { +// base_options { +// model_asset { +// file_name: "hand_landmarker.task" +// } +// } +// hand_detector_graph_options { +// base_options { +// model_asset { +// file_name: "palm_detection.tflite" +// } +// } +// min_detection_confidence: 0.5 +// num_hands: 2 +// } +// hand_landmarker_subgraph_options { +// base_options { +// model_asset { +// file_name: "hand_landmark_lite.tflite" +// } +// } +// min_detection_confidence: 0.5 +// } +// } +// } +// } +class HandLandmarkerGraph : public core::ModelTaskGraph { + public: + absl::StatusOr GetConfig( + SubgraphContext* sc) override { + Graph graph; + ASSIGN_OR_RETURN( + auto hand_landmarker_outputs, + BuildHandLandmarkerGraph(sc->Options(), + graph[Input(kImageTag)], graph)); + hand_landmarker_outputs.landmark_lists >> + graph[Output>(kLandmarksTag)]; + hand_landmarker_outputs.world_landmark_lists >> + graph[Output>(kWorldLandmarksTag)]; + hand_landmarker_outputs.hand_rects_next_frame >> + graph[Output>(kHandRectNextFrameTag)]; + hand_landmarker_outputs.handednesses >> + graph[Output>(kHandednessTag)]; + hand_landmarker_outputs.palm_rects >> + graph[Output>(kPalmRectsTag)]; + hand_landmarker_outputs.palm_detections >> + graph[Output>(kPalmDetectionsTag)]; + hand_landmarker_outputs.image >> graph[Output(kImageTag)]; + + // TODO remove when support is fixed. + // As mediapipe GraphBuilder currently doesn't support configuring + // InputStreamInfo, modifying the CalculatorGraphConfig proto directly. + CalculatorGraphConfig config = graph.GetConfig(); + for (int i = 0; i < config.node_size(); ++i) { + if (config.node(i).calculator() == kPreviousLoopbackCalculatorName) { + auto* info = config.mutable_node(i)->add_input_stream_info(); + info->set_tag_index("LOOP"); + info->set_back_edge(true); + break; + } + } + return config; + } + + private: + // Adds a mediapipe hand landmark detection graph into the provided + // builder::Graph instance. + // + // tasks_options: the mediapipe tasks module HandLandmarkerGraphOptions. + // image_in: (mediapipe::Image) stream to run hand landmark detection on. + // graph: the mediapipe graph instance to be updated. + absl::StatusOr BuildHandLandmarkerGraph( + const HandLandmarkerGraphOptions& tasks_options, Source image_in, + Graph& graph) { + const int max_num_hands = + tasks_options.hand_detector_graph_options().num_hands(); + + auto& previous_loopback = graph.AddNode(kPreviousLoopbackCalculatorName); + image_in >> previous_loopback.In("MAIN"); + auto prev_hand_rects_from_landmarks = + previous_loopback[Output>("PREV_LOOP")]; + + auto& min_size_node = + graph.AddNode("NormalizedRectVectorHasMinSizeCalculator"); + prev_hand_rects_from_landmarks >> min_size_node.In("ITERABLE"); + min_size_node.GetOptions() + .set_min_size(max_num_hands); + auto has_enough_hands = min_size_node.Out("").Cast(); + + auto image_for_hand_detector = + DisallowIf(image_in, has_enough_hands, graph); + + auto& hand_detector = + graph.AddNode("mediapipe.tasks.vision.HandDetectorGraph"); + hand_detector.GetOptions().CopyFrom( + tasks_options.hand_detector_graph_options()); + image_for_hand_detector >> hand_detector.In("IMAGE"); + auto hand_rects_from_hand_detector = hand_detector.Out("HAND_RECTS"); + + auto& hand_association = graph.AddNode("HandAssociationCalculator"); + hand_association.GetOptions() + .set_min_similarity_threshold(tasks_options.min_tracking_confidence()); + prev_hand_rects_from_landmarks >> + hand_association[Input>::Multiple("")][0]; + hand_rects_from_hand_detector >> + hand_association[Input>::Multiple("")][1]; + auto hand_rects = hand_association.Out(""); + + auto& clip_hand_rects = + graph.AddNode("ClipNormalizedRectVectorSizeCalculator"); + clip_hand_rects.GetOptions() + .set_max_vec_size(max_num_hands); + hand_rects >> clip_hand_rects.In(""); + auto clipped_hand_rects = clip_hand_rects.Out(""); + + auto& hand_landmarker_subgraph = graph.AddNode( + "mediapipe.tasks.vision.hand_landmarker.HandLandmarkerSubgraph"); + hand_landmarker_subgraph.GetOptions() + .CopyFrom(tasks_options.hand_landmarker_subgraph_options()); + image_in >> hand_landmarker_subgraph.In("IMAGE"); + clipped_hand_rects >> hand_landmarker_subgraph.In("HAND_RECT"); + + auto hand_rects_for_next_frame = + hand_landmarker_subgraph[Output>( + kHandRectNextFrameTag)]; + // Back edge. + hand_rects_for_next_frame >> previous_loopback.In("LOOP"); + + // TODO: Replace PassThroughCalculator with a calculator that + // converts the pixel data to be stored on the target storage (CPU vs GPU). + auto& pass_through = graph.AddNode("PassThroughCalculator"); + image_in >> pass_through.In(""); + + return {{ + /* landmark_lists= */ hand_landmarker_subgraph + [Output>(kLandmarksTag)], + /* world_landmark_lists= */ + hand_landmarker_subgraph[Output>( + kWorldLandmarksTag)], + /* hand_rects_next_frame= */ hand_rects_for_next_frame, + hand_landmarker_subgraph[Output>( + kHandednessTag)], + /* palm_rects= */ + hand_detector[Output>(kPalmRectsTag)], + /* palm_detections */ + hand_detector[Output>(kPalmDetectionsTag)], + /* image */ + pass_through[Output("")], + }}; + } +}; + +REGISTER_MEDIAPIPE_GRAPH( + ::mediapipe::tasks::vision::hand_landmarker::HandLandmarkerGraph); + +} // namespace hand_landmarker +} // namespace vision +} // namespace tasks +} // namespace mediapipe diff --git a/mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarker_graph_test.cc b/mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarker_graph_test.cc new file mode 100644 index 000000000..413af68ff --- /dev/null +++ b/mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarker_graph_test.cc @@ -0,0 +1,167 @@ +/* Copyright 2022 The MediaPipe Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include +#include +#include +#include + +#include "absl/flags/flag.h" +#include "absl/status/statusor.h" +#include "absl/strings/str_format.h" +#include "absl/strings/string_view.h" +#include "mediapipe/framework/api2/builder.h" +#include "mediapipe/framework/api2/port.h" +#include "mediapipe/framework/deps/file_path.h" +#include "mediapipe/framework/formats/classification.pb.h" +#include "mediapipe/framework/formats/image.h" +#include "mediapipe/framework/formats/landmark.pb.h" +#include "mediapipe/framework/formats/rect.pb.h" +#include "mediapipe/framework/port/file_helpers.h" +#include "mediapipe/framework/port/gmock.h" +#include "mediapipe/framework/port/gtest.h" +#include "mediapipe/tasks/cc/core/mediapipe_builtin_op_resolver.h" +#include "mediapipe/tasks/cc/core/proto/base_options.pb.h" +#include "mediapipe/tasks/cc/core/proto/external_file.pb.h" +#include "mediapipe/tasks/cc/core/task_runner.h" +#include "mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_graph_options.pb.h" +#include "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarker_graph_options.pb.h" +#include "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarker_subgraph_options.pb.h" +#include "mediapipe/tasks/cc/vision/utils/image_utils.h" +#include "tensorflow/lite/c/common.h" +#include "tensorflow/lite/core/shims/cc/shims_test_util.h" + +namespace mediapipe { +namespace tasks { +namespace vision { +namespace hand_landmarker { + +namespace { + +using ::file::Defaults; +using ::file::GetTextProto; +using ::mediapipe::api2::Input; +using ::mediapipe::api2::Output; +using ::mediapipe::api2::builder::Graph; +using ::mediapipe::api2::builder::Source; +using ::mediapipe::file::JoinPath; +using ::mediapipe::tasks::core::TaskRunner; +using ::mediapipe::tasks::vision::hand_landmarker::proto:: + HandLandmarkerGraphOptions; +using ::testing::EqualsProto; +using ::testing::proto::Approximately; +using ::testing::proto::Partially; + +constexpr char kTestDataDirectory[] = "/mediapipe/tasks/testdata/vision/"; +constexpr char kPalmDetectionModel[] = "palm_detection_full.tflite"; +constexpr char kHandLandmarkerFullModel[] = "hand_landmark_full.tflite"; +constexpr char kLeftHandsImage[] = "left_hands.jpg"; + +constexpr char kImageTag[] = "IMAGE"; +constexpr char kImageName[] = "image_in"; +constexpr char kLandmarksTag[] = "LANDMARKS"; +constexpr char kLandmarksName[] = "landmarks"; +constexpr char kWorldLandmarksTag[] = "WORLD_LANDMARKS"; +constexpr char kWorldLandmarksName[] = "world_landmarks"; +constexpr char kHandRectNextFrameTag[] = "HAND_RECT_NEXT_FRAME"; +constexpr char kHandRectNextFrameName[] = "hand_rect_next_frame"; +constexpr char kHandednessTag[] = "HANDEDNESS"; +constexpr char kHandednessName[] = "handedness"; + +// Expected hand landmarks positions, in text proto format. +constexpr char kExpectedLeftUpHandLandmarksFilename[] = + "expected_left_up_hand_landmarks.prototxt"; +constexpr char kExpectedLeftDownHandLandmarksFilename[] = + "expected_left_down_hand_landmarks.prototxt"; + +constexpr float kFullModelFractionDiff = 0.03; // percentage +constexpr float kAbsMargin = 0.03; +constexpr int kMaxNumHands = 2; +constexpr float kMinTrackingConfidence = 0.5; + +NormalizedLandmarkList GetExpectedLandmarkList(absl::string_view filename) { + NormalizedLandmarkList expected_landmark_list; + MP_EXPECT_OK(GetTextProto(file::JoinPath("./", kTestDataDirectory, filename), + &expected_landmark_list, Defaults())); + return expected_landmark_list; +} + +// Helper function to create a Hand Landmarker TaskRunner. +absl::StatusOr> CreateTaskRunner() { + Graph graph; + auto& hand_landmarker_graph = graph.AddNode( + "mediapipe.tasks.vision.hand_landmarker.HandLandmarkerGraph"); + auto& options = + hand_landmarker_graph.GetOptions(); + options.mutable_hand_detector_graph_options() + ->mutable_base_options() + ->mutable_model_asset() + ->set_file_name(JoinPath("./", kTestDataDirectory, kPalmDetectionModel)); + options.mutable_hand_detector_graph_options()->mutable_base_options(); + options.mutable_hand_detector_graph_options()->set_num_hands(kMaxNumHands); + options.mutable_hand_landmarker_subgraph_options() + ->mutable_base_options() + ->mutable_model_asset() + ->set_file_name( + JoinPath("./", kTestDataDirectory, kHandLandmarkerFullModel)); + options.set_min_tracking_confidence(kMinTrackingConfidence); + + graph[Input(kImageTag)].SetName(kImageName) >> + hand_landmarker_graph.In(kImageTag); + hand_landmarker_graph.Out(kLandmarksTag).SetName(kLandmarksName) >> + graph[Output>(kLandmarksTag)]; + hand_landmarker_graph.Out(kWorldLandmarksTag).SetName(kWorldLandmarksName) >> + graph[Output>(kWorldLandmarksTag)]; + hand_landmarker_graph.Out(kHandednessTag).SetName(kHandednessName) >> + graph[Output>(kHandednessTag)]; + hand_landmarker_graph.Out(kHandRectNextFrameTag) + .SetName(kHandRectNextFrameName) >> + graph[Output>(kHandRectNextFrameTag)]; + return TaskRunner::Create( + graph.GetConfig(), absl::make_unique()); +} + +class HandLandmarkerTest : public tflite_shims::testing::Test {}; + +TEST_F(HandLandmarkerTest, Succeeds) { + MP_ASSERT_OK_AND_ASSIGN( + Image image, + DecodeImageFromFile(JoinPath("./", kTestDataDirectory, kLeftHandsImage))); + MP_ASSERT_OK_AND_ASSIGN(auto task_runner, CreateTaskRunner()); + auto output_packets = + task_runner->Process({{kImageName, MakePacket(std::move(image))}}); + const auto& landmarks = (*output_packets)[kLandmarksName] + .Get>(); + ASSERT_EQ(landmarks.size(), kMaxNumHands); + std::vector expected_landmarks = { + GetExpectedLandmarkList(kExpectedLeftUpHandLandmarksFilename), + GetExpectedLandmarkList(kExpectedLeftDownHandLandmarksFilename)}; + + EXPECT_THAT(landmarks[0], + Approximately(Partially(EqualsProto(expected_landmarks[0])), + /*margin=*/kAbsMargin, + /*fraction=*/kFullModelFractionDiff)); + EXPECT_THAT(landmarks[1], + Approximately(Partially(EqualsProto(expected_landmarks[1])), + /*margin=*/kAbsMargin, + /*fraction=*/kFullModelFractionDiff)); +} + +} // namespace + +} // namespace hand_landmarker +} // namespace vision +} // namespace tasks +} // namespace mediapipe diff --git a/mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarker_subgraph.cc b/mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarker_subgraph.cc index fff4ae0d4..0ac4686b7 100644 --- a/mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarker_subgraph.cc +++ b/mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarker_subgraph.cc @@ -34,6 +34,7 @@ limitations under the License. #include "mediapipe/framework/formats/tensor.h" #include "mediapipe/tasks/cc/common.h" #include "mediapipe/tasks/cc/components/image_preprocessing.h" +#include "mediapipe/tasks/cc/components/utils/gate.h" #include "mediapipe/tasks/cc/core/model_resources.h" #include "mediapipe/tasks/cc/core/model_task_graph.h" #include "mediapipe/tasks/cc/core/proto/inference_subgraph.pb.h" @@ -48,6 +49,7 @@ limitations under the License. namespace mediapipe { namespace tasks { namespace vision { +namespace hand_landmarker { namespace { @@ -55,6 +57,7 @@ using ::mediapipe::api2::Input; using ::mediapipe::api2::Output; using ::mediapipe::api2::builder::Graph; using ::mediapipe::api2::builder::Source; +using ::mediapipe::tasks::components::utils::AllowIf; using ::mediapipe::tasks::core::ModelResources; using ::mediapipe::tasks::vision::hand_landmarker::proto:: HandLandmarkerSubgraphOptions; @@ -82,7 +85,6 @@ struct SingleHandLandmarkerOutputs { Source hand_presence; Source hand_presence_score; Source handedness; - Source> image_size; }; struct HandLandmarkerOutputs { @@ -92,7 +94,6 @@ struct HandLandmarkerOutputs { Source> presences; Source> presence_scores; Source> handednesses; - Source> image_size; }; absl::Status SanityCheckOptions(const HandLandmarkerSubgraphOptions& options) { @@ -208,8 +209,6 @@ void ConfigureHandRectTransformationCalculator( // Float value indicates the probability that the hand is present. // HANDEDNESS - ClassificationList // Classification of handedness. -// IMAGE_SIZE - std::vector -// The size of input image. // // Example: // node { @@ -221,8 +220,6 @@ void ConfigureHandRectTransformationCalculator( // output_stream: "HAND_RECT_NEXT_FRAME:hand_rect_next_frame" // output_stream: "PRESENCE:hand_presence" // output_stream: "PRESENCE_SCORE:hand_presence_score" -// output_stream: "HANDEDNESS:handedness" -// output_stream: "IMAGE_SIZE:image_size" // options { // [mediapipe.tasks.vision.hand_landmarker.proto.HandLandmarkerSubgraphOptions.ext] // { @@ -259,8 +256,6 @@ class SingleHandLandmarkerSubgraph : public core::ModelTaskGraph { graph[Output(kPresenceScoreTag)]; hand_landmark_detection_outs.handedness >> graph[Output(kHandednessTag)]; - hand_landmark_detection_outs.image_size >> - graph[Output>(kImageSizeTag)]; return graph.GetConfig(); } @@ -332,18 +327,7 @@ class SingleHandLandmarkerSubgraph : public core::ModelTaskGraph { // score of hand presence. auto& tensors_to_hand_presence = graph.AddNode("TensorsToFloatsCalculator"); hand_flag_tensors >> tensors_to_hand_presence.In("TENSORS"); - - // Converts the handedness tensor into a float that represents the - // classification score of handedness. - auto& tensors_to_handedness = - graph.AddNode("TensorsToClassificationCalculator"); - ConfigureTensorsToHandednessCalculator( - &tensors_to_handedness.GetOptions< - mediapipe::TensorsToClassificationCalculatorOptions>()); - handedness_tensors >> tensors_to_handedness.In("TENSORS"); auto hand_presence_score = tensors_to_hand_presence[Output("FLOAT")]; - auto handedness = - tensors_to_handedness[Output("CLASSIFICATIONS")]; // Applies a threshold to the confidence score to determine whether a // hand is present. @@ -354,6 +338,18 @@ class SingleHandLandmarkerSubgraph : public core::ModelTaskGraph { hand_presence_score >> hand_presence_thresholding.In("FLOAT"); auto hand_presence = hand_presence_thresholding[Output("FLAG")]; + // Converts the handedness tensor into a float that represents the + // classification score of handedness. + auto& tensors_to_handedness = + graph.AddNode("TensorsToClassificationCalculator"); + ConfigureTensorsToHandednessCalculator( + &tensors_to_handedness.GetOptions< + mediapipe::TensorsToClassificationCalculatorOptions>()); + handedness_tensors >> tensors_to_handedness.In("TENSORS"); + auto handedness = AllowIf( + tensors_to_handedness[Output("CLASSIFICATIONS")], + hand_presence, graph); + // Adjusts landmarks (already normalized to [0.f, 1.f]) on the letterboxed // hand image (after image transformation with the FIT scale mode) to the // corresponding locations on the same image with the letterbox removed @@ -371,8 +367,9 @@ class SingleHandLandmarkerSubgraph : public core::ModelTaskGraph { landmark_letterbox_removal.Out("LANDMARKS") >> landmark_projection.In("NORM_LANDMARKS"); hand_rect >> landmark_projection.In("NORM_RECT"); - auto projected_landmarks = - landmark_projection[Output("NORM_LANDMARKS")]; + auto projected_landmarks = AllowIf( + landmark_projection[Output("NORM_LANDMARKS")], + hand_presence, graph); // Projects the world landmarks from the cropped hand image to the // corresponding locations on the full image before cropping (input to the @@ -383,7 +380,8 @@ class SingleHandLandmarkerSubgraph : public core::ModelTaskGraph { world_landmark_projection.In("LANDMARKS"); hand_rect >> world_landmark_projection.In("NORM_RECT"); auto projected_world_landmarks = - world_landmark_projection[Output("LANDMARKS")]; + AllowIf(world_landmark_projection[Output("LANDMARKS")], + hand_presence, graph); // Converts the hand landmarks into a rectangle (normalized by image size) // that encloses the hand. @@ -403,7 +401,8 @@ class SingleHandLandmarkerSubgraph : public core::ModelTaskGraph { hand_landmarks_to_rect.Out("NORM_RECT") >> hand_rect_transformation.In("NORM_RECT"); auto hand_rect_next_frame = - hand_rect_transformation[Output("")]; + AllowIf(hand_rect_transformation[Output("")], + hand_presence, graph); return {{ /* hand_landmarks= */ projected_landmarks, @@ -412,16 +411,15 @@ class SingleHandLandmarkerSubgraph : public core::ModelTaskGraph { /* hand_presence= */ hand_presence, /* hand_presence_score= */ hand_presence_score, /* handedness= */ handedness, - /* image_size= */ image_size, }}; } }; REGISTER_MEDIAPIPE_GRAPH( - ::mediapipe::tasks::vision::SingleHandLandmarkerSubgraph); + ::mediapipe::tasks::vision::hand_landmarker::SingleHandLandmarkerSubgraph); -// A "mediapipe.tasks.vision.HandLandmarkerSubgraph" performs multi -// hand landmark detection. +// A "mediapipe.tasks.vision.HandLandmarkerSubgraph" performs multi hand +// landmark detection. // - Accepts CPU input image and a vector of hand rect RoIs to detect the // multiple hands landmarks enclosed by the RoIs. Output vectors of // hand landmarks related results, where each element in the vectors @@ -449,8 +447,6 @@ REGISTER_MEDIAPIPE_GRAPH( // Vector of float value indicates the probability that the hand is present. // HANDEDNESS - std::vector // Vector of classification of handedness. -// IMAGE_SIZE - std::vector -// The size of input image. // // Example: // node { @@ -463,7 +459,6 @@ REGISTER_MEDIAPIPE_GRAPH( // output_stream: "PRESENCE:hand_presence" // output_stream: "PRESENCE_SCORE:hand_presence_score" // output_stream: "HANDEDNESS:handedness" -// output_stream: "IMAGE_SIZE:image_size" // options { // [mediapipe.tasks.vision.hand_landmarker.proto.HandLandmarkerSubgraphOptions.ext] // { @@ -499,8 +494,6 @@ class HandLandmarkerSubgraph : public core::ModelTaskGraph { graph[Output>(kPresenceScoreTag)]; hand_landmark_detection_outputs.handednesses >> graph[Output>(kHandednessTag)]; - hand_landmark_detection_outputs.image_size >> - graph[Output>(kImageSizeTag)]; return graph.GetConfig(); } @@ -510,8 +503,8 @@ class HandLandmarkerSubgraph : public core::ModelTaskGraph { const HandLandmarkerSubgraphOptions& subgraph_options, Source image_in, Source> multi_hand_rects, Graph& graph) { - auto& hand_landmark_subgraph = - graph.AddNode("mediapipe.tasks.vision.SingleHandLandmarkerSubgraph"); + auto& hand_landmark_subgraph = graph.AddNode( + "mediapipe.tasks.vision.hand_landmarker.SingleHandLandmarkerSubgraph"); hand_landmark_subgraph.GetOptions().CopyFrom( subgraph_options); @@ -533,8 +526,6 @@ class HandLandmarkerSubgraph : public core::ModelTaskGraph { hand_landmark_subgraph.Out("HAND_RECT_NEXT_FRAME"); auto landmarks = hand_landmark_subgraph.Out("LANDMARKS"); auto world_landmarks = hand_landmark_subgraph.Out("WORLD_LANDMARKS"); - auto image_size = - hand_landmark_subgraph[Output>("IMAGE_SIZE")]; auto& end_loop_handedness = graph.AddNode("EndLoopClassificationListCalculator"); @@ -585,13 +576,14 @@ class HandLandmarkerSubgraph : public core::ModelTaskGraph { /* presences= */ presences, /* presence_scores= */ presence_scores, /* handednesses= */ handednesses, - /* image_size= */ image_size, }}; } }; -REGISTER_MEDIAPIPE_GRAPH(::mediapipe::tasks::vision::HandLandmarkerSubgraph); +REGISTER_MEDIAPIPE_GRAPH( + ::mediapipe::tasks::vision::hand_landmarker::HandLandmarkerSubgraph); +} // namespace hand_landmarker } // namespace vision } // namespace tasks } // namespace mediapipe diff --git a/mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarker_subgraph_test.cc b/mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarker_subgraph_test.cc index 1c2bc6da7..7d91dc3c7 100644 --- a/mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarker_subgraph_test.cc +++ b/mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarker_subgraph_test.cc @@ -45,6 +45,7 @@ limitations under the License. namespace mediapipe { namespace tasks { namespace vision { +namespace hand_landmarker { namespace { using ::file::Defaults; @@ -112,8 +113,8 @@ absl::StatusOr> CreateSingleHandTaskRunner( absl::string_view model_name) { Graph graph; - auto& hand_landmark_detection = - graph.AddNode("mediapipe.tasks.vision.SingleHandLandmarkerSubgraph"); + auto& hand_landmark_detection = graph.AddNode( + "mediapipe.tasks.vision.hand_landmarker.SingleHandLandmarkerSubgraph"); auto options = std::make_unique(); options->mutable_base_options()->mutable_model_asset()->set_file_name( @@ -151,8 +152,8 @@ absl::StatusOr> CreateMultiHandTaskRunner( absl::string_view model_name) { Graph graph; - auto& multi_hand_landmark_detection = - graph.AddNode("mediapipe.tasks.vision.HandLandmarkerSubgraph"); + auto& multi_hand_landmark_detection = graph.AddNode( + "mediapipe.tasks.vision.hand_landmarker.HandLandmarkerSubgraph"); auto options = std::make_unique(); options->mutable_base_options()->mutable_model_asset()->set_file_name( @@ -462,6 +463,7 @@ INSTANTIATE_TEST_SUITE_P( }); } // namespace +} // namespace hand_landmarker } // namespace vision } // namespace tasks } // namespace mediapipe diff --git a/mediapipe/tasks/cc/vision/hand_landmarker/proto/BUILD b/mediapipe/tasks/cc/vision/hand_landmarker/proto/BUILD index 8cc984c47..9d1ba6f90 100644 --- a/mediapipe/tasks/cc/vision/hand_landmarker/proto/BUILD +++ b/mediapipe/tasks/cc/vision/hand_landmarker/proto/BUILD @@ -31,13 +31,13 @@ mediapipe_proto_library( ) mediapipe_proto_library( - name = "hand_landmarker_options_proto", - srcs = ["hand_landmarker_options.proto"], + name = "hand_landmarker_graph_options_proto", + srcs = ["hand_landmarker_graph_options.proto"], deps = [ ":hand_landmarker_subgraph_options_proto", "//mediapipe/framework:calculator_options_proto", "//mediapipe/framework:calculator_proto", "//mediapipe/tasks/cc/core/proto:base_options_proto", - "//mediapipe/tasks/cc/vision/hand_detector/proto:hand_detector_options_proto", + "//mediapipe/tasks/cc/vision/hand_detector/proto:hand_detector_graph_options_proto", ], ) diff --git a/mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarker_options.proto b/mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarker_graph_options.proto similarity index 74% rename from mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarker_options.proto rename to mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarker_graph_options.proto index b3d82eda4..13849ec5e 100644 --- a/mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarker_options.proto +++ b/mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarker_graph_options.proto @@ -19,22 +19,25 @@ package mediapipe.tasks.vision.hand_landmarker.proto; import "mediapipe/framework/calculator.proto"; import "mediapipe/tasks/cc/core/proto/base_options.proto"; -import "mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_options.proto"; +import "mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_graph_options.proto"; import "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarker_subgraph_options.proto"; -message HandLandmarkerOptions { +message HandLandmarkerGraphOptions { extend mediapipe.CalculatorOptions { - optional HandLandmarkerOptions ext = 462713202; + optional HandLandmarkerGraphOptions ext = 462713202; } // Base options for configuring MediaPipe Tasks, such as specifying the TfLite // model file with metadata, accelerator options, etc. optional core.proto.BaseOptions base_options = 1; - // The locale to use for display names specified through the TFLite Model - // Metadata, if any. Defaults to English. - optional string display_names_locale = 2 [default = "en"]; + // Options for hand detector graph. + optional hand_detector.proto.HandDetectorGraphOptions + hand_detector_graph_options = 2; - optional hand_detector.proto.HandDetectorOptions hand_detector_options = 3; + // Options for hand landmarker subgraph. + optional HandLandmarkerSubgraphOptions hand_landmarker_subgraph_options = 3; - optional HandLandmarkerSubgraphOptions hand_landmarker_subgraph_options = 4; + // Minimum confidence for hand landmarks tracking to be considered + // successfully. + optional float min_tracking_confidence = 4 [default = 0.5]; } diff --git a/mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarker_subgraph_options.proto b/mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarker_subgraph_options.proto index 9e93384d6..02d18e8ab 100644 --- a/mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarker_subgraph_options.proto +++ b/mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarker_subgraph_options.proto @@ -28,11 +28,7 @@ message HandLandmarkerSubgraphOptions { // model file with metadata, accelerator options, etc. optional core.proto.BaseOptions base_options = 1; - // The locale to use for display names specified through the TFLite Model - // Metadata, if any. Defaults to English. - optional string display_names_locale = 2 [default = "en"]; - // Minimum confidence value ([0.0, 1.0]) for hand presence score to be // considered successfully detecting a hand in the image. - optional float min_detection_confidence = 3 [default = 0.5]; + optional float min_detection_confidence = 2 [default = 0.5]; } From f7fa3dc9bea39a621a139fc312ba1d6695958b17 Mon Sep 17 00:00:00 2001 From: Hadon Nash Date: Mon, 3 Oct 2022 22:04:29 -0700 Subject: [PATCH 02/10] Explaining "Graph Options" in the MediaPipe user guide. PiperOrigin-RevId: 478688026 --- docs/framework_concepts/graphs.md | 92 +++++++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) diff --git a/docs/framework_concepts/graphs.md b/docs/framework_concepts/graphs.md index f951b506d..b20a87467 100644 --- a/docs/framework_concepts/graphs.md +++ b/docs/framework_concepts/graphs.md @@ -143,6 +143,98 @@ Below is an example of how to create a subgraph named `TwoPassThroughSubgraph`. } ``` +## Graph Options + +It is possible to specify a "graph options" protobuf for a MediaPipe graph +similar to the [`Calculator Options`](calculators.md#calculator-options) +protobuf specified for a MediaPipe calculator. These "graph options" can be +specified where a graph is invoked, and used to populate calculator options and +subgraph options within the graph. + +In a CalculatorGraphConfig, graph options can be specified for a subgraph +exactly like calculator options, as shown below: + +``` +node { + calculator: "FlowLimiterCalculator" + input_stream: "image" + output_stream: "throttled_image" + node_options: { + [type.googleapis.com/mediapipe.FlowLimiterCalculatorOptions] { + max_in_flight: 1 + } + } +} + +node { + calculator: "FaceDetectionSubgraph" + input_stream: "IMAGE:throttled_image" + node_options: { + [type.googleapis.com/mediapipe.FaceDetectionOptions] { + tensor_width: 192 + tensor_height: 192 + } + } +} +``` + +In a CalculatorGraphConfig, graph options can be accepted and used to populate +calculator options, as shown below: + +``` +graph_options: { + [type.googleapis.com/mediapipe.FaceDetectionOptions] {} +} + +node: { + calculator: "ImageToTensorCalculator" + input_stream: "IMAGE:multi_backend_image" + node_options: { + [type.googleapis.com/mediapipe.ImageToTensorCalculatorOptions] { + keep_aspect_ratio: true + border_mode: BORDER_ZERO + } + } + option_value: "output_tensor_width:options/tensor_width" + option_value: "output_tensor_height:options/tensor_height" +} + +node { + calculator: "InferenceCalculator" + node_options: { + [type.googleapis.com/mediapipe.InferenceCalculatorOptions] {} + } + option_value: "delegate:options/delegate" + option_value: "model_path:options/model_path" +} +``` + +In this example, the `FaceDetectionSubgraph` accepts graph option protobuf +`FaceDetectionOptions`. The `FaceDetectionOptions` is used to define some field +values in the calculator options `ImageToTensorCalculatorOptions` and some field +values in the subgraph options `InferenceCalculatorOptions`. The field values +are defined using the `option_value:` syntax. + +In the `CalculatorGraphConfig::Node` protobuf, the fields `node_options:` and +`option_value:` together define the option values for a calculator such as +`ImageToTensorCalculator`. The `node_options:` field defines a set of literal +constant values using the text protobuf syntax. Each `option_value:` field +defines the value for one protobuf field using information from the enclosing +graph, specifically from field values of the graph options of the enclosing +graph. In the example above, the `option_value:` +`"output_tensor_width:options/tensor_width"` defines the field +`ImageToTensorCalculatorOptions.output_tensor_width` using the value of +`FaceDetectionOptions.tensor_width`. + +The syntax of `option_value:` is similar to the syntax of `input_stream:`. The +syntax is `option_value: "LHS:RHS"`. The LHS identifies a calculator option +field and the RHS identifies a graph option field. More specifically, the LHS +and RHS each consists of a series of protobuf field names identifying nested +protobuf messages and fields separated by '/'. This is known as the "ProtoPath" +syntax. Nested messages that are referenced in the LHS or RHS must already be +defined in the enclosing protobuf in order to be traversed using +`option_value:`. + ## Cycles From 2cb9ebb5e341c2419c9580ba01aff8d28a508f01 Mon Sep 17 00:00:00 2001 From: MediaPipe Team Date: Mon, 3 Oct 2022 23:43:13 -0700 Subject: [PATCH 03/10] Rename HandGestureRecognizer to GestureRecognizer and update namespace for Tasks C++ conventions. PiperOrigin-RevId: 478700907 --- .../BUILD | 12 +-- .../calculators/BUILD | 16 ++- .../handedness_to_matrix_calculator.cc | 13 ++- .../handedness_to_matrix_calculator_test.cc | 4 - .../landmarks_to_matrix_calculator.cc | 13 +-- .../landmarks_to_matrix_calculator.proto | 2 +- .../landmarks_to_matrix_calculator_test.cc | 10 +- .../hand_gesture_recognizer_graph.cc} | 100 +++++++++--------- .../handedness_util.cc | 4 +- .../handedness_util.h | 8 +- .../handedness_util_test.cc | 4 +- .../proto/BUILD | 13 +-- ...nd_gesture_recognizer_graph_options.proto} | 6 +- .../hand_detector/hand_detector_graph.cc | 16 +-- .../hand_detector/hand_detector_graph_test.cc | 4 +- .../hand_landmarker/hand_landmarker_graph.cc | 2 +- 16 files changed, 114 insertions(+), 113 deletions(-) rename mediapipe/tasks/cc/vision/{hand_gesture_recognizer => gesture_recognizer}/BUILD (83%) rename mediapipe/tasks/cc/vision/{hand_gesture_recognizer => gesture_recognizer}/calculators/BUILD (84%) rename mediapipe/tasks/cc/vision/{hand_gesture_recognizer => gesture_recognizer}/calculators/handedness_to_matrix_calculator.cc (90%) rename mediapipe/tasks/cc/vision/{hand_gesture_recognizer => gesture_recognizer}/calculators/handedness_to_matrix_calculator_test.cc (97%) rename mediapipe/tasks/cc/vision/{hand_gesture_recognizer => gesture_recognizer}/calculators/landmarks_to_matrix_calculator.cc (96%) rename mediapipe/tasks/cc/vision/{hand_gesture_recognizer/proto => gesture_recognizer/calculators}/landmarks_to_matrix_calculator.proto (97%) rename mediapipe/tasks/cc/vision/{hand_gesture_recognizer => gesture_recognizer}/calculators/landmarks_to_matrix_calculator_test.cc (96%) rename mediapipe/tasks/cc/vision/{hand_gesture_recognizer/hand_gesture_recognizer_subgraph.cc => gesture_recognizer/hand_gesture_recognizer_graph.cc} (80%) rename mediapipe/tasks/cc/vision/{hand_gesture_recognizer => gesture_recognizer}/handedness_util.cc (93%) rename mediapipe/tasks/cc/vision/{hand_gesture_recognizer => gesture_recognizer}/handedness_util.h (79%) rename mediapipe/tasks/cc/vision/{hand_gesture_recognizer => gesture_recognizer}/handedness_util_test.cc (94%) rename mediapipe/tasks/cc/vision/{hand_gesture_recognizer => gesture_recognizer}/proto/BUILD (73%) rename mediapipe/tasks/cc/vision/{hand_gesture_recognizer/proto/hand_gesture_recognizer_subgraph_options.proto => gesture_recognizer/proto/hand_gesture_recognizer_graph_options.proto} (89%) diff --git a/mediapipe/tasks/cc/vision/hand_gesture_recognizer/BUILD b/mediapipe/tasks/cc/vision/gesture_recognizer/BUILD similarity index 83% rename from mediapipe/tasks/cc/vision/hand_gesture_recognizer/BUILD rename to mediapipe/tasks/cc/vision/gesture_recognizer/BUILD index 9e2d9bd17..cb392873e 100644 --- a/mediapipe/tasks/cc/vision/hand_gesture_recognizer/BUILD +++ b/mediapipe/tasks/cc/vision/gesture_recognizer/BUILD @@ -41,8 +41,8 @@ cc_test( ) cc_library( - name = "hand_gesture_recognizer_subgraph", - srcs = ["hand_gesture_recognizer_subgraph.cc"], + name = "hand_gesture_recognizer_graph", + srcs = ["hand_gesture_recognizer_graph.cc"], deps = [ "//mediapipe/calculators/core:concatenate_vector_calculator", "//mediapipe/calculators/tensor:tensor_converter_calculator", @@ -62,10 +62,10 @@ cc_library( "//mediapipe/tasks/cc/core:model_task_graph", "//mediapipe/tasks/cc/core:utils", "//mediapipe/tasks/cc/core/proto:inference_subgraph_cc_proto", - "//mediapipe/tasks/cc/vision/hand_gesture_recognizer/calculators:handedness_to_matrix_calculator", - "//mediapipe/tasks/cc/vision/hand_gesture_recognizer/calculators:landmarks_to_matrix_calculator", - "//mediapipe/tasks/cc/vision/hand_gesture_recognizer/proto:hand_gesture_recognizer_subgraph_options_cc_proto", - "//mediapipe/tasks/cc/vision/hand_gesture_recognizer/proto:landmarks_to_matrix_calculator_cc_proto", + "//mediapipe/tasks/cc/vision/gesture_recognizer/calculators:handedness_to_matrix_calculator", + "//mediapipe/tasks/cc/vision/gesture_recognizer/calculators:landmarks_to_matrix_calculator", + "//mediapipe/tasks/cc/vision/gesture_recognizer/calculators:landmarks_to_matrix_calculator_cc_proto", + "//mediapipe/tasks/cc/vision/gesture_recognizer/proto:hand_gesture_recognizer_graph_options_cc_proto", "//mediapipe/tasks/cc/vision/hand_landmarker:hand_landmarker_subgraph", "//mediapipe/tasks/cc/vision/utils:image_tensor_specs", "//mediapipe/tasks/metadata:metadata_schema_cc", diff --git a/mediapipe/tasks/cc/vision/hand_gesture_recognizer/calculators/BUILD b/mediapipe/tasks/cc/vision/gesture_recognizer/calculators/BUILD similarity index 84% rename from mediapipe/tasks/cc/vision/hand_gesture_recognizer/calculators/BUILD rename to mediapipe/tasks/cc/vision/gesture_recognizer/calculators/BUILD index 4863c8682..a6de4f950 100644 --- a/mediapipe/tasks/cc/vision/hand_gesture_recognizer/calculators/BUILD +++ b/mediapipe/tasks/cc/vision/gesture_recognizer/calculators/BUILD @@ -12,11 +12,23 @@ # See the License for the specific language governing permissions and # limitations under the License. +load("//mediapipe/framework/port:build_config.bzl", "mediapipe_proto_library") + package(default_visibility = [ "//mediapipe/app/xeno:__subpackages__", "//mediapipe/tasks:internal", ]) +mediapipe_proto_library( + name = "landmarks_to_matrix_calculator_proto", + srcs = ["landmarks_to_matrix_calculator.proto"], + deps = [ + "//mediapipe/framework:calculator_options_proto", + "//mediapipe/framework:calculator_proto", + "//mediapipe/tasks/cc/core/proto:base_options_proto", + ], +) + cc_library( name = "handedness_to_matrix_calculator", srcs = ["handedness_to_matrix_calculator.cc"], @@ -25,7 +37,7 @@ cc_library( "//mediapipe/framework/formats:classification_cc_proto", "//mediapipe/framework/formats:matrix", "//mediapipe/framework/port:ret_check", - "//mediapipe/tasks/cc/vision/hand_gesture_recognizer:handedness_util", + "//mediapipe/tasks/cc/vision/gesture_recognizer:handedness_util", "@com_google_absl//absl/memory", "@com_google_absl//absl/status", "@com_google_absl//absl/status:statusor", @@ -53,11 +65,11 @@ cc_library( name = "landmarks_to_matrix_calculator", srcs = ["landmarks_to_matrix_calculator.cc"], deps = [ + ":landmarks_to_matrix_calculator_cc_proto", "//mediapipe/framework:calculator_framework", "//mediapipe/framework/formats:landmark_cc_proto", "//mediapipe/framework/formats:matrix", "//mediapipe/framework/port:ret_check", - "//mediapipe/tasks/cc/vision/hand_gesture_recognizer/proto:landmarks_to_matrix_calculator_cc_proto", "@com_google_absl//absl/status", "@com_google_absl//absl/status:statusor", "@com_google_absl//absl/strings", diff --git a/mediapipe/tasks/cc/vision/hand_gesture_recognizer/calculators/handedness_to_matrix_calculator.cc b/mediapipe/tasks/cc/vision/gesture_recognizer/calculators/handedness_to_matrix_calculator.cc similarity index 90% rename from mediapipe/tasks/cc/vision/hand_gesture_recognizer/calculators/handedness_to_matrix_calculator.cc rename to mediapipe/tasks/cc/vision/gesture_recognizer/calculators/handedness_to_matrix_calculator.cc index 746293d21..b6c973a1b 100644 --- a/mediapipe/tasks/cc/vision/hand_gesture_recognizer/calculators/handedness_to_matrix_calculator.cc +++ b/mediapipe/tasks/cc/vision/gesture_recognizer/calculators/handedness_to_matrix_calculator.cc @@ -26,14 +26,16 @@ limitations under the License. #include "mediapipe/framework/formats/classification.pb.h" #include "mediapipe/framework/formats/matrix.h" #include "mediapipe/framework/port/ret_check.h" -#include "mediapipe/tasks/cc/vision/hand_gesture_recognizer/handedness_util.h" +#include "mediapipe/tasks/cc/vision/gesture_recognizer/handedness_util.h" +// TODO Update to use API2 namespace mediapipe { -namespace tasks { -namespace vision { +namespace api2 { namespace { +using ::mediapipe::tasks::vision::gesture_recognizer::GetLeftHandScore; + constexpr char kHandednessTag[] = "HANDEDNESS"; constexpr char kHandednessMatrixTag[] = "HANDEDNESS_MATRIX"; @@ -71,6 +73,8 @@ class HandednessToMatrixCalculator : public CalculatorBase { return absl::OkStatus(); } + // TODO remove this after change to API2, because Setting offset + // to 0 is the default in API2 absl::Status Open(CalculatorContext* cc) override { cc->SetOffset(TimestampDiff(0)); return absl::OkStatus(); @@ -95,6 +99,5 @@ absl::Status HandednessToMatrixCalculator::Process(CalculatorContext* cc) { return absl::OkStatus(); } -} // namespace vision -} // namespace tasks +} // namespace api2 } // namespace mediapipe diff --git a/mediapipe/tasks/cc/vision/hand_gesture_recognizer/calculators/handedness_to_matrix_calculator_test.cc b/mediapipe/tasks/cc/vision/gesture_recognizer/calculators/handedness_to_matrix_calculator_test.cc similarity index 97% rename from mediapipe/tasks/cc/vision/hand_gesture_recognizer/calculators/handedness_to_matrix_calculator_test.cc rename to mediapipe/tasks/cc/vision/gesture_recognizer/calculators/handedness_to_matrix_calculator_test.cc index c93c48ac5..17b16bf80 100644 --- a/mediapipe/tasks/cc/vision/hand_gesture_recognizer/calculators/handedness_to_matrix_calculator_test.cc +++ b/mediapipe/tasks/cc/vision/gesture_recognizer/calculators/handedness_to_matrix_calculator_test.cc @@ -28,8 +28,6 @@ limitations under the License. #include "mediapipe/framework/port/status_matchers.h" namespace mediapipe { -namespace tasks { -namespace vision { namespace { @@ -95,6 +93,4 @@ INSTANTIATE_TEST_CASE_P( } // namespace -} // namespace vision -} // namespace tasks } // namespace mediapipe diff --git a/mediapipe/tasks/cc/vision/hand_gesture_recognizer/calculators/landmarks_to_matrix_calculator.cc b/mediapipe/tasks/cc/vision/gesture_recognizer/calculators/landmarks_to_matrix_calculator.cc similarity index 96% rename from mediapipe/tasks/cc/vision/hand_gesture_recognizer/calculators/landmarks_to_matrix_calculator.cc rename to mediapipe/tasks/cc/vision/gesture_recognizer/calculators/landmarks_to_matrix_calculator.cc index 990e99920..b70689eaf 100644 --- a/mediapipe/tasks/cc/vision/hand_gesture_recognizer/calculators/landmarks_to_matrix_calculator.cc +++ b/mediapipe/tasks/cc/vision/gesture_recognizer/calculators/landmarks_to_matrix_calculator.cc @@ -27,13 +27,11 @@ limitations under the License. #include "mediapipe/framework/formats/landmark.pb.h" #include "mediapipe/framework/formats/matrix.h" #include "mediapipe/framework/port/ret_check.h" -#include "mediapipe/tasks/cc/vision/hand_gesture_recognizer/proto/landmarks_to_matrix_calculator.pb.h" +#include "mediapipe/tasks/cc/vision/gesture_recognizer/calculators/landmarks_to_matrix_calculator.pb.h" +// TODO Update to use API2 namespace mediapipe { -namespace tasks { -namespace vision { - -using proto::LandmarksToMatrixCalculatorOptions; +namespace api2 { namespace { @@ -175,7 +173,7 @@ absl::Status ProcessLandmarks(LandmarkListT landmarks, CalculatorContext* cc) { // input_stream: "IMAGE_SIZE:image_size" // output_stream: "LANDMARKS_MATRIX:landmarks_matrix" // options { -// [mediapipe.tasks.vision.proto.LandmarksToMatrixCalculatorOptions.ext] { +// [mediapipe.LandmarksToMatrixCalculatorOptions.ext] { // object_normalization: true // object_normalization_origin_offset: 0 // } @@ -221,6 +219,5 @@ absl::Status LandmarksToMatrixCalculator::Process(CalculatorContext* cc) { return absl::OkStatus(); } -} // namespace vision -} // namespace tasks +} // namespace api2 } // namespace mediapipe diff --git a/mediapipe/tasks/cc/vision/hand_gesture_recognizer/proto/landmarks_to_matrix_calculator.proto b/mediapipe/tasks/cc/vision/gesture_recognizer/calculators/landmarks_to_matrix_calculator.proto similarity index 97% rename from mediapipe/tasks/cc/vision/hand_gesture_recognizer/proto/landmarks_to_matrix_calculator.proto rename to mediapipe/tasks/cc/vision/gesture_recognizer/calculators/landmarks_to_matrix_calculator.proto index 6b004e203..10b034447 100644 --- a/mediapipe/tasks/cc/vision/hand_gesture_recognizer/proto/landmarks_to_matrix_calculator.proto +++ b/mediapipe/tasks/cc/vision/gesture_recognizer/calculators/landmarks_to_matrix_calculator.proto @@ -15,7 +15,7 @@ limitations under the License. syntax = "proto2"; -package mediapipe.tasks.vision.proto; +package mediapipe; import "mediapipe/framework/calculator.proto"; diff --git a/mediapipe/tasks/cc/vision/hand_gesture_recognizer/calculators/landmarks_to_matrix_calculator_test.cc b/mediapipe/tasks/cc/vision/gesture_recognizer/calculators/landmarks_to_matrix_calculator_test.cc similarity index 96% rename from mediapipe/tasks/cc/vision/hand_gesture_recognizer/calculators/landmarks_to_matrix_calculator_test.cc rename to mediapipe/tasks/cc/vision/gesture_recognizer/calculators/landmarks_to_matrix_calculator_test.cc index 05d238f66..8a68d8dae 100644 --- a/mediapipe/tasks/cc/vision/hand_gesture_recognizer/calculators/landmarks_to_matrix_calculator_test.cc +++ b/mediapipe/tasks/cc/vision/gesture_recognizer/calculators/landmarks_to_matrix_calculator_test.cc @@ -28,8 +28,6 @@ limitations under the License. #include "mediapipe/framework/port/status_matchers.h" namespace mediapipe { -namespace tasks { -namespace vision { namespace { @@ -72,8 +70,7 @@ TEST_P(Landmarks2dToMatrixCalculatorTest, OutputsCorrectResult) { input_stream: "IMAGE_SIZE:image_size" output_stream: "LANDMARKS_MATRIX:landmarks_matrix" options { - [mediapipe.tasks.vision.proto.LandmarksToMatrixCalculatorOptions - .ext] { + [mediapipe.LandmarksToMatrixCalculatorOptions.ext] { object_normalization: $0 object_normalization_origin_offset: $1 } @@ -145,8 +142,7 @@ TEST_P(LandmarksWorld3dToMatrixCalculatorTest, OutputsCorrectResult) { input_stream: "IMAGE_SIZE:image_size" output_stream: "LANDMARKS_MATRIX:landmarks_matrix" options { - [mediapipe.tasks.vision.proto.LandmarksToMatrixCalculatorOptions - .ext] { + [mediapipe.LandmarksToMatrixCalculatorOptions.ext] { object_normalization: $0 object_normalization_origin_offset: $1 } @@ -202,6 +198,4 @@ INSTANTIATE_TEST_CASE_P( } // namespace -} // namespace vision -} // namespace tasks } // namespace mediapipe diff --git a/mediapipe/tasks/cc/vision/hand_gesture_recognizer/hand_gesture_recognizer_subgraph.cc b/mediapipe/tasks/cc/vision/gesture_recognizer/hand_gesture_recognizer_graph.cc similarity index 80% rename from mediapipe/tasks/cc/vision/hand_gesture_recognizer/hand_gesture_recognizer_subgraph.cc rename to mediapipe/tasks/cc/vision/gesture_recognizer/hand_gesture_recognizer_graph.cc index 247d8453d..05bc607ae 100644 --- a/mediapipe/tasks/cc/vision/hand_gesture_recognizer/hand_gesture_recognizer_subgraph.cc +++ b/mediapipe/tasks/cc/vision/gesture_recognizer/hand_gesture_recognizer_graph.cc @@ -34,14 +34,15 @@ limitations under the License. #include "mediapipe/tasks/cc/core/model_task_graph.h" #include "mediapipe/tasks/cc/core/proto/inference_subgraph.pb.h" #include "mediapipe/tasks/cc/core/utils.h" -#include "mediapipe/tasks/cc/vision/hand_gesture_recognizer/proto/hand_gesture_recognizer_subgraph_options.pb.h" -#include "mediapipe/tasks/cc/vision/hand_gesture_recognizer/proto/landmarks_to_matrix_calculator.pb.h" +#include "mediapipe/tasks/cc/vision/gesture_recognizer/calculators/landmarks_to_matrix_calculator.pb.h" +#include "mediapipe/tasks/cc/vision/gesture_recognizer/proto/hand_gesture_recognizer_graph_options.pb.h" #include "mediapipe/tasks/cc/vision/utils/image_tensor_specs.h" #include "mediapipe/tasks/metadata/metadata_schema_generated.h" namespace mediapipe { namespace tasks { namespace vision { +namespace gesture_recognizer { namespace { @@ -50,9 +51,8 @@ using ::mediapipe::api2::Output; using ::mediapipe::api2::builder::Graph; using ::mediapipe::api2::builder::Source; using ::mediapipe::tasks::components::containers::proto::ClassificationResult; -using ::mediapipe::tasks::vision::hand_gesture_recognizer::proto:: - HandGestureRecognizerSubgraphOptions; -using ::mediapipe::tasks::vision::proto::LandmarksToMatrixCalculatorOptions; +using ::mediapipe::tasks::vision::gesture_recognizer::proto:: + HandGestureRecognizerGraphOptions; constexpr char kHandednessTag[] = "HANDEDNESS"; constexpr char kLandmarksTag[] = "LANDMARKS"; @@ -70,18 +70,6 @@ constexpr char kIndexTag[] = "INDEX"; constexpr char kIterableTag[] = "ITERABLE"; constexpr char kBatchEndTag[] = "BATCH_END"; -absl::Status SanityCheckOptions( - const HandGestureRecognizerSubgraphOptions& options) { - if (options.min_tracking_confidence() < 0 || - options.min_tracking_confidence() > 1) { - return CreateStatusWithPayload(absl::StatusCode::kInvalidArgument, - "Invalid `min_tracking_confidence` option: " - "value must be in the range [0.0, 1.0]", - MediaPipeTasksStatus::kInvalidArgumentError); - } - return absl::OkStatus(); -} - Source> ConvertMatrixToTensor(Source matrix, Graph& graph) { auto& node = graph.AddNode("TensorConverterCalculator"); @@ -91,9 +79,10 @@ Source> ConvertMatrixToTensor(Source matrix, } // namespace -// A "mediapipe.tasks.vision.SingleHandGestureRecognizerSubgraph" performs -// single hand gesture recognition. This graph is used as a building block for -// mediapipe.tasks.vision.HandGestureRecognizerGraph. +// A +// "mediapipe.tasks.vision.gesture_recognizer.SingleHandGestureRecognizerGraph" +// performs single hand gesture recognition. This graph is used as a building +// block for mediapipe.tasks.vision.GestureRecognizerGraph. // // Inputs: // HANDEDNESS - ClassificationList @@ -113,14 +102,15 @@ Source> ConvertMatrixToTensor(Source matrix, // // Example: // node { -// calculator: "mediapipe.tasks.vision.SingleHandGestureRecognizerSubgraph" +// calculator: +// "mediapipe.tasks.vision.gesture_recognizer.SingleHandGestureRecognizerGraph" // input_stream: "HANDEDNESS:handedness" // input_stream: "LANDMARKS:landmarks" // input_stream: "WORLD_LANDMARKS:world_landmarks" // input_stream: "IMAGE_SIZE:image_size" // output_stream: "HAND_GESTURES:hand_gestures" // options { -// [mediapipe.tasks.vision.hand_gesture_recognizer.proto.HandGestureRecognizerSubgraphOptions.ext] +// [mediapipe.tasks.vision.gesture_recognizer.proto.HandGestureRecognizerGraphOptions.ext] // { // base_options { // model_asset { @@ -130,19 +120,19 @@ Source> ConvertMatrixToTensor(Source matrix, // } // } // } -class SingleHandGestureRecognizerSubgraph : public core::ModelTaskGraph { +class SingleHandGestureRecognizerGraph : public core::ModelTaskGraph { public: absl::StatusOr GetConfig( SubgraphContext* sc) override { ASSIGN_OR_RETURN( const auto* model_resources, - CreateModelResources(sc)); + CreateModelResources(sc)); Graph graph; ASSIGN_OR_RETURN( auto hand_gestures, - BuildHandGestureRecognizerGraph( - sc->Options(), - *model_resources, graph[Input(kHandednessTag)], + BuildGestureRecognizerGraph( + sc->Options(), *model_resources, + graph[Input(kHandednessTag)], graph[Input(kLandmarksTag)], graph[Input(kWorldLandmarksTag)], graph[Input>(kImageSizeTag)], graph)); @@ -151,15 +141,13 @@ class SingleHandGestureRecognizerSubgraph : public core::ModelTaskGraph { } private: - absl::StatusOr> BuildHandGestureRecognizerGraph( - const HandGestureRecognizerSubgraphOptions& graph_options, + absl::StatusOr> BuildGestureRecognizerGraph( + const HandGestureRecognizerGraphOptions& graph_options, const core::ModelResources& model_resources, Source handedness, Source hand_landmarks, Source hand_world_landmarks, Source> image_size, Graph& graph) { - MP_RETURN_IF_ERROR(SanityCheckOptions(graph_options)); - // Converts the ClassificationList to a matrix. auto& handedness_to_matrix = graph.AddNode("HandednessToMatrixCalculator"); handedness >> handedness_to_matrix.In(kHandednessTag); @@ -235,12 +223,15 @@ class SingleHandGestureRecognizerSubgraph : public core::ModelTaskGraph { } }; +// clang-format off REGISTER_MEDIAPIPE_GRAPH( - ::mediapipe::tasks::vision::SingleHandGestureRecognizerSubgraph); + ::mediapipe::tasks::vision::gesture_recognizer::SingleHandGestureRecognizerGraph); // NOLINT +// clang-format on -// A "mediapipe.tasks.vision.HandGestureRecognizerSubgraph" performs multi -// hand gesture recognition. This graph is used as a building block for -// mediapipe.tasks.vision.HandGestureRecognizerGraph. +// A +// "mediapipe.tasks.vision.gesture_recognizer.MultipleHandGestureRecognizerGraph" +// performs multi hand gesture recognition. This graph is used as a building +// block for mediapipe.tasks.vision.gesture_recognizer.GestureRecognizerGraph. // // Inputs: // HANDEDNESS - std::vector @@ -263,7 +254,8 @@ REGISTER_MEDIAPIPE_GRAPH( // // Example: // node { -// calculator: "mediapipe.tasks.vision.HandGestureRecognizerSubgraph" +// calculator: +// "mediapipe.tasks.vision.gesture_recognizer.MultipleHandGestureRecognizerGraph" // input_stream: "HANDEDNESS:handedness" // input_stream: "LANDMARKS:landmarks" // input_stream: "WORLD_LANDMARKS:world_landmarks" @@ -271,7 +263,7 @@ REGISTER_MEDIAPIPE_GRAPH( // input_stream: "HAND_TRACKING_IDS:hand_tracking_ids" // output_stream: "HAND_GESTURES:hand_gestures" // options { -// [mediapipe.tasks.vision.hand_gesture_recognizer.proto.HandGestureRecognizerSubgraph.ext] +// [mediapipe.tasks.vision.gesture_recognizer.proto.MultipleHandGestureRecognizerGraph.ext] // { // base_options { // model_asset { @@ -281,15 +273,15 @@ REGISTER_MEDIAPIPE_GRAPH( // } // } // } -class HandGestureRecognizerSubgraph : public core::ModelTaskGraph { +class MultipleHandGestureRecognizerGraph : public core::ModelTaskGraph { public: absl::StatusOr GetConfig( SubgraphContext* sc) override { Graph graph; ASSIGN_OR_RETURN( auto multi_hand_gestures, - BuildMultiHandGestureRecognizerSubraph( - sc->Options(), + BuildMultiGestureRecognizerSubraph( + sc->Options(), graph[Input>(kHandednessTag)], graph[Input>(kLandmarksTag)], graph[Input>(kWorldLandmarksTag)], @@ -302,8 +294,8 @@ class HandGestureRecognizerSubgraph : public core::ModelTaskGraph { private: absl::StatusOr>> - BuildMultiHandGestureRecognizerSubraph( - const HandGestureRecognizerSubgraphOptions& graph_options, + BuildMultiGestureRecognizerSubraph( + const HandGestureRecognizerGraphOptions& graph_options, Source> multi_handedness, Source> multi_hand_landmarks, Source> multi_hand_world_landmarks, @@ -341,17 +333,18 @@ class HandGestureRecognizerSubgraph : public core::ModelTaskGraph { hand_tracking_id >> get_world_landmarks_at_index.In(kIndexTag); auto hand_world_landmarks = get_world_landmarks_at_index.Out(kItemTag); - auto& hand_gesture_recognizer_subgraph = graph.AddNode( - "mediapipe.tasks.vision.SingleHandGestureRecognizerSubgraph"); - hand_gesture_recognizer_subgraph - .GetOptions() + auto& hand_gesture_recognizer_graph = graph.AddNode( + "mediapipe.tasks.vision.gesture_recognizer." + "SingleHandGestureRecognizerGraph"); + hand_gesture_recognizer_graph + .GetOptions() .CopyFrom(graph_options); - handedness >> hand_gesture_recognizer_subgraph.In(kHandednessTag); - hand_landmarks >> hand_gesture_recognizer_subgraph.In(kLandmarksTag); + handedness >> hand_gesture_recognizer_graph.In(kHandednessTag); + hand_landmarks >> hand_gesture_recognizer_graph.In(kLandmarksTag); hand_world_landmarks >> - hand_gesture_recognizer_subgraph.In(kWorldLandmarksTag); - image_size_clone >> hand_gesture_recognizer_subgraph.In(kImageSizeTag); - auto hand_gestures = hand_gesture_recognizer_subgraph.Out(kHandGesturesTag); + hand_gesture_recognizer_graph.In(kWorldLandmarksTag); + image_size_clone >> hand_gesture_recognizer_graph.In(kImageSizeTag); + auto hand_gestures = hand_gesture_recognizer_graph.Out(kHandGesturesTag); auto& end_loop_classification_results = graph.AddNode("mediapipe.tasks.EndLoopClassificationResultCalculator"); @@ -364,9 +357,12 @@ class HandGestureRecognizerSubgraph : public core::ModelTaskGraph { } }; +// clang-format off REGISTER_MEDIAPIPE_GRAPH( - ::mediapipe::tasks::vision::HandGestureRecognizerSubgraph); + ::mediapipe::tasks::vision::gesture_recognizer::MultipleHandGestureRecognizerGraph); // NOLINT +// clang-format on +} // namespace gesture_recognizer } // namespace vision } // namespace tasks } // namespace mediapipe diff --git a/mediapipe/tasks/cc/vision/hand_gesture_recognizer/handedness_util.cc b/mediapipe/tasks/cc/vision/gesture_recognizer/handedness_util.cc similarity index 93% rename from mediapipe/tasks/cc/vision/hand_gesture_recognizer/handedness_util.cc rename to mediapipe/tasks/cc/vision/gesture_recognizer/handedness_util.cc index 00e19cdb5..60ccae92c 100644 --- a/mediapipe/tasks/cc/vision/hand_gesture_recognizer/handedness_util.cc +++ b/mediapipe/tasks/cc/vision/gesture_recognizer/handedness_util.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "mediapipe/tasks/cc/vision/hand_gesture_recognizer/handedness_util.h" +#include "mediapipe/tasks/cc/vision/gesture_recognizer/handedness_util.h" #include @@ -25,6 +25,7 @@ limitations under the License. namespace mediapipe { namespace tasks { namespace vision { +namespace gesture_recognizer { namespace {} // namespace @@ -58,6 +59,7 @@ absl::StatusOr GetLeftHandScore( } } +} // namespace gesture_recognizer } // namespace vision } // namespace tasks } // namespace mediapipe diff --git a/mediapipe/tasks/cc/vision/hand_gesture_recognizer/handedness_util.h b/mediapipe/tasks/cc/vision/gesture_recognizer/handedness_util.h similarity index 79% rename from mediapipe/tasks/cc/vision/hand_gesture_recognizer/handedness_util.h rename to mediapipe/tasks/cc/vision/gesture_recognizer/handedness_util.h index 74e04b8cc..ae4137d0f 100644 --- a/mediapipe/tasks/cc/vision/hand_gesture_recognizer/handedness_util.h +++ b/mediapipe/tasks/cc/vision/gesture_recognizer/handedness_util.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef MEDIAPIPE_TASKS_CC_VISION_HAND_GESTURE_RECOGNIZER_HADNDEDNESS_UTILS_H_ -#define MEDIAPIPE_TASKS_CC_VISION_HAND_GESTURE_RECOGNIZER_HADNDEDNESS_UTILS_H_ +#ifndef MEDIAPIPE_TASKS_CC_VISION_GESTURE_RECOGNIZER_HADNDEDNESS_UTILS_H_ +#define MEDIAPIPE_TASKS_CC_VISION_GESTURE_RECOGNIZER_HADNDEDNESS_UTILS_H_ #include "absl/status/statusor.h" #include "mediapipe/framework/formats/classification.pb.h" @@ -22,6 +22,7 @@ limitations under the License. namespace mediapipe { namespace tasks { namespace vision { +namespace gesture_recognizer { bool IsLeftHand(const mediapipe::Classification& c); @@ -30,8 +31,9 @@ bool IsRightHand(const mediapipe::Classification& c); absl::StatusOr GetLeftHandScore( const mediapipe::ClassificationList& classification_list); +} // namespace gesture_recognizer } // namespace vision } // namespace tasks } // namespace mediapipe -#endif // MEDIAPIPE_TASKS_CC_VISION_HAND_GESTURE_RECOGNIZER_HADNDEDNESS_UTILS_H_ +#endif // MEDIAPIPE_TASKS_CC_VISION_GESTURE_RECOGNIZER_HADNDEDNESS_UTILS_H_ diff --git a/mediapipe/tasks/cc/vision/hand_gesture_recognizer/handedness_util_test.cc b/mediapipe/tasks/cc/vision/gesture_recognizer/handedness_util_test.cc similarity index 94% rename from mediapipe/tasks/cc/vision/hand_gesture_recognizer/handedness_util_test.cc rename to mediapipe/tasks/cc/vision/gesture_recognizer/handedness_util_test.cc index 51dfb5dea..40a201ae8 100644 --- a/mediapipe/tasks/cc/vision/hand_gesture_recognizer/handedness_util_test.cc +++ b/mediapipe/tasks/cc/vision/gesture_recognizer/handedness_util_test.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "mediapipe/tasks/cc/vision/hand_gesture_recognizer/handedness_util.h" +#include "mediapipe/tasks/cc/vision/gesture_recognizer/handedness_util.h" #include "mediapipe/framework/formats/classification.pb.h" #include "mediapipe/framework/port/gmock.h" @@ -23,6 +23,7 @@ limitations under the License. namespace mediapipe { namespace tasks { namespace vision { +namespace gesture_recognizer { namespace { TEST(GetLeftHandScore, SingleLeftHandClassification) { @@ -72,6 +73,7 @@ TEST(GetLeftHandScore, LeftAndRightLowerCaseHandClassification) { } } // namespace +} // namespace gesture_recognizer } // namespace vision } // namespace tasks } // namespace mediapipe diff --git a/mediapipe/tasks/cc/vision/hand_gesture_recognizer/proto/BUILD b/mediapipe/tasks/cc/vision/gesture_recognizer/proto/BUILD similarity index 73% rename from mediapipe/tasks/cc/vision/hand_gesture_recognizer/proto/BUILD rename to mediapipe/tasks/cc/vision/gesture_recognizer/proto/BUILD index 44ec611b2..cb6ec8289 100644 --- a/mediapipe/tasks/cc/vision/hand_gesture_recognizer/proto/BUILD +++ b/mediapipe/tasks/cc/vision/gesture_recognizer/proto/BUILD @@ -21,8 +21,8 @@ package(default_visibility = [ licenses(["notice"]) mediapipe_proto_library( - name = "hand_gesture_recognizer_subgraph_options_proto", - srcs = ["hand_gesture_recognizer_subgraph_options.proto"], + name = "hand_gesture_recognizer_graph_options_proto", + srcs = ["hand_gesture_recognizer_graph_options.proto"], deps = [ "//mediapipe/framework:calculator_options_proto", "//mediapipe/framework:calculator_proto", @@ -30,12 +30,3 @@ mediapipe_proto_library( "//mediapipe/tasks/cc/core/proto:base_options_proto", ], ) - -mediapipe_proto_library( - name = "landmarks_to_matrix_calculator_proto", - srcs = ["landmarks_to_matrix_calculator.proto"], - deps = [ - "//mediapipe/framework:calculator_options_proto", - "//mediapipe/framework:calculator_proto", - ], -) diff --git a/mediapipe/tasks/cc/vision/hand_gesture_recognizer/proto/hand_gesture_recognizer_subgraph_options.proto b/mediapipe/tasks/cc/vision/gesture_recognizer/proto/hand_gesture_recognizer_graph_options.proto similarity index 89% rename from mediapipe/tasks/cc/vision/hand_gesture_recognizer/proto/hand_gesture_recognizer_subgraph_options.proto rename to mediapipe/tasks/cc/vision/gesture_recognizer/proto/hand_gesture_recognizer_graph_options.proto index d8ee95037..ac8cda15c 100644 --- a/mediapipe/tasks/cc/vision/hand_gesture_recognizer/proto/hand_gesture_recognizer_subgraph_options.proto +++ b/mediapipe/tasks/cc/vision/gesture_recognizer/proto/hand_gesture_recognizer_graph_options.proto @@ -15,15 +15,15 @@ limitations under the License. // TODO Refactor naming and class structure of hand related Tasks. syntax = "proto2"; -package mediapipe.tasks.vision.hand_gesture_recognizer.proto; +package mediapipe.tasks.vision.gesture_recognizer.proto; import "mediapipe/framework/calculator.proto"; import "mediapipe/tasks/cc/components/processors/proto/classifier_options.proto"; import "mediapipe/tasks/cc/core/proto/base_options.proto"; -message HandGestureRecognizerSubgraphOptions { +message HandGestureRecognizerGraphOptions { extend mediapipe.CalculatorOptions { - optional HandGestureRecognizerSubgraphOptions ext = 463370452; + optional HandGestureRecognizerGraphOptions ext = 463370452; } // Base options for configuring hand gesture recognition subgraph, such as // specifying the TfLite model file with metadata, accelerator options, etc. diff --git a/mediapipe/tasks/cc/vision/hand_detector/hand_detector_graph.cc b/mediapipe/tasks/cc/vision/hand_detector/hand_detector_graph.cc index 7ef8d62f5..8573d718f 100644 --- a/mediapipe/tasks/cc/vision/hand_detector/hand_detector_graph.cc +++ b/mediapipe/tasks/cc/vision/hand_detector/hand_detector_graph.cc @@ -46,6 +46,7 @@ limitations under the License. namespace mediapipe { namespace tasks { namespace vision { +namespace hand_detector { namespace { @@ -139,9 +140,9 @@ void ConfigureRectTransformationCalculator( } // namespace -// A "mediapipe.tasks.vision.HandDetectorGraph" performs hand detection. The -// Hand Detection Graph is based on palm detection model, and scale the detected -// palm bounding box to enclose the detected whole hand. +// A "mediapipe.tasks.vision.hand_detector.HandDetectorGraph" performs hand +// detection. The Hand Detection Graph is based on palm detection model, and +// scale the detected palm bounding box to enclose the detected whole hand. // Accepts CPU input images and outputs Landmark on CPU. // // Inputs: @@ -161,14 +162,15 @@ void ConfigureRectTransformationCalculator( // // Example: // node { -// calculator: "mediapipe.tasks.vision.HandDetectorGraph" +// calculator: "mediapipe.tasks.vision.hand_detector.HandDetectorGraph" // input_stream: "IMAGE:image" // output_stream: "PALM_DETECTIONS:palm_detections" // output_stream: "HAND_RECTS:hand_rects_from_palm_detections" // output_stream: "PALM_RECTS:palm_rects" // output_stream: "IMAGE:image_out" // options { -// [mediapipe.tasks.hand_detector.proto.HandDetectorGraphOptions.ext] { +// [mediapipe.tasks.vision.hand_detector.proto.HandDetectorGraphOptions.ext] +// { // base_options { // model_asset { // file_name: "palm_detection.tflite" @@ -334,8 +336,10 @@ class HandDetectorGraph : public core::ModelTaskGraph { } }; -REGISTER_MEDIAPIPE_GRAPH(::mediapipe::tasks::vision::HandDetectorGraph); +REGISTER_MEDIAPIPE_GRAPH( + ::mediapipe::tasks::vision::hand_detector::HandDetectorGraph); +} // namespace hand_detector } // namespace vision } // namespace tasks } // namespace mediapipe diff --git a/mediapipe/tasks/cc/vision/hand_detector/hand_detector_graph_test.cc b/mediapipe/tasks/cc/vision/hand_detector/hand_detector_graph_test.cc index 850ff2732..11cfc3026 100644 --- a/mediapipe/tasks/cc/vision/hand_detector/hand_detector_graph_test.cc +++ b/mediapipe/tasks/cc/vision/hand_detector/hand_detector_graph_test.cc @@ -47,6 +47,7 @@ limitations under the License. namespace mediapipe { namespace tasks { namespace vision { +namespace hand_detector { namespace { using ::file::Defaults; @@ -105,7 +106,7 @@ absl::StatusOr> CreateTaskRunner( Graph graph; auto& hand_detection = - graph.AddNode("mediapipe.tasks.vision.HandDetectorGraph"); + graph.AddNode("mediapipe.tasks.vision.hand_detector.HandDetectorGraph"); auto options = std::make_unique(); options->mutable_base_options()->mutable_model_asset()->set_file_name( @@ -201,6 +202,7 @@ INSTANTIATE_TEST_SUITE_P( }); } // namespace +} // namespace hand_detector } // namespace vision } // namespace tasks } // namespace mediapipe diff --git a/mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarker_graph.cc b/mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarker_graph.cc index 6041d528f..ab3403d53 100644 --- a/mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarker_graph.cc +++ b/mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarker_graph.cc @@ -216,7 +216,7 @@ class HandLandmarkerGraph : public core::ModelTaskGraph { DisallowIf(image_in, has_enough_hands, graph); auto& hand_detector = - graph.AddNode("mediapipe.tasks.vision.HandDetectorGraph"); + graph.AddNode("mediapipe.tasks.vision.hand_detector.HandDetectorGraph"); hand_detector.GetOptions().CopyFrom( tasks_options.hand_detector_graph_options()); image_for_hand_detector >> hand_detector.In("IMAGE"); From 25e424baaf2a0399584fee2a00d1468c1b479154 Mon Sep 17 00:00:00 2001 From: MediaPipe Team Date: Tue, 4 Oct 2022 00:22:35 -0700 Subject: [PATCH 04/10] Rename hand landmarker related graphs. PiperOrigin-RevId: 478706652 --- .../tasks/cc/vision/gesture_recognizer/BUILD | 2 +- .../tasks/cc/vision/hand_landmarker/BUILD | 10 +-- .../hand_landmarker/hand_landmarker_graph.cc | 44 ++++++------ .../hand_landmarker_graph_test.cc | 4 +- ...ph.cc => hand_landmarks_detector_graph.cc} | 71 +++++++++++-------- ... => hand_landmarks_detector_graph_test.cc} | 18 ++--- .../cc/vision/hand_landmarker/proto/BUILD | 6 +- .../proto/hand_landmarker_graph_options.proto | 5 +- ...nd_landmarks_detector_graph_options.proto} | 4 +- 9 files changed, 90 insertions(+), 74 deletions(-) rename mediapipe/tasks/cc/vision/hand_landmarker/{hand_landmarker_subgraph.cc => hand_landmarks_detector_graph.cc} (91%) rename mediapipe/tasks/cc/vision/hand_landmarker/{hand_landmarker_subgraph_test.cc => hand_landmarks_detector_graph_test.cc} (97%) rename mediapipe/tasks/cc/vision/hand_landmarker/proto/{hand_landmarker_subgraph_options.proto => hand_landmarks_detector_graph_options.proto} (92%) diff --git a/mediapipe/tasks/cc/vision/gesture_recognizer/BUILD b/mediapipe/tasks/cc/vision/gesture_recognizer/BUILD index cb392873e..c9319e946 100644 --- a/mediapipe/tasks/cc/vision/gesture_recognizer/BUILD +++ b/mediapipe/tasks/cc/vision/gesture_recognizer/BUILD @@ -66,7 +66,7 @@ cc_library( "//mediapipe/tasks/cc/vision/gesture_recognizer/calculators:landmarks_to_matrix_calculator", "//mediapipe/tasks/cc/vision/gesture_recognizer/calculators:landmarks_to_matrix_calculator_cc_proto", "//mediapipe/tasks/cc/vision/gesture_recognizer/proto:hand_gesture_recognizer_graph_options_cc_proto", - "//mediapipe/tasks/cc/vision/hand_landmarker:hand_landmarker_subgraph", + "//mediapipe/tasks/cc/vision/hand_landmarker:hand_landmarks_detector_graph", "//mediapipe/tasks/cc/vision/utils:image_tensor_specs", "//mediapipe/tasks/metadata:metadata_schema_cc", "@com_google_absl//absl/status", diff --git a/mediapipe/tasks/cc/vision/hand_landmarker/BUILD b/mediapipe/tasks/cc/vision/hand_landmarker/BUILD index c968c17fa..a2bb458db 100644 --- a/mediapipe/tasks/cc/vision/hand_landmarker/BUILD +++ b/mediapipe/tasks/cc/vision/hand_landmarker/BUILD @@ -19,10 +19,10 @@ package(default_visibility = [ licenses(["notice"]) cc_library( - name = "hand_landmarker_subgraph", - srcs = ["hand_landmarker_subgraph.cc"], + name = "hand_landmarks_detector_graph", + srcs = ["hand_landmarks_detector_graph.cc"], deps = [ - "//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_landmarker_subgraph_options_cc_proto", + "//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_landmarks_detector_graph_options_cc_proto", "@com_google_absl//absl/status", "@com_google_absl//absl/status:statusor", "//mediapipe/calculators/core:split_vector_calculator", @@ -72,7 +72,7 @@ cc_library( name = "hand_landmarker_graph", srcs = ["hand_landmarker_graph.cc"], deps = [ - ":hand_landmarker_subgraph", + ":hand_landmarks_detector_graph", "//mediapipe/calculators/core:begin_loop_calculator", "//mediapipe/calculators/core:clip_vector_size_calculator_cc_proto", "//mediapipe/calculators/core:end_loop_calculator", @@ -99,7 +99,7 @@ cc_library( "//mediapipe/tasks/cc/vision/hand_landmarker/calculators:hand_association_calculator", "//mediapipe/tasks/cc/vision/hand_landmarker/calculators:hand_association_calculator_cc_proto", "//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_landmarker_graph_options_cc_proto", - "//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_landmarker_subgraph_options_cc_proto", + "//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_landmarks_detector_graph_options_cc_proto", ], alwayslink = 1, ) diff --git a/mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarker_graph.cc b/mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarker_graph.cc index ab3403d53..949c06520 100644 --- a/mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarker_graph.cc +++ b/mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarker_graph.cc @@ -36,7 +36,7 @@ limitations under the License. #include "mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_graph_options.pb.h" #include "mediapipe/tasks/cc/vision/hand_landmarker/calculators/hand_association_calculator.pb.h" #include "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarker_graph_options.pb.h" -#include "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarker_subgraph_options.pb.h" +#include "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarks_detector_graph_options.pb.h" namespace mediapipe { namespace tasks { @@ -55,7 +55,7 @@ using ::mediapipe::tasks::vision::hand_detector::proto:: using ::mediapipe::tasks::vision::hand_landmarker::proto:: HandLandmarkerGraphOptions; using ::mediapipe::tasks::vision::hand_landmarker::proto:: - HandLandmarkerSubgraphOptions; + HandLandmarksDetectorGraphOptions; constexpr char kImageTag[] = "IMAGE"; constexpr char kLandmarksTag[] = "LANDMARKS"; @@ -78,14 +78,14 @@ struct HandLandmarkerOutputs { } // namespace -// A "mediapipe.tasks.vision.HandLandmarkerGraph" performs hand +// A "mediapipe.tasks.vision.hand_landmarker.HandLandmarkerGraph" performs hand // landmarks detection. The HandLandmarkerGraph consists of two subgraphs: -// HandDetectorGraph and HandLandmarkerSubgraph. HandLandmarkerSubgraph detects -// landmarks from bounding boxes produced by HandDetectorGraph. -// HandLandmarkerGraph tracks the landmarks over time, and skips the -// HandDetectorGraph. If the tracking is lost or the detectd hands are -// less than configured max number hands, HandDetectorGraph would be triggered -// to detect hands. +// HandDetectorGraph and MultipleHandLandmarksDetectorGraph. +// MultipleHandLandmarksDetectorGraph detects landmarks from bounding boxes +// produced by HandDetectorGraph. HandLandmarkerGraph tracks the landmarks over +// time, and skips the HandDetectorGraph. If the tracking is lost or the detectd +// hands are less than configured max number hands, HandDetectorGraph would be +// triggered to detect hands. // // Accepts CPU input images and outputs Landmarks on CPU. // @@ -113,7 +113,7 @@ struct HandLandmarkerOutputs { // // Example: // node { -// calculator: "mediapipe.tasks.vision.HandLandmarkerGraph" +// calculator: "mediapipe.tasks.vision.hand_landmarker.HandLandmarkerGraph" // input_stream: "IMAGE:image_in" // output_stream: "LANDMARKS:hand_landmarks" // output_stream: "WORLD_LANDMARKS:world_hand_landmarks" @@ -138,7 +138,7 @@ struct HandLandmarkerOutputs { // min_detection_confidence: 0.5 // num_hands: 2 // } -// hand_landmarker_subgraph_options { +// hand_landmarks_detector_graph_options { // base_options { // model_asset { // file_name: "hand_landmark_lite.tflite" @@ -238,15 +238,17 @@ class HandLandmarkerGraph : public core::ModelTaskGraph { hand_rects >> clip_hand_rects.In(""); auto clipped_hand_rects = clip_hand_rects.Out(""); - auto& hand_landmarker_subgraph = graph.AddNode( - "mediapipe.tasks.vision.hand_landmarker.HandLandmarkerSubgraph"); - hand_landmarker_subgraph.GetOptions() - .CopyFrom(tasks_options.hand_landmarker_subgraph_options()); - image_in >> hand_landmarker_subgraph.In("IMAGE"); - clipped_hand_rects >> hand_landmarker_subgraph.In("HAND_RECT"); + auto& hand_landmarks_detector_graph = graph.AddNode( + "mediapipe.tasks.vision.hand_landmarker." + "MultipleHandLandmarksDetectorGraph"); + hand_landmarks_detector_graph + .GetOptions() + .CopyFrom(tasks_options.hand_landmarks_detector_graph_options()); + image_in >> hand_landmarks_detector_graph.In("IMAGE"); + clipped_hand_rects >> hand_landmarks_detector_graph.In("HAND_RECT"); auto hand_rects_for_next_frame = - hand_landmarker_subgraph[Output>( + hand_landmarks_detector_graph[Output>( kHandRectNextFrameTag)]; // Back edge. hand_rects_for_next_frame >> previous_loopback.In("LOOP"); @@ -257,13 +259,13 @@ class HandLandmarkerGraph : public core::ModelTaskGraph { image_in >> pass_through.In(""); return {{ - /* landmark_lists= */ hand_landmarker_subgraph + /* landmark_lists= */ hand_landmarks_detector_graph [Output>(kLandmarksTag)], /* world_landmark_lists= */ - hand_landmarker_subgraph[Output>( + hand_landmarks_detector_graph[Output>( kWorldLandmarksTag)], /* hand_rects_next_frame= */ hand_rects_for_next_frame, - hand_landmarker_subgraph[Output>( + hand_landmarks_detector_graph[Output>( kHandednessTag)], /* palm_rects= */ hand_detector[Output>(kPalmRectsTag)], diff --git a/mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarker_graph_test.cc b/mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarker_graph_test.cc index 413af68ff..bce5613ff 100644 --- a/mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarker_graph_test.cc +++ b/mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarker_graph_test.cc @@ -38,7 +38,7 @@ limitations under the License. #include "mediapipe/tasks/cc/core/task_runner.h" #include "mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_graph_options.pb.h" #include "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarker_graph_options.pb.h" -#include "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarker_subgraph_options.pb.h" +#include "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarks_detector_graph_options.pb.h" #include "mediapipe/tasks/cc/vision/utils/image_utils.h" #include "tensorflow/lite/c/common.h" #include "tensorflow/lite/core/shims/cc/shims_test_util.h" @@ -111,7 +111,7 @@ absl::StatusOr> CreateTaskRunner() { ->set_file_name(JoinPath("./", kTestDataDirectory, kPalmDetectionModel)); options.mutable_hand_detector_graph_options()->mutable_base_options(); options.mutable_hand_detector_graph_options()->set_num_hands(kMaxNumHands); - options.mutable_hand_landmarker_subgraph_options() + options.mutable_hand_landmarks_detector_graph_options() ->mutable_base_options() ->mutable_model_asset() ->set_file_name( diff --git a/mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarker_subgraph.cc b/mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarks_detector_graph.cc similarity index 91% rename from mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarker_subgraph.cc rename to mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarks_detector_graph.cc index 0ac4686b7..23521790d 100644 --- a/mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarker_subgraph.cc +++ b/mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarks_detector_graph.cc @@ -40,7 +40,7 @@ limitations under the License. #include "mediapipe/tasks/cc/core/proto/inference_subgraph.pb.h" #include "mediapipe/tasks/cc/core/utils.h" #include "mediapipe/tasks/cc/metadata/metadata_extractor.h" -#include "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarker_subgraph_options.pb.h" +#include "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarks_detector_graph_options.pb.h" #include "mediapipe/tasks/cc/vision/utils/image_tensor_specs.h" #include "mediapipe/tasks/metadata/metadata_schema_generated.h" #include "mediapipe/util/label_map.pb.h" @@ -60,7 +60,7 @@ using ::mediapipe::api2::builder::Source; using ::mediapipe::tasks::components::utils::AllowIf; using ::mediapipe::tasks::core::ModelResources; using ::mediapipe::tasks::vision::hand_landmarker::proto:: - HandLandmarkerSubgraphOptions; + HandLandmarksDetectorGraphOptions; using LabelItems = mediapipe::proto_ns::Map; constexpr char kImageTag[] = "IMAGE"; @@ -96,7 +96,8 @@ struct HandLandmarkerOutputs { Source> handednesses; }; -absl::Status SanityCheckOptions(const HandLandmarkerSubgraphOptions& options) { +absl::Status SanityCheckOptions( + const HandLandmarksDetectorGraphOptions& options) { if (options.min_detection_confidence() < 0 || options.min_detection_confidence() > 1) { return CreateStatusWithPayload(absl::StatusCode::kInvalidArgument, @@ -183,8 +184,8 @@ void ConfigureHandRectTransformationCalculator( } // namespace -// A "mediapipe.tasks.vision.SingleHandLandmarkerSubgraph" performs hand -// landmark detection. +// A "mediapipe.tasks.vision.hand_landmarker.SingleHandLandmarksDetectorGraph" +// performs hand landmarks detection. // - Accepts CPU input images and outputs Landmark on CPU. // // Inputs: @@ -212,7 +213,8 @@ void ConfigureHandRectTransformationCalculator( // // Example: // node { -// calculator: "mediapipe.tasks.vision.SingleHandLandmarkerSubgraph" +// calculator: +// "mediapipe.tasks.vision.hand_landmarker.SingleHandLandmarksDetectorGraph" // input_stream: "IMAGE:input_image" // input_stream: "HAND_RECT:hand_rect" // output_stream: "LANDMARKS:hand_landmarks" @@ -221,7 +223,7 @@ void ConfigureHandRectTransformationCalculator( // output_stream: "PRESENCE:hand_presence" // output_stream: "PRESENCE_SCORE:hand_presence_score" // options { -// [mediapipe.tasks.vision.hand_landmarker.proto.HandLandmarkerSubgraphOptions.ext] +// [mediapipe.tasks.vision.hand_landmarker.proto.HandLandmarksDetectorGraphOptions.ext] // { // base_options { // model_asset { @@ -232,16 +234,17 @@ void ConfigureHandRectTransformationCalculator( // } // } // } -class SingleHandLandmarkerSubgraph : public core::ModelTaskGraph { +class SingleHandLandmarksDetectorGraph : public core::ModelTaskGraph { public: absl::StatusOr GetConfig( SubgraphContext* sc) override { - ASSIGN_OR_RETURN(const auto* model_resources, - CreateModelResources(sc)); + ASSIGN_OR_RETURN( + const auto* model_resources, + CreateModelResources(sc)); Graph graph; ASSIGN_OR_RETURN(auto hand_landmark_detection_outs, - BuildSingleHandLandmarkerSubgraph( - sc->Options(), + BuildSingleHandLandmarksDetectorGraph( + sc->Options(), *model_resources, graph[Input(kImageTag)], graph[Input(kHandRectTag)], graph)); hand_landmark_detection_outs.hand_landmarks >> @@ -264,14 +267,16 @@ class SingleHandLandmarkerSubgraph : public core::ModelTaskGraph { // Adds a mediapipe hand landmark detection graph into the provided // builder::Graph instance. // - // subgraph_options: the mediapipe tasks module HandLandmarkerSubgraphOptions. - // model_resources: the ModelSources object initialized from a hand landmark + // subgraph_options: the mediapipe tasks module + // HandLandmarksDetectorGraphOptions. model_resources: the ModelSources object + // initialized from a hand landmark // detection model file with model metadata. // image_in: (mediapipe::Image) stream to run hand landmark detection on. // rect: (NormalizedRect) stream to run on the RoI of image. // graph: the mediapipe graph instance to be updated. - absl::StatusOr BuildSingleHandLandmarkerSubgraph( - const HandLandmarkerSubgraphOptions& subgraph_options, + absl::StatusOr + BuildSingleHandLandmarksDetectorGraph( + const HandLandmarksDetectorGraphOptions& subgraph_options, const core::ModelResources& model_resources, Source image_in, Source hand_rect, Graph& graph) { MP_RETURN_IF_ERROR(SanityCheckOptions(subgraph_options)); @@ -415,11 +420,13 @@ class SingleHandLandmarkerSubgraph : public core::ModelTaskGraph { } }; +// clang-format off REGISTER_MEDIAPIPE_GRAPH( - ::mediapipe::tasks::vision::hand_landmarker::SingleHandLandmarkerSubgraph); + ::mediapipe::tasks::vision::hand_landmarker::SingleHandLandmarksDetectorGraph); // NOLINT +// clang-format on -// A "mediapipe.tasks.vision.HandLandmarkerSubgraph" performs multi hand -// landmark detection. +// A "mediapipe.tasks.vision.hand_landmarker.MultipleHandLandmarksDetectorGraph" +// performs multi hand landmark detection. // - Accepts CPU input image and a vector of hand rect RoIs to detect the // multiple hands landmarks enclosed by the RoIs. Output vectors of // hand landmarks related results, where each element in the vectors @@ -450,7 +457,8 @@ REGISTER_MEDIAPIPE_GRAPH( // // Example: // node { -// calculator: "mediapipe.tasks.vision.HandLandmarkerSubgraph" +// calculator: +// "mediapipe.tasks.vision.hand_landmarker.MultipleHandLandmarksDetectorGraph" // input_stream: "IMAGE:input_image" // input_stream: "HAND_RECT:hand_rect" // output_stream: "LANDMARKS:hand_landmarks" @@ -460,7 +468,7 @@ REGISTER_MEDIAPIPE_GRAPH( // output_stream: "PRESENCE_SCORE:hand_presence_score" // output_stream: "HANDEDNESS:handedness" // options { -// [mediapipe.tasks.vision.hand_landmarker.proto.HandLandmarkerSubgraphOptions.ext] +// [mediapipe.tasks.vision.hand_landmarker.proto.HandLandmarksDetectorGraphOptions.ext] // { // base_options { // model_asset { @@ -471,15 +479,15 @@ REGISTER_MEDIAPIPE_GRAPH( // } // } // } -class HandLandmarkerSubgraph : public core::ModelTaskGraph { +class MultipleHandLandmarksDetectorGraph : public core::ModelTaskGraph { public: absl::StatusOr GetConfig( SubgraphContext* sc) override { Graph graph; ASSIGN_OR_RETURN( auto hand_landmark_detection_outputs, - BuildHandLandmarkerSubgraph( - sc->Options(), + BuildHandLandmarksDetectorGraph( + sc->Options(), graph[Input(kImageTag)], graph[Input>(kHandRectTag)], graph)); hand_landmark_detection_outputs.landmark_lists >> @@ -499,14 +507,15 @@ class HandLandmarkerSubgraph : public core::ModelTaskGraph { } private: - absl::StatusOr BuildHandLandmarkerSubgraph( - const HandLandmarkerSubgraphOptions& subgraph_options, + absl::StatusOr BuildHandLandmarksDetectorGraph( + const HandLandmarksDetectorGraphOptions& subgraph_options, Source image_in, Source> multi_hand_rects, Graph& graph) { auto& hand_landmark_subgraph = graph.AddNode( - "mediapipe.tasks.vision.hand_landmarker.SingleHandLandmarkerSubgraph"); - hand_landmark_subgraph.GetOptions().CopyFrom( - subgraph_options); + "mediapipe.tasks.vision.hand_landmarker." + "SingleHandLandmarksDetectorGraph"); + hand_landmark_subgraph.GetOptions() + .CopyFrom(subgraph_options); auto& begin_loop_multi_hand_rects = graph.AddNode("BeginLoopNormalizedRectCalculator"); @@ -580,8 +589,10 @@ class HandLandmarkerSubgraph : public core::ModelTaskGraph { } }; +// clang-format off REGISTER_MEDIAPIPE_GRAPH( - ::mediapipe::tasks::vision::hand_landmarker::HandLandmarkerSubgraph); + ::mediapipe::tasks::vision::hand_landmarker::MultipleHandLandmarksDetectorGraph); // NOLINT +// clang-format on } // namespace hand_landmarker } // namespace vision diff --git a/mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarker_subgraph_test.cc b/mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarks_detector_graph_test.cc similarity index 97% rename from mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarker_subgraph_test.cc rename to mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarks_detector_graph_test.cc index 7d91dc3c7..d1e928ce7 100644 --- a/mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarker_subgraph_test.cc +++ b/mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarks_detector_graph_test.cc @@ -39,7 +39,7 @@ limitations under the License. #include "mediapipe/tasks/cc/core/proto/base_options.pb.h" #include "mediapipe/tasks/cc/core/proto/external_file.pb.h" #include "mediapipe/tasks/cc/core/task_runner.h" -#include "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarker_subgraph_options.pb.h" +#include "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarks_detector_graph_options.pb.h" #include "mediapipe/tasks/cc/vision/utils/image_utils.h" namespace mediapipe { @@ -58,7 +58,7 @@ using ::mediapipe::file::JoinPath; using ::mediapipe::tasks::core::TaskRunner; using ::mediapipe::tasks::vision::DecodeImageFromFile; using ::mediapipe::tasks::vision::hand_landmarker::proto:: - HandLandmarkerSubgraphOptions; + HandLandmarksDetectorGraphOptions; using ::testing::ElementsAreArray; using ::testing::EqualsProto; using ::testing::Pointwise; @@ -114,12 +114,13 @@ absl::StatusOr> CreateSingleHandTaskRunner( Graph graph; auto& hand_landmark_detection = graph.AddNode( - "mediapipe.tasks.vision.hand_landmarker.SingleHandLandmarkerSubgraph"); + "mediapipe.tasks.vision.hand_landmarker." + "SingleHandLandmarksDetectorGraph"); - auto options = std::make_unique(); + auto options = std::make_unique(); options->mutable_base_options()->mutable_model_asset()->set_file_name( JoinPath("./", kTestDataDirectory, model_name)); - hand_landmark_detection.GetOptions().Swap( + hand_landmark_detection.GetOptions().Swap( options.get()); graph[Input(kImageTag)].SetName(kImageName) >> @@ -153,12 +154,13 @@ absl::StatusOr> CreateMultiHandTaskRunner( Graph graph; auto& multi_hand_landmark_detection = graph.AddNode( - "mediapipe.tasks.vision.hand_landmarker.HandLandmarkerSubgraph"); + "mediapipe.tasks.vision.hand_landmarker." + "MultipleHandLandmarksDetectorGraph"); - auto options = std::make_unique(); + auto options = std::make_unique(); options->mutable_base_options()->mutable_model_asset()->set_file_name( JoinPath("./", kTestDataDirectory, model_name)); - multi_hand_landmark_detection.GetOptions() + multi_hand_landmark_detection.GetOptions() .Swap(options.get()); graph[Input(kImageTag)].SetName(kImageName) >> diff --git a/mediapipe/tasks/cc/vision/hand_landmarker/proto/BUILD b/mediapipe/tasks/cc/vision/hand_landmarker/proto/BUILD index 9d1ba6f90..945b12f3e 100644 --- a/mediapipe/tasks/cc/vision/hand_landmarker/proto/BUILD +++ b/mediapipe/tasks/cc/vision/hand_landmarker/proto/BUILD @@ -21,8 +21,8 @@ package(default_visibility = [ licenses(["notice"]) mediapipe_proto_library( - name = "hand_landmarker_subgraph_options_proto", - srcs = ["hand_landmarker_subgraph_options.proto"], + name = "hand_landmarks_detector_graph_options_proto", + srcs = ["hand_landmarks_detector_graph_options.proto"], deps = [ "//mediapipe/framework:calculator_options_proto", "//mediapipe/framework:calculator_proto", @@ -34,7 +34,7 @@ mediapipe_proto_library( name = "hand_landmarker_graph_options_proto", srcs = ["hand_landmarker_graph_options.proto"], deps = [ - ":hand_landmarker_subgraph_options_proto", + ":hand_landmarks_detector_graph_options_proto", "//mediapipe/framework:calculator_options_proto", "//mediapipe/framework:calculator_proto", "//mediapipe/tasks/cc/core/proto:base_options_proto", diff --git a/mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarker_graph_options.proto b/mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarker_graph_options.proto index 13849ec5e..7f3536b09 100644 --- a/mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarker_graph_options.proto +++ b/mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarker_graph_options.proto @@ -20,7 +20,7 @@ package mediapipe.tasks.vision.hand_landmarker.proto; import "mediapipe/framework/calculator.proto"; import "mediapipe/tasks/cc/core/proto/base_options.proto"; import "mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_graph_options.proto"; -import "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarker_subgraph_options.proto"; +import "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarks_detector_graph_options.proto"; message HandLandmarkerGraphOptions { extend mediapipe.CalculatorOptions { @@ -35,7 +35,8 @@ message HandLandmarkerGraphOptions { hand_detector_graph_options = 2; // Options for hand landmarker subgraph. - optional HandLandmarkerSubgraphOptions hand_landmarker_subgraph_options = 3; + optional HandLandmarksDetectorGraphOptions + hand_landmarks_detector_graph_options = 3; // Minimum confidence for hand landmarks tracking to be considered // successfully. diff --git a/mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarker_subgraph_options.proto b/mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarks_detector_graph_options.proto similarity index 92% rename from mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarker_subgraph_options.proto rename to mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarks_detector_graph_options.proto index 02d18e8ab..8c0fc66f2 100644 --- a/mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarker_subgraph_options.proto +++ b/mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarks_detector_graph_options.proto @@ -20,9 +20,9 @@ package mediapipe.tasks.vision.hand_landmarker.proto; import "mediapipe/framework/calculator.proto"; import "mediapipe/tasks/cc/core/proto/base_options.proto"; -message HandLandmarkerSubgraphOptions { +message HandLandmarksDetectorGraphOptions { extend mediapipe.CalculatorOptions { - optional HandLandmarkerSubgraphOptions ext = 474472470; + optional HandLandmarksDetectorGraphOptions ext = 474472470; } // Base options for configuring MediaPipe Tasks, such as specifying the TfLite // model file with metadata, accelerator options, etc. From 14eb6fe62220eb38886c141db72b677b1c281c98 Mon Sep 17 00:00:00 2001 From: Jiuqiang Tang Date: Tue, 4 Oct 2022 02:32:11 -0700 Subject: [PATCH 05/10] Ensure that the REGISTER_DRISHTI_GRAPH argument is fit on one line in the OSS version. PiperOrigin-RevId: 478729958 --- .../processors/classification_postprocessing_graph.cc | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/mediapipe/tasks/cc/components/processors/classification_postprocessing_graph.cc b/mediapipe/tasks/cc/components/processors/classification_postprocessing_graph.cc index 35adab687..cd5933ee6 100644 --- a/mediapipe/tasks/cc/components/processors/classification_postprocessing_graph.cc +++ b/mediapipe/tasks/cc/components/processors/classification_postprocessing_graph.cc @@ -507,8 +507,11 @@ class ClassificationPostprocessingGraph : public mediapipe::Subgraph { } }; -REGISTER_MEDIAPIPE_GRAPH(::mediapipe::tasks::components::processors:: - ClassificationPostprocessingGraph); // NOLINT +// REGISTER_MEDIAPIPE_GRAPH argument has to fit on one line to work properly. +// clang-format off +REGISTER_MEDIAPIPE_GRAPH( + ::mediapipe::tasks::components::processors::ClassificationPostprocessingGraph); // NOLINT +// clang-format on } // namespace processors } // namespace components From 05209a43923001ca04acee9ded9d4fc7a593f9ee Mon Sep 17 00:00:00 2001 From: Jiuqiang Tang Date: Tue, 4 Oct 2022 04:36:56 -0700 Subject: [PATCH 06/10] Refactor mediapipe_aar.bzl to expose `mediapipe_java_proto_srcs`, `mediapipe_logging_java_proto_srcs`, and `mediapipe_java_proto_src_extractor`. PiperOrigin-RevId: 478750184 --- .../com/google/mediapipe/mediapipe_aar.bzl | 194 +++++++++--------- 1 file changed, 92 insertions(+), 102 deletions(-) diff --git a/mediapipe/java/com/google/mediapipe/mediapipe_aar.bzl b/mediapipe/java/com/google/mediapipe/mediapipe_aar.bzl index ed1686954..7f2cb146c 100644 --- a/mediapipe/java/com/google/mediapipe/mediapipe_aar.bzl +++ b/mediapipe/java/com/google/mediapipe/mediapipe_aar.bzl @@ -89,10 +89,6 @@ def mediapipe_aar( calculators = calculators, ) - _mediapipe_proto( - name = name + "_proto", - ) - native.genrule( name = name + "_aar_manifest_generator", outs = ["AndroidManifest.xml"], @@ -115,19 +111,10 @@ EOF "//mediapipe/java/com/google/mediapipe/components:java_src", "//mediapipe/java/com/google/mediapipe/framework:java_src", "//mediapipe/java/com/google/mediapipe/glutil:java_src", - "com/google/mediapipe/formats/annotation/proto/RasterizationProto.java", - "com/google/mediapipe/formats/proto/ClassificationProto.java", - "com/google/mediapipe/formats/proto/DetectionProto.java", - "com/google/mediapipe/formats/proto/LandmarkProto.java", - "com/google/mediapipe/formats/proto/LocationDataProto.java", - "com/google/mediapipe/proto/CalculatorProto.java", - ] + + ] + mediapipe_java_proto_srcs() + select({ "//conditions:default": [], - "enable_stats_logging": [ - "com/google/mediapipe/proto/MediaPipeLoggingProto.java", - "com/google/mediapipe/proto/MediaPipeLoggingEnumsProto.java", - ], + "enable_stats_logging": mediapipe_logging_java_proto_srcs(), }), manifest = "AndroidManifest.xml", proguard_specs = ["//mediapipe/java/com/google/mediapipe/framework:proguard.pgcfg"], @@ -179,93 +166,6 @@ EOF _aar_with_jni(name, name + "_android_lib") -def _mediapipe_proto(name): - """Generates MediaPipe java proto libraries. - - Args: - name: the name of the target. - """ - _proto_java_src_generator( - name = "mediapipe_log_extension_proto", - proto_src = "mediapipe/util/analytics/mediapipe_log_extension.proto", - java_lite_out = "com/google/mediapipe/proto/MediaPipeLoggingProto.java", - srcs = ["//mediapipe/util/analytics:protos_src"], - ) - - _proto_java_src_generator( - name = "mediapipe_logging_enums_proto", - proto_src = "mediapipe/util/analytics/mediapipe_logging_enums.proto", - java_lite_out = "com/google/mediapipe/proto/MediaPipeLoggingEnumsProto.java", - srcs = ["//mediapipe/util/analytics:protos_src"], - ) - - _proto_java_src_generator( - name = "calculator_proto", - proto_src = "mediapipe/framework/calculator.proto", - java_lite_out = "com/google/mediapipe/proto/CalculatorProto.java", - srcs = ["//mediapipe/framework:protos_src"], - ) - - _proto_java_src_generator( - name = "landmark_proto", - proto_src = "mediapipe/framework/formats/landmark.proto", - java_lite_out = "com/google/mediapipe/formats/proto/LandmarkProto.java", - srcs = ["//mediapipe/framework/formats:protos_src"], - ) - - _proto_java_src_generator( - name = "rasterization_proto", - proto_src = "mediapipe/framework/formats/annotation/rasterization.proto", - java_lite_out = "com/google/mediapipe/formats/annotation/proto/RasterizationProto.java", - srcs = ["//mediapipe/framework/formats/annotation:protos_src"], - ) - - _proto_java_src_generator( - name = "location_data_proto", - proto_src = "mediapipe/framework/formats/location_data.proto", - java_lite_out = "com/google/mediapipe/formats/proto/LocationDataProto.java", - srcs = [ - "//mediapipe/framework/formats:protos_src", - "//mediapipe/framework/formats/annotation:protos_src", - ], - ) - - _proto_java_src_generator( - name = "detection_proto", - proto_src = "mediapipe/framework/formats/detection.proto", - java_lite_out = "com/google/mediapipe/formats/proto/DetectionProto.java", - srcs = [ - "//mediapipe/framework/formats:protos_src", - "//mediapipe/framework/formats/annotation:protos_src", - ], - ) - - _proto_java_src_generator( - name = "classification_proto", - proto_src = "mediapipe/framework/formats/classification.proto", - java_lite_out = "com/google/mediapipe/formats/proto/ClassificationProto.java", - srcs = [ - "//mediapipe/framework/formats:protos_src", - ], - ) - -def _proto_java_src_generator(name, proto_src, java_lite_out, srcs = []): - native.genrule( - name = name + "_proto_java_src_generator", - srcs = srcs + [ - "@com_google_protobuf//:lite_well_known_protos", - ], - outs = [java_lite_out], - cmd = "$(location @com_google_protobuf//:protoc) " + - "--proto_path=. --proto_path=$(GENDIR) " + - "--proto_path=$$(pwd)/external/com_google_protobuf/src " + - "--java_out=lite:$(GENDIR) " + proto_src + " && " + - "mv $(GENDIR)/" + java_lite_out + " $$(dirname $(location " + java_lite_out + "))", - tools = [ - "@com_google_protobuf//:protoc", - ], - ) - def _mediapipe_jni(name, gen_libmediapipe, calculators = []): """Generates MediaPipe jni library. @@ -345,3 +245,93 @@ cp -r lib jni zip -r $$origdir/$(location :{}.aar) jni/*/*.so """.format(android_library, name, name, name, name), ) + +def mediapipe_java_proto_src_extractor(target, src_out, name = ""): + """Extracts the generated MediaPipe java proto source code from the target. + + Args: + target: The java proto lite target to be built and extracted. + src_out: The output java proto src code path. + name: The optional bazel target name. + + Returns: + The output java proto src code path. + """ + + if not name: + name = target.split(":")[-1] + "_proto_java_src_extractor" + src_jar = target.replace("_java_proto_lite", "_proto-lite-src.jar").replace(":", "/").replace("//", "") + native.genrule( + name = name + "_proto_java_src_extractor", + srcs = [target], + outs = [src_out], + cmd = "unzip $(GENDIR)/" + src_jar + " -d $(GENDIR) && mv $(GENDIR)/" + + src_out + " $$(dirname $(location " + src_out + "))", + ) + return src_out + +def mediapipe_java_proto_srcs(name = ""): + """Extracts the generated MediaPipe framework java proto source code. + + Args: + name: The optional bazel target name. + + Returns: + The list of the extrated MediaPipe java proto source code. + """ + + proto_src_list = [] + + proto_src_list.append(mediapipe_java_proto_src_extractor( + target = "//mediapipe/framework:calculator_java_proto_lite", + src_out = "com/google/mediapipe/proto/CalculatorProto.java", + )) + + proto_src_list.append(mediapipe_java_proto_src_extractor( + target = "//mediapipe/framework/formats:landmark_java_proto_lite", + src_out = "com/google/mediapipe/formats/proto/LandmarkProto.java", + )) + + proto_src_list.append(mediapipe_java_proto_src_extractor( + target = "//mediapipe/framework/formats/annotation:rasterization_java_proto_lite", + src_out = "com/google/mediapipe/formats/annotation/proto/RasterizationProto.java", + )) + + proto_src_list.append(mediapipe_java_proto_src_extractor( + target = "//mediapipe/framework/formats:location_data_java_proto_lite", + src_out = "com/google/mediapipe/formats/proto/LocationDataProto.java", + )) + + proto_src_list.append(mediapipe_java_proto_src_extractor( + target = "//mediapipe/framework/formats:detection_java_proto_lite", + src_out = "com/google/mediapipe/formats/proto/DetectionProto.java", + )) + + proto_src_list.append(mediapipe_java_proto_src_extractor( + target = "//mediapipe/framework/formats:classification_java_proto_lite", + src_out = "com/google/mediapipe/formats/proto/ClassificationProto.java", + )) + return proto_src_list + +def mediapipe_logging_java_proto_srcs(name = ""): + """Extracts the generated logging-related MediaPipe java proto source code. + + Args: + name: The optional bazel target name. + + Returns: + The list of the extrated MediaPipe logging-related java proto source code. + """ + + proto_src_list = [] + + proto_src_list.append(mediapipe_java_proto_src_extractor( + target = "//mediapipe/util/analytics:mediapipe_log_extension_java_proto_lite", + src_out = "com/google/mediapipe/proto/MediaPipeLoggingProto.java", + )) + + proto_src_list.append(mediapipe_java_proto_src_extractor( + target = "//mediapipe/util/analytics:mediapipe_logging_enums_java_proto_lite", + src_out = "com/google/mediapipe/proto/MediaPipeLoggingEnumsProto.java", + )) + return proto_src_list From 8d5cf9bbedb2ee97651f57feeb273f0da48e4f78 Mon Sep 17 00:00:00 2001 From: Sebastian Schmidt Date: Tue, 4 Oct 2022 09:40:03 -0700 Subject: [PATCH 07/10] Open source MediaPipe object detector task reference app prototype. PiperOrigin-RevId: 478811683 --- .../instantmotiontracking/GIFEditText.java | 2 +- mediapipe/tasks/examples/android/BUILD | 21 ++ .../src/main/AndroidManifest.xml | 37 +++ .../android/objectdetector/src/main/BUILD | 48 ++++ .../examples/objectdetector/MainActivity.java | 236 ++++++++++++++++++ .../ObjectDetectionResultImageView.java | 77 ++++++ .../drawable-v24/ic_launcher_foreground.xml | 34 +++ .../res/drawable/ic_launcher_background.xml | 74 ++++++ .../android/res/layout/activity_main.xml | 40 +++ .../res/mipmap-anydpi-v26/ic_launcher.xml | 5 + .../mipmap-anydpi-v26/ic_launcher_round.xml | 5 + .../android/res/mipmap-hdpi/ic_launcher.png | Bin 0 -> 1354 bytes .../mipmap-hdpi/ic_launcher_foreground.png | Bin 0 -> 2257 bytes .../res/mipmap-hdpi/ic_launcher_round.png | Bin 0 -> 3246 bytes .../android/res/mipmap-mdpi/ic_launcher.png | Bin 0 -> 959 bytes .../mipmap-mdpi/ic_launcher_foreground.png | Bin 0 -> 900 bytes .../res/mipmap-mdpi/ic_launcher_round.png | Bin 0 -> 1955 bytes .../android/res/mipmap-xhdpi/ic_launcher.png | Bin 0 -> 1971 bytes .../mipmap-xhdpi/ic_launcher_foreground.png | Bin 0 -> 1845 bytes .../res/mipmap-xhdpi/ic_launcher_round.png | Bin 0 -> 4658 bytes .../android/res/mipmap-xxhdpi/ic_launcher.png | Bin 0 -> 3562 bytes .../mipmap-xxhdpi/ic_launcher_foreground.png | Bin 0 -> 5655 bytes .../res/mipmap-xxhdpi/ic_launcher_round.png | Bin 0 -> 7745 bytes .../res/mipmap-xxxhdpi/ic_launcher.png | Bin 0 -> 5004 bytes .../mipmap-xxxhdpi/ic_launcher_foreground.png | Bin 0 -> 8278 bytes .../res/mipmap-xxxhdpi/ic_launcher_round.png | Bin 0 -> 11062 bytes .../examples/android/res/values/colors.xml | 6 + .../examples/android/res/values/strings.xml | 6 + .../examples/android/res/values/styles.xml | 11 + 29 files changed, 601 insertions(+), 1 deletion(-) create mode 100644 mediapipe/tasks/examples/android/BUILD create mode 100644 mediapipe/tasks/examples/android/objectdetector/src/main/AndroidManifest.xml create mode 100644 mediapipe/tasks/examples/android/objectdetector/src/main/BUILD create mode 100644 mediapipe/tasks/examples/android/objectdetector/src/main/java/com/google/mediapipe/tasks/examples/objectdetector/MainActivity.java create mode 100644 mediapipe/tasks/examples/android/objectdetector/src/main/java/com/google/mediapipe/tasks/examples/objectdetector/ObjectDetectionResultImageView.java create mode 100644 mediapipe/tasks/examples/android/res/drawable-v24/ic_launcher_foreground.xml create mode 100644 mediapipe/tasks/examples/android/res/drawable/ic_launcher_background.xml create mode 100644 mediapipe/tasks/examples/android/res/layout/activity_main.xml create mode 100644 mediapipe/tasks/examples/android/res/mipmap-anydpi-v26/ic_launcher.xml create mode 100644 mediapipe/tasks/examples/android/res/mipmap-anydpi-v26/ic_launcher_round.xml create mode 100644 mediapipe/tasks/examples/android/res/mipmap-hdpi/ic_launcher.png create mode 100644 mediapipe/tasks/examples/android/res/mipmap-hdpi/ic_launcher_foreground.png create mode 100644 mediapipe/tasks/examples/android/res/mipmap-hdpi/ic_launcher_round.png create mode 100644 mediapipe/tasks/examples/android/res/mipmap-mdpi/ic_launcher.png create mode 100644 mediapipe/tasks/examples/android/res/mipmap-mdpi/ic_launcher_foreground.png create mode 100644 mediapipe/tasks/examples/android/res/mipmap-mdpi/ic_launcher_round.png create mode 100644 mediapipe/tasks/examples/android/res/mipmap-xhdpi/ic_launcher.png create mode 100644 mediapipe/tasks/examples/android/res/mipmap-xhdpi/ic_launcher_foreground.png create mode 100644 mediapipe/tasks/examples/android/res/mipmap-xhdpi/ic_launcher_round.png create mode 100644 mediapipe/tasks/examples/android/res/mipmap-xxhdpi/ic_launcher.png create mode 100644 mediapipe/tasks/examples/android/res/mipmap-xxhdpi/ic_launcher_foreground.png create mode 100644 mediapipe/tasks/examples/android/res/mipmap-xxhdpi/ic_launcher_round.png create mode 100644 mediapipe/tasks/examples/android/res/mipmap-xxxhdpi/ic_launcher.png create mode 100644 mediapipe/tasks/examples/android/res/mipmap-xxxhdpi/ic_launcher_foreground.png create mode 100644 mediapipe/tasks/examples/android/res/mipmap-xxxhdpi/ic_launcher_round.png create mode 100644 mediapipe/tasks/examples/android/res/values/colors.xml create mode 100644 mediapipe/tasks/examples/android/res/values/strings.xml create mode 100644 mediapipe/tasks/examples/android/res/values/styles.xml diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/instantmotiontracking/GIFEditText.java b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/instantmotiontracking/GIFEditText.java index 1b733ed82..10e6422ba 100644 --- a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/instantmotiontracking/GIFEditText.java +++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/instantmotiontracking/GIFEditText.java @@ -18,7 +18,7 @@ import android.content.ClipDescription; import android.content.Context; import android.net.Uri; import android.os.Bundle; -import androidx.appcompat.widget.AppCompatEditText; +import android.support.v7.widget.AppCompatEditText; import android.util.AttributeSet; import android.util.Log; import android.view.inputmethod.EditorInfo; diff --git a/mediapipe/tasks/examples/android/BUILD b/mediapipe/tasks/examples/android/BUILD new file mode 100644 index 000000000..c07af2d2c --- /dev/null +++ b/mediapipe/tasks/examples/android/BUILD @@ -0,0 +1,21 @@ +# Copyright 2022 The MediaPipe Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +licenses(["notice"]) + +filegroup( + name = "resource_files", + srcs = glob(["res/**"]), + visibility = ["//mediapipe/tasks/examples/android:__subpackages__"], +) diff --git a/mediapipe/tasks/examples/android/objectdetector/src/main/AndroidManifest.xml b/mediapipe/tasks/examples/android/objectdetector/src/main/AndroidManifest.xml new file mode 100644 index 000000000..5c53dc269 --- /dev/null +++ b/mediapipe/tasks/examples/android/objectdetector/src/main/AndroidManifest.xml @@ -0,0 +1,37 @@ + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/mediapipe/tasks/examples/android/objectdetector/src/main/BUILD b/mediapipe/tasks/examples/android/objectdetector/src/main/BUILD new file mode 100644 index 000000000..65b98d647 --- /dev/null +++ b/mediapipe/tasks/examples/android/objectdetector/src/main/BUILD @@ -0,0 +1,48 @@ +# Copyright 2022 The MediaPipe Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +licenses(["notice"]) + +package(default_visibility = ["//visibility:private"]) + +android_binary( + name = "objectdetector", + srcs = glob(["**/*.java"]), + assets = [ + "//mediapipe/tasks/testdata/vision:test_models", + ], + assets_dir = "", + custom_package = "com.google.mediapipe.tasks.examples.objectdetector", + manifest = "AndroidManifest.xml", + manifest_values = { + "applicationId": "com.google.mediapipe.tasks.examples.objectdetector", + }, + multidex = "native", + resource_files = ["//mediapipe/tasks/examples/android:resource_files"], + deps = [ + "//mediapipe/java/com/google/mediapipe/framework/image", + "//mediapipe/tasks/java/com/google/mediapipe/tasks/components/containers:detection", + "//mediapipe/tasks/java/com/google/mediapipe/tasks/core", + "//mediapipe/tasks/java/com/google/mediapipe/tasks/vision/core", + "//mediapipe/tasks/java/com/google/mediapipe/tasks/vision/objectdetector", + "//third_party:androidx_appcompat", + "//third_party:androidx_constraint_layout", + "//third_party:opencv", + "@maven//:androidx_activity_activity", + "@maven//:androidx_concurrent_concurrent_futures", + "@maven//:androidx_exifinterface_exifinterface", + "@maven//:androidx_fragment_fragment", + "@maven//:com_google_guava_guava", + ], +) diff --git a/mediapipe/tasks/examples/android/objectdetector/src/main/java/com/google/mediapipe/tasks/examples/objectdetector/MainActivity.java b/mediapipe/tasks/examples/android/objectdetector/src/main/java/com/google/mediapipe/tasks/examples/objectdetector/MainActivity.java new file mode 100644 index 000000000..7f7ec1389 --- /dev/null +++ b/mediapipe/tasks/examples/android/objectdetector/src/main/java/com/google/mediapipe/tasks/examples/objectdetector/MainActivity.java @@ -0,0 +1,236 @@ +// Copyright 2022 The MediaPipe Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package com.google.mediapipe.tasks.examples.objectdetector; + +import android.content.Intent; +import android.graphics.Bitmap; +import android.graphics.Matrix; +import android.media.MediaMetadataRetriever; +import android.os.Bundle; +import android.provider.MediaStore; +import androidx.appcompat.app.AppCompatActivity; +import android.util.Log; +import android.view.View; +import android.widget.Button; +import android.widget.FrameLayout; +import androidx.activity.result.ActivityResultLauncher; +import androidx.activity.result.contract.ActivityResultContracts; +import androidx.exifinterface.media.ExifInterface; +// ContentResolver dependency +import com.google.mediapipe.framework.image.BitmapImageBuilder; +import com.google.mediapipe.framework.image.Image; +import com.google.mediapipe.tasks.core.BaseOptions; +import com.google.mediapipe.tasks.vision.core.RunningMode; +import com.google.mediapipe.tasks.vision.objectdetector.ObjectDetectionResult; +import com.google.mediapipe.tasks.vision.objectdetector.ObjectDetector; +import com.google.mediapipe.tasks.vision.objectdetector.ObjectDetector.ObjectDetectorOptions; +import java.io.IOException; +import java.io.InputStream; + +/** Main activity of MediaPipe Task Object Detector reference app. */ +public class MainActivity extends AppCompatActivity { + private static final String TAG = "MainActivity"; + private static final String MODEL_FILE = "coco_ssd_mobilenet_v1_1.0_quant_2018_06_29.tflite"; + + private ObjectDetector objectDetector; + + private enum InputSource { + UNKNOWN, + IMAGE, + VIDEO, + CAMERA, + } + + private InputSource inputSource = InputSource.UNKNOWN; + + // Image mode demo component. + private ActivityResultLauncher imageGetter; + // Video mode demo component. + private ActivityResultLauncher videoGetter; + private ObjectDetectionResultImageView imageView; + + @Override + protected void onCreate(Bundle savedInstanceState) { + super.onCreate(savedInstanceState); + setContentView(R.layout.activity_main); + setupImageModeDemo(); + setupVideoModeDemo(); + // TODO: Adds live camera demo. + } + + /** Sets up the image mode demo. */ + private void setupImageModeDemo() { + imageView = new ObjectDetectionResultImageView(this); + // The Intent to access gallery and read images as bitmap. + imageGetter = + registerForActivityResult( + new ActivityResultContracts.StartActivityForResult(), + result -> { + Intent resultIntent = result.getData(); + if (resultIntent != null) { + if (result.getResultCode() == RESULT_OK) { + Bitmap bitmap = null; + try { + bitmap = + downscaleBitmap( + MediaStore.Images.Media.getBitmap( + this.getContentResolver(), resultIntent.getData())); + } catch (IOException e) { + Log.e(TAG, "Bitmap reading error:" + e); + } + try { + InputStream imageData = + this.getContentResolver().openInputStream(resultIntent.getData()); + bitmap = rotateBitmap(bitmap, imageData); + } catch (IOException e) { + Log.e(TAG, "Bitmap rotation error:" + e); + } + if (bitmap != null) { + Image image = new BitmapImageBuilder(bitmap).build(); + ObjectDetectionResult detectionResult = objectDetector.detect(image); + imageView.setData(image, detectionResult); + runOnUiThread(() -> imageView.update()); + } + } + } + }); + Button loadImageButton = findViewById(R.id.button_load_picture); + loadImageButton.setOnClickListener( + v -> { + if (inputSource != InputSource.IMAGE) { + createObjectDetector(RunningMode.IMAGE); + this.inputSource = InputSource.IMAGE; + updateLayout(); + } + // Reads images from gallery. + Intent pickImageIntent = new Intent(Intent.ACTION_PICK); + pickImageIntent.setDataAndType(MediaStore.Images.Media.INTERNAL_CONTENT_URI, "image/*"); + imageGetter.launch(pickImageIntent); + }); + } + + /** Sets up the video mode demo. */ + private void setupVideoModeDemo() { + imageView = new ObjectDetectionResultImageView(this); + // The Intent to access gallery and read a video file. + videoGetter = + registerForActivityResult( + new ActivityResultContracts.StartActivityForResult(), + result -> { + Intent resultIntent = result.getData(); + if (resultIntent != null) { + if (result.getResultCode() == RESULT_OK) { + MediaMetadataRetriever metaRetriever = new MediaMetadataRetriever(); + metaRetriever.setDataSource(this, resultIntent.getData()); + long duration = + Long.parseLong( + metaRetriever.extractMetadata( + MediaMetadataRetriever.METADATA_KEY_DURATION)); + int numFrames = + Integer.parseInt( + metaRetriever.extractMetadata( + MediaMetadataRetriever.METADATA_KEY_VIDEO_FRAME_COUNT)); + long frameIntervalMs = duration / numFrames; + for (int i = 0; i < numFrames; ++i) { + Image image = new BitmapImageBuilder(metaRetriever.getFrameAtIndex(i)).build(); + ObjectDetectionResult detectionResult = + objectDetector.detectForVideo(image, frameIntervalMs * i); + // Currently only annotates the detection result on the first video frame and + // display it to verify the correctness. + // TODO: Annotates the detection result on every frame, save the + // annotated frames as a video file, and play back the video afterwards. + if (i == 0) { + imageView.setData(image, detectionResult); + runOnUiThread(() -> imageView.update()); + } + } + } + } + }); + Button loadVideoButton = findViewById(R.id.button_load_video); + loadVideoButton.setOnClickListener( + v -> { + createObjectDetector(RunningMode.VIDEO); + updateLayout(); + this.inputSource = InputSource.VIDEO; + + // Reads a video from gallery. + Intent pickVideoIntent = new Intent(Intent.ACTION_PICK); + pickVideoIntent.setDataAndType(MediaStore.Video.Media.INTERNAL_CONTENT_URI, "video/*"); + videoGetter.launch(pickVideoIntent); + }); + } + + private void createObjectDetector(RunningMode mode) { + if (objectDetector != null) { + objectDetector.close(); + } + // Initializes a new MediaPipe ObjectDetector instance + ObjectDetectorOptions options = + ObjectDetectorOptions.builder() + .setBaseOptions(BaseOptions.builder().setModelAssetPath(MODEL_FILE).build()) + .setScoreThreshold(0.5f) + .setMaxResults(5) + .setRunningMode(mode) + .build(); + objectDetector = ObjectDetector.createFromOptions(this, options); + } + + private void updateLayout() { + // Updates the preview layout. + FrameLayout frameLayout = findViewById(R.id.preview_display_layout); + frameLayout.removeAllViewsInLayout(); + imageView.setImageDrawable(null); + frameLayout.addView(imageView); + imageView.setVisibility(View.VISIBLE); + } + + private Bitmap downscaleBitmap(Bitmap originalBitmap) { + double aspectRatio = (double) originalBitmap.getWidth() / originalBitmap.getHeight(); + int width = imageView.getWidth(); + int height = imageView.getHeight(); + if (((double) imageView.getWidth() / imageView.getHeight()) > aspectRatio) { + width = (int) (height * aspectRatio); + } else { + height = (int) (width / aspectRatio); + } + return Bitmap.createScaledBitmap(originalBitmap, width, height, false); + } + + private Bitmap rotateBitmap(Bitmap inputBitmap, InputStream imageData) throws IOException { + int orientation = + new ExifInterface(imageData) + .getAttributeInt(ExifInterface.TAG_ORIENTATION, ExifInterface.ORIENTATION_NORMAL); + if (orientation == ExifInterface.ORIENTATION_NORMAL) { + return inputBitmap; + } + Matrix matrix = new Matrix(); + switch (orientation) { + case ExifInterface.ORIENTATION_ROTATE_90: + matrix.postRotate(90); + break; + case ExifInterface.ORIENTATION_ROTATE_180: + matrix.postRotate(180); + break; + case ExifInterface.ORIENTATION_ROTATE_270: + matrix.postRotate(270); + break; + default: + matrix.postRotate(0); + } + return Bitmap.createBitmap( + inputBitmap, 0, 0, inputBitmap.getWidth(), inputBitmap.getHeight(), matrix, true); + } +} diff --git a/mediapipe/tasks/examples/android/objectdetector/src/main/java/com/google/mediapipe/tasks/examples/objectdetector/ObjectDetectionResultImageView.java b/mediapipe/tasks/examples/android/objectdetector/src/main/java/com/google/mediapipe/tasks/examples/objectdetector/ObjectDetectionResultImageView.java new file mode 100644 index 000000000..94a4a90dc --- /dev/null +++ b/mediapipe/tasks/examples/android/objectdetector/src/main/java/com/google/mediapipe/tasks/examples/objectdetector/ObjectDetectionResultImageView.java @@ -0,0 +1,77 @@ +// Copyright 2022 The MediaPipe Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package com.google.mediapipe.tasks.examples.objectdetector; + +import android.content.Context; +import android.graphics.Bitmap; +import android.graphics.Canvas; +import android.graphics.Color; +import android.graphics.Matrix; +import android.graphics.Paint; +import androidx.appcompat.widget.AppCompatImageView; +import com.google.mediapipe.framework.image.BitmapExtractor; +import com.google.mediapipe.framework.image.Image; +import com.google.mediapipe.tasks.components.containers.Detection; +import com.google.mediapipe.tasks.vision.objectdetector.ObjectDetectionResult; + +/** An ImageView implementation for displaying {@link ObjectDetectionResult}. */ +public class ObjectDetectionResultImageView extends AppCompatImageView { + private static final String TAG = "ObjectDetectionResultImageView"; + + private static final int BBOX_COLOR = Color.GREEN; + private static final int BBOX_THICKNESS = 5; // Pixels + private Bitmap latest; + + public ObjectDetectionResultImageView(Context context) { + super(context); + setScaleType(AppCompatImageView.ScaleType.FIT_CENTER); + } + + /** + * Sets an {@link Image} and an {@link ObjectDetectionResult} to render. + * + * @param image an {@link Image} object for annotation. + * @param result an {@link ObjectDetectionResult} object that contains the detection result. + */ + public void setData(Image image, ObjectDetectionResult result) { + if (image == null || result == null) { + return; + } + latest = BitmapExtractor.extract(image); + Canvas canvas = new Canvas(latest); + canvas.drawBitmap(latest, new Matrix(), null); + for (int i = 0; i < result.detections().size(); ++i) { + drawDetectionOnCanvas(result.detections().get(i), canvas); + } + } + + /** Updates the image view with the latest {@link ObjectDetectionResult}. */ + public void update() { + postInvalidate(); + if (latest != null) { + setImageBitmap(latest); + } + } + + private void drawDetectionOnCanvas(Detection detection, Canvas canvas) { + // TODO: Draws the category and the score per bounding box. + // Draws bounding box. + Paint bboxPaint = new Paint(); + bboxPaint.setColor(BBOX_COLOR); + bboxPaint.setStyle(Paint.Style.STROKE); + bboxPaint.setStrokeWidth(BBOX_THICKNESS); + canvas.drawRect(detection.boundingBox(), bboxPaint); + } +} diff --git a/mediapipe/tasks/examples/android/res/drawable-v24/ic_launcher_foreground.xml b/mediapipe/tasks/examples/android/res/drawable-v24/ic_launcher_foreground.xml new file mode 100644 index 000000000..c7bd21dbd --- /dev/null +++ b/mediapipe/tasks/examples/android/res/drawable-v24/ic_launcher_foreground.xml @@ -0,0 +1,34 @@ + + + + + + + + + + + diff --git a/mediapipe/tasks/examples/android/res/drawable/ic_launcher_background.xml b/mediapipe/tasks/examples/android/res/drawable/ic_launcher_background.xml new file mode 100644 index 000000000..01f0af0ad --- /dev/null +++ b/mediapipe/tasks/examples/android/res/drawable/ic_launcher_background.xml @@ -0,0 +1,74 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/mediapipe/tasks/examples/android/res/layout/activity_main.xml b/mediapipe/tasks/examples/android/res/layout/activity_main.xml new file mode 100644 index 000000000..834e9a3e6 --- /dev/null +++ b/mediapipe/tasks/examples/android/res/layout/activity_main.xml @@ -0,0 +1,40 @@ + + + +