Merge branch 'master' into image-classification-python-impl

This commit is contained in:
Kinar R 2022-10-04 16:53:33 +05:30 committed by GitHub
commit b0d855daad
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
28 changed files with 865 additions and 252 deletions

View File

@ -143,6 +143,98 @@ Below is an example of how to create a subgraph named `TwoPassThroughSubgraph`.
} }
``` ```
## Graph Options
It is possible to specify a "graph options" protobuf for a MediaPipe graph
similar to the [`Calculator Options`](calculators.md#calculator-options)
protobuf specified for a MediaPipe calculator. These "graph options" can be
specified where a graph is invoked, and used to populate calculator options and
subgraph options within the graph.
In a CalculatorGraphConfig, graph options can be specified for a subgraph
exactly like calculator options, as shown below:
```
node {
calculator: "FlowLimiterCalculator"
input_stream: "image"
output_stream: "throttled_image"
node_options: {
[type.googleapis.com/mediapipe.FlowLimiterCalculatorOptions] {
max_in_flight: 1
}
}
}
node {
calculator: "FaceDetectionSubgraph"
input_stream: "IMAGE:throttled_image"
node_options: {
[type.googleapis.com/mediapipe.FaceDetectionOptions] {
tensor_width: 192
tensor_height: 192
}
}
}
```
In a CalculatorGraphConfig, graph options can be accepted and used to populate
calculator options, as shown below:
```
graph_options: {
[type.googleapis.com/mediapipe.FaceDetectionOptions] {}
}
node: {
calculator: "ImageToTensorCalculator"
input_stream: "IMAGE:multi_backend_image"
node_options: {
[type.googleapis.com/mediapipe.ImageToTensorCalculatorOptions] {
keep_aspect_ratio: true
border_mode: BORDER_ZERO
}
}
option_value: "output_tensor_width:options/tensor_width"
option_value: "output_tensor_height:options/tensor_height"
}
node {
calculator: "InferenceCalculator"
node_options: {
[type.googleapis.com/mediapipe.InferenceCalculatorOptions] {}
}
option_value: "delegate:options/delegate"
option_value: "model_path:options/model_path"
}
```
In this example, the `FaceDetectionSubgraph` accepts graph option protobuf
`FaceDetectionOptions`. The `FaceDetectionOptions` is used to define some field
values in the calculator options `ImageToTensorCalculatorOptions` and some field
values in the subgraph options `InferenceCalculatorOptions`. The field values
are defined using the `option_value:` syntax.
In the `CalculatorGraphConfig::Node` protobuf, the fields `node_options:` and
`option_value:` together define the option values for a calculator such as
`ImageToTensorCalculator`. The `node_options:` field defines a set of literal
constant values using the text protobuf syntax. Each `option_value:` field
defines the value for one protobuf field using information from the enclosing
graph, specifically from field values of the graph options of the enclosing
graph. In the example above, the `option_value:`
`"output_tensor_width:options/tensor_width"` defines the field
`ImageToTensorCalculatorOptions.output_tensor_width` using the value of
`FaceDetectionOptions.tensor_width`.
The syntax of `option_value:` is similar to the syntax of `input_stream:`. The
syntax is `option_value: "LHS:RHS"`. The LHS identifies a calculator option
field and the RHS identifies a graph option field. More specifically, the LHS
and RHS each consists of a series of protobuf field names identifying nested
protobuf messages and fields separated by '/'. This is known as the "ProtoPath"
syntax. Nested messages that are referenced in the LHS or RHS must already be
defined in the enclosing protobuf in order to be traversed using
`option_value:`.
## Cycles ## Cycles
<!-- TODO: add discussion of PreviousLoopbackCalculator --> <!-- TODO: add discussion of PreviousLoopbackCalculator -->

View File

@ -507,8 +507,11 @@ class ClassificationPostprocessingGraph : public mediapipe::Subgraph {
} }
}; };
// REGISTER_MEDIAPIPE_GRAPH argument has to fit on one line to work properly.
// clang-format off
REGISTER_MEDIAPIPE_GRAPH( REGISTER_MEDIAPIPE_GRAPH(
::mediapipe::tasks::components::processors::ClassificationPostprocessingGraph); // NOLINT ::mediapipe::tasks::components::processors::ClassificationPostprocessingGraph); // NOLINT
// clang-format on
} // namespace processors } // namespace processors
} // namespace components } // namespace components

View File

@ -41,8 +41,8 @@ cc_test(
) )
cc_library( cc_library(
name = "hand_gesture_recognizer_subgraph", name = "hand_gesture_recognizer_graph",
srcs = ["hand_gesture_recognizer_subgraph.cc"], srcs = ["hand_gesture_recognizer_graph.cc"],
deps = [ deps = [
"//mediapipe/calculators/core:concatenate_vector_calculator", "//mediapipe/calculators/core:concatenate_vector_calculator",
"//mediapipe/calculators/tensor:tensor_converter_calculator", "//mediapipe/calculators/tensor:tensor_converter_calculator",
@ -62,11 +62,11 @@ cc_library(
"//mediapipe/tasks/cc/core:model_task_graph", "//mediapipe/tasks/cc/core:model_task_graph",
"//mediapipe/tasks/cc/core:utils", "//mediapipe/tasks/cc/core:utils",
"//mediapipe/tasks/cc/core/proto:inference_subgraph_cc_proto", "//mediapipe/tasks/cc/core/proto:inference_subgraph_cc_proto",
"//mediapipe/tasks/cc/vision/hand_gesture_recognizer/calculators:handedness_to_matrix_calculator", "//mediapipe/tasks/cc/vision/gesture_recognizer/calculators:handedness_to_matrix_calculator",
"//mediapipe/tasks/cc/vision/hand_gesture_recognizer/calculators:landmarks_to_matrix_calculator", "//mediapipe/tasks/cc/vision/gesture_recognizer/calculators:landmarks_to_matrix_calculator",
"//mediapipe/tasks/cc/vision/hand_gesture_recognizer/proto:hand_gesture_recognizer_subgraph_options_cc_proto", "//mediapipe/tasks/cc/vision/gesture_recognizer/calculators:landmarks_to_matrix_calculator_cc_proto",
"//mediapipe/tasks/cc/vision/hand_gesture_recognizer/proto:landmarks_to_matrix_calculator_cc_proto", "//mediapipe/tasks/cc/vision/gesture_recognizer/proto:hand_gesture_recognizer_graph_options_cc_proto",
"//mediapipe/tasks/cc/vision/hand_landmarker:hand_landmarker_subgraph", "//mediapipe/tasks/cc/vision/hand_landmarker:hand_landmarks_detector_graph",
"//mediapipe/tasks/cc/vision/utils:image_tensor_specs", "//mediapipe/tasks/cc/vision/utils:image_tensor_specs",
"//mediapipe/tasks/metadata:metadata_schema_cc", "//mediapipe/tasks/metadata:metadata_schema_cc",
"@com_google_absl//absl/status", "@com_google_absl//absl/status",

View File

@ -12,11 +12,23 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
load("//mediapipe/framework/port:build_config.bzl", "mediapipe_proto_library")
package(default_visibility = [ package(default_visibility = [
"//mediapipe/app/xeno:__subpackages__", "//mediapipe/app/xeno:__subpackages__",
"//mediapipe/tasks:internal", "//mediapipe/tasks:internal",
]) ])
mediapipe_proto_library(
name = "landmarks_to_matrix_calculator_proto",
srcs = ["landmarks_to_matrix_calculator.proto"],
deps = [
"//mediapipe/framework:calculator_options_proto",
"//mediapipe/framework:calculator_proto",
"//mediapipe/tasks/cc/core/proto:base_options_proto",
],
)
cc_library( cc_library(
name = "handedness_to_matrix_calculator", name = "handedness_to_matrix_calculator",
srcs = ["handedness_to_matrix_calculator.cc"], srcs = ["handedness_to_matrix_calculator.cc"],
@ -25,7 +37,7 @@ cc_library(
"//mediapipe/framework/formats:classification_cc_proto", "//mediapipe/framework/formats:classification_cc_proto",
"//mediapipe/framework/formats:matrix", "//mediapipe/framework/formats:matrix",
"//mediapipe/framework/port:ret_check", "//mediapipe/framework/port:ret_check",
"//mediapipe/tasks/cc/vision/hand_gesture_recognizer:handedness_util", "//mediapipe/tasks/cc/vision/gesture_recognizer:handedness_util",
"@com_google_absl//absl/memory", "@com_google_absl//absl/memory",
"@com_google_absl//absl/status", "@com_google_absl//absl/status",
"@com_google_absl//absl/status:statusor", "@com_google_absl//absl/status:statusor",
@ -53,11 +65,11 @@ cc_library(
name = "landmarks_to_matrix_calculator", name = "landmarks_to_matrix_calculator",
srcs = ["landmarks_to_matrix_calculator.cc"], srcs = ["landmarks_to_matrix_calculator.cc"],
deps = [ deps = [
":landmarks_to_matrix_calculator_cc_proto",
"//mediapipe/framework:calculator_framework", "//mediapipe/framework:calculator_framework",
"//mediapipe/framework/formats:landmark_cc_proto", "//mediapipe/framework/formats:landmark_cc_proto",
"//mediapipe/framework/formats:matrix", "//mediapipe/framework/formats:matrix",
"//mediapipe/framework/port:ret_check", "//mediapipe/framework/port:ret_check",
"//mediapipe/tasks/cc/vision/hand_gesture_recognizer/proto:landmarks_to_matrix_calculator_cc_proto",
"@com_google_absl//absl/status", "@com_google_absl//absl/status",
"@com_google_absl//absl/status:statusor", "@com_google_absl//absl/status:statusor",
"@com_google_absl//absl/strings", "@com_google_absl//absl/strings",

View File

@ -26,14 +26,16 @@ limitations under the License.
#include "mediapipe/framework/formats/classification.pb.h" #include "mediapipe/framework/formats/classification.pb.h"
#include "mediapipe/framework/formats/matrix.h" #include "mediapipe/framework/formats/matrix.h"
#include "mediapipe/framework/port/ret_check.h" #include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/tasks/cc/vision/hand_gesture_recognizer/handedness_util.h" #include "mediapipe/tasks/cc/vision/gesture_recognizer/handedness_util.h"
// TODO Update to use API2
namespace mediapipe { namespace mediapipe {
namespace tasks { namespace api2 {
namespace vision {
namespace { namespace {
using ::mediapipe::tasks::vision::gesture_recognizer::GetLeftHandScore;
constexpr char kHandednessTag[] = "HANDEDNESS"; constexpr char kHandednessTag[] = "HANDEDNESS";
constexpr char kHandednessMatrixTag[] = "HANDEDNESS_MATRIX"; constexpr char kHandednessMatrixTag[] = "HANDEDNESS_MATRIX";
@ -71,6 +73,8 @@ class HandednessToMatrixCalculator : public CalculatorBase {
return absl::OkStatus(); return absl::OkStatus();
} }
// TODO remove this after change to API2, because Setting offset
// to 0 is the default in API2
absl::Status Open(CalculatorContext* cc) override { absl::Status Open(CalculatorContext* cc) override {
cc->SetOffset(TimestampDiff(0)); cc->SetOffset(TimestampDiff(0));
return absl::OkStatus(); return absl::OkStatus();
@ -95,6 +99,5 @@ absl::Status HandednessToMatrixCalculator::Process(CalculatorContext* cc) {
return absl::OkStatus(); return absl::OkStatus();
} }
} // namespace vision } // namespace api2
} // namespace tasks
} // namespace mediapipe } // namespace mediapipe

View File

@ -28,8 +28,6 @@ limitations under the License.
#include "mediapipe/framework/port/status_matchers.h" #include "mediapipe/framework/port/status_matchers.h"
namespace mediapipe { namespace mediapipe {
namespace tasks {
namespace vision {
namespace { namespace {
@ -95,6 +93,4 @@ INSTANTIATE_TEST_CASE_P(
} // namespace } // namespace
} // namespace vision
} // namespace tasks
} // namespace mediapipe } // namespace mediapipe

View File

@ -27,13 +27,11 @@ limitations under the License.
#include "mediapipe/framework/formats/landmark.pb.h" #include "mediapipe/framework/formats/landmark.pb.h"
#include "mediapipe/framework/formats/matrix.h" #include "mediapipe/framework/formats/matrix.h"
#include "mediapipe/framework/port/ret_check.h" #include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/tasks/cc/vision/hand_gesture_recognizer/proto/landmarks_to_matrix_calculator.pb.h" #include "mediapipe/tasks/cc/vision/gesture_recognizer/calculators/landmarks_to_matrix_calculator.pb.h"
// TODO Update to use API2
namespace mediapipe { namespace mediapipe {
namespace tasks { namespace api2 {
namespace vision {
using proto::LandmarksToMatrixCalculatorOptions;
namespace { namespace {
@ -175,7 +173,7 @@ absl::Status ProcessLandmarks(LandmarkListT landmarks, CalculatorContext* cc) {
// input_stream: "IMAGE_SIZE:image_size" // input_stream: "IMAGE_SIZE:image_size"
// output_stream: "LANDMARKS_MATRIX:landmarks_matrix" // output_stream: "LANDMARKS_MATRIX:landmarks_matrix"
// options { // options {
// [mediapipe.tasks.vision.proto.LandmarksToMatrixCalculatorOptions.ext] { // [mediapipe.LandmarksToMatrixCalculatorOptions.ext] {
// object_normalization: true // object_normalization: true
// object_normalization_origin_offset: 0 // object_normalization_origin_offset: 0
// } // }
@ -221,6 +219,5 @@ absl::Status LandmarksToMatrixCalculator::Process(CalculatorContext* cc) {
return absl::OkStatus(); return absl::OkStatus();
} }
} // namespace vision } // namespace api2
} // namespace tasks
} // namespace mediapipe } // namespace mediapipe

View File

@ -15,7 +15,7 @@ limitations under the License.
syntax = "proto2"; syntax = "proto2";
package mediapipe.tasks.vision.proto; package mediapipe;
import "mediapipe/framework/calculator.proto"; import "mediapipe/framework/calculator.proto";

View File

@ -28,8 +28,6 @@ limitations under the License.
#include "mediapipe/framework/port/status_matchers.h" #include "mediapipe/framework/port/status_matchers.h"
namespace mediapipe { namespace mediapipe {
namespace tasks {
namespace vision {
namespace { namespace {
@ -72,8 +70,7 @@ TEST_P(Landmarks2dToMatrixCalculatorTest, OutputsCorrectResult) {
input_stream: "IMAGE_SIZE:image_size" input_stream: "IMAGE_SIZE:image_size"
output_stream: "LANDMARKS_MATRIX:landmarks_matrix" output_stream: "LANDMARKS_MATRIX:landmarks_matrix"
options { options {
[mediapipe.tasks.vision.proto.LandmarksToMatrixCalculatorOptions [mediapipe.LandmarksToMatrixCalculatorOptions.ext] {
.ext] {
object_normalization: $0 object_normalization: $0
object_normalization_origin_offset: $1 object_normalization_origin_offset: $1
} }
@ -145,8 +142,7 @@ TEST_P(LandmarksWorld3dToMatrixCalculatorTest, OutputsCorrectResult) {
input_stream: "IMAGE_SIZE:image_size" input_stream: "IMAGE_SIZE:image_size"
output_stream: "LANDMARKS_MATRIX:landmarks_matrix" output_stream: "LANDMARKS_MATRIX:landmarks_matrix"
options { options {
[mediapipe.tasks.vision.proto.LandmarksToMatrixCalculatorOptions [mediapipe.LandmarksToMatrixCalculatorOptions.ext] {
.ext] {
object_normalization: $0 object_normalization: $0
object_normalization_origin_offset: $1 object_normalization_origin_offset: $1
} }
@ -202,6 +198,4 @@ INSTANTIATE_TEST_CASE_P(
} // namespace } // namespace
} // namespace vision
} // namespace tasks
} // namespace mediapipe } // namespace mediapipe

View File

@ -34,14 +34,15 @@ limitations under the License.
#include "mediapipe/tasks/cc/core/model_task_graph.h" #include "mediapipe/tasks/cc/core/model_task_graph.h"
#include "mediapipe/tasks/cc/core/proto/inference_subgraph.pb.h" #include "mediapipe/tasks/cc/core/proto/inference_subgraph.pb.h"
#include "mediapipe/tasks/cc/core/utils.h" #include "mediapipe/tasks/cc/core/utils.h"
#include "mediapipe/tasks/cc/vision/hand_gesture_recognizer/proto/hand_gesture_recognizer_subgraph_options.pb.h" #include "mediapipe/tasks/cc/vision/gesture_recognizer/calculators/landmarks_to_matrix_calculator.pb.h"
#include "mediapipe/tasks/cc/vision/hand_gesture_recognizer/proto/landmarks_to_matrix_calculator.pb.h" #include "mediapipe/tasks/cc/vision/gesture_recognizer/proto/hand_gesture_recognizer_graph_options.pb.h"
#include "mediapipe/tasks/cc/vision/utils/image_tensor_specs.h" #include "mediapipe/tasks/cc/vision/utils/image_tensor_specs.h"
#include "mediapipe/tasks/metadata/metadata_schema_generated.h" #include "mediapipe/tasks/metadata/metadata_schema_generated.h"
namespace mediapipe { namespace mediapipe {
namespace tasks { namespace tasks {
namespace vision { namespace vision {
namespace gesture_recognizer {
namespace { namespace {
@ -50,9 +51,8 @@ using ::mediapipe::api2::Output;
using ::mediapipe::api2::builder::Graph; using ::mediapipe::api2::builder::Graph;
using ::mediapipe::api2::builder::Source; using ::mediapipe::api2::builder::Source;
using ::mediapipe::tasks::components::containers::proto::ClassificationResult; using ::mediapipe::tasks::components::containers::proto::ClassificationResult;
using ::mediapipe::tasks::vision::hand_gesture_recognizer::proto:: using ::mediapipe::tasks::vision::gesture_recognizer::proto::
HandGestureRecognizerSubgraphOptions; HandGestureRecognizerGraphOptions;
using ::mediapipe::tasks::vision::proto::LandmarksToMatrixCalculatorOptions;
constexpr char kHandednessTag[] = "HANDEDNESS"; constexpr char kHandednessTag[] = "HANDEDNESS";
constexpr char kLandmarksTag[] = "LANDMARKS"; constexpr char kLandmarksTag[] = "LANDMARKS";
@ -70,18 +70,6 @@ constexpr char kIndexTag[] = "INDEX";
constexpr char kIterableTag[] = "ITERABLE"; constexpr char kIterableTag[] = "ITERABLE";
constexpr char kBatchEndTag[] = "BATCH_END"; constexpr char kBatchEndTag[] = "BATCH_END";
absl::Status SanityCheckOptions(
const HandGestureRecognizerSubgraphOptions& options) {
if (options.min_tracking_confidence() < 0 ||
options.min_tracking_confidence() > 1) {
return CreateStatusWithPayload(absl::StatusCode::kInvalidArgument,
"Invalid `min_tracking_confidence` option: "
"value must be in the range [0.0, 1.0]",
MediaPipeTasksStatus::kInvalidArgumentError);
}
return absl::OkStatus();
}
Source<std::vector<Tensor>> ConvertMatrixToTensor(Source<Matrix> matrix, Source<std::vector<Tensor>> ConvertMatrixToTensor(Source<Matrix> matrix,
Graph& graph) { Graph& graph) {
auto& node = graph.AddNode("TensorConverterCalculator"); auto& node = graph.AddNode("TensorConverterCalculator");
@ -91,9 +79,10 @@ Source<std::vector<Tensor>> ConvertMatrixToTensor(Source<Matrix> matrix,
} // namespace } // namespace
// A "mediapipe.tasks.vision.SingleHandGestureRecognizerSubgraph" performs // A
// single hand gesture recognition. This graph is used as a building block for // "mediapipe.tasks.vision.gesture_recognizer.SingleHandGestureRecognizerGraph"
// mediapipe.tasks.vision.HandGestureRecognizerGraph. // performs single hand gesture recognition. This graph is used as a building
// block for mediapipe.tasks.vision.GestureRecognizerGraph.
// //
// Inputs: // Inputs:
// HANDEDNESS - ClassificationList // HANDEDNESS - ClassificationList
@ -113,14 +102,15 @@ Source<std::vector<Tensor>> ConvertMatrixToTensor(Source<Matrix> matrix,
// //
// Example: // Example:
// node { // node {
// calculator: "mediapipe.tasks.vision.SingleHandGestureRecognizerSubgraph" // calculator:
// "mediapipe.tasks.vision.gesture_recognizer.SingleHandGestureRecognizerGraph"
// input_stream: "HANDEDNESS:handedness" // input_stream: "HANDEDNESS:handedness"
// input_stream: "LANDMARKS:landmarks" // input_stream: "LANDMARKS:landmarks"
// input_stream: "WORLD_LANDMARKS:world_landmarks" // input_stream: "WORLD_LANDMARKS:world_landmarks"
// input_stream: "IMAGE_SIZE:image_size" // input_stream: "IMAGE_SIZE:image_size"
// output_stream: "HAND_GESTURES:hand_gestures" // output_stream: "HAND_GESTURES:hand_gestures"
// options { // options {
// [mediapipe.tasks.vision.hand_gesture_recognizer.proto.HandGestureRecognizerSubgraphOptions.ext] // [mediapipe.tasks.vision.gesture_recognizer.proto.HandGestureRecognizerGraphOptions.ext]
// { // {
// base_options { // base_options {
// model_asset { // model_asset {
@ -130,19 +120,19 @@ Source<std::vector<Tensor>> ConvertMatrixToTensor(Source<Matrix> matrix,
// } // }
// } // }
// } // }
class SingleHandGestureRecognizerSubgraph : public core::ModelTaskGraph { class SingleHandGestureRecognizerGraph : public core::ModelTaskGraph {
public: public:
absl::StatusOr<CalculatorGraphConfig> GetConfig( absl::StatusOr<CalculatorGraphConfig> GetConfig(
SubgraphContext* sc) override { SubgraphContext* sc) override {
ASSIGN_OR_RETURN( ASSIGN_OR_RETURN(
const auto* model_resources, const auto* model_resources,
CreateModelResources<HandGestureRecognizerSubgraphOptions>(sc)); CreateModelResources<HandGestureRecognizerGraphOptions>(sc));
Graph graph; Graph graph;
ASSIGN_OR_RETURN( ASSIGN_OR_RETURN(
auto hand_gestures, auto hand_gestures,
BuildHandGestureRecognizerGraph( BuildGestureRecognizerGraph(
sc->Options<HandGestureRecognizerSubgraphOptions>(), sc->Options<HandGestureRecognizerGraphOptions>(), *model_resources,
*model_resources, graph[Input<ClassificationList>(kHandednessTag)], graph[Input<ClassificationList>(kHandednessTag)],
graph[Input<NormalizedLandmarkList>(kLandmarksTag)], graph[Input<NormalizedLandmarkList>(kLandmarksTag)],
graph[Input<LandmarkList>(kWorldLandmarksTag)], graph[Input<LandmarkList>(kWorldLandmarksTag)],
graph[Input<std::pair<int, int>>(kImageSizeTag)], graph)); graph[Input<std::pair<int, int>>(kImageSizeTag)], graph));
@ -151,15 +141,13 @@ class SingleHandGestureRecognizerSubgraph : public core::ModelTaskGraph {
} }
private: private:
absl::StatusOr<Source<ClassificationResult>> BuildHandGestureRecognizerGraph( absl::StatusOr<Source<ClassificationResult>> BuildGestureRecognizerGraph(
const HandGestureRecognizerSubgraphOptions& graph_options, const HandGestureRecognizerGraphOptions& graph_options,
const core::ModelResources& model_resources, const core::ModelResources& model_resources,
Source<ClassificationList> handedness, Source<ClassificationList> handedness,
Source<NormalizedLandmarkList> hand_landmarks, Source<NormalizedLandmarkList> hand_landmarks,
Source<LandmarkList> hand_world_landmarks, Source<LandmarkList> hand_world_landmarks,
Source<std::pair<int, int>> image_size, Graph& graph) { Source<std::pair<int, int>> image_size, Graph& graph) {
MP_RETURN_IF_ERROR(SanityCheckOptions(graph_options));
// Converts the ClassificationList to a matrix. // Converts the ClassificationList to a matrix.
auto& handedness_to_matrix = graph.AddNode("HandednessToMatrixCalculator"); auto& handedness_to_matrix = graph.AddNode("HandednessToMatrixCalculator");
handedness >> handedness_to_matrix.In(kHandednessTag); handedness >> handedness_to_matrix.In(kHandednessTag);
@ -235,12 +223,15 @@ class SingleHandGestureRecognizerSubgraph : public core::ModelTaskGraph {
} }
}; };
// clang-format off
REGISTER_MEDIAPIPE_GRAPH( REGISTER_MEDIAPIPE_GRAPH(
::mediapipe::tasks::vision::SingleHandGestureRecognizerSubgraph); ::mediapipe::tasks::vision::gesture_recognizer::SingleHandGestureRecognizerGraph); // NOLINT
// clang-format on
// A "mediapipe.tasks.vision.HandGestureRecognizerSubgraph" performs multi // A
// hand gesture recognition. This graph is used as a building block for // "mediapipe.tasks.vision.gesture_recognizer.MultipleHandGestureRecognizerGraph"
// mediapipe.tasks.vision.HandGestureRecognizerGraph. // performs multi hand gesture recognition. This graph is used as a building
// block for mediapipe.tasks.vision.gesture_recognizer.GestureRecognizerGraph.
// //
// Inputs: // Inputs:
// HANDEDNESS - std::vector<ClassificationList> // HANDEDNESS - std::vector<ClassificationList>
@ -263,7 +254,8 @@ REGISTER_MEDIAPIPE_GRAPH(
// //
// Example: // Example:
// node { // node {
// calculator: "mediapipe.tasks.vision.HandGestureRecognizerSubgraph" // calculator:
// "mediapipe.tasks.vision.gesture_recognizer.MultipleHandGestureRecognizerGraph"
// input_stream: "HANDEDNESS:handedness" // input_stream: "HANDEDNESS:handedness"
// input_stream: "LANDMARKS:landmarks" // input_stream: "LANDMARKS:landmarks"
// input_stream: "WORLD_LANDMARKS:world_landmarks" // input_stream: "WORLD_LANDMARKS:world_landmarks"
@ -271,7 +263,7 @@ REGISTER_MEDIAPIPE_GRAPH(
// input_stream: "HAND_TRACKING_IDS:hand_tracking_ids" // input_stream: "HAND_TRACKING_IDS:hand_tracking_ids"
// output_stream: "HAND_GESTURES:hand_gestures" // output_stream: "HAND_GESTURES:hand_gestures"
// options { // options {
// [mediapipe.tasks.vision.hand_gesture_recognizer.proto.HandGestureRecognizerSubgraph.ext] // [mediapipe.tasks.vision.gesture_recognizer.proto.MultipleHandGestureRecognizerGraph.ext]
// { // {
// base_options { // base_options {
// model_asset { // model_asset {
@ -281,15 +273,15 @@ REGISTER_MEDIAPIPE_GRAPH(
// } // }
// } // }
// } // }
class HandGestureRecognizerSubgraph : public core::ModelTaskGraph { class MultipleHandGestureRecognizerGraph : public core::ModelTaskGraph {
public: public:
absl::StatusOr<CalculatorGraphConfig> GetConfig( absl::StatusOr<CalculatorGraphConfig> GetConfig(
SubgraphContext* sc) override { SubgraphContext* sc) override {
Graph graph; Graph graph;
ASSIGN_OR_RETURN( ASSIGN_OR_RETURN(
auto multi_hand_gestures, auto multi_hand_gestures,
BuildMultiHandGestureRecognizerSubraph( BuildMultiGestureRecognizerSubraph(
sc->Options<HandGestureRecognizerSubgraphOptions>(), sc->Options<HandGestureRecognizerGraphOptions>(),
graph[Input<std::vector<ClassificationList>>(kHandednessTag)], graph[Input<std::vector<ClassificationList>>(kHandednessTag)],
graph[Input<std::vector<NormalizedLandmarkList>>(kLandmarksTag)], graph[Input<std::vector<NormalizedLandmarkList>>(kLandmarksTag)],
graph[Input<std::vector<LandmarkList>>(kWorldLandmarksTag)], graph[Input<std::vector<LandmarkList>>(kWorldLandmarksTag)],
@ -302,8 +294,8 @@ class HandGestureRecognizerSubgraph : public core::ModelTaskGraph {
private: private:
absl::StatusOr<Source<std::vector<ClassificationResult>>> absl::StatusOr<Source<std::vector<ClassificationResult>>>
BuildMultiHandGestureRecognizerSubraph( BuildMultiGestureRecognizerSubraph(
const HandGestureRecognizerSubgraphOptions& graph_options, const HandGestureRecognizerGraphOptions& graph_options,
Source<std::vector<ClassificationList>> multi_handedness, Source<std::vector<ClassificationList>> multi_handedness,
Source<std::vector<NormalizedLandmarkList>> multi_hand_landmarks, Source<std::vector<NormalizedLandmarkList>> multi_hand_landmarks,
Source<std::vector<LandmarkList>> multi_hand_world_landmarks, Source<std::vector<LandmarkList>> multi_hand_world_landmarks,
@ -341,17 +333,18 @@ class HandGestureRecognizerSubgraph : public core::ModelTaskGraph {
hand_tracking_id >> get_world_landmarks_at_index.In(kIndexTag); hand_tracking_id >> get_world_landmarks_at_index.In(kIndexTag);
auto hand_world_landmarks = get_world_landmarks_at_index.Out(kItemTag); auto hand_world_landmarks = get_world_landmarks_at_index.Out(kItemTag);
auto& hand_gesture_recognizer_subgraph = graph.AddNode( auto& hand_gesture_recognizer_graph = graph.AddNode(
"mediapipe.tasks.vision.SingleHandGestureRecognizerSubgraph"); "mediapipe.tasks.vision.gesture_recognizer."
hand_gesture_recognizer_subgraph "SingleHandGestureRecognizerGraph");
.GetOptions<HandGestureRecognizerSubgraphOptions>() hand_gesture_recognizer_graph
.GetOptions<HandGestureRecognizerGraphOptions>()
.CopyFrom(graph_options); .CopyFrom(graph_options);
handedness >> hand_gesture_recognizer_subgraph.In(kHandednessTag); handedness >> hand_gesture_recognizer_graph.In(kHandednessTag);
hand_landmarks >> hand_gesture_recognizer_subgraph.In(kLandmarksTag); hand_landmarks >> hand_gesture_recognizer_graph.In(kLandmarksTag);
hand_world_landmarks >> hand_world_landmarks >>
hand_gesture_recognizer_subgraph.In(kWorldLandmarksTag); hand_gesture_recognizer_graph.In(kWorldLandmarksTag);
image_size_clone >> hand_gesture_recognizer_subgraph.In(kImageSizeTag); image_size_clone >> hand_gesture_recognizer_graph.In(kImageSizeTag);
auto hand_gestures = hand_gesture_recognizer_subgraph.Out(kHandGesturesTag); auto hand_gestures = hand_gesture_recognizer_graph.Out(kHandGesturesTag);
auto& end_loop_classification_results = auto& end_loop_classification_results =
graph.AddNode("mediapipe.tasks.EndLoopClassificationResultCalculator"); graph.AddNode("mediapipe.tasks.EndLoopClassificationResultCalculator");
@ -364,9 +357,12 @@ class HandGestureRecognizerSubgraph : public core::ModelTaskGraph {
} }
}; };
// clang-format off
REGISTER_MEDIAPIPE_GRAPH( REGISTER_MEDIAPIPE_GRAPH(
::mediapipe::tasks::vision::HandGestureRecognizerSubgraph); ::mediapipe::tasks::vision::gesture_recognizer::MultipleHandGestureRecognizerGraph); // NOLINT
// clang-format on
} // namespace gesture_recognizer
} // namespace vision } // namespace vision
} // namespace tasks } // namespace tasks
} // namespace mediapipe } // namespace mediapipe

View File

@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. limitations under the License.
==============================================================================*/ ==============================================================================*/
#include "mediapipe/tasks/cc/vision/hand_gesture_recognizer/handedness_util.h" #include "mediapipe/tasks/cc/vision/gesture_recognizer/handedness_util.h"
#include <algorithm> #include <algorithm>
@ -25,6 +25,7 @@ limitations under the License.
namespace mediapipe { namespace mediapipe {
namespace tasks { namespace tasks {
namespace vision { namespace vision {
namespace gesture_recognizer {
namespace {} // namespace namespace {} // namespace
@ -58,6 +59,7 @@ absl::StatusOr<float> GetLeftHandScore(
} }
} }
} // namespace gesture_recognizer
} // namespace vision } // namespace vision
} // namespace tasks } // namespace tasks
} // namespace mediapipe } // namespace mediapipe

View File

@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License. limitations under the License.
==============================================================================*/ ==============================================================================*/
#ifndef MEDIAPIPE_TASKS_CC_VISION_HAND_GESTURE_RECOGNIZER_HADNDEDNESS_UTILS_H_ #ifndef MEDIAPIPE_TASKS_CC_VISION_GESTURE_RECOGNIZER_HADNDEDNESS_UTILS_H_
#define MEDIAPIPE_TASKS_CC_VISION_HAND_GESTURE_RECOGNIZER_HADNDEDNESS_UTILS_H_ #define MEDIAPIPE_TASKS_CC_VISION_GESTURE_RECOGNIZER_HADNDEDNESS_UTILS_H_
#include "absl/status/statusor.h" #include "absl/status/statusor.h"
#include "mediapipe/framework/formats/classification.pb.h" #include "mediapipe/framework/formats/classification.pb.h"
@ -22,6 +22,7 @@ limitations under the License.
namespace mediapipe { namespace mediapipe {
namespace tasks { namespace tasks {
namespace vision { namespace vision {
namespace gesture_recognizer {
bool IsLeftHand(const mediapipe::Classification& c); bool IsLeftHand(const mediapipe::Classification& c);
@ -30,8 +31,9 @@ bool IsRightHand(const mediapipe::Classification& c);
absl::StatusOr<float> GetLeftHandScore( absl::StatusOr<float> GetLeftHandScore(
const mediapipe::ClassificationList& classification_list); const mediapipe::ClassificationList& classification_list);
} // namespace gesture_recognizer
} // namespace vision } // namespace vision
} // namespace tasks } // namespace tasks
} // namespace mediapipe } // namespace mediapipe
#endif // MEDIAPIPE_TASKS_CC_VISION_HAND_GESTURE_RECOGNIZER_HADNDEDNESS_UTILS_H_ #endif // MEDIAPIPE_TASKS_CC_VISION_GESTURE_RECOGNIZER_HADNDEDNESS_UTILS_H_

View File

@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. limitations under the License.
==============================================================================*/ ==============================================================================*/
#include "mediapipe/tasks/cc/vision/hand_gesture_recognizer/handedness_util.h" #include "mediapipe/tasks/cc/vision/gesture_recognizer/handedness_util.h"
#include "mediapipe/framework/formats/classification.pb.h" #include "mediapipe/framework/formats/classification.pb.h"
#include "mediapipe/framework/port/gmock.h" #include "mediapipe/framework/port/gmock.h"
@ -23,6 +23,7 @@ limitations under the License.
namespace mediapipe { namespace mediapipe {
namespace tasks { namespace tasks {
namespace vision { namespace vision {
namespace gesture_recognizer {
namespace { namespace {
TEST(GetLeftHandScore, SingleLeftHandClassification) { TEST(GetLeftHandScore, SingleLeftHandClassification) {
@ -72,6 +73,7 @@ TEST(GetLeftHandScore, LeftAndRightLowerCaseHandClassification) {
} }
} // namespace } // namespace
} // namespace gesture_recognizer
} // namespace vision } // namespace vision
} // namespace tasks } // namespace tasks
} // namespace mediapipe } // namespace mediapipe

View File

@ -21,8 +21,8 @@ package(default_visibility = [
licenses(["notice"]) licenses(["notice"])
mediapipe_proto_library( mediapipe_proto_library(
name = "hand_gesture_recognizer_subgraph_options_proto", name = "hand_gesture_recognizer_graph_options_proto",
srcs = ["hand_gesture_recognizer_subgraph_options.proto"], srcs = ["hand_gesture_recognizer_graph_options.proto"],
deps = [ deps = [
"//mediapipe/framework:calculator_options_proto", "//mediapipe/framework:calculator_options_proto",
"//mediapipe/framework:calculator_proto", "//mediapipe/framework:calculator_proto",
@ -30,12 +30,3 @@ mediapipe_proto_library(
"//mediapipe/tasks/cc/core/proto:base_options_proto", "//mediapipe/tasks/cc/core/proto:base_options_proto",
], ],
) )
mediapipe_proto_library(
name = "landmarks_to_matrix_calculator_proto",
srcs = ["landmarks_to_matrix_calculator.proto"],
deps = [
"//mediapipe/framework:calculator_options_proto",
"//mediapipe/framework:calculator_proto",
],
)

View File

@ -15,15 +15,15 @@ limitations under the License.
// TODO Refactor naming and class structure of hand related Tasks. // TODO Refactor naming and class structure of hand related Tasks.
syntax = "proto2"; syntax = "proto2";
package mediapipe.tasks.vision.hand_gesture_recognizer.proto; package mediapipe.tasks.vision.gesture_recognizer.proto;
import "mediapipe/framework/calculator.proto"; import "mediapipe/framework/calculator.proto";
import "mediapipe/tasks/cc/components/processors/proto/classifier_options.proto"; import "mediapipe/tasks/cc/components/processors/proto/classifier_options.proto";
import "mediapipe/tasks/cc/core/proto/base_options.proto"; import "mediapipe/tasks/cc/core/proto/base_options.proto";
message HandGestureRecognizerSubgraphOptions { message HandGestureRecognizerGraphOptions {
extend mediapipe.CalculatorOptions { extend mediapipe.CalculatorOptions {
optional HandGestureRecognizerSubgraphOptions ext = 463370452; optional HandGestureRecognizerGraphOptions ext = 463370452;
} }
// Base options for configuring hand gesture recognition subgraph, such as // Base options for configuring hand gesture recognition subgraph, such as
// specifying the TfLite model file with metadata, accelerator options, etc. // specifying the TfLite model file with metadata, accelerator options, etc.

View File

@ -51,7 +51,7 @@ cc_library(
"//mediapipe/tasks/cc/core:model_task_graph", "//mediapipe/tasks/cc/core:model_task_graph",
"//mediapipe/tasks/cc/core:utils", "//mediapipe/tasks/cc/core:utils",
"//mediapipe/tasks/cc/core/proto:inference_subgraph_cc_proto", "//mediapipe/tasks/cc/core/proto:inference_subgraph_cc_proto",
"//mediapipe/tasks/cc/vision/hand_detector/proto:hand_detector_options_cc_proto", "//mediapipe/tasks/cc/vision/hand_detector/proto:hand_detector_graph_options_cc_proto",
"//mediapipe/tasks/cc/vision/utils:image_tensor_specs", "//mediapipe/tasks/cc/vision/utils:image_tensor_specs",
"@com_google_absl//absl/status", "@com_google_absl//absl/status",
"@com_google_absl//absl/status:statusor", "@com_google_absl//absl/status:statusor",

View File

@ -40,12 +40,13 @@ limitations under the License.
#include "mediapipe/tasks/cc/core/model_task_graph.h" #include "mediapipe/tasks/cc/core/model_task_graph.h"
#include "mediapipe/tasks/cc/core/proto/inference_subgraph.pb.h" #include "mediapipe/tasks/cc/core/proto/inference_subgraph.pb.h"
#include "mediapipe/tasks/cc/core/utils.h" #include "mediapipe/tasks/cc/core/utils.h"
#include "mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_options.pb.h" #include "mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_graph_options.pb.h"
#include "mediapipe/tasks/cc/vision/utils/image_tensor_specs.h" #include "mediapipe/tasks/cc/vision/utils/image_tensor_specs.h"
namespace mediapipe { namespace mediapipe {
namespace tasks { namespace tasks {
namespace vision { namespace vision {
namespace hand_detector {
namespace { namespace {
@ -53,18 +54,23 @@ using ::mediapipe::api2::Input;
using ::mediapipe::api2::Output; using ::mediapipe::api2::Output;
using ::mediapipe::api2::builder::Graph; using ::mediapipe::api2::builder::Graph;
using ::mediapipe::api2::builder::Source; using ::mediapipe::api2::builder::Source;
using ::mediapipe::tasks::vision::hand_detector::proto::HandDetectorOptions; using ::mediapipe::tasks::vision::hand_detector::proto::
HandDetectorGraphOptions;
constexpr char kImageTag[] = "IMAGE"; constexpr char kImageTag[] = "IMAGE";
constexpr char kDetectionsTag[] = "DETECTIONS"; constexpr char kPalmDetectionsTag[] = "PALM_DETECTIONS";
constexpr char kNormRectsTag[] = "NORM_RECTS"; constexpr char kHandRectsTag[] = "HAND_RECTS";
constexpr char kPalmRectsTag[] = "PALM_RECTS";
struct HandDetectionOuts { struct HandDetectionOuts {
Source<std::vector<Detection>> palm_detections; Source<std::vector<Detection>> palm_detections;
Source<std::vector<NormalizedRect>> hand_rects; Source<std::vector<NormalizedRect>> hand_rects;
Source<std::vector<NormalizedRect>> palm_rects;
Source<Image> image;
}; };
void ConfigureTensorsToDetectionsCalculator( void ConfigureTensorsToDetectionsCalculator(
const HandDetectorGraphOptions& tasks_options,
mediapipe::TensorsToDetectionsCalculatorOptions* options) { mediapipe::TensorsToDetectionsCalculatorOptions* options) {
// TODO use metadata to configure these fields. // TODO use metadata to configure these fields.
options->set_num_classes(1); options->set_num_classes(1);
@ -77,7 +83,7 @@ void ConfigureTensorsToDetectionsCalculator(
options->set_sigmoid_score(true); options->set_sigmoid_score(true);
options->set_score_clipping_thresh(100.0); options->set_score_clipping_thresh(100.0);
options->set_reverse_output_order(true); options->set_reverse_output_order(true);
options->set_min_score_thresh(0.5); options->set_min_score_thresh(tasks_options.min_detection_confidence());
options->set_x_scale(192.0); options->set_x_scale(192.0);
options->set_y_scale(192.0); options->set_y_scale(192.0);
options->set_w_scale(192.0); options->set_w_scale(192.0);
@ -134,9 +140,9 @@ void ConfigureRectTransformationCalculator(
} // namespace } // namespace
// A "mediapipe.tasks.vision.HandDetectorGraph" performs hand detection. The // A "mediapipe.tasks.vision.hand_detector.HandDetectorGraph" performs hand
// Hand Detection Graph is based on palm detection model, and scale the detected // detection. The Hand Detection Graph is based on palm detection model, and
// palm bounding box to enclose the detected whole hand. // scale the detected palm bounding box to enclose the detected whole hand.
// Accepts CPU input images and outputs Landmark on CPU. // Accepts CPU input images and outputs Landmark on CPU.
// //
// Inputs: // Inputs:
@ -144,19 +150,27 @@ void ConfigureRectTransformationCalculator(
// Image to perform detection on. // Image to perform detection on.
// //
// Outputs: // Outputs:
// DETECTIONS - std::vector<Detection> // PALM_DETECTIONS - std::vector<Detection>
// Detected palms with maximum `num_hands` specified in options. // Detected palms with maximum `num_hands` specified in options.
// NORM_RECTS - std::vector<NormalizedRect> // HAND_RECTS - std::vector<NormalizedRect>
// Detected hand bounding boxes in normalized coordinates. // Detected hand bounding boxes in normalized coordinates.
// PLAM_RECTS - std::vector<NormalizedRect>
// Detected palm bounding boxes in normalized coordinates.
// IMAGE - Image
// The input image that the hand detector runs on and has the pixel data
// stored on the target storage (CPU vs GPU).
// //
// Example: // Example:
// node { // node {
// calculator: "mediapipe.tasks.vision.HandDetectorGraph" // calculator: "mediapipe.tasks.vision.hand_detector.HandDetectorGraph"
// input_stream: "IMAGE:image" // input_stream: "IMAGE:image"
// output_stream: "DETECTIONS:palm_detections" // output_stream: "PALM_DETECTIONS:palm_detections"
// output_stream: "NORM_RECTS:hand_rects_from_palm_detections" // output_stream: "HAND_RECTS:hand_rects_from_palm_detections"
// output_stream: "PALM_RECTS:palm_rects"
// output_stream: "IMAGE:image_out"
// options { // options {
// [mediapipe.tasks.hand_detector.proto.HandDetectorOptions.ext] { // [mediapipe.tasks.vision.hand_detector.proto.HandDetectorGraphOptions.ext]
// {
// base_options { // base_options {
// model_asset { // model_asset {
// file_name: "palm_detection.tflite" // file_name: "palm_detection.tflite"
@ -173,16 +187,20 @@ class HandDetectorGraph : public core::ModelTaskGraph {
absl::StatusOr<CalculatorGraphConfig> GetConfig( absl::StatusOr<CalculatorGraphConfig> GetConfig(
SubgraphContext* sc) override { SubgraphContext* sc) override {
ASSIGN_OR_RETURN(const auto* model_resources, ASSIGN_OR_RETURN(const auto* model_resources,
CreateModelResources<HandDetectorOptions>(sc)); CreateModelResources<HandDetectorGraphOptions>(sc));
Graph graph; Graph graph;
ASSIGN_OR_RETURN(auto hand_detection_outs, ASSIGN_OR_RETURN(
BuildHandDetectionSubgraph( auto hand_detection_outs,
sc->Options<HandDetectorOptions>(), *model_resources, BuildHandDetectionSubgraph(sc->Options<HandDetectorGraphOptions>(),
graph[Input<Image>(kImageTag)], graph)); *model_resources,
graph[Input<Image>(kImageTag)], graph));
hand_detection_outs.palm_detections >> hand_detection_outs.palm_detections >>
graph[Output<std::vector<Detection>>(kDetectionsTag)]; graph[Output<std::vector<Detection>>(kPalmDetectionsTag)];
hand_detection_outs.hand_rects >> hand_detection_outs.hand_rects >>
graph[Output<std::vector<NormalizedRect>>(kNormRectsTag)]; graph[Output<std::vector<NormalizedRect>>(kHandRectsTag)];
hand_detection_outs.palm_rects >>
graph[Output<std::vector<NormalizedRect>>(kPalmRectsTag)];
hand_detection_outs.image >> graph[Output<Image>(kImageTag)];
return graph.GetConfig(); return graph.GetConfig();
} }
@ -196,7 +214,7 @@ class HandDetectorGraph : public core::ModelTaskGraph {
// image_in: image stream to run hand detection on. // image_in: image stream to run hand detection on.
// graph: the mediapipe builder::Graph instance to be updated. // graph: the mediapipe builder::Graph instance to be updated.
absl::StatusOr<HandDetectionOuts> BuildHandDetectionSubgraph( absl::StatusOr<HandDetectionOuts> BuildHandDetectionSubgraph(
const HandDetectorOptions& subgraph_options, const HandDetectorGraphOptions& subgraph_options,
const core::ModelResources& model_resources, Source<Image> image_in, const core::ModelResources& model_resources, Source<Image> image_in,
Graph& graph) { Graph& graph) {
// Add image preprocessing subgraph. The model expects aspect ratio // Add image preprocessing subgraph. The model expects aspect ratio
@ -235,6 +253,7 @@ class HandDetectorGraph : public core::ModelTaskGraph {
auto& tensors_to_detections = auto& tensors_to_detections =
graph.AddNode("TensorsToDetectionsCalculator"); graph.AddNode("TensorsToDetectionsCalculator");
ConfigureTensorsToDetectionsCalculator( ConfigureTensorsToDetectionsCalculator(
subgraph_options,
&tensors_to_detections &tensors_to_detections
.GetOptions<mediapipe::TensorsToDetectionsCalculatorOptions>()); .GetOptions<mediapipe::TensorsToDetectionsCalculatorOptions>());
model_output_tensors >> tensors_to_detections.In("TENSORS"); model_output_tensors >> tensors_to_detections.In("TENSORS");
@ -281,7 +300,8 @@ class HandDetectorGraph : public core::ModelTaskGraph {
.GetOptions<mediapipe::DetectionsToRectsCalculatorOptions>()); .GetOptions<mediapipe::DetectionsToRectsCalculatorOptions>());
palm_detections >> detections_to_rects.In("DETECTIONS"); palm_detections >> detections_to_rects.In("DETECTIONS");
image_size >> detections_to_rects.In("IMAGE_SIZE"); image_size >> detections_to_rects.In("IMAGE_SIZE");
auto palm_rects = detections_to_rects.Out("NORM_RECTS"); auto palm_rects =
detections_to_rects[Output<std::vector<NormalizedRect>>("NORM_RECTS")];
// Expands and shifts the rectangle that contains the palm so that it's // Expands and shifts the rectangle that contains the palm so that it's
// likely to cover the entire hand. // likely to cover the entire hand.
@ -308,13 +328,18 @@ class HandDetectorGraph : public core::ModelTaskGraph {
clip_normalized_rect_vector_size[Output<std::vector<NormalizedRect>>( clip_normalized_rect_vector_size[Output<std::vector<NormalizedRect>>(
"")]; "")];
return HandDetectionOuts{.palm_detections = palm_detections, return HandDetectionOuts{
.hand_rects = clipped_hand_rects}; /* palm_detections= */ palm_detections,
/* hand_rects= */ clipped_hand_rects,
/* palm_rects= */ palm_rects,
/* image= */ preprocessing[Output<Image>(kImageTag)]};
} }
}; };
REGISTER_MEDIAPIPE_GRAPH(::mediapipe::tasks::vision::HandDetectorGraph); REGISTER_MEDIAPIPE_GRAPH(
::mediapipe::tasks::vision::hand_detector::HandDetectorGraph);
} // namespace hand_detector
} // namespace vision } // namespace vision
} // namespace tasks } // namespace tasks
} // namespace mediapipe } // namespace mediapipe

View File

@ -40,13 +40,14 @@ limitations under the License.
#include "mediapipe/tasks/cc/core/proto/base_options.pb.h" #include "mediapipe/tasks/cc/core/proto/base_options.pb.h"
#include "mediapipe/tasks/cc/core/proto/external_file.pb.h" #include "mediapipe/tasks/cc/core/proto/external_file.pb.h"
#include "mediapipe/tasks/cc/core/task_runner.h" #include "mediapipe/tasks/cc/core/task_runner.h"
#include "mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_options.pb.h" #include "mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_graph_options.pb.h"
#include "mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_result.pb.h" #include "mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_result.pb.h"
#include "mediapipe/tasks/cc/vision/utils/image_utils.h" #include "mediapipe/tasks/cc/vision/utils/image_utils.h"
namespace mediapipe { namespace mediapipe {
namespace tasks { namespace tasks {
namespace vision { namespace vision {
namespace hand_detector {
namespace { namespace {
using ::file::Defaults; using ::file::Defaults;
@ -60,7 +61,8 @@ using ::mediapipe::tasks::core::ModelResources;
using ::mediapipe::tasks::core::TaskRunner; using ::mediapipe::tasks::core::TaskRunner;
using ::mediapipe::tasks::core::proto::ExternalFile; using ::mediapipe::tasks::core::proto::ExternalFile;
using ::mediapipe::tasks::vision::DecodeImageFromFile; using ::mediapipe::tasks::vision::DecodeImageFromFile;
using ::mediapipe::tasks::vision::hand_detector::proto::HandDetectorOptions; using ::mediapipe::tasks::vision::hand_detector::proto::
HandDetectorGraphOptions;
using ::mediapipe::tasks::vision::hand_detector::proto::HandDetectorResult; using ::mediapipe::tasks::vision::hand_detector::proto::HandDetectorResult;
using ::testing::EqualsProto; using ::testing::EqualsProto;
using ::testing::TestParamInfo; using ::testing::TestParamInfo;
@ -80,9 +82,9 @@ constexpr char kTwoHandsResultFile[] = "hand_detector_result_two_hands.pbtxt";
constexpr char kImageTag[] = "IMAGE"; constexpr char kImageTag[] = "IMAGE";
constexpr char kImageName[] = "image"; constexpr char kImageName[] = "image";
constexpr char kPalmDetectionsTag[] = "DETECTIONS"; constexpr char kPalmDetectionsTag[] = "PALM_DETECTIONS";
constexpr char kPalmDetectionsName[] = "palm_detections"; constexpr char kPalmDetectionsName[] = "palm_detections";
constexpr char kHandNormRectsTag[] = "NORM_RECTS"; constexpr char kHandRectsTag[] = "HAND_RECTS";
constexpr char kHandNormRectsName[] = "hand_norm_rects"; constexpr char kHandNormRectsName[] = "hand_norm_rects";
constexpr float kPalmDetectionBboxMaxDiff = 0.01; constexpr float kPalmDetectionBboxMaxDiff = 0.01;
@ -104,22 +106,22 @@ absl::StatusOr<std::unique_ptr<TaskRunner>> CreateTaskRunner(
Graph graph; Graph graph;
auto& hand_detection = auto& hand_detection =
graph.AddNode("mediapipe.tasks.vision.HandDetectorGraph"); graph.AddNode("mediapipe.tasks.vision.hand_detector.HandDetectorGraph");
auto options = std::make_unique<HandDetectorOptions>(); auto options = std::make_unique<HandDetectorGraphOptions>();
options->mutable_base_options()->mutable_model_asset()->set_file_name( options->mutable_base_options()->mutable_model_asset()->set_file_name(
JoinPath("./", kTestDataDirectory, model_name)); JoinPath("./", kTestDataDirectory, model_name));
options->set_min_detection_confidence(0.5); options->set_min_detection_confidence(0.5);
options->set_num_hands(num_hands); options->set_num_hands(num_hands);
hand_detection.GetOptions<HandDetectorOptions>().Swap(options.get()); hand_detection.GetOptions<HandDetectorGraphOptions>().Swap(options.get());
graph[Input<Image>(kImageTag)].SetName(kImageName) >> graph[Input<Image>(kImageTag)].SetName(kImageName) >>
hand_detection.In(kImageTag); hand_detection.In(kImageTag);
hand_detection.Out(kPalmDetectionsTag).SetName(kPalmDetectionsName) >> hand_detection.Out(kPalmDetectionsTag).SetName(kPalmDetectionsName) >>
graph[Output<std::vector<Detection>>(kPalmDetectionsTag)]; graph[Output<std::vector<Detection>>(kPalmDetectionsTag)];
hand_detection.Out(kHandNormRectsTag).SetName(kHandNormRectsName) >> hand_detection.Out(kHandRectsTag).SetName(kHandNormRectsName) >>
graph[Output<std::vector<NormalizedRect>>(kHandNormRectsTag)]; graph[Output<std::vector<NormalizedRect>>(kHandRectsTag)];
return TaskRunner::Create( return TaskRunner::Create(
graph.GetConfig(), std::make_unique<core::MediaPipeBuiltinOpResolver>()); graph.GetConfig(), std::make_unique<core::MediaPipeBuiltinOpResolver>());
@ -200,6 +202,7 @@ INSTANTIATE_TEST_SUITE_P(
}); });
} // namespace } // namespace
} // namespace hand_detector
} // namespace vision } // namespace vision
} // namespace tasks } // namespace tasks
} // namespace mediapipe } // namespace mediapipe

View File

@ -21,8 +21,8 @@ package(default_visibility = [
licenses(["notice"]) licenses(["notice"])
mediapipe_proto_library( mediapipe_proto_library(
name = "hand_detector_options_proto", name = "hand_detector_graph_options_proto",
srcs = ["hand_detector_options.proto"], srcs = ["hand_detector_graph_options.proto"],
deps = [ deps = [
"//mediapipe/framework:calculator_options_proto", "//mediapipe/framework:calculator_options_proto",
"//mediapipe/framework:calculator_proto", "//mediapipe/framework:calculator_proto",

View File

@ -21,24 +21,20 @@ import "mediapipe/framework/calculator.proto";
import "mediapipe/tasks/cc/core/proto/base_options.proto"; import "mediapipe/tasks/cc/core/proto/base_options.proto";
option java_package = "com.google.mediapipe.tasks.vision.handdetector"; option java_package = "com.google.mediapipe.tasks.vision.handdetector";
option java_outer_classname = "HandDetectorOptionsProto"; option java_outer_classname = "HandDetectorGraphOptionsProto";
message HandDetectorOptions { message HandDetectorGraphOptions {
extend mediapipe.CalculatorOptions { extend mediapipe.CalculatorOptions {
optional HandDetectorOptions ext = 464864288; optional HandDetectorGraphOptions ext = 464864288;
} }
// Base options for configuring Task library, such as specifying the TfLite // Base options for configuring Task library, such as specifying the TfLite
// model file with metadata, accelerator options, etc. // model file with metadata, accelerator options, etc.
optional core.proto.BaseOptions base_options = 1; optional core.proto.BaseOptions base_options = 1;
// The locale to use for display names specified through the TFLite Model
// Metadata, if any. Defaults to English.
optional string display_names_locale = 2 [default = "en"];
// Minimum confidence value ([0.0, 1.0]) for confidence score to be considered // Minimum confidence value ([0.0, 1.0]) for confidence score to be considered
// successfully detecting a hand in the image. // successfully detecting a hand in the image.
optional float min_detection_confidence = 3 [default = 0.5]; optional float min_detection_confidence = 2 [default = 0.5];
// The maximum number of hands output by the detector. // The maximum number of hands output by the detector.
optional int32 num_hands = 4; optional int32 num_hands = 3;
} }

View File

@ -19,10 +19,10 @@ package(default_visibility = [
licenses(["notice"]) licenses(["notice"])
cc_library( cc_library(
name = "hand_landmarker_subgraph", name = "hand_landmarks_detector_graph",
srcs = ["hand_landmarker_subgraph.cc"], srcs = ["hand_landmarks_detector_graph.cc"],
deps = [ deps = [
"//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_landmarker_subgraph_options_cc_proto", "//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_landmarks_detector_graph_options_cc_proto",
"@com_google_absl//absl/status", "@com_google_absl//absl/status",
"@com_google_absl//absl/status:statusor", "@com_google_absl//absl/status:statusor",
"//mediapipe/calculators/core:split_vector_calculator", "//mediapipe/calculators/core:split_vector_calculator",
@ -51,6 +51,7 @@ cc_library(
# TODO: move calculators in modules/hand_landmark/calculators to tasks dir. # TODO: move calculators in modules/hand_landmark/calculators to tasks dir.
"//mediapipe/modules/hand_landmark/calculators:hand_landmarks_to_rect_calculator", "//mediapipe/modules/hand_landmark/calculators:hand_landmarks_to_rect_calculator",
"//mediapipe/tasks/cc:common", "//mediapipe/tasks/cc:common",
"//mediapipe/tasks/cc/components/utils:gate",
"//mediapipe/tasks/cc/components:image_preprocessing", "//mediapipe/tasks/cc/components:image_preprocessing",
"//mediapipe/tasks/cc/core:model_resources", "//mediapipe/tasks/cc/core:model_resources",
"//mediapipe/tasks/cc/core:model_task_graph", "//mediapipe/tasks/cc/core:model_task_graph",
@ -66,3 +67,41 @@ cc_library(
) )
# TODO: Enable this test # TODO: Enable this test
cc_library(
name = "hand_landmarker_graph",
srcs = ["hand_landmarker_graph.cc"],
deps = [
":hand_landmarks_detector_graph",
"//mediapipe/calculators/core:begin_loop_calculator",
"//mediapipe/calculators/core:clip_vector_size_calculator_cc_proto",
"//mediapipe/calculators/core:end_loop_calculator",
"//mediapipe/calculators/core:gate_calculator",
"//mediapipe/calculators/core:gate_calculator_cc_proto",
"//mediapipe/calculators/core:pass_through_calculator",
"//mediapipe/calculators/core:previous_loopback_calculator",
"//mediapipe/calculators/util:collection_has_min_size_calculator",
"//mediapipe/calculators/util:collection_has_min_size_calculator_cc_proto",
"//mediapipe/framework/api2:builder",
"//mediapipe/framework/api2:port",
"//mediapipe/framework/formats:classification_cc_proto",
"//mediapipe/framework/formats:detection_cc_proto",
"//mediapipe/framework/formats:image",
"//mediapipe/framework/formats:landmark_cc_proto",
"//mediapipe/framework/formats:rect_cc_proto",
"//mediapipe/framework/formats:tensor",
"//mediapipe/tasks/cc:common",
"//mediapipe/tasks/cc/components/utils:gate",
"//mediapipe/tasks/cc/core:model_task_graph",
"//mediapipe/tasks/cc/core:utils",
"//mediapipe/tasks/cc/vision/hand_detector:hand_detector_graph",
"//mediapipe/tasks/cc/vision/hand_detector/proto:hand_detector_graph_options_cc_proto",
"//mediapipe/tasks/cc/vision/hand_landmarker/calculators:hand_association_calculator",
"//mediapipe/tasks/cc/vision/hand_landmarker/calculators:hand_association_calculator_cc_proto",
"//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_landmarker_graph_options_cc_proto",
"//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_landmarks_detector_graph_options_cc_proto",
],
alwayslink = 1,
)
# TODO: Enable this test

View File

@ -0,0 +1,286 @@
/* Copyright 2022 The MediaPipe Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include <memory>
#include <type_traits>
#include <utility>
#include <vector>
#include "mediapipe/calculators/core/clip_vector_size_calculator.pb.h"
#include "mediapipe/calculators/core/gate_calculator.pb.h"
#include "mediapipe/calculators/util/collection_has_min_size_calculator.pb.h"
#include "mediapipe/framework/api2/builder.h"
#include "mediapipe/framework/api2/port.h"
#include "mediapipe/framework/formats/classification.pb.h"
#include "mediapipe/framework/formats/detection.pb.h"
#include "mediapipe/framework/formats/image.h"
#include "mediapipe/framework/formats/landmark.pb.h"
#include "mediapipe/framework/formats/rect.pb.h"
#include "mediapipe/framework/formats/tensor.h"
#include "mediapipe/tasks/cc/common.h"
#include "mediapipe/tasks/cc/components/utils/gate.h"
#include "mediapipe/tasks/cc/core/model_task_graph.h"
#include "mediapipe/tasks/cc/core/utils.h"
#include "mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_graph_options.pb.h"
#include "mediapipe/tasks/cc/vision/hand_landmarker/calculators/hand_association_calculator.pb.h"
#include "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarker_graph_options.pb.h"
#include "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarks_detector_graph_options.pb.h"
namespace mediapipe {
namespace tasks {
namespace vision {
namespace hand_landmarker {
namespace {
using ::mediapipe::api2::Input;
using ::mediapipe::api2::Output;
using ::mediapipe::api2::builder::Graph;
using ::mediapipe::api2::builder::Source;
using ::mediapipe::tasks::components::utils::DisallowIf;
using ::mediapipe::tasks::vision::hand_detector::proto::
HandDetectorGraphOptions;
using ::mediapipe::tasks::vision::hand_landmarker::proto::
HandLandmarkerGraphOptions;
using ::mediapipe::tasks::vision::hand_landmarker::proto::
HandLandmarksDetectorGraphOptions;
constexpr char kImageTag[] = "IMAGE";
constexpr char kLandmarksTag[] = "LANDMARKS";
constexpr char kWorldLandmarksTag[] = "WORLD_LANDMARKS";
constexpr char kHandRectNextFrameTag[] = "HAND_RECT_NEXT_FRAME";
constexpr char kHandednessTag[] = "HANDEDNESS";
constexpr char kPalmDetectionsTag[] = "PALM_DETECTIONS";
constexpr char kPalmRectsTag[] = "PALM_RECTS";
constexpr char kPreviousLoopbackCalculatorName[] = "PreviousLoopbackCalculator";
struct HandLandmarkerOutputs {
Source<std::vector<NormalizedLandmarkList>> landmark_lists;
Source<std::vector<LandmarkList>> world_landmark_lists;
Source<std::vector<NormalizedRect>> hand_rects_next_frame;
Source<std::vector<ClassificationList>> handednesses;
Source<std::vector<NormalizedRect>> palm_rects;
Source<std::vector<Detection>> palm_detections;
Source<Image> image;
};
} // namespace
// A "mediapipe.tasks.vision.hand_landmarker.HandLandmarkerGraph" performs hand
// landmarks detection. The HandLandmarkerGraph consists of two subgraphs:
// HandDetectorGraph and MultipleHandLandmarksDetectorGraph.
// MultipleHandLandmarksDetectorGraph detects landmarks from bounding boxes
// produced by HandDetectorGraph. HandLandmarkerGraph tracks the landmarks over
// time, and skips the HandDetectorGraph. If the tracking is lost or the detectd
// hands are less than configured max number hands, HandDetectorGraph would be
// triggered to detect hands.
//
// Accepts CPU input images and outputs Landmarks on CPU.
//
// Inputs:
// IMAGE - Image
// Image to perform hand landmarks detection on.
//
// Outputs:
// LANDMARKS: - std::vector<NormalizedLandmarkList>
// Vector of detected hand landmarks.
// WORLD_LANDMARKS - std::vector<LandmarkList>
// Vector of detected hand landmarks in world coordinates.
// HAND_RECT_NEXT_FRAME - std::vector<NormalizedRect>
// Vector of the predicted rects enclosing the same hand RoI for landmark
// detection on the next frame.
// HANDEDNESS - std::vector<ClassificationList>
// Vector of classification of handedness.
// PALM_RECTS - std::vector<NormalizedRect>
// Detected palm bounding boxes in normalized coordinates.
// PALM_DETECTIONS - std::vector<Detection>
// Detected palms with maximum `num_hands` specified in options.
// IMAGE - Image
// The input image that the hand landmarker runs on and has the pixel data
// stored on the target storage (CPU vs GPU).
//
// Example:
// node {
// calculator: "mediapipe.tasks.vision.hand_landmarker.HandLandmarkerGraph"
// input_stream: "IMAGE:image_in"
// output_stream: "LANDMARKS:hand_landmarks"
// output_stream: "WORLD_LANDMARKS:world_hand_landmarks"
// output_stream: "HAND_RECT_NEXT_FRAME:hand_rect_next_frame"
// output_stream: "HANDEDNESS:handedness"
// output_stream: "PALM_RECTS:palm_rects"
// output_stream: "PALM_DETECTIONS:palm_detections"
// output_stream: "IMAGE:image_out"
// options {
// [mediapipe.tasks.hand_landmarker.proto.HandLandmarkerGraphOptions.ext] {
// base_options {
// model_asset {
// file_name: "hand_landmarker.task"
// }
// }
// hand_detector_graph_options {
// base_options {
// model_asset {
// file_name: "palm_detection.tflite"
// }
// }
// min_detection_confidence: 0.5
// num_hands: 2
// }
// hand_landmarks_detector_graph_options {
// base_options {
// model_asset {
// file_name: "hand_landmark_lite.tflite"
// }
// }
// min_detection_confidence: 0.5
// }
// }
// }
// }
class HandLandmarkerGraph : public core::ModelTaskGraph {
public:
absl::StatusOr<CalculatorGraphConfig> GetConfig(
SubgraphContext* sc) override {
Graph graph;
ASSIGN_OR_RETURN(
auto hand_landmarker_outputs,
BuildHandLandmarkerGraph(sc->Options<HandLandmarkerGraphOptions>(),
graph[Input<Image>(kImageTag)], graph));
hand_landmarker_outputs.landmark_lists >>
graph[Output<std::vector<NormalizedLandmarkList>>(kLandmarksTag)];
hand_landmarker_outputs.world_landmark_lists >>
graph[Output<std::vector<LandmarkList>>(kWorldLandmarksTag)];
hand_landmarker_outputs.hand_rects_next_frame >>
graph[Output<std::vector<NormalizedRect>>(kHandRectNextFrameTag)];
hand_landmarker_outputs.handednesses >>
graph[Output<std::vector<ClassificationList>>(kHandednessTag)];
hand_landmarker_outputs.palm_rects >>
graph[Output<std::vector<NormalizedRect>>(kPalmRectsTag)];
hand_landmarker_outputs.palm_detections >>
graph[Output<std::vector<Detection>>(kPalmDetectionsTag)];
hand_landmarker_outputs.image >> graph[Output<Image>(kImageTag)];
// TODO remove when support is fixed.
// As mediapipe GraphBuilder currently doesn't support configuring
// InputStreamInfo, modifying the CalculatorGraphConfig proto directly.
CalculatorGraphConfig config = graph.GetConfig();
for (int i = 0; i < config.node_size(); ++i) {
if (config.node(i).calculator() == kPreviousLoopbackCalculatorName) {
auto* info = config.mutable_node(i)->add_input_stream_info();
info->set_tag_index("LOOP");
info->set_back_edge(true);
break;
}
}
return config;
}
private:
// Adds a mediapipe hand landmark detection graph into the provided
// builder::Graph instance.
//
// tasks_options: the mediapipe tasks module HandLandmarkerGraphOptions.
// image_in: (mediapipe::Image) stream to run hand landmark detection on.
// graph: the mediapipe graph instance to be updated.
absl::StatusOr<HandLandmarkerOutputs> BuildHandLandmarkerGraph(
const HandLandmarkerGraphOptions& tasks_options, Source<Image> image_in,
Graph& graph) {
const int max_num_hands =
tasks_options.hand_detector_graph_options().num_hands();
auto& previous_loopback = graph.AddNode(kPreviousLoopbackCalculatorName);
image_in >> previous_loopback.In("MAIN");
auto prev_hand_rects_from_landmarks =
previous_loopback[Output<std::vector<NormalizedRect>>("PREV_LOOP")];
auto& min_size_node =
graph.AddNode("NormalizedRectVectorHasMinSizeCalculator");
prev_hand_rects_from_landmarks >> min_size_node.In("ITERABLE");
min_size_node.GetOptions<CollectionHasMinSizeCalculatorOptions>()
.set_min_size(max_num_hands);
auto has_enough_hands = min_size_node.Out("").Cast<bool>();
auto image_for_hand_detector =
DisallowIf(image_in, has_enough_hands, graph);
auto& hand_detector =
graph.AddNode("mediapipe.tasks.vision.hand_detector.HandDetectorGraph");
hand_detector.GetOptions<HandDetectorGraphOptions>().CopyFrom(
tasks_options.hand_detector_graph_options());
image_for_hand_detector >> hand_detector.In("IMAGE");
auto hand_rects_from_hand_detector = hand_detector.Out("HAND_RECTS");
auto& hand_association = graph.AddNode("HandAssociationCalculator");
hand_association.GetOptions<HandAssociationCalculatorOptions>()
.set_min_similarity_threshold(tasks_options.min_tracking_confidence());
prev_hand_rects_from_landmarks >>
hand_association[Input<std::vector<NormalizedRect>>::Multiple("")][0];
hand_rects_from_hand_detector >>
hand_association[Input<std::vector<NormalizedRect>>::Multiple("")][1];
auto hand_rects = hand_association.Out("");
auto& clip_hand_rects =
graph.AddNode("ClipNormalizedRectVectorSizeCalculator");
clip_hand_rects.GetOptions<ClipVectorSizeCalculatorOptions>()
.set_max_vec_size(max_num_hands);
hand_rects >> clip_hand_rects.In("");
auto clipped_hand_rects = clip_hand_rects.Out("");
auto& hand_landmarks_detector_graph = graph.AddNode(
"mediapipe.tasks.vision.hand_landmarker."
"MultipleHandLandmarksDetectorGraph");
hand_landmarks_detector_graph
.GetOptions<HandLandmarksDetectorGraphOptions>()
.CopyFrom(tasks_options.hand_landmarks_detector_graph_options());
image_in >> hand_landmarks_detector_graph.In("IMAGE");
clipped_hand_rects >> hand_landmarks_detector_graph.In("HAND_RECT");
auto hand_rects_for_next_frame =
hand_landmarks_detector_graph[Output<std::vector<NormalizedRect>>(
kHandRectNextFrameTag)];
// Back edge.
hand_rects_for_next_frame >> previous_loopback.In("LOOP");
// TODO: Replace PassThroughCalculator with a calculator that
// converts the pixel data to be stored on the target storage (CPU vs GPU).
auto& pass_through = graph.AddNode("PassThroughCalculator");
image_in >> pass_through.In("");
return {{
/* landmark_lists= */ hand_landmarks_detector_graph
[Output<std::vector<NormalizedLandmarkList>>(kLandmarksTag)],
/* world_landmark_lists= */
hand_landmarks_detector_graph[Output<std::vector<LandmarkList>>(
kWorldLandmarksTag)],
/* hand_rects_next_frame= */ hand_rects_for_next_frame,
hand_landmarks_detector_graph[Output<std::vector<ClassificationList>>(
kHandednessTag)],
/* palm_rects= */
hand_detector[Output<std::vector<NormalizedRect>>(kPalmRectsTag)],
/* palm_detections */
hand_detector[Output<std::vector<Detection>>(kPalmDetectionsTag)],
/* image */
pass_through[Output<Image>("")],
}};
}
};
REGISTER_MEDIAPIPE_GRAPH(
::mediapipe::tasks::vision::hand_landmarker::HandLandmarkerGraph);
} // namespace hand_landmarker
} // namespace vision
} // namespace tasks
} // namespace mediapipe

View File

@ -0,0 +1,167 @@
/* Copyright 2022 The MediaPipe Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include <iostream>
#include <memory>
#include <string>
#include <utility>
#include "absl/flags/flag.h"
#include "absl/status/statusor.h"
#include "absl/strings/str_format.h"
#include "absl/strings/string_view.h"
#include "mediapipe/framework/api2/builder.h"
#include "mediapipe/framework/api2/port.h"
#include "mediapipe/framework/deps/file_path.h"
#include "mediapipe/framework/formats/classification.pb.h"
#include "mediapipe/framework/formats/image.h"
#include "mediapipe/framework/formats/landmark.pb.h"
#include "mediapipe/framework/formats/rect.pb.h"
#include "mediapipe/framework/port/file_helpers.h"
#include "mediapipe/framework/port/gmock.h"
#include "mediapipe/framework/port/gtest.h"
#include "mediapipe/tasks/cc/core/mediapipe_builtin_op_resolver.h"
#include "mediapipe/tasks/cc/core/proto/base_options.pb.h"
#include "mediapipe/tasks/cc/core/proto/external_file.pb.h"
#include "mediapipe/tasks/cc/core/task_runner.h"
#include "mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_graph_options.pb.h"
#include "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarker_graph_options.pb.h"
#include "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarks_detector_graph_options.pb.h"
#include "mediapipe/tasks/cc/vision/utils/image_utils.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/core/shims/cc/shims_test_util.h"
namespace mediapipe {
namespace tasks {
namespace vision {
namespace hand_landmarker {
namespace {
using ::file::Defaults;
using ::file::GetTextProto;
using ::mediapipe::api2::Input;
using ::mediapipe::api2::Output;
using ::mediapipe::api2::builder::Graph;
using ::mediapipe::api2::builder::Source;
using ::mediapipe::file::JoinPath;
using ::mediapipe::tasks::core::TaskRunner;
using ::mediapipe::tasks::vision::hand_landmarker::proto::
HandLandmarkerGraphOptions;
using ::testing::EqualsProto;
using ::testing::proto::Approximately;
using ::testing::proto::Partially;
constexpr char kTestDataDirectory[] = "/mediapipe/tasks/testdata/vision/";
constexpr char kPalmDetectionModel[] = "palm_detection_full.tflite";
constexpr char kHandLandmarkerFullModel[] = "hand_landmark_full.tflite";
constexpr char kLeftHandsImage[] = "left_hands.jpg";
constexpr char kImageTag[] = "IMAGE";
constexpr char kImageName[] = "image_in";
constexpr char kLandmarksTag[] = "LANDMARKS";
constexpr char kLandmarksName[] = "landmarks";
constexpr char kWorldLandmarksTag[] = "WORLD_LANDMARKS";
constexpr char kWorldLandmarksName[] = "world_landmarks";
constexpr char kHandRectNextFrameTag[] = "HAND_RECT_NEXT_FRAME";
constexpr char kHandRectNextFrameName[] = "hand_rect_next_frame";
constexpr char kHandednessTag[] = "HANDEDNESS";
constexpr char kHandednessName[] = "handedness";
// Expected hand landmarks positions, in text proto format.
constexpr char kExpectedLeftUpHandLandmarksFilename[] =
"expected_left_up_hand_landmarks.prototxt";
constexpr char kExpectedLeftDownHandLandmarksFilename[] =
"expected_left_down_hand_landmarks.prototxt";
constexpr float kFullModelFractionDiff = 0.03; // percentage
constexpr float kAbsMargin = 0.03;
constexpr int kMaxNumHands = 2;
constexpr float kMinTrackingConfidence = 0.5;
NormalizedLandmarkList GetExpectedLandmarkList(absl::string_view filename) {
NormalizedLandmarkList expected_landmark_list;
MP_EXPECT_OK(GetTextProto(file::JoinPath("./", kTestDataDirectory, filename),
&expected_landmark_list, Defaults()));
return expected_landmark_list;
}
// Helper function to create a Hand Landmarker TaskRunner.
absl::StatusOr<std::unique_ptr<TaskRunner>> CreateTaskRunner() {
Graph graph;
auto& hand_landmarker_graph = graph.AddNode(
"mediapipe.tasks.vision.hand_landmarker.HandLandmarkerGraph");
auto& options =
hand_landmarker_graph.GetOptions<HandLandmarkerGraphOptions>();
options.mutable_hand_detector_graph_options()
->mutable_base_options()
->mutable_model_asset()
->set_file_name(JoinPath("./", kTestDataDirectory, kPalmDetectionModel));
options.mutable_hand_detector_graph_options()->mutable_base_options();
options.mutable_hand_detector_graph_options()->set_num_hands(kMaxNumHands);
options.mutable_hand_landmarks_detector_graph_options()
->mutable_base_options()
->mutable_model_asset()
->set_file_name(
JoinPath("./", kTestDataDirectory, kHandLandmarkerFullModel));
options.set_min_tracking_confidence(kMinTrackingConfidence);
graph[Input<Image>(kImageTag)].SetName(kImageName) >>
hand_landmarker_graph.In(kImageTag);
hand_landmarker_graph.Out(kLandmarksTag).SetName(kLandmarksName) >>
graph[Output<std::vector<NormalizedLandmarkList>>(kLandmarksTag)];
hand_landmarker_graph.Out(kWorldLandmarksTag).SetName(kWorldLandmarksName) >>
graph[Output<std::vector<LandmarkList>>(kWorldLandmarksTag)];
hand_landmarker_graph.Out(kHandednessTag).SetName(kHandednessName) >>
graph[Output<std::vector<ClassificationList>>(kHandednessTag)];
hand_landmarker_graph.Out(kHandRectNextFrameTag)
.SetName(kHandRectNextFrameName) >>
graph[Output<std::vector<NormalizedRect>>(kHandRectNextFrameTag)];
return TaskRunner::Create(
graph.GetConfig(), absl::make_unique<core::MediaPipeBuiltinOpResolver>());
}
class HandLandmarkerTest : public tflite_shims::testing::Test {};
TEST_F(HandLandmarkerTest, Succeeds) {
MP_ASSERT_OK_AND_ASSIGN(
Image image,
DecodeImageFromFile(JoinPath("./", kTestDataDirectory, kLeftHandsImage)));
MP_ASSERT_OK_AND_ASSIGN(auto task_runner, CreateTaskRunner());
auto output_packets =
task_runner->Process({{kImageName, MakePacket<Image>(std::move(image))}});
const auto& landmarks = (*output_packets)[kLandmarksName]
.Get<std::vector<NormalizedLandmarkList>>();
ASSERT_EQ(landmarks.size(), kMaxNumHands);
std::vector<NormalizedLandmarkList> expected_landmarks = {
GetExpectedLandmarkList(kExpectedLeftUpHandLandmarksFilename),
GetExpectedLandmarkList(kExpectedLeftDownHandLandmarksFilename)};
EXPECT_THAT(landmarks[0],
Approximately(Partially(EqualsProto(expected_landmarks[0])),
/*margin=*/kAbsMargin,
/*fraction=*/kFullModelFractionDiff));
EXPECT_THAT(landmarks[1],
Approximately(Partially(EqualsProto(expected_landmarks[1])),
/*margin=*/kAbsMargin,
/*fraction=*/kFullModelFractionDiff));
}
} // namespace
} // namespace hand_landmarker
} // namespace vision
} // namespace tasks
} // namespace mediapipe

View File

@ -34,12 +34,13 @@ limitations under the License.
#include "mediapipe/framework/formats/tensor.h" #include "mediapipe/framework/formats/tensor.h"
#include "mediapipe/tasks/cc/common.h" #include "mediapipe/tasks/cc/common.h"
#include "mediapipe/tasks/cc/components/image_preprocessing.h" #include "mediapipe/tasks/cc/components/image_preprocessing.h"
#include "mediapipe/tasks/cc/components/utils/gate.h"
#include "mediapipe/tasks/cc/core/model_resources.h" #include "mediapipe/tasks/cc/core/model_resources.h"
#include "mediapipe/tasks/cc/core/model_task_graph.h" #include "mediapipe/tasks/cc/core/model_task_graph.h"
#include "mediapipe/tasks/cc/core/proto/inference_subgraph.pb.h" #include "mediapipe/tasks/cc/core/proto/inference_subgraph.pb.h"
#include "mediapipe/tasks/cc/core/utils.h" #include "mediapipe/tasks/cc/core/utils.h"
#include "mediapipe/tasks/cc/metadata/metadata_extractor.h" #include "mediapipe/tasks/cc/metadata/metadata_extractor.h"
#include "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarker_subgraph_options.pb.h" #include "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarks_detector_graph_options.pb.h"
#include "mediapipe/tasks/cc/vision/utils/image_tensor_specs.h" #include "mediapipe/tasks/cc/vision/utils/image_tensor_specs.h"
#include "mediapipe/tasks/metadata/metadata_schema_generated.h" #include "mediapipe/tasks/metadata/metadata_schema_generated.h"
#include "mediapipe/util/label_map.pb.h" #include "mediapipe/util/label_map.pb.h"
@ -48,6 +49,7 @@ limitations under the License.
namespace mediapipe { namespace mediapipe {
namespace tasks { namespace tasks {
namespace vision { namespace vision {
namespace hand_landmarker {
namespace { namespace {
@ -55,9 +57,10 @@ using ::mediapipe::api2::Input;
using ::mediapipe::api2::Output; using ::mediapipe::api2::Output;
using ::mediapipe::api2::builder::Graph; using ::mediapipe::api2::builder::Graph;
using ::mediapipe::api2::builder::Source; using ::mediapipe::api2::builder::Source;
using ::mediapipe::tasks::components::utils::AllowIf;
using ::mediapipe::tasks::core::ModelResources; using ::mediapipe::tasks::core::ModelResources;
using ::mediapipe::tasks::vision::hand_landmarker::proto:: using ::mediapipe::tasks::vision::hand_landmarker::proto::
HandLandmarkerSubgraphOptions; HandLandmarksDetectorGraphOptions;
using LabelItems = mediapipe::proto_ns::Map<int64, ::mediapipe::LabelMapItem>; using LabelItems = mediapipe::proto_ns::Map<int64, ::mediapipe::LabelMapItem>;
constexpr char kImageTag[] = "IMAGE"; constexpr char kImageTag[] = "IMAGE";
@ -82,7 +85,6 @@ struct SingleHandLandmarkerOutputs {
Source<bool> hand_presence; Source<bool> hand_presence;
Source<float> hand_presence_score; Source<float> hand_presence_score;
Source<ClassificationList> handedness; Source<ClassificationList> handedness;
Source<std::pair<int, int>> image_size;
}; };
struct HandLandmarkerOutputs { struct HandLandmarkerOutputs {
@ -92,10 +94,10 @@ struct HandLandmarkerOutputs {
Source<std::vector<bool>> presences; Source<std::vector<bool>> presences;
Source<std::vector<float>> presence_scores; Source<std::vector<float>> presence_scores;
Source<std::vector<ClassificationList>> handednesses; Source<std::vector<ClassificationList>> handednesses;
Source<std::pair<int, int>> image_size;
}; };
absl::Status SanityCheckOptions(const HandLandmarkerSubgraphOptions& options) { absl::Status SanityCheckOptions(
const HandLandmarksDetectorGraphOptions& options) {
if (options.min_detection_confidence() < 0 || if (options.min_detection_confidence() < 0 ||
options.min_detection_confidence() > 1) { options.min_detection_confidence() > 1) {
return CreateStatusWithPayload(absl::StatusCode::kInvalidArgument, return CreateStatusWithPayload(absl::StatusCode::kInvalidArgument,
@ -182,8 +184,8 @@ void ConfigureHandRectTransformationCalculator(
} // namespace } // namespace
// A "mediapipe.tasks.vision.SingleHandLandmarkerSubgraph" performs hand // A "mediapipe.tasks.vision.hand_landmarker.SingleHandLandmarksDetectorGraph"
// landmark detection. // performs hand landmarks detection.
// - Accepts CPU input images and outputs Landmark on CPU. // - Accepts CPU input images and outputs Landmark on CPU.
// //
// Inputs: // Inputs:
@ -208,12 +210,11 @@ void ConfigureHandRectTransformationCalculator(
// Float value indicates the probability that the hand is present. // Float value indicates the probability that the hand is present.
// HANDEDNESS - ClassificationList // HANDEDNESS - ClassificationList
// Classification of handedness. // Classification of handedness.
// IMAGE_SIZE - std::vector<int, int>
// The size of input image.
// //
// Example: // Example:
// node { // node {
// calculator: "mediapipe.tasks.vision.SingleHandLandmarkerSubgraph" // calculator:
// "mediapipe.tasks.vision.hand_landmarker.SingleHandLandmarksDetectorGraph"
// input_stream: "IMAGE:input_image" // input_stream: "IMAGE:input_image"
// input_stream: "HAND_RECT:hand_rect" // input_stream: "HAND_RECT:hand_rect"
// output_stream: "LANDMARKS:hand_landmarks" // output_stream: "LANDMARKS:hand_landmarks"
@ -221,10 +222,8 @@ void ConfigureHandRectTransformationCalculator(
// output_stream: "HAND_RECT_NEXT_FRAME:hand_rect_next_frame" // output_stream: "HAND_RECT_NEXT_FRAME:hand_rect_next_frame"
// output_stream: "PRESENCE:hand_presence" // output_stream: "PRESENCE:hand_presence"
// output_stream: "PRESENCE_SCORE:hand_presence_score" // output_stream: "PRESENCE_SCORE:hand_presence_score"
// output_stream: "HANDEDNESS:handedness"
// output_stream: "IMAGE_SIZE:image_size"
// options { // options {
// [mediapipe.tasks.vision.hand_landmarker.proto.HandLandmarkerSubgraphOptions.ext] // [mediapipe.tasks.vision.hand_landmarker.proto.HandLandmarksDetectorGraphOptions.ext]
// { // {
// base_options { // base_options {
// model_asset { // model_asset {
@ -235,16 +234,17 @@ void ConfigureHandRectTransformationCalculator(
// } // }
// } // }
// } // }
class SingleHandLandmarkerSubgraph : public core::ModelTaskGraph { class SingleHandLandmarksDetectorGraph : public core::ModelTaskGraph {
public: public:
absl::StatusOr<CalculatorGraphConfig> GetConfig( absl::StatusOr<CalculatorGraphConfig> GetConfig(
SubgraphContext* sc) override { SubgraphContext* sc) override {
ASSIGN_OR_RETURN(const auto* model_resources, ASSIGN_OR_RETURN(
CreateModelResources<HandLandmarkerSubgraphOptions>(sc)); const auto* model_resources,
CreateModelResources<HandLandmarksDetectorGraphOptions>(sc));
Graph graph; Graph graph;
ASSIGN_OR_RETURN(auto hand_landmark_detection_outs, ASSIGN_OR_RETURN(auto hand_landmark_detection_outs,
BuildSingleHandLandmarkerSubgraph( BuildSingleHandLandmarksDetectorGraph(
sc->Options<HandLandmarkerSubgraphOptions>(), sc->Options<HandLandmarksDetectorGraphOptions>(),
*model_resources, graph[Input<Image>(kImageTag)], *model_resources, graph[Input<Image>(kImageTag)],
graph[Input<NormalizedRect>(kHandRectTag)], graph)); graph[Input<NormalizedRect>(kHandRectTag)], graph));
hand_landmark_detection_outs.hand_landmarks >> hand_landmark_detection_outs.hand_landmarks >>
@ -259,8 +259,6 @@ class SingleHandLandmarkerSubgraph : public core::ModelTaskGraph {
graph[Output<float>(kPresenceScoreTag)]; graph[Output<float>(kPresenceScoreTag)];
hand_landmark_detection_outs.handedness >> hand_landmark_detection_outs.handedness >>
graph[Output<ClassificationList>(kHandednessTag)]; graph[Output<ClassificationList>(kHandednessTag)];
hand_landmark_detection_outs.image_size >>
graph[Output<std::pair<int, int>>(kImageSizeTag)];
return graph.GetConfig(); return graph.GetConfig();
} }
@ -269,14 +267,16 @@ class SingleHandLandmarkerSubgraph : public core::ModelTaskGraph {
// Adds a mediapipe hand landmark detection graph into the provided // Adds a mediapipe hand landmark detection graph into the provided
// builder::Graph instance. // builder::Graph instance.
// //
// subgraph_options: the mediapipe tasks module HandLandmarkerSubgraphOptions. // subgraph_options: the mediapipe tasks module
// model_resources: the ModelSources object initialized from a hand landmark // HandLandmarksDetectorGraphOptions. model_resources: the ModelSources object
// initialized from a hand landmark
// detection model file with model metadata. // detection model file with model metadata.
// image_in: (mediapipe::Image) stream to run hand landmark detection on. // image_in: (mediapipe::Image) stream to run hand landmark detection on.
// rect: (NormalizedRect) stream to run on the RoI of image. // rect: (NormalizedRect) stream to run on the RoI of image.
// graph: the mediapipe graph instance to be updated. // graph: the mediapipe graph instance to be updated.
absl::StatusOr<SingleHandLandmarkerOutputs> BuildSingleHandLandmarkerSubgraph( absl::StatusOr<SingleHandLandmarkerOutputs>
const HandLandmarkerSubgraphOptions& subgraph_options, BuildSingleHandLandmarksDetectorGraph(
const HandLandmarksDetectorGraphOptions& subgraph_options,
const core::ModelResources& model_resources, Source<Image> image_in, const core::ModelResources& model_resources, Source<Image> image_in,
Source<NormalizedRect> hand_rect, Graph& graph) { Source<NormalizedRect> hand_rect, Graph& graph) {
MP_RETURN_IF_ERROR(SanityCheckOptions(subgraph_options)); MP_RETURN_IF_ERROR(SanityCheckOptions(subgraph_options));
@ -332,18 +332,7 @@ class SingleHandLandmarkerSubgraph : public core::ModelTaskGraph {
// score of hand presence. // score of hand presence.
auto& tensors_to_hand_presence = graph.AddNode("TensorsToFloatsCalculator"); auto& tensors_to_hand_presence = graph.AddNode("TensorsToFloatsCalculator");
hand_flag_tensors >> tensors_to_hand_presence.In("TENSORS"); hand_flag_tensors >> tensors_to_hand_presence.In("TENSORS");
// Converts the handedness tensor into a float that represents the
// classification score of handedness.
auto& tensors_to_handedness =
graph.AddNode("TensorsToClassificationCalculator");
ConfigureTensorsToHandednessCalculator(
&tensors_to_handedness.GetOptions<
mediapipe::TensorsToClassificationCalculatorOptions>());
handedness_tensors >> tensors_to_handedness.In("TENSORS");
auto hand_presence_score = tensors_to_hand_presence[Output<float>("FLOAT")]; auto hand_presence_score = tensors_to_hand_presence[Output<float>("FLOAT")];
auto handedness =
tensors_to_handedness[Output<ClassificationList>("CLASSIFICATIONS")];
// Applies a threshold to the confidence score to determine whether a // Applies a threshold to the confidence score to determine whether a
// hand is present. // hand is present.
@ -354,6 +343,18 @@ class SingleHandLandmarkerSubgraph : public core::ModelTaskGraph {
hand_presence_score >> hand_presence_thresholding.In("FLOAT"); hand_presence_score >> hand_presence_thresholding.In("FLOAT");
auto hand_presence = hand_presence_thresholding[Output<bool>("FLAG")]; auto hand_presence = hand_presence_thresholding[Output<bool>("FLAG")];
// Converts the handedness tensor into a float that represents the
// classification score of handedness.
auto& tensors_to_handedness =
graph.AddNode("TensorsToClassificationCalculator");
ConfigureTensorsToHandednessCalculator(
&tensors_to_handedness.GetOptions<
mediapipe::TensorsToClassificationCalculatorOptions>());
handedness_tensors >> tensors_to_handedness.In("TENSORS");
auto handedness = AllowIf(
tensors_to_handedness[Output<ClassificationList>("CLASSIFICATIONS")],
hand_presence, graph);
// Adjusts landmarks (already normalized to [0.f, 1.f]) on the letterboxed // Adjusts landmarks (already normalized to [0.f, 1.f]) on the letterboxed
// hand image (after image transformation with the FIT scale mode) to the // hand image (after image transformation with the FIT scale mode) to the
// corresponding locations on the same image with the letterbox removed // corresponding locations on the same image with the letterbox removed
@ -371,8 +372,9 @@ class SingleHandLandmarkerSubgraph : public core::ModelTaskGraph {
landmark_letterbox_removal.Out("LANDMARKS") >> landmark_letterbox_removal.Out("LANDMARKS") >>
landmark_projection.In("NORM_LANDMARKS"); landmark_projection.In("NORM_LANDMARKS");
hand_rect >> landmark_projection.In("NORM_RECT"); hand_rect >> landmark_projection.In("NORM_RECT");
auto projected_landmarks = auto projected_landmarks = AllowIf(
landmark_projection[Output<NormalizedLandmarkList>("NORM_LANDMARKS")]; landmark_projection[Output<NormalizedLandmarkList>("NORM_LANDMARKS")],
hand_presence, graph);
// Projects the world landmarks from the cropped hand image to the // Projects the world landmarks from the cropped hand image to the
// corresponding locations on the full image before cropping (input to the // corresponding locations on the full image before cropping (input to the
@ -383,7 +385,8 @@ class SingleHandLandmarkerSubgraph : public core::ModelTaskGraph {
world_landmark_projection.In("LANDMARKS"); world_landmark_projection.In("LANDMARKS");
hand_rect >> world_landmark_projection.In("NORM_RECT"); hand_rect >> world_landmark_projection.In("NORM_RECT");
auto projected_world_landmarks = auto projected_world_landmarks =
world_landmark_projection[Output<LandmarkList>("LANDMARKS")]; AllowIf(world_landmark_projection[Output<LandmarkList>("LANDMARKS")],
hand_presence, graph);
// Converts the hand landmarks into a rectangle (normalized by image size) // Converts the hand landmarks into a rectangle (normalized by image size)
// that encloses the hand. // that encloses the hand.
@ -403,7 +406,8 @@ class SingleHandLandmarkerSubgraph : public core::ModelTaskGraph {
hand_landmarks_to_rect.Out("NORM_RECT") >> hand_landmarks_to_rect.Out("NORM_RECT") >>
hand_rect_transformation.In("NORM_RECT"); hand_rect_transformation.In("NORM_RECT");
auto hand_rect_next_frame = auto hand_rect_next_frame =
hand_rect_transformation[Output<NormalizedRect>("")]; AllowIf(hand_rect_transformation[Output<NormalizedRect>("")],
hand_presence, graph);
return {{ return {{
/* hand_landmarks= */ projected_landmarks, /* hand_landmarks= */ projected_landmarks,
@ -412,16 +416,17 @@ class SingleHandLandmarkerSubgraph : public core::ModelTaskGraph {
/* hand_presence= */ hand_presence, /* hand_presence= */ hand_presence,
/* hand_presence_score= */ hand_presence_score, /* hand_presence_score= */ hand_presence_score,
/* handedness= */ handedness, /* handedness= */ handedness,
/* image_size= */ image_size,
}}; }};
} }
}; };
// clang-format off
REGISTER_MEDIAPIPE_GRAPH( REGISTER_MEDIAPIPE_GRAPH(
::mediapipe::tasks::vision::SingleHandLandmarkerSubgraph); ::mediapipe::tasks::vision::hand_landmarker::SingleHandLandmarksDetectorGraph); // NOLINT
// clang-format on
// A "mediapipe.tasks.vision.HandLandmarkerSubgraph" performs multi // A "mediapipe.tasks.vision.hand_landmarker.MultipleHandLandmarksDetectorGraph"
// hand landmark detection. // performs multi hand landmark detection.
// - Accepts CPU input image and a vector of hand rect RoIs to detect the // - Accepts CPU input image and a vector of hand rect RoIs to detect the
// multiple hands landmarks enclosed by the RoIs. Output vectors of // multiple hands landmarks enclosed by the RoIs. Output vectors of
// hand landmarks related results, where each element in the vectors // hand landmarks related results, where each element in the vectors
@ -449,12 +454,11 @@ REGISTER_MEDIAPIPE_GRAPH(
// Vector of float value indicates the probability that the hand is present. // Vector of float value indicates the probability that the hand is present.
// HANDEDNESS - std::vector<ClassificationList> // HANDEDNESS - std::vector<ClassificationList>
// Vector of classification of handedness. // Vector of classification of handedness.
// IMAGE_SIZE - std::vector<int, int>
// The size of input image.
// //
// Example: // Example:
// node { // node {
// calculator: "mediapipe.tasks.vision.HandLandmarkerSubgraph" // calculator:
// "mediapipe.tasks.vision.hand_landmarker.MultipleHandLandmarksDetectorGraph"
// input_stream: "IMAGE:input_image" // input_stream: "IMAGE:input_image"
// input_stream: "HAND_RECT:hand_rect" // input_stream: "HAND_RECT:hand_rect"
// output_stream: "LANDMARKS:hand_landmarks" // output_stream: "LANDMARKS:hand_landmarks"
@ -463,9 +467,8 @@ REGISTER_MEDIAPIPE_GRAPH(
// output_stream: "PRESENCE:hand_presence" // output_stream: "PRESENCE:hand_presence"
// output_stream: "PRESENCE_SCORE:hand_presence_score" // output_stream: "PRESENCE_SCORE:hand_presence_score"
// output_stream: "HANDEDNESS:handedness" // output_stream: "HANDEDNESS:handedness"
// output_stream: "IMAGE_SIZE:image_size"
// options { // options {
// [mediapipe.tasks.vision.hand_landmarker.proto.HandLandmarkerSubgraphOptions.ext] // [mediapipe.tasks.vision.hand_landmarker.proto.HandLandmarksDetectorGraphOptions.ext]
// { // {
// base_options { // base_options {
// model_asset { // model_asset {
@ -476,15 +479,15 @@ REGISTER_MEDIAPIPE_GRAPH(
// } // }
// } // }
// } // }
class HandLandmarkerSubgraph : public core::ModelTaskGraph { class MultipleHandLandmarksDetectorGraph : public core::ModelTaskGraph {
public: public:
absl::StatusOr<CalculatorGraphConfig> GetConfig( absl::StatusOr<CalculatorGraphConfig> GetConfig(
SubgraphContext* sc) override { SubgraphContext* sc) override {
Graph graph; Graph graph;
ASSIGN_OR_RETURN( ASSIGN_OR_RETURN(
auto hand_landmark_detection_outputs, auto hand_landmark_detection_outputs,
BuildHandLandmarkerSubgraph( BuildHandLandmarksDetectorGraph(
sc->Options<HandLandmarkerSubgraphOptions>(), sc->Options<HandLandmarksDetectorGraphOptions>(),
graph[Input<Image>(kImageTag)], graph[Input<Image>(kImageTag)],
graph[Input<std::vector<NormalizedRect>>(kHandRectTag)], graph)); graph[Input<std::vector<NormalizedRect>>(kHandRectTag)], graph));
hand_landmark_detection_outputs.landmark_lists >> hand_landmark_detection_outputs.landmark_lists >>
@ -499,21 +502,20 @@ class HandLandmarkerSubgraph : public core::ModelTaskGraph {
graph[Output<std::vector<float>>(kPresenceScoreTag)]; graph[Output<std::vector<float>>(kPresenceScoreTag)];
hand_landmark_detection_outputs.handednesses >> hand_landmark_detection_outputs.handednesses >>
graph[Output<std::vector<ClassificationList>>(kHandednessTag)]; graph[Output<std::vector<ClassificationList>>(kHandednessTag)];
hand_landmark_detection_outputs.image_size >>
graph[Output<std::pair<int, int>>(kImageSizeTag)];
return graph.GetConfig(); return graph.GetConfig();
} }
private: private:
absl::StatusOr<HandLandmarkerOutputs> BuildHandLandmarkerSubgraph( absl::StatusOr<HandLandmarkerOutputs> BuildHandLandmarksDetectorGraph(
const HandLandmarkerSubgraphOptions& subgraph_options, const HandLandmarksDetectorGraphOptions& subgraph_options,
Source<Image> image_in, Source<Image> image_in,
Source<std::vector<NormalizedRect>> multi_hand_rects, Graph& graph) { Source<std::vector<NormalizedRect>> multi_hand_rects, Graph& graph) {
auto& hand_landmark_subgraph = auto& hand_landmark_subgraph = graph.AddNode(
graph.AddNode("mediapipe.tasks.vision.SingleHandLandmarkerSubgraph"); "mediapipe.tasks.vision.hand_landmarker."
hand_landmark_subgraph.GetOptions<HandLandmarkerSubgraphOptions>().CopyFrom( "SingleHandLandmarksDetectorGraph");
subgraph_options); hand_landmark_subgraph.GetOptions<HandLandmarksDetectorGraphOptions>()
.CopyFrom(subgraph_options);
auto& begin_loop_multi_hand_rects = auto& begin_loop_multi_hand_rects =
graph.AddNode("BeginLoopNormalizedRectCalculator"); graph.AddNode("BeginLoopNormalizedRectCalculator");
@ -533,8 +535,6 @@ class HandLandmarkerSubgraph : public core::ModelTaskGraph {
hand_landmark_subgraph.Out("HAND_RECT_NEXT_FRAME"); hand_landmark_subgraph.Out("HAND_RECT_NEXT_FRAME");
auto landmarks = hand_landmark_subgraph.Out("LANDMARKS"); auto landmarks = hand_landmark_subgraph.Out("LANDMARKS");
auto world_landmarks = hand_landmark_subgraph.Out("WORLD_LANDMARKS"); auto world_landmarks = hand_landmark_subgraph.Out("WORLD_LANDMARKS");
auto image_size =
hand_landmark_subgraph[Output<std::pair<int, int>>("IMAGE_SIZE")];
auto& end_loop_handedness = auto& end_loop_handedness =
graph.AddNode("EndLoopClassificationListCalculator"); graph.AddNode("EndLoopClassificationListCalculator");
@ -585,13 +585,16 @@ class HandLandmarkerSubgraph : public core::ModelTaskGraph {
/* presences= */ presences, /* presences= */ presences,
/* presence_scores= */ presence_scores, /* presence_scores= */ presence_scores,
/* handednesses= */ handednesses, /* handednesses= */ handednesses,
/* image_size= */ image_size,
}}; }};
} }
}; };
REGISTER_MEDIAPIPE_GRAPH(::mediapipe::tasks::vision::HandLandmarkerSubgraph); // clang-format off
REGISTER_MEDIAPIPE_GRAPH(
::mediapipe::tasks::vision::hand_landmarker::MultipleHandLandmarksDetectorGraph); // NOLINT
// clang-format on
} // namespace hand_landmarker
} // namespace vision } // namespace vision
} // namespace tasks } // namespace tasks
} // namespace mediapipe } // namespace mediapipe

View File

@ -39,12 +39,13 @@ limitations under the License.
#include "mediapipe/tasks/cc/core/proto/base_options.pb.h" #include "mediapipe/tasks/cc/core/proto/base_options.pb.h"
#include "mediapipe/tasks/cc/core/proto/external_file.pb.h" #include "mediapipe/tasks/cc/core/proto/external_file.pb.h"
#include "mediapipe/tasks/cc/core/task_runner.h" #include "mediapipe/tasks/cc/core/task_runner.h"
#include "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarker_subgraph_options.pb.h" #include "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarks_detector_graph_options.pb.h"
#include "mediapipe/tasks/cc/vision/utils/image_utils.h" #include "mediapipe/tasks/cc/vision/utils/image_utils.h"
namespace mediapipe { namespace mediapipe {
namespace tasks { namespace tasks {
namespace vision { namespace vision {
namespace hand_landmarker {
namespace { namespace {
using ::file::Defaults; using ::file::Defaults;
@ -57,7 +58,7 @@ using ::mediapipe::file::JoinPath;
using ::mediapipe::tasks::core::TaskRunner; using ::mediapipe::tasks::core::TaskRunner;
using ::mediapipe::tasks::vision::DecodeImageFromFile; using ::mediapipe::tasks::vision::DecodeImageFromFile;
using ::mediapipe::tasks::vision::hand_landmarker::proto:: using ::mediapipe::tasks::vision::hand_landmarker::proto::
HandLandmarkerSubgraphOptions; HandLandmarksDetectorGraphOptions;
using ::testing::ElementsAreArray; using ::testing::ElementsAreArray;
using ::testing::EqualsProto; using ::testing::EqualsProto;
using ::testing::Pointwise; using ::testing::Pointwise;
@ -112,13 +113,14 @@ absl::StatusOr<std::unique_ptr<TaskRunner>> CreateSingleHandTaskRunner(
absl::string_view model_name) { absl::string_view model_name) {
Graph graph; Graph graph;
auto& hand_landmark_detection = auto& hand_landmark_detection = graph.AddNode(
graph.AddNode("mediapipe.tasks.vision.SingleHandLandmarkerSubgraph"); "mediapipe.tasks.vision.hand_landmarker."
"SingleHandLandmarksDetectorGraph");
auto options = std::make_unique<HandLandmarkerSubgraphOptions>(); auto options = std::make_unique<HandLandmarksDetectorGraphOptions>();
options->mutable_base_options()->mutable_model_asset()->set_file_name( options->mutable_base_options()->mutable_model_asset()->set_file_name(
JoinPath("./", kTestDataDirectory, model_name)); JoinPath("./", kTestDataDirectory, model_name));
hand_landmark_detection.GetOptions<HandLandmarkerSubgraphOptions>().Swap( hand_landmark_detection.GetOptions<HandLandmarksDetectorGraphOptions>().Swap(
options.get()); options.get());
graph[Input<Image>(kImageTag)].SetName(kImageName) >> graph[Input<Image>(kImageTag)].SetName(kImageName) >>
@ -151,13 +153,14 @@ absl::StatusOr<std::unique_ptr<TaskRunner>> CreateMultiHandTaskRunner(
absl::string_view model_name) { absl::string_view model_name) {
Graph graph; Graph graph;
auto& multi_hand_landmark_detection = auto& multi_hand_landmark_detection = graph.AddNode(
graph.AddNode("mediapipe.tasks.vision.HandLandmarkerSubgraph"); "mediapipe.tasks.vision.hand_landmarker."
"MultipleHandLandmarksDetectorGraph");
auto options = std::make_unique<HandLandmarkerSubgraphOptions>(); auto options = std::make_unique<HandLandmarksDetectorGraphOptions>();
options->mutable_base_options()->mutable_model_asset()->set_file_name( options->mutable_base_options()->mutable_model_asset()->set_file_name(
JoinPath("./", kTestDataDirectory, model_name)); JoinPath("./", kTestDataDirectory, model_name));
multi_hand_landmark_detection.GetOptions<HandLandmarkerSubgraphOptions>() multi_hand_landmark_detection.GetOptions<HandLandmarksDetectorGraphOptions>()
.Swap(options.get()); .Swap(options.get());
graph[Input<Image>(kImageTag)].SetName(kImageName) >> graph[Input<Image>(kImageTag)].SetName(kImageName) >>
@ -462,6 +465,7 @@ INSTANTIATE_TEST_SUITE_P(
}); });
} // namespace } // namespace
} // namespace hand_landmarker
} // namespace vision } // namespace vision
} // namespace tasks } // namespace tasks
} // namespace mediapipe } // namespace mediapipe

View File

@ -21,8 +21,8 @@ package(default_visibility = [
licenses(["notice"]) licenses(["notice"])
mediapipe_proto_library( mediapipe_proto_library(
name = "hand_landmarker_subgraph_options_proto", name = "hand_landmarks_detector_graph_options_proto",
srcs = ["hand_landmarker_subgraph_options.proto"], srcs = ["hand_landmarks_detector_graph_options.proto"],
deps = [ deps = [
"//mediapipe/framework:calculator_options_proto", "//mediapipe/framework:calculator_options_proto",
"//mediapipe/framework:calculator_proto", "//mediapipe/framework:calculator_proto",
@ -31,13 +31,13 @@ mediapipe_proto_library(
) )
mediapipe_proto_library( mediapipe_proto_library(
name = "hand_landmarker_options_proto", name = "hand_landmarker_graph_options_proto",
srcs = ["hand_landmarker_options.proto"], srcs = ["hand_landmarker_graph_options.proto"],
deps = [ deps = [
":hand_landmarker_subgraph_options_proto", ":hand_landmarks_detector_graph_options_proto",
"//mediapipe/framework:calculator_options_proto", "//mediapipe/framework:calculator_options_proto",
"//mediapipe/framework:calculator_proto", "//mediapipe/framework:calculator_proto",
"//mediapipe/tasks/cc/core/proto:base_options_proto", "//mediapipe/tasks/cc/core/proto:base_options_proto",
"//mediapipe/tasks/cc/vision/hand_detector/proto:hand_detector_options_proto", "//mediapipe/tasks/cc/vision/hand_detector/proto:hand_detector_graph_options_proto",
], ],
) )

View File

@ -19,22 +19,26 @@ package mediapipe.tasks.vision.hand_landmarker.proto;
import "mediapipe/framework/calculator.proto"; import "mediapipe/framework/calculator.proto";
import "mediapipe/tasks/cc/core/proto/base_options.proto"; import "mediapipe/tasks/cc/core/proto/base_options.proto";
import "mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_options.proto"; import "mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_graph_options.proto";
import "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarker_subgraph_options.proto"; import "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarks_detector_graph_options.proto";
message HandLandmarkerOptions { message HandLandmarkerGraphOptions {
extend mediapipe.CalculatorOptions { extend mediapipe.CalculatorOptions {
optional HandLandmarkerOptions ext = 462713202; optional HandLandmarkerGraphOptions ext = 462713202;
} }
// Base options for configuring MediaPipe Tasks, such as specifying the TfLite // Base options for configuring MediaPipe Tasks, such as specifying the TfLite
// model file with metadata, accelerator options, etc. // model file with metadata, accelerator options, etc.
optional core.proto.BaseOptions base_options = 1; optional core.proto.BaseOptions base_options = 1;
// The locale to use for display names specified through the TFLite Model // Options for hand detector graph.
// Metadata, if any. Defaults to English. optional hand_detector.proto.HandDetectorGraphOptions
optional string display_names_locale = 2 [default = "en"]; hand_detector_graph_options = 2;
optional hand_detector.proto.HandDetectorOptions hand_detector_options = 3; // Options for hand landmarker subgraph.
optional HandLandmarksDetectorGraphOptions
hand_landmarks_detector_graph_options = 3;
optional HandLandmarkerSubgraphOptions hand_landmarker_subgraph_options = 4; // Minimum confidence for hand landmarks tracking to be considered
// successfully.
optional float min_tracking_confidence = 4 [default = 0.5];
} }

View File

@ -20,19 +20,15 @@ package mediapipe.tasks.vision.hand_landmarker.proto;
import "mediapipe/framework/calculator.proto"; import "mediapipe/framework/calculator.proto";
import "mediapipe/tasks/cc/core/proto/base_options.proto"; import "mediapipe/tasks/cc/core/proto/base_options.proto";
message HandLandmarkerSubgraphOptions { message HandLandmarksDetectorGraphOptions {
extend mediapipe.CalculatorOptions { extend mediapipe.CalculatorOptions {
optional HandLandmarkerSubgraphOptions ext = 474472470; optional HandLandmarksDetectorGraphOptions ext = 474472470;
} }
// Base options for configuring MediaPipe Tasks, such as specifying the TfLite // Base options for configuring MediaPipe Tasks, such as specifying the TfLite
// model file with metadata, accelerator options, etc. // model file with metadata, accelerator options, etc.
optional core.proto.BaseOptions base_options = 1; optional core.proto.BaseOptions base_options = 1;
// The locale to use for display names specified through the TFLite Model
// Metadata, if any. Defaults to English.
optional string display_names_locale = 2 [default = "en"];
// Minimum confidence value ([0.0, 1.0]) for hand presence score to be // Minimum confidence value ([0.0, 1.0]) for hand presence score to be
// considered successfully detecting a hand in the image. // considered successfully detecting a hand in the image.
optional float min_detection_confidence = 3 [default = 0.5]; optional float min_detection_confidence = 2 [default = 0.5];
} }