Merge branch 'master' into image-classification-python-impl
This commit is contained in:
commit
b0d855daad
|
@ -143,6 +143,98 @@ Below is an example of how to create a subgraph named `TwoPassThroughSubgraph`.
|
|||
}
|
||||
```
|
||||
|
||||
## Graph Options
|
||||
|
||||
It is possible to specify a "graph options" protobuf for a MediaPipe graph
|
||||
similar to the [`Calculator Options`](calculators.md#calculator-options)
|
||||
protobuf specified for a MediaPipe calculator. These "graph options" can be
|
||||
specified where a graph is invoked, and used to populate calculator options and
|
||||
subgraph options within the graph.
|
||||
|
||||
In a CalculatorGraphConfig, graph options can be specified for a subgraph
|
||||
exactly like calculator options, as shown below:
|
||||
|
||||
```
|
||||
node {
|
||||
calculator: "FlowLimiterCalculator"
|
||||
input_stream: "image"
|
||||
output_stream: "throttled_image"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.FlowLimiterCalculatorOptions] {
|
||||
max_in_flight: 1
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
node {
|
||||
calculator: "FaceDetectionSubgraph"
|
||||
input_stream: "IMAGE:throttled_image"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.FaceDetectionOptions] {
|
||||
tensor_width: 192
|
||||
tensor_height: 192
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
In a CalculatorGraphConfig, graph options can be accepted and used to populate
|
||||
calculator options, as shown below:
|
||||
|
||||
```
|
||||
graph_options: {
|
||||
[type.googleapis.com/mediapipe.FaceDetectionOptions] {}
|
||||
}
|
||||
|
||||
node: {
|
||||
calculator: "ImageToTensorCalculator"
|
||||
input_stream: "IMAGE:multi_backend_image"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.ImageToTensorCalculatorOptions] {
|
||||
keep_aspect_ratio: true
|
||||
border_mode: BORDER_ZERO
|
||||
}
|
||||
}
|
||||
option_value: "output_tensor_width:options/tensor_width"
|
||||
option_value: "output_tensor_height:options/tensor_height"
|
||||
}
|
||||
|
||||
node {
|
||||
calculator: "InferenceCalculator"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.InferenceCalculatorOptions] {}
|
||||
}
|
||||
option_value: "delegate:options/delegate"
|
||||
option_value: "model_path:options/model_path"
|
||||
}
|
||||
```
|
||||
|
||||
In this example, the `FaceDetectionSubgraph` accepts graph option protobuf
|
||||
`FaceDetectionOptions`. The `FaceDetectionOptions` is used to define some field
|
||||
values in the calculator options `ImageToTensorCalculatorOptions` and some field
|
||||
values in the subgraph options `InferenceCalculatorOptions`. The field values
|
||||
are defined using the `option_value:` syntax.
|
||||
|
||||
In the `CalculatorGraphConfig::Node` protobuf, the fields `node_options:` and
|
||||
`option_value:` together define the option values for a calculator such as
|
||||
`ImageToTensorCalculator`. The `node_options:` field defines a set of literal
|
||||
constant values using the text protobuf syntax. Each `option_value:` field
|
||||
defines the value for one protobuf field using information from the enclosing
|
||||
graph, specifically from field values of the graph options of the enclosing
|
||||
graph. In the example above, the `option_value:`
|
||||
`"output_tensor_width:options/tensor_width"` defines the field
|
||||
`ImageToTensorCalculatorOptions.output_tensor_width` using the value of
|
||||
`FaceDetectionOptions.tensor_width`.
|
||||
|
||||
The syntax of `option_value:` is similar to the syntax of `input_stream:`. The
|
||||
syntax is `option_value: "LHS:RHS"`. The LHS identifies a calculator option
|
||||
field and the RHS identifies a graph option field. More specifically, the LHS
|
||||
and RHS each consists of a series of protobuf field names identifying nested
|
||||
protobuf messages and fields separated by '/'. This is known as the "ProtoPath"
|
||||
syntax. Nested messages that are referenced in the LHS or RHS must already be
|
||||
defined in the enclosing protobuf in order to be traversed using
|
||||
`option_value:`.
|
||||
|
||||
## Cycles
|
||||
|
||||
<!-- TODO: add discussion of PreviousLoopbackCalculator -->
|
||||
|
|
|
@ -507,8 +507,11 @@ class ClassificationPostprocessingGraph : public mediapipe::Subgraph {
|
|||
}
|
||||
};
|
||||
|
||||
// REGISTER_MEDIAPIPE_GRAPH argument has to fit on one line to work properly.
|
||||
// clang-format off
|
||||
REGISTER_MEDIAPIPE_GRAPH(
|
||||
::mediapipe::tasks::components::processors::ClassificationPostprocessingGraph); // NOLINT
|
||||
// clang-format on
|
||||
|
||||
} // namespace processors
|
||||
} // namespace components
|
||||
|
|
|
@ -41,8 +41,8 @@ cc_test(
|
|||
)
|
||||
|
||||
cc_library(
|
||||
name = "hand_gesture_recognizer_subgraph",
|
||||
srcs = ["hand_gesture_recognizer_subgraph.cc"],
|
||||
name = "hand_gesture_recognizer_graph",
|
||||
srcs = ["hand_gesture_recognizer_graph.cc"],
|
||||
deps = [
|
||||
"//mediapipe/calculators/core:concatenate_vector_calculator",
|
||||
"//mediapipe/calculators/tensor:tensor_converter_calculator",
|
||||
|
@ -62,11 +62,11 @@ cc_library(
|
|||
"//mediapipe/tasks/cc/core:model_task_graph",
|
||||
"//mediapipe/tasks/cc/core:utils",
|
||||
"//mediapipe/tasks/cc/core/proto:inference_subgraph_cc_proto",
|
||||
"//mediapipe/tasks/cc/vision/hand_gesture_recognizer/calculators:handedness_to_matrix_calculator",
|
||||
"//mediapipe/tasks/cc/vision/hand_gesture_recognizer/calculators:landmarks_to_matrix_calculator",
|
||||
"//mediapipe/tasks/cc/vision/hand_gesture_recognizer/proto:hand_gesture_recognizer_subgraph_options_cc_proto",
|
||||
"//mediapipe/tasks/cc/vision/hand_gesture_recognizer/proto:landmarks_to_matrix_calculator_cc_proto",
|
||||
"//mediapipe/tasks/cc/vision/hand_landmarker:hand_landmarker_subgraph",
|
||||
"//mediapipe/tasks/cc/vision/gesture_recognizer/calculators:handedness_to_matrix_calculator",
|
||||
"//mediapipe/tasks/cc/vision/gesture_recognizer/calculators:landmarks_to_matrix_calculator",
|
||||
"//mediapipe/tasks/cc/vision/gesture_recognizer/calculators:landmarks_to_matrix_calculator_cc_proto",
|
||||
"//mediapipe/tasks/cc/vision/gesture_recognizer/proto:hand_gesture_recognizer_graph_options_cc_proto",
|
||||
"//mediapipe/tasks/cc/vision/hand_landmarker:hand_landmarks_detector_graph",
|
||||
"//mediapipe/tasks/cc/vision/utils:image_tensor_specs",
|
||||
"//mediapipe/tasks/metadata:metadata_schema_cc",
|
||||
"@com_google_absl//absl/status",
|
|
@ -12,11 +12,23 @@
|
|||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
load("//mediapipe/framework/port:build_config.bzl", "mediapipe_proto_library")
|
||||
|
||||
package(default_visibility = [
|
||||
"//mediapipe/app/xeno:__subpackages__",
|
||||
"//mediapipe/tasks:internal",
|
||||
])
|
||||
|
||||
mediapipe_proto_library(
|
||||
name = "landmarks_to_matrix_calculator_proto",
|
||||
srcs = ["landmarks_to_matrix_calculator.proto"],
|
||||
deps = [
|
||||
"//mediapipe/framework:calculator_options_proto",
|
||||
"//mediapipe/framework:calculator_proto",
|
||||
"//mediapipe/tasks/cc/core/proto:base_options_proto",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "handedness_to_matrix_calculator",
|
||||
srcs = ["handedness_to_matrix_calculator.cc"],
|
||||
|
@ -25,7 +37,7 @@ cc_library(
|
|||
"//mediapipe/framework/formats:classification_cc_proto",
|
||||
"//mediapipe/framework/formats:matrix",
|
||||
"//mediapipe/framework/port:ret_check",
|
||||
"//mediapipe/tasks/cc/vision/hand_gesture_recognizer:handedness_util",
|
||||
"//mediapipe/tasks/cc/vision/gesture_recognizer:handedness_util",
|
||||
"@com_google_absl//absl/memory",
|
||||
"@com_google_absl//absl/status",
|
||||
"@com_google_absl//absl/status:statusor",
|
||||
|
@ -53,11 +65,11 @@ cc_library(
|
|||
name = "landmarks_to_matrix_calculator",
|
||||
srcs = ["landmarks_to_matrix_calculator.cc"],
|
||||
deps = [
|
||||
":landmarks_to_matrix_calculator_cc_proto",
|
||||
"//mediapipe/framework:calculator_framework",
|
||||
"//mediapipe/framework/formats:landmark_cc_proto",
|
||||
"//mediapipe/framework/formats:matrix",
|
||||
"//mediapipe/framework/port:ret_check",
|
||||
"//mediapipe/tasks/cc/vision/hand_gesture_recognizer/proto:landmarks_to_matrix_calculator_cc_proto",
|
||||
"@com_google_absl//absl/status",
|
||||
"@com_google_absl//absl/status:statusor",
|
||||
"@com_google_absl//absl/strings",
|
|
@ -26,14 +26,16 @@ limitations under the License.
|
|||
#include "mediapipe/framework/formats/classification.pb.h"
|
||||
#include "mediapipe/framework/formats/matrix.h"
|
||||
#include "mediapipe/framework/port/ret_check.h"
|
||||
#include "mediapipe/tasks/cc/vision/hand_gesture_recognizer/handedness_util.h"
|
||||
#include "mediapipe/tasks/cc/vision/gesture_recognizer/handedness_util.h"
|
||||
|
||||
// TODO Update to use API2
|
||||
namespace mediapipe {
|
||||
namespace tasks {
|
||||
namespace vision {
|
||||
namespace api2 {
|
||||
|
||||
namespace {
|
||||
|
||||
using ::mediapipe::tasks::vision::gesture_recognizer::GetLeftHandScore;
|
||||
|
||||
constexpr char kHandednessTag[] = "HANDEDNESS";
|
||||
constexpr char kHandednessMatrixTag[] = "HANDEDNESS_MATRIX";
|
||||
|
||||
|
@ -71,6 +73,8 @@ class HandednessToMatrixCalculator : public CalculatorBase {
|
|||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
// TODO remove this after change to API2, because Setting offset
|
||||
// to 0 is the default in API2
|
||||
absl::Status Open(CalculatorContext* cc) override {
|
||||
cc->SetOffset(TimestampDiff(0));
|
||||
return absl::OkStatus();
|
||||
|
@ -95,6 +99,5 @@ absl::Status HandednessToMatrixCalculator::Process(CalculatorContext* cc) {
|
|||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
} // namespace vision
|
||||
} // namespace tasks
|
||||
} // namespace api2
|
||||
} // namespace mediapipe
|
|
@ -28,8 +28,6 @@ limitations under the License.
|
|||
#include "mediapipe/framework/port/status_matchers.h"
|
||||
|
||||
namespace mediapipe {
|
||||
namespace tasks {
|
||||
namespace vision {
|
||||
|
||||
namespace {
|
||||
|
||||
|
@ -95,6 +93,4 @@ INSTANTIATE_TEST_CASE_P(
|
|||
|
||||
} // namespace
|
||||
|
||||
} // namespace vision
|
||||
} // namespace tasks
|
||||
} // namespace mediapipe
|
|
@ -27,13 +27,11 @@ limitations under the License.
|
|||
#include "mediapipe/framework/formats/landmark.pb.h"
|
||||
#include "mediapipe/framework/formats/matrix.h"
|
||||
#include "mediapipe/framework/port/ret_check.h"
|
||||
#include "mediapipe/tasks/cc/vision/hand_gesture_recognizer/proto/landmarks_to_matrix_calculator.pb.h"
|
||||
#include "mediapipe/tasks/cc/vision/gesture_recognizer/calculators/landmarks_to_matrix_calculator.pb.h"
|
||||
|
||||
// TODO Update to use API2
|
||||
namespace mediapipe {
|
||||
namespace tasks {
|
||||
namespace vision {
|
||||
|
||||
using proto::LandmarksToMatrixCalculatorOptions;
|
||||
namespace api2 {
|
||||
|
||||
namespace {
|
||||
|
||||
|
@ -175,7 +173,7 @@ absl::Status ProcessLandmarks(LandmarkListT landmarks, CalculatorContext* cc) {
|
|||
// input_stream: "IMAGE_SIZE:image_size"
|
||||
// output_stream: "LANDMARKS_MATRIX:landmarks_matrix"
|
||||
// options {
|
||||
// [mediapipe.tasks.vision.proto.LandmarksToMatrixCalculatorOptions.ext] {
|
||||
// [mediapipe.LandmarksToMatrixCalculatorOptions.ext] {
|
||||
// object_normalization: true
|
||||
// object_normalization_origin_offset: 0
|
||||
// }
|
||||
|
@ -221,6 +219,5 @@ absl::Status LandmarksToMatrixCalculator::Process(CalculatorContext* cc) {
|
|||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
} // namespace vision
|
||||
} // namespace tasks
|
||||
} // namespace api2
|
||||
} // namespace mediapipe
|
|
@ -15,7 +15,7 @@ limitations under the License.
|
|||
|
||||
syntax = "proto2";
|
||||
|
||||
package mediapipe.tasks.vision.proto;
|
||||
package mediapipe;
|
||||
|
||||
import "mediapipe/framework/calculator.proto";
|
||||
|
|
@ -28,8 +28,6 @@ limitations under the License.
|
|||
#include "mediapipe/framework/port/status_matchers.h"
|
||||
|
||||
namespace mediapipe {
|
||||
namespace tasks {
|
||||
namespace vision {
|
||||
|
||||
namespace {
|
||||
|
||||
|
@ -72,8 +70,7 @@ TEST_P(Landmarks2dToMatrixCalculatorTest, OutputsCorrectResult) {
|
|||
input_stream: "IMAGE_SIZE:image_size"
|
||||
output_stream: "LANDMARKS_MATRIX:landmarks_matrix"
|
||||
options {
|
||||
[mediapipe.tasks.vision.proto.LandmarksToMatrixCalculatorOptions
|
||||
.ext] {
|
||||
[mediapipe.LandmarksToMatrixCalculatorOptions.ext] {
|
||||
object_normalization: $0
|
||||
object_normalization_origin_offset: $1
|
||||
}
|
||||
|
@ -145,8 +142,7 @@ TEST_P(LandmarksWorld3dToMatrixCalculatorTest, OutputsCorrectResult) {
|
|||
input_stream: "IMAGE_SIZE:image_size"
|
||||
output_stream: "LANDMARKS_MATRIX:landmarks_matrix"
|
||||
options {
|
||||
[mediapipe.tasks.vision.proto.LandmarksToMatrixCalculatorOptions
|
||||
.ext] {
|
||||
[mediapipe.LandmarksToMatrixCalculatorOptions.ext] {
|
||||
object_normalization: $0
|
||||
object_normalization_origin_offset: $1
|
||||
}
|
||||
|
@ -202,6 +198,4 @@ INSTANTIATE_TEST_CASE_P(
|
|||
|
||||
} // namespace
|
||||
|
||||
} // namespace vision
|
||||
} // namespace tasks
|
||||
} // namespace mediapipe
|
|
@ -34,14 +34,15 @@ limitations under the License.
|
|||
#include "mediapipe/tasks/cc/core/model_task_graph.h"
|
||||
#include "mediapipe/tasks/cc/core/proto/inference_subgraph.pb.h"
|
||||
#include "mediapipe/tasks/cc/core/utils.h"
|
||||
#include "mediapipe/tasks/cc/vision/hand_gesture_recognizer/proto/hand_gesture_recognizer_subgraph_options.pb.h"
|
||||
#include "mediapipe/tasks/cc/vision/hand_gesture_recognizer/proto/landmarks_to_matrix_calculator.pb.h"
|
||||
#include "mediapipe/tasks/cc/vision/gesture_recognizer/calculators/landmarks_to_matrix_calculator.pb.h"
|
||||
#include "mediapipe/tasks/cc/vision/gesture_recognizer/proto/hand_gesture_recognizer_graph_options.pb.h"
|
||||
#include "mediapipe/tasks/cc/vision/utils/image_tensor_specs.h"
|
||||
#include "mediapipe/tasks/metadata/metadata_schema_generated.h"
|
||||
|
||||
namespace mediapipe {
|
||||
namespace tasks {
|
||||
namespace vision {
|
||||
namespace gesture_recognizer {
|
||||
|
||||
namespace {
|
||||
|
||||
|
@ -50,9 +51,8 @@ using ::mediapipe::api2::Output;
|
|||
using ::mediapipe::api2::builder::Graph;
|
||||
using ::mediapipe::api2::builder::Source;
|
||||
using ::mediapipe::tasks::components::containers::proto::ClassificationResult;
|
||||
using ::mediapipe::tasks::vision::hand_gesture_recognizer::proto::
|
||||
HandGestureRecognizerSubgraphOptions;
|
||||
using ::mediapipe::tasks::vision::proto::LandmarksToMatrixCalculatorOptions;
|
||||
using ::mediapipe::tasks::vision::gesture_recognizer::proto::
|
||||
HandGestureRecognizerGraphOptions;
|
||||
|
||||
constexpr char kHandednessTag[] = "HANDEDNESS";
|
||||
constexpr char kLandmarksTag[] = "LANDMARKS";
|
||||
|
@ -70,18 +70,6 @@ constexpr char kIndexTag[] = "INDEX";
|
|||
constexpr char kIterableTag[] = "ITERABLE";
|
||||
constexpr char kBatchEndTag[] = "BATCH_END";
|
||||
|
||||
absl::Status SanityCheckOptions(
|
||||
const HandGestureRecognizerSubgraphOptions& options) {
|
||||
if (options.min_tracking_confidence() < 0 ||
|
||||
options.min_tracking_confidence() > 1) {
|
||||
return CreateStatusWithPayload(absl::StatusCode::kInvalidArgument,
|
||||
"Invalid `min_tracking_confidence` option: "
|
||||
"value must be in the range [0.0, 1.0]",
|
||||
MediaPipeTasksStatus::kInvalidArgumentError);
|
||||
}
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
Source<std::vector<Tensor>> ConvertMatrixToTensor(Source<Matrix> matrix,
|
||||
Graph& graph) {
|
||||
auto& node = graph.AddNode("TensorConverterCalculator");
|
||||
|
@ -91,9 +79,10 @@ Source<std::vector<Tensor>> ConvertMatrixToTensor(Source<Matrix> matrix,
|
|||
|
||||
} // namespace
|
||||
|
||||
// A "mediapipe.tasks.vision.SingleHandGestureRecognizerSubgraph" performs
|
||||
// single hand gesture recognition. This graph is used as a building block for
|
||||
// mediapipe.tasks.vision.HandGestureRecognizerGraph.
|
||||
// A
|
||||
// "mediapipe.tasks.vision.gesture_recognizer.SingleHandGestureRecognizerGraph"
|
||||
// performs single hand gesture recognition. This graph is used as a building
|
||||
// block for mediapipe.tasks.vision.GestureRecognizerGraph.
|
||||
//
|
||||
// Inputs:
|
||||
// HANDEDNESS - ClassificationList
|
||||
|
@ -113,14 +102,15 @@ Source<std::vector<Tensor>> ConvertMatrixToTensor(Source<Matrix> matrix,
|
|||
//
|
||||
// Example:
|
||||
// node {
|
||||
// calculator: "mediapipe.tasks.vision.SingleHandGestureRecognizerSubgraph"
|
||||
// calculator:
|
||||
// "mediapipe.tasks.vision.gesture_recognizer.SingleHandGestureRecognizerGraph"
|
||||
// input_stream: "HANDEDNESS:handedness"
|
||||
// input_stream: "LANDMARKS:landmarks"
|
||||
// input_stream: "WORLD_LANDMARKS:world_landmarks"
|
||||
// input_stream: "IMAGE_SIZE:image_size"
|
||||
// output_stream: "HAND_GESTURES:hand_gestures"
|
||||
// options {
|
||||
// [mediapipe.tasks.vision.hand_gesture_recognizer.proto.HandGestureRecognizerSubgraphOptions.ext]
|
||||
// [mediapipe.tasks.vision.gesture_recognizer.proto.HandGestureRecognizerGraphOptions.ext]
|
||||
// {
|
||||
// base_options {
|
||||
// model_asset {
|
||||
|
@ -130,19 +120,19 @@ Source<std::vector<Tensor>> ConvertMatrixToTensor(Source<Matrix> matrix,
|
|||
// }
|
||||
// }
|
||||
// }
|
||||
class SingleHandGestureRecognizerSubgraph : public core::ModelTaskGraph {
|
||||
class SingleHandGestureRecognizerGraph : public core::ModelTaskGraph {
|
||||
public:
|
||||
absl::StatusOr<CalculatorGraphConfig> GetConfig(
|
||||
SubgraphContext* sc) override {
|
||||
ASSIGN_OR_RETURN(
|
||||
const auto* model_resources,
|
||||
CreateModelResources<HandGestureRecognizerSubgraphOptions>(sc));
|
||||
CreateModelResources<HandGestureRecognizerGraphOptions>(sc));
|
||||
Graph graph;
|
||||
ASSIGN_OR_RETURN(
|
||||
auto hand_gestures,
|
||||
BuildHandGestureRecognizerGraph(
|
||||
sc->Options<HandGestureRecognizerSubgraphOptions>(),
|
||||
*model_resources, graph[Input<ClassificationList>(kHandednessTag)],
|
||||
BuildGestureRecognizerGraph(
|
||||
sc->Options<HandGestureRecognizerGraphOptions>(), *model_resources,
|
||||
graph[Input<ClassificationList>(kHandednessTag)],
|
||||
graph[Input<NormalizedLandmarkList>(kLandmarksTag)],
|
||||
graph[Input<LandmarkList>(kWorldLandmarksTag)],
|
||||
graph[Input<std::pair<int, int>>(kImageSizeTag)], graph));
|
||||
|
@ -151,15 +141,13 @@ class SingleHandGestureRecognizerSubgraph : public core::ModelTaskGraph {
|
|||
}
|
||||
|
||||
private:
|
||||
absl::StatusOr<Source<ClassificationResult>> BuildHandGestureRecognizerGraph(
|
||||
const HandGestureRecognizerSubgraphOptions& graph_options,
|
||||
absl::StatusOr<Source<ClassificationResult>> BuildGestureRecognizerGraph(
|
||||
const HandGestureRecognizerGraphOptions& graph_options,
|
||||
const core::ModelResources& model_resources,
|
||||
Source<ClassificationList> handedness,
|
||||
Source<NormalizedLandmarkList> hand_landmarks,
|
||||
Source<LandmarkList> hand_world_landmarks,
|
||||
Source<std::pair<int, int>> image_size, Graph& graph) {
|
||||
MP_RETURN_IF_ERROR(SanityCheckOptions(graph_options));
|
||||
|
||||
// Converts the ClassificationList to a matrix.
|
||||
auto& handedness_to_matrix = graph.AddNode("HandednessToMatrixCalculator");
|
||||
handedness >> handedness_to_matrix.In(kHandednessTag);
|
||||
|
@ -235,12 +223,15 @@ class SingleHandGestureRecognizerSubgraph : public core::ModelTaskGraph {
|
|||
}
|
||||
};
|
||||
|
||||
// clang-format off
|
||||
REGISTER_MEDIAPIPE_GRAPH(
|
||||
::mediapipe::tasks::vision::SingleHandGestureRecognizerSubgraph);
|
||||
::mediapipe::tasks::vision::gesture_recognizer::SingleHandGestureRecognizerGraph); // NOLINT
|
||||
// clang-format on
|
||||
|
||||
// A "mediapipe.tasks.vision.HandGestureRecognizerSubgraph" performs multi
|
||||
// hand gesture recognition. This graph is used as a building block for
|
||||
// mediapipe.tasks.vision.HandGestureRecognizerGraph.
|
||||
// A
|
||||
// "mediapipe.tasks.vision.gesture_recognizer.MultipleHandGestureRecognizerGraph"
|
||||
// performs multi hand gesture recognition. This graph is used as a building
|
||||
// block for mediapipe.tasks.vision.gesture_recognizer.GestureRecognizerGraph.
|
||||
//
|
||||
// Inputs:
|
||||
// HANDEDNESS - std::vector<ClassificationList>
|
||||
|
@ -263,7 +254,8 @@ REGISTER_MEDIAPIPE_GRAPH(
|
|||
//
|
||||
// Example:
|
||||
// node {
|
||||
// calculator: "mediapipe.tasks.vision.HandGestureRecognizerSubgraph"
|
||||
// calculator:
|
||||
// "mediapipe.tasks.vision.gesture_recognizer.MultipleHandGestureRecognizerGraph"
|
||||
// input_stream: "HANDEDNESS:handedness"
|
||||
// input_stream: "LANDMARKS:landmarks"
|
||||
// input_stream: "WORLD_LANDMARKS:world_landmarks"
|
||||
|
@ -271,7 +263,7 @@ REGISTER_MEDIAPIPE_GRAPH(
|
|||
// input_stream: "HAND_TRACKING_IDS:hand_tracking_ids"
|
||||
// output_stream: "HAND_GESTURES:hand_gestures"
|
||||
// options {
|
||||
// [mediapipe.tasks.vision.hand_gesture_recognizer.proto.HandGestureRecognizerSubgraph.ext]
|
||||
// [mediapipe.tasks.vision.gesture_recognizer.proto.MultipleHandGestureRecognizerGraph.ext]
|
||||
// {
|
||||
// base_options {
|
||||
// model_asset {
|
||||
|
@ -281,15 +273,15 @@ REGISTER_MEDIAPIPE_GRAPH(
|
|||
// }
|
||||
// }
|
||||
// }
|
||||
class HandGestureRecognizerSubgraph : public core::ModelTaskGraph {
|
||||
class MultipleHandGestureRecognizerGraph : public core::ModelTaskGraph {
|
||||
public:
|
||||
absl::StatusOr<CalculatorGraphConfig> GetConfig(
|
||||
SubgraphContext* sc) override {
|
||||
Graph graph;
|
||||
ASSIGN_OR_RETURN(
|
||||
auto multi_hand_gestures,
|
||||
BuildMultiHandGestureRecognizerSubraph(
|
||||
sc->Options<HandGestureRecognizerSubgraphOptions>(),
|
||||
BuildMultiGestureRecognizerSubraph(
|
||||
sc->Options<HandGestureRecognizerGraphOptions>(),
|
||||
graph[Input<std::vector<ClassificationList>>(kHandednessTag)],
|
||||
graph[Input<std::vector<NormalizedLandmarkList>>(kLandmarksTag)],
|
||||
graph[Input<std::vector<LandmarkList>>(kWorldLandmarksTag)],
|
||||
|
@ -302,8 +294,8 @@ class HandGestureRecognizerSubgraph : public core::ModelTaskGraph {
|
|||
|
||||
private:
|
||||
absl::StatusOr<Source<std::vector<ClassificationResult>>>
|
||||
BuildMultiHandGestureRecognizerSubraph(
|
||||
const HandGestureRecognizerSubgraphOptions& graph_options,
|
||||
BuildMultiGestureRecognizerSubraph(
|
||||
const HandGestureRecognizerGraphOptions& graph_options,
|
||||
Source<std::vector<ClassificationList>> multi_handedness,
|
||||
Source<std::vector<NormalizedLandmarkList>> multi_hand_landmarks,
|
||||
Source<std::vector<LandmarkList>> multi_hand_world_landmarks,
|
||||
|
@ -341,17 +333,18 @@ class HandGestureRecognizerSubgraph : public core::ModelTaskGraph {
|
|||
hand_tracking_id >> get_world_landmarks_at_index.In(kIndexTag);
|
||||
auto hand_world_landmarks = get_world_landmarks_at_index.Out(kItemTag);
|
||||
|
||||
auto& hand_gesture_recognizer_subgraph = graph.AddNode(
|
||||
"mediapipe.tasks.vision.SingleHandGestureRecognizerSubgraph");
|
||||
hand_gesture_recognizer_subgraph
|
||||
.GetOptions<HandGestureRecognizerSubgraphOptions>()
|
||||
auto& hand_gesture_recognizer_graph = graph.AddNode(
|
||||
"mediapipe.tasks.vision.gesture_recognizer."
|
||||
"SingleHandGestureRecognizerGraph");
|
||||
hand_gesture_recognizer_graph
|
||||
.GetOptions<HandGestureRecognizerGraphOptions>()
|
||||
.CopyFrom(graph_options);
|
||||
handedness >> hand_gesture_recognizer_subgraph.In(kHandednessTag);
|
||||
hand_landmarks >> hand_gesture_recognizer_subgraph.In(kLandmarksTag);
|
||||
handedness >> hand_gesture_recognizer_graph.In(kHandednessTag);
|
||||
hand_landmarks >> hand_gesture_recognizer_graph.In(kLandmarksTag);
|
||||
hand_world_landmarks >>
|
||||
hand_gesture_recognizer_subgraph.In(kWorldLandmarksTag);
|
||||
image_size_clone >> hand_gesture_recognizer_subgraph.In(kImageSizeTag);
|
||||
auto hand_gestures = hand_gesture_recognizer_subgraph.Out(kHandGesturesTag);
|
||||
hand_gesture_recognizer_graph.In(kWorldLandmarksTag);
|
||||
image_size_clone >> hand_gesture_recognizer_graph.In(kImageSizeTag);
|
||||
auto hand_gestures = hand_gesture_recognizer_graph.Out(kHandGesturesTag);
|
||||
|
||||
auto& end_loop_classification_results =
|
||||
graph.AddNode("mediapipe.tasks.EndLoopClassificationResultCalculator");
|
||||
|
@ -364,9 +357,12 @@ class HandGestureRecognizerSubgraph : public core::ModelTaskGraph {
|
|||
}
|
||||
};
|
||||
|
||||
// clang-format off
|
||||
REGISTER_MEDIAPIPE_GRAPH(
|
||||
::mediapipe::tasks::vision::HandGestureRecognizerSubgraph);
|
||||
::mediapipe::tasks::vision::gesture_recognizer::MultipleHandGestureRecognizerGraph); // NOLINT
|
||||
// clang-format on
|
||||
|
||||
} // namespace gesture_recognizer
|
||||
} // namespace vision
|
||||
} // namespace tasks
|
||||
} // namespace mediapipe
|
|
@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
|
|||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "mediapipe/tasks/cc/vision/hand_gesture_recognizer/handedness_util.h"
|
||||
#include "mediapipe/tasks/cc/vision/gesture_recognizer/handedness_util.h"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
|
@ -25,6 +25,7 @@ limitations under the License.
|
|||
namespace mediapipe {
|
||||
namespace tasks {
|
||||
namespace vision {
|
||||
namespace gesture_recognizer {
|
||||
|
||||
namespace {} // namespace
|
||||
|
||||
|
@ -58,6 +59,7 @@ absl::StatusOr<float> GetLeftHandScore(
|
|||
}
|
||||
}
|
||||
|
||||
} // namespace gesture_recognizer
|
||||
} // namespace vision
|
||||
} // namespace tasks
|
||||
} // namespace mediapipe
|
|
@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
|
|||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef MEDIAPIPE_TASKS_CC_VISION_HAND_GESTURE_RECOGNIZER_HADNDEDNESS_UTILS_H_
|
||||
#define MEDIAPIPE_TASKS_CC_VISION_HAND_GESTURE_RECOGNIZER_HADNDEDNESS_UTILS_H_
|
||||
#ifndef MEDIAPIPE_TASKS_CC_VISION_GESTURE_RECOGNIZER_HADNDEDNESS_UTILS_H_
|
||||
#define MEDIAPIPE_TASKS_CC_VISION_GESTURE_RECOGNIZER_HADNDEDNESS_UTILS_H_
|
||||
|
||||
#include "absl/status/statusor.h"
|
||||
#include "mediapipe/framework/formats/classification.pb.h"
|
||||
|
@ -22,6 +22,7 @@ limitations under the License.
|
|||
namespace mediapipe {
|
||||
namespace tasks {
|
||||
namespace vision {
|
||||
namespace gesture_recognizer {
|
||||
|
||||
bool IsLeftHand(const mediapipe::Classification& c);
|
||||
|
||||
|
@ -30,8 +31,9 @@ bool IsRightHand(const mediapipe::Classification& c);
|
|||
absl::StatusOr<float> GetLeftHandScore(
|
||||
const mediapipe::ClassificationList& classification_list);
|
||||
|
||||
} // namespace gesture_recognizer
|
||||
} // namespace vision
|
||||
} // namespace tasks
|
||||
} // namespace mediapipe
|
||||
|
||||
#endif // MEDIAPIPE_TASKS_CC_VISION_HAND_GESTURE_RECOGNIZER_HADNDEDNESS_UTILS_H_
|
||||
#endif // MEDIAPIPE_TASKS_CC_VISION_GESTURE_RECOGNIZER_HADNDEDNESS_UTILS_H_
|
|
@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
|
|||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "mediapipe/tasks/cc/vision/hand_gesture_recognizer/handedness_util.h"
|
||||
#include "mediapipe/tasks/cc/vision/gesture_recognizer/handedness_util.h"
|
||||
|
||||
#include "mediapipe/framework/formats/classification.pb.h"
|
||||
#include "mediapipe/framework/port/gmock.h"
|
||||
|
@ -23,6 +23,7 @@ limitations under the License.
|
|||
namespace mediapipe {
|
||||
namespace tasks {
|
||||
namespace vision {
|
||||
namespace gesture_recognizer {
|
||||
namespace {
|
||||
|
||||
TEST(GetLeftHandScore, SingleLeftHandClassification) {
|
||||
|
@ -72,6 +73,7 @@ TEST(GetLeftHandScore, LeftAndRightLowerCaseHandClassification) {
|
|||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace gesture_recognizer
|
||||
} // namespace vision
|
||||
} // namespace tasks
|
||||
} // namespace mediapipe
|
|
@ -21,8 +21,8 @@ package(default_visibility = [
|
|||
licenses(["notice"])
|
||||
|
||||
mediapipe_proto_library(
|
||||
name = "hand_gesture_recognizer_subgraph_options_proto",
|
||||
srcs = ["hand_gesture_recognizer_subgraph_options.proto"],
|
||||
name = "hand_gesture_recognizer_graph_options_proto",
|
||||
srcs = ["hand_gesture_recognizer_graph_options.proto"],
|
||||
deps = [
|
||||
"//mediapipe/framework:calculator_options_proto",
|
||||
"//mediapipe/framework:calculator_proto",
|
||||
|
@ -30,12 +30,3 @@ mediapipe_proto_library(
|
|||
"//mediapipe/tasks/cc/core/proto:base_options_proto",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_proto_library(
|
||||
name = "landmarks_to_matrix_calculator_proto",
|
||||
srcs = ["landmarks_to_matrix_calculator.proto"],
|
||||
deps = [
|
||||
"//mediapipe/framework:calculator_options_proto",
|
||||
"//mediapipe/framework:calculator_proto",
|
||||
],
|
||||
)
|
|
@ -15,15 +15,15 @@ limitations under the License.
|
|||
// TODO Refactor naming and class structure of hand related Tasks.
|
||||
syntax = "proto2";
|
||||
|
||||
package mediapipe.tasks.vision.hand_gesture_recognizer.proto;
|
||||
package mediapipe.tasks.vision.gesture_recognizer.proto;
|
||||
|
||||
import "mediapipe/framework/calculator.proto";
|
||||
import "mediapipe/tasks/cc/components/processors/proto/classifier_options.proto";
|
||||
import "mediapipe/tasks/cc/core/proto/base_options.proto";
|
||||
|
||||
message HandGestureRecognizerSubgraphOptions {
|
||||
message HandGestureRecognizerGraphOptions {
|
||||
extend mediapipe.CalculatorOptions {
|
||||
optional HandGestureRecognizerSubgraphOptions ext = 463370452;
|
||||
optional HandGestureRecognizerGraphOptions ext = 463370452;
|
||||
}
|
||||
// Base options for configuring hand gesture recognition subgraph, such as
|
||||
// specifying the TfLite model file with metadata, accelerator options, etc.
|
|
@ -51,7 +51,7 @@ cc_library(
|
|||
"//mediapipe/tasks/cc/core:model_task_graph",
|
||||
"//mediapipe/tasks/cc/core:utils",
|
||||
"//mediapipe/tasks/cc/core/proto:inference_subgraph_cc_proto",
|
||||
"//mediapipe/tasks/cc/vision/hand_detector/proto:hand_detector_options_cc_proto",
|
||||
"//mediapipe/tasks/cc/vision/hand_detector/proto:hand_detector_graph_options_cc_proto",
|
||||
"//mediapipe/tasks/cc/vision/utils:image_tensor_specs",
|
||||
"@com_google_absl//absl/status",
|
||||
"@com_google_absl//absl/status:statusor",
|
||||
|
|
|
@ -40,12 +40,13 @@ limitations under the License.
|
|||
#include "mediapipe/tasks/cc/core/model_task_graph.h"
|
||||
#include "mediapipe/tasks/cc/core/proto/inference_subgraph.pb.h"
|
||||
#include "mediapipe/tasks/cc/core/utils.h"
|
||||
#include "mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_options.pb.h"
|
||||
#include "mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_graph_options.pb.h"
|
||||
#include "mediapipe/tasks/cc/vision/utils/image_tensor_specs.h"
|
||||
|
||||
namespace mediapipe {
|
||||
namespace tasks {
|
||||
namespace vision {
|
||||
namespace hand_detector {
|
||||
|
||||
namespace {
|
||||
|
||||
|
@ -53,18 +54,23 @@ using ::mediapipe::api2::Input;
|
|||
using ::mediapipe::api2::Output;
|
||||
using ::mediapipe::api2::builder::Graph;
|
||||
using ::mediapipe::api2::builder::Source;
|
||||
using ::mediapipe::tasks::vision::hand_detector::proto::HandDetectorOptions;
|
||||
using ::mediapipe::tasks::vision::hand_detector::proto::
|
||||
HandDetectorGraphOptions;
|
||||
|
||||
constexpr char kImageTag[] = "IMAGE";
|
||||
constexpr char kDetectionsTag[] = "DETECTIONS";
|
||||
constexpr char kNormRectsTag[] = "NORM_RECTS";
|
||||
constexpr char kPalmDetectionsTag[] = "PALM_DETECTIONS";
|
||||
constexpr char kHandRectsTag[] = "HAND_RECTS";
|
||||
constexpr char kPalmRectsTag[] = "PALM_RECTS";
|
||||
|
||||
struct HandDetectionOuts {
|
||||
Source<std::vector<Detection>> palm_detections;
|
||||
Source<std::vector<NormalizedRect>> hand_rects;
|
||||
Source<std::vector<NormalizedRect>> palm_rects;
|
||||
Source<Image> image;
|
||||
};
|
||||
|
||||
void ConfigureTensorsToDetectionsCalculator(
|
||||
const HandDetectorGraphOptions& tasks_options,
|
||||
mediapipe::TensorsToDetectionsCalculatorOptions* options) {
|
||||
// TODO use metadata to configure these fields.
|
||||
options->set_num_classes(1);
|
||||
|
@ -77,7 +83,7 @@ void ConfigureTensorsToDetectionsCalculator(
|
|||
options->set_sigmoid_score(true);
|
||||
options->set_score_clipping_thresh(100.0);
|
||||
options->set_reverse_output_order(true);
|
||||
options->set_min_score_thresh(0.5);
|
||||
options->set_min_score_thresh(tasks_options.min_detection_confidence());
|
||||
options->set_x_scale(192.0);
|
||||
options->set_y_scale(192.0);
|
||||
options->set_w_scale(192.0);
|
||||
|
@ -134,9 +140,9 @@ void ConfigureRectTransformationCalculator(
|
|||
|
||||
} // namespace
|
||||
|
||||
// A "mediapipe.tasks.vision.HandDetectorGraph" performs hand detection. The
|
||||
// Hand Detection Graph is based on palm detection model, and scale the detected
|
||||
// palm bounding box to enclose the detected whole hand.
|
||||
// A "mediapipe.tasks.vision.hand_detector.HandDetectorGraph" performs hand
|
||||
// detection. The Hand Detection Graph is based on palm detection model, and
|
||||
// scale the detected palm bounding box to enclose the detected whole hand.
|
||||
// Accepts CPU input images and outputs Landmark on CPU.
|
||||
//
|
||||
// Inputs:
|
||||
|
@ -144,19 +150,27 @@ void ConfigureRectTransformationCalculator(
|
|||
// Image to perform detection on.
|
||||
//
|
||||
// Outputs:
|
||||
// DETECTIONS - std::vector<Detection>
|
||||
// PALM_DETECTIONS - std::vector<Detection>
|
||||
// Detected palms with maximum `num_hands` specified in options.
|
||||
// NORM_RECTS - std::vector<NormalizedRect>
|
||||
// HAND_RECTS - std::vector<NormalizedRect>
|
||||
// Detected hand bounding boxes in normalized coordinates.
|
||||
// PLAM_RECTS - std::vector<NormalizedRect>
|
||||
// Detected palm bounding boxes in normalized coordinates.
|
||||
// IMAGE - Image
|
||||
// The input image that the hand detector runs on and has the pixel data
|
||||
// stored on the target storage (CPU vs GPU).
|
||||
//
|
||||
// Example:
|
||||
// node {
|
||||
// calculator: "mediapipe.tasks.vision.HandDetectorGraph"
|
||||
// calculator: "mediapipe.tasks.vision.hand_detector.HandDetectorGraph"
|
||||
// input_stream: "IMAGE:image"
|
||||
// output_stream: "DETECTIONS:palm_detections"
|
||||
// output_stream: "NORM_RECTS:hand_rects_from_palm_detections"
|
||||
// output_stream: "PALM_DETECTIONS:palm_detections"
|
||||
// output_stream: "HAND_RECTS:hand_rects_from_palm_detections"
|
||||
// output_stream: "PALM_RECTS:palm_rects"
|
||||
// output_stream: "IMAGE:image_out"
|
||||
// options {
|
||||
// [mediapipe.tasks.hand_detector.proto.HandDetectorOptions.ext] {
|
||||
// [mediapipe.tasks.vision.hand_detector.proto.HandDetectorGraphOptions.ext]
|
||||
// {
|
||||
// base_options {
|
||||
// model_asset {
|
||||
// file_name: "palm_detection.tflite"
|
||||
|
@ -173,16 +187,20 @@ class HandDetectorGraph : public core::ModelTaskGraph {
|
|||
absl::StatusOr<CalculatorGraphConfig> GetConfig(
|
||||
SubgraphContext* sc) override {
|
||||
ASSIGN_OR_RETURN(const auto* model_resources,
|
||||
CreateModelResources<HandDetectorOptions>(sc));
|
||||
CreateModelResources<HandDetectorGraphOptions>(sc));
|
||||
Graph graph;
|
||||
ASSIGN_OR_RETURN(auto hand_detection_outs,
|
||||
BuildHandDetectionSubgraph(
|
||||
sc->Options<HandDetectorOptions>(), *model_resources,
|
||||
ASSIGN_OR_RETURN(
|
||||
auto hand_detection_outs,
|
||||
BuildHandDetectionSubgraph(sc->Options<HandDetectorGraphOptions>(),
|
||||
*model_resources,
|
||||
graph[Input<Image>(kImageTag)], graph));
|
||||
hand_detection_outs.palm_detections >>
|
||||
graph[Output<std::vector<Detection>>(kDetectionsTag)];
|
||||
graph[Output<std::vector<Detection>>(kPalmDetectionsTag)];
|
||||
hand_detection_outs.hand_rects >>
|
||||
graph[Output<std::vector<NormalizedRect>>(kNormRectsTag)];
|
||||
graph[Output<std::vector<NormalizedRect>>(kHandRectsTag)];
|
||||
hand_detection_outs.palm_rects >>
|
||||
graph[Output<std::vector<NormalizedRect>>(kPalmRectsTag)];
|
||||
hand_detection_outs.image >> graph[Output<Image>(kImageTag)];
|
||||
return graph.GetConfig();
|
||||
}
|
||||
|
||||
|
@ -196,7 +214,7 @@ class HandDetectorGraph : public core::ModelTaskGraph {
|
|||
// image_in: image stream to run hand detection on.
|
||||
// graph: the mediapipe builder::Graph instance to be updated.
|
||||
absl::StatusOr<HandDetectionOuts> BuildHandDetectionSubgraph(
|
||||
const HandDetectorOptions& subgraph_options,
|
||||
const HandDetectorGraphOptions& subgraph_options,
|
||||
const core::ModelResources& model_resources, Source<Image> image_in,
|
||||
Graph& graph) {
|
||||
// Add image preprocessing subgraph. The model expects aspect ratio
|
||||
|
@ -235,6 +253,7 @@ class HandDetectorGraph : public core::ModelTaskGraph {
|
|||
auto& tensors_to_detections =
|
||||
graph.AddNode("TensorsToDetectionsCalculator");
|
||||
ConfigureTensorsToDetectionsCalculator(
|
||||
subgraph_options,
|
||||
&tensors_to_detections
|
||||
.GetOptions<mediapipe::TensorsToDetectionsCalculatorOptions>());
|
||||
model_output_tensors >> tensors_to_detections.In("TENSORS");
|
||||
|
@ -281,7 +300,8 @@ class HandDetectorGraph : public core::ModelTaskGraph {
|
|||
.GetOptions<mediapipe::DetectionsToRectsCalculatorOptions>());
|
||||
palm_detections >> detections_to_rects.In("DETECTIONS");
|
||||
image_size >> detections_to_rects.In("IMAGE_SIZE");
|
||||
auto palm_rects = detections_to_rects.Out("NORM_RECTS");
|
||||
auto palm_rects =
|
||||
detections_to_rects[Output<std::vector<NormalizedRect>>("NORM_RECTS")];
|
||||
|
||||
// Expands and shifts the rectangle that contains the palm so that it's
|
||||
// likely to cover the entire hand.
|
||||
|
@ -308,13 +328,18 @@ class HandDetectorGraph : public core::ModelTaskGraph {
|
|||
clip_normalized_rect_vector_size[Output<std::vector<NormalizedRect>>(
|
||||
"")];
|
||||
|
||||
return HandDetectionOuts{.palm_detections = palm_detections,
|
||||
.hand_rects = clipped_hand_rects};
|
||||
return HandDetectionOuts{
|
||||
/* palm_detections= */ palm_detections,
|
||||
/* hand_rects= */ clipped_hand_rects,
|
||||
/* palm_rects= */ palm_rects,
|
||||
/* image= */ preprocessing[Output<Image>(kImageTag)]};
|
||||
}
|
||||
};
|
||||
|
||||
REGISTER_MEDIAPIPE_GRAPH(::mediapipe::tasks::vision::HandDetectorGraph);
|
||||
REGISTER_MEDIAPIPE_GRAPH(
|
||||
::mediapipe::tasks::vision::hand_detector::HandDetectorGraph);
|
||||
|
||||
} // namespace hand_detector
|
||||
} // namespace vision
|
||||
} // namespace tasks
|
||||
} // namespace mediapipe
|
||||
|
|
|
@ -40,13 +40,14 @@ limitations under the License.
|
|||
#include "mediapipe/tasks/cc/core/proto/base_options.pb.h"
|
||||
#include "mediapipe/tasks/cc/core/proto/external_file.pb.h"
|
||||
#include "mediapipe/tasks/cc/core/task_runner.h"
|
||||
#include "mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_options.pb.h"
|
||||
#include "mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_graph_options.pb.h"
|
||||
#include "mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_result.pb.h"
|
||||
#include "mediapipe/tasks/cc/vision/utils/image_utils.h"
|
||||
|
||||
namespace mediapipe {
|
||||
namespace tasks {
|
||||
namespace vision {
|
||||
namespace hand_detector {
|
||||
namespace {
|
||||
|
||||
using ::file::Defaults;
|
||||
|
@ -60,7 +61,8 @@ using ::mediapipe::tasks::core::ModelResources;
|
|||
using ::mediapipe::tasks::core::TaskRunner;
|
||||
using ::mediapipe::tasks::core::proto::ExternalFile;
|
||||
using ::mediapipe::tasks::vision::DecodeImageFromFile;
|
||||
using ::mediapipe::tasks::vision::hand_detector::proto::HandDetectorOptions;
|
||||
using ::mediapipe::tasks::vision::hand_detector::proto::
|
||||
HandDetectorGraphOptions;
|
||||
using ::mediapipe::tasks::vision::hand_detector::proto::HandDetectorResult;
|
||||
using ::testing::EqualsProto;
|
||||
using ::testing::TestParamInfo;
|
||||
|
@ -80,9 +82,9 @@ constexpr char kTwoHandsResultFile[] = "hand_detector_result_two_hands.pbtxt";
|
|||
|
||||
constexpr char kImageTag[] = "IMAGE";
|
||||
constexpr char kImageName[] = "image";
|
||||
constexpr char kPalmDetectionsTag[] = "DETECTIONS";
|
||||
constexpr char kPalmDetectionsTag[] = "PALM_DETECTIONS";
|
||||
constexpr char kPalmDetectionsName[] = "palm_detections";
|
||||
constexpr char kHandNormRectsTag[] = "NORM_RECTS";
|
||||
constexpr char kHandRectsTag[] = "HAND_RECTS";
|
||||
constexpr char kHandNormRectsName[] = "hand_norm_rects";
|
||||
|
||||
constexpr float kPalmDetectionBboxMaxDiff = 0.01;
|
||||
|
@ -104,22 +106,22 @@ absl::StatusOr<std::unique_ptr<TaskRunner>> CreateTaskRunner(
|
|||
Graph graph;
|
||||
|
||||
auto& hand_detection =
|
||||
graph.AddNode("mediapipe.tasks.vision.HandDetectorGraph");
|
||||
graph.AddNode("mediapipe.tasks.vision.hand_detector.HandDetectorGraph");
|
||||
|
||||
auto options = std::make_unique<HandDetectorOptions>();
|
||||
auto options = std::make_unique<HandDetectorGraphOptions>();
|
||||
options->mutable_base_options()->mutable_model_asset()->set_file_name(
|
||||
JoinPath("./", kTestDataDirectory, model_name));
|
||||
options->set_min_detection_confidence(0.5);
|
||||
options->set_num_hands(num_hands);
|
||||
hand_detection.GetOptions<HandDetectorOptions>().Swap(options.get());
|
||||
hand_detection.GetOptions<HandDetectorGraphOptions>().Swap(options.get());
|
||||
|
||||
graph[Input<Image>(kImageTag)].SetName(kImageName) >>
|
||||
hand_detection.In(kImageTag);
|
||||
|
||||
hand_detection.Out(kPalmDetectionsTag).SetName(kPalmDetectionsName) >>
|
||||
graph[Output<std::vector<Detection>>(kPalmDetectionsTag)];
|
||||
hand_detection.Out(kHandNormRectsTag).SetName(kHandNormRectsName) >>
|
||||
graph[Output<std::vector<NormalizedRect>>(kHandNormRectsTag)];
|
||||
hand_detection.Out(kHandRectsTag).SetName(kHandNormRectsName) >>
|
||||
graph[Output<std::vector<NormalizedRect>>(kHandRectsTag)];
|
||||
|
||||
return TaskRunner::Create(
|
||||
graph.GetConfig(), std::make_unique<core::MediaPipeBuiltinOpResolver>());
|
||||
|
@ -200,6 +202,7 @@ INSTANTIATE_TEST_SUITE_P(
|
|||
});
|
||||
|
||||
} // namespace
|
||||
} // namespace hand_detector
|
||||
} // namespace vision
|
||||
} // namespace tasks
|
||||
} // namespace mediapipe
|
||||
|
|
|
@ -21,8 +21,8 @@ package(default_visibility = [
|
|||
licenses(["notice"])
|
||||
|
||||
mediapipe_proto_library(
|
||||
name = "hand_detector_options_proto",
|
||||
srcs = ["hand_detector_options.proto"],
|
||||
name = "hand_detector_graph_options_proto",
|
||||
srcs = ["hand_detector_graph_options.proto"],
|
||||
deps = [
|
||||
"//mediapipe/framework:calculator_options_proto",
|
||||
"//mediapipe/framework:calculator_proto",
|
||||
|
|
|
@ -21,24 +21,20 @@ import "mediapipe/framework/calculator.proto";
|
|||
import "mediapipe/tasks/cc/core/proto/base_options.proto";
|
||||
|
||||
option java_package = "com.google.mediapipe.tasks.vision.handdetector";
|
||||
option java_outer_classname = "HandDetectorOptionsProto";
|
||||
option java_outer_classname = "HandDetectorGraphOptionsProto";
|
||||
|
||||
message HandDetectorOptions {
|
||||
message HandDetectorGraphOptions {
|
||||
extend mediapipe.CalculatorOptions {
|
||||
optional HandDetectorOptions ext = 464864288;
|
||||
optional HandDetectorGraphOptions ext = 464864288;
|
||||
}
|
||||
// Base options for configuring Task library, such as specifying the TfLite
|
||||
// model file with metadata, accelerator options, etc.
|
||||
optional core.proto.BaseOptions base_options = 1;
|
||||
|
||||
// The locale to use for display names specified through the TFLite Model
|
||||
// Metadata, if any. Defaults to English.
|
||||
optional string display_names_locale = 2 [default = "en"];
|
||||
|
||||
// Minimum confidence value ([0.0, 1.0]) for confidence score to be considered
|
||||
// successfully detecting a hand in the image.
|
||||
optional float min_detection_confidence = 3 [default = 0.5];
|
||||
optional float min_detection_confidence = 2 [default = 0.5];
|
||||
|
||||
// The maximum number of hands output by the detector.
|
||||
optional int32 num_hands = 4;
|
||||
optional int32 num_hands = 3;
|
||||
}
|
|
@ -19,10 +19,10 @@ package(default_visibility = [
|
|||
licenses(["notice"])
|
||||
|
||||
cc_library(
|
||||
name = "hand_landmarker_subgraph",
|
||||
srcs = ["hand_landmarker_subgraph.cc"],
|
||||
name = "hand_landmarks_detector_graph",
|
||||
srcs = ["hand_landmarks_detector_graph.cc"],
|
||||
deps = [
|
||||
"//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_landmarker_subgraph_options_cc_proto",
|
||||
"//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_landmarks_detector_graph_options_cc_proto",
|
||||
"@com_google_absl//absl/status",
|
||||
"@com_google_absl//absl/status:statusor",
|
||||
"//mediapipe/calculators/core:split_vector_calculator",
|
||||
|
@ -51,6 +51,7 @@ cc_library(
|
|||
# TODO: move calculators in modules/hand_landmark/calculators to tasks dir.
|
||||
"//mediapipe/modules/hand_landmark/calculators:hand_landmarks_to_rect_calculator",
|
||||
"//mediapipe/tasks/cc:common",
|
||||
"//mediapipe/tasks/cc/components/utils:gate",
|
||||
"//mediapipe/tasks/cc/components:image_preprocessing",
|
||||
"//mediapipe/tasks/cc/core:model_resources",
|
||||
"//mediapipe/tasks/cc/core:model_task_graph",
|
||||
|
@ -66,3 +67,41 @@ cc_library(
|
|||
)
|
||||
|
||||
# TODO: Enable this test
|
||||
|
||||
cc_library(
|
||||
name = "hand_landmarker_graph",
|
||||
srcs = ["hand_landmarker_graph.cc"],
|
||||
deps = [
|
||||
":hand_landmarks_detector_graph",
|
||||
"//mediapipe/calculators/core:begin_loop_calculator",
|
||||
"//mediapipe/calculators/core:clip_vector_size_calculator_cc_proto",
|
||||
"//mediapipe/calculators/core:end_loop_calculator",
|
||||
"//mediapipe/calculators/core:gate_calculator",
|
||||
"//mediapipe/calculators/core:gate_calculator_cc_proto",
|
||||
"//mediapipe/calculators/core:pass_through_calculator",
|
||||
"//mediapipe/calculators/core:previous_loopback_calculator",
|
||||
"//mediapipe/calculators/util:collection_has_min_size_calculator",
|
||||
"//mediapipe/calculators/util:collection_has_min_size_calculator_cc_proto",
|
||||
"//mediapipe/framework/api2:builder",
|
||||
"//mediapipe/framework/api2:port",
|
||||
"//mediapipe/framework/formats:classification_cc_proto",
|
||||
"//mediapipe/framework/formats:detection_cc_proto",
|
||||
"//mediapipe/framework/formats:image",
|
||||
"//mediapipe/framework/formats:landmark_cc_proto",
|
||||
"//mediapipe/framework/formats:rect_cc_proto",
|
||||
"//mediapipe/framework/formats:tensor",
|
||||
"//mediapipe/tasks/cc:common",
|
||||
"//mediapipe/tasks/cc/components/utils:gate",
|
||||
"//mediapipe/tasks/cc/core:model_task_graph",
|
||||
"//mediapipe/tasks/cc/core:utils",
|
||||
"//mediapipe/tasks/cc/vision/hand_detector:hand_detector_graph",
|
||||
"//mediapipe/tasks/cc/vision/hand_detector/proto:hand_detector_graph_options_cc_proto",
|
||||
"//mediapipe/tasks/cc/vision/hand_landmarker/calculators:hand_association_calculator",
|
||||
"//mediapipe/tasks/cc/vision/hand_landmarker/calculators:hand_association_calculator_cc_proto",
|
||||
"//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_landmarker_graph_options_cc_proto",
|
||||
"//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_landmarks_detector_graph_options_cc_proto",
|
||||
],
|
||||
alwayslink = 1,
|
||||
)
|
||||
|
||||
# TODO: Enable this test
|
||||
|
|
|
@ -0,0 +1,286 @@
|
|||
/* Copyright 2022 The MediaPipe Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include <memory>
|
||||
#include <type_traits>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "mediapipe/calculators/core/clip_vector_size_calculator.pb.h"
|
||||
#include "mediapipe/calculators/core/gate_calculator.pb.h"
|
||||
#include "mediapipe/calculators/util/collection_has_min_size_calculator.pb.h"
|
||||
#include "mediapipe/framework/api2/builder.h"
|
||||
#include "mediapipe/framework/api2/port.h"
|
||||
#include "mediapipe/framework/formats/classification.pb.h"
|
||||
#include "mediapipe/framework/formats/detection.pb.h"
|
||||
#include "mediapipe/framework/formats/image.h"
|
||||
#include "mediapipe/framework/formats/landmark.pb.h"
|
||||
#include "mediapipe/framework/formats/rect.pb.h"
|
||||
#include "mediapipe/framework/formats/tensor.h"
|
||||
#include "mediapipe/tasks/cc/common.h"
|
||||
#include "mediapipe/tasks/cc/components/utils/gate.h"
|
||||
#include "mediapipe/tasks/cc/core/model_task_graph.h"
|
||||
#include "mediapipe/tasks/cc/core/utils.h"
|
||||
#include "mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_graph_options.pb.h"
|
||||
#include "mediapipe/tasks/cc/vision/hand_landmarker/calculators/hand_association_calculator.pb.h"
|
||||
#include "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarker_graph_options.pb.h"
|
||||
#include "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarks_detector_graph_options.pb.h"
|
||||
|
||||
namespace mediapipe {
|
||||
namespace tasks {
|
||||
namespace vision {
|
||||
namespace hand_landmarker {
|
||||
|
||||
namespace {
|
||||
|
||||
using ::mediapipe::api2::Input;
|
||||
using ::mediapipe::api2::Output;
|
||||
using ::mediapipe::api2::builder::Graph;
|
||||
using ::mediapipe::api2::builder::Source;
|
||||
using ::mediapipe::tasks::components::utils::DisallowIf;
|
||||
using ::mediapipe::tasks::vision::hand_detector::proto::
|
||||
HandDetectorGraphOptions;
|
||||
using ::mediapipe::tasks::vision::hand_landmarker::proto::
|
||||
HandLandmarkerGraphOptions;
|
||||
using ::mediapipe::tasks::vision::hand_landmarker::proto::
|
||||
HandLandmarksDetectorGraphOptions;
|
||||
|
||||
constexpr char kImageTag[] = "IMAGE";
|
||||
constexpr char kLandmarksTag[] = "LANDMARKS";
|
||||
constexpr char kWorldLandmarksTag[] = "WORLD_LANDMARKS";
|
||||
constexpr char kHandRectNextFrameTag[] = "HAND_RECT_NEXT_FRAME";
|
||||
constexpr char kHandednessTag[] = "HANDEDNESS";
|
||||
constexpr char kPalmDetectionsTag[] = "PALM_DETECTIONS";
|
||||
constexpr char kPalmRectsTag[] = "PALM_RECTS";
|
||||
constexpr char kPreviousLoopbackCalculatorName[] = "PreviousLoopbackCalculator";
|
||||
|
||||
struct HandLandmarkerOutputs {
|
||||
Source<std::vector<NormalizedLandmarkList>> landmark_lists;
|
||||
Source<std::vector<LandmarkList>> world_landmark_lists;
|
||||
Source<std::vector<NormalizedRect>> hand_rects_next_frame;
|
||||
Source<std::vector<ClassificationList>> handednesses;
|
||||
Source<std::vector<NormalizedRect>> palm_rects;
|
||||
Source<std::vector<Detection>> palm_detections;
|
||||
Source<Image> image;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
// A "mediapipe.tasks.vision.hand_landmarker.HandLandmarkerGraph" performs hand
|
||||
// landmarks detection. The HandLandmarkerGraph consists of two subgraphs:
|
||||
// HandDetectorGraph and MultipleHandLandmarksDetectorGraph.
|
||||
// MultipleHandLandmarksDetectorGraph detects landmarks from bounding boxes
|
||||
// produced by HandDetectorGraph. HandLandmarkerGraph tracks the landmarks over
|
||||
// time, and skips the HandDetectorGraph. If the tracking is lost or the detectd
|
||||
// hands are less than configured max number hands, HandDetectorGraph would be
|
||||
// triggered to detect hands.
|
||||
//
|
||||
// Accepts CPU input images and outputs Landmarks on CPU.
|
||||
//
|
||||
// Inputs:
|
||||
// IMAGE - Image
|
||||
// Image to perform hand landmarks detection on.
|
||||
//
|
||||
// Outputs:
|
||||
// LANDMARKS: - std::vector<NormalizedLandmarkList>
|
||||
// Vector of detected hand landmarks.
|
||||
// WORLD_LANDMARKS - std::vector<LandmarkList>
|
||||
// Vector of detected hand landmarks in world coordinates.
|
||||
// HAND_RECT_NEXT_FRAME - std::vector<NormalizedRect>
|
||||
// Vector of the predicted rects enclosing the same hand RoI for landmark
|
||||
// detection on the next frame.
|
||||
// HANDEDNESS - std::vector<ClassificationList>
|
||||
// Vector of classification of handedness.
|
||||
// PALM_RECTS - std::vector<NormalizedRect>
|
||||
// Detected palm bounding boxes in normalized coordinates.
|
||||
// PALM_DETECTIONS - std::vector<Detection>
|
||||
// Detected palms with maximum `num_hands` specified in options.
|
||||
// IMAGE - Image
|
||||
// The input image that the hand landmarker runs on and has the pixel data
|
||||
// stored on the target storage (CPU vs GPU).
|
||||
//
|
||||
// Example:
|
||||
// node {
|
||||
// calculator: "mediapipe.tasks.vision.hand_landmarker.HandLandmarkerGraph"
|
||||
// input_stream: "IMAGE:image_in"
|
||||
// output_stream: "LANDMARKS:hand_landmarks"
|
||||
// output_stream: "WORLD_LANDMARKS:world_hand_landmarks"
|
||||
// output_stream: "HAND_RECT_NEXT_FRAME:hand_rect_next_frame"
|
||||
// output_stream: "HANDEDNESS:handedness"
|
||||
// output_stream: "PALM_RECTS:palm_rects"
|
||||
// output_stream: "PALM_DETECTIONS:palm_detections"
|
||||
// output_stream: "IMAGE:image_out"
|
||||
// options {
|
||||
// [mediapipe.tasks.hand_landmarker.proto.HandLandmarkerGraphOptions.ext] {
|
||||
// base_options {
|
||||
// model_asset {
|
||||
// file_name: "hand_landmarker.task"
|
||||
// }
|
||||
// }
|
||||
// hand_detector_graph_options {
|
||||
// base_options {
|
||||
// model_asset {
|
||||
// file_name: "palm_detection.tflite"
|
||||
// }
|
||||
// }
|
||||
// min_detection_confidence: 0.5
|
||||
// num_hands: 2
|
||||
// }
|
||||
// hand_landmarks_detector_graph_options {
|
||||
// base_options {
|
||||
// model_asset {
|
||||
// file_name: "hand_landmark_lite.tflite"
|
||||
// }
|
||||
// }
|
||||
// min_detection_confidence: 0.5
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
class HandLandmarkerGraph : public core::ModelTaskGraph {
|
||||
public:
|
||||
absl::StatusOr<CalculatorGraphConfig> GetConfig(
|
||||
SubgraphContext* sc) override {
|
||||
Graph graph;
|
||||
ASSIGN_OR_RETURN(
|
||||
auto hand_landmarker_outputs,
|
||||
BuildHandLandmarkerGraph(sc->Options<HandLandmarkerGraphOptions>(),
|
||||
graph[Input<Image>(kImageTag)], graph));
|
||||
hand_landmarker_outputs.landmark_lists >>
|
||||
graph[Output<std::vector<NormalizedLandmarkList>>(kLandmarksTag)];
|
||||
hand_landmarker_outputs.world_landmark_lists >>
|
||||
graph[Output<std::vector<LandmarkList>>(kWorldLandmarksTag)];
|
||||
hand_landmarker_outputs.hand_rects_next_frame >>
|
||||
graph[Output<std::vector<NormalizedRect>>(kHandRectNextFrameTag)];
|
||||
hand_landmarker_outputs.handednesses >>
|
||||
graph[Output<std::vector<ClassificationList>>(kHandednessTag)];
|
||||
hand_landmarker_outputs.palm_rects >>
|
||||
graph[Output<std::vector<NormalizedRect>>(kPalmRectsTag)];
|
||||
hand_landmarker_outputs.palm_detections >>
|
||||
graph[Output<std::vector<Detection>>(kPalmDetectionsTag)];
|
||||
hand_landmarker_outputs.image >> graph[Output<Image>(kImageTag)];
|
||||
|
||||
// TODO remove when support is fixed.
|
||||
// As mediapipe GraphBuilder currently doesn't support configuring
|
||||
// InputStreamInfo, modifying the CalculatorGraphConfig proto directly.
|
||||
CalculatorGraphConfig config = graph.GetConfig();
|
||||
for (int i = 0; i < config.node_size(); ++i) {
|
||||
if (config.node(i).calculator() == kPreviousLoopbackCalculatorName) {
|
||||
auto* info = config.mutable_node(i)->add_input_stream_info();
|
||||
info->set_tag_index("LOOP");
|
||||
info->set_back_edge(true);
|
||||
break;
|
||||
}
|
||||
}
|
||||
return config;
|
||||
}
|
||||
|
||||
private:
|
||||
// Adds a mediapipe hand landmark detection graph into the provided
|
||||
// builder::Graph instance.
|
||||
//
|
||||
// tasks_options: the mediapipe tasks module HandLandmarkerGraphOptions.
|
||||
// image_in: (mediapipe::Image) stream to run hand landmark detection on.
|
||||
// graph: the mediapipe graph instance to be updated.
|
||||
absl::StatusOr<HandLandmarkerOutputs> BuildHandLandmarkerGraph(
|
||||
const HandLandmarkerGraphOptions& tasks_options, Source<Image> image_in,
|
||||
Graph& graph) {
|
||||
const int max_num_hands =
|
||||
tasks_options.hand_detector_graph_options().num_hands();
|
||||
|
||||
auto& previous_loopback = graph.AddNode(kPreviousLoopbackCalculatorName);
|
||||
image_in >> previous_loopback.In("MAIN");
|
||||
auto prev_hand_rects_from_landmarks =
|
||||
previous_loopback[Output<std::vector<NormalizedRect>>("PREV_LOOP")];
|
||||
|
||||
auto& min_size_node =
|
||||
graph.AddNode("NormalizedRectVectorHasMinSizeCalculator");
|
||||
prev_hand_rects_from_landmarks >> min_size_node.In("ITERABLE");
|
||||
min_size_node.GetOptions<CollectionHasMinSizeCalculatorOptions>()
|
||||
.set_min_size(max_num_hands);
|
||||
auto has_enough_hands = min_size_node.Out("").Cast<bool>();
|
||||
|
||||
auto image_for_hand_detector =
|
||||
DisallowIf(image_in, has_enough_hands, graph);
|
||||
|
||||
auto& hand_detector =
|
||||
graph.AddNode("mediapipe.tasks.vision.hand_detector.HandDetectorGraph");
|
||||
hand_detector.GetOptions<HandDetectorGraphOptions>().CopyFrom(
|
||||
tasks_options.hand_detector_graph_options());
|
||||
image_for_hand_detector >> hand_detector.In("IMAGE");
|
||||
auto hand_rects_from_hand_detector = hand_detector.Out("HAND_RECTS");
|
||||
|
||||
auto& hand_association = graph.AddNode("HandAssociationCalculator");
|
||||
hand_association.GetOptions<HandAssociationCalculatorOptions>()
|
||||
.set_min_similarity_threshold(tasks_options.min_tracking_confidence());
|
||||
prev_hand_rects_from_landmarks >>
|
||||
hand_association[Input<std::vector<NormalizedRect>>::Multiple("")][0];
|
||||
hand_rects_from_hand_detector >>
|
||||
hand_association[Input<std::vector<NormalizedRect>>::Multiple("")][1];
|
||||
auto hand_rects = hand_association.Out("");
|
||||
|
||||
auto& clip_hand_rects =
|
||||
graph.AddNode("ClipNormalizedRectVectorSizeCalculator");
|
||||
clip_hand_rects.GetOptions<ClipVectorSizeCalculatorOptions>()
|
||||
.set_max_vec_size(max_num_hands);
|
||||
hand_rects >> clip_hand_rects.In("");
|
||||
auto clipped_hand_rects = clip_hand_rects.Out("");
|
||||
|
||||
auto& hand_landmarks_detector_graph = graph.AddNode(
|
||||
"mediapipe.tasks.vision.hand_landmarker."
|
||||
"MultipleHandLandmarksDetectorGraph");
|
||||
hand_landmarks_detector_graph
|
||||
.GetOptions<HandLandmarksDetectorGraphOptions>()
|
||||
.CopyFrom(tasks_options.hand_landmarks_detector_graph_options());
|
||||
image_in >> hand_landmarks_detector_graph.In("IMAGE");
|
||||
clipped_hand_rects >> hand_landmarks_detector_graph.In("HAND_RECT");
|
||||
|
||||
auto hand_rects_for_next_frame =
|
||||
hand_landmarks_detector_graph[Output<std::vector<NormalizedRect>>(
|
||||
kHandRectNextFrameTag)];
|
||||
// Back edge.
|
||||
hand_rects_for_next_frame >> previous_loopback.In("LOOP");
|
||||
|
||||
// TODO: Replace PassThroughCalculator with a calculator that
|
||||
// converts the pixel data to be stored on the target storage (CPU vs GPU).
|
||||
auto& pass_through = graph.AddNode("PassThroughCalculator");
|
||||
image_in >> pass_through.In("");
|
||||
|
||||
return {{
|
||||
/* landmark_lists= */ hand_landmarks_detector_graph
|
||||
[Output<std::vector<NormalizedLandmarkList>>(kLandmarksTag)],
|
||||
/* world_landmark_lists= */
|
||||
hand_landmarks_detector_graph[Output<std::vector<LandmarkList>>(
|
||||
kWorldLandmarksTag)],
|
||||
/* hand_rects_next_frame= */ hand_rects_for_next_frame,
|
||||
hand_landmarks_detector_graph[Output<std::vector<ClassificationList>>(
|
||||
kHandednessTag)],
|
||||
/* palm_rects= */
|
||||
hand_detector[Output<std::vector<NormalizedRect>>(kPalmRectsTag)],
|
||||
/* palm_detections */
|
||||
hand_detector[Output<std::vector<Detection>>(kPalmDetectionsTag)],
|
||||
/* image */
|
||||
pass_through[Output<Image>("")],
|
||||
}};
|
||||
}
|
||||
};
|
||||
|
||||
REGISTER_MEDIAPIPE_GRAPH(
|
||||
::mediapipe::tasks::vision::hand_landmarker::HandLandmarkerGraph);
|
||||
|
||||
} // namespace hand_landmarker
|
||||
} // namespace vision
|
||||
} // namespace tasks
|
||||
} // namespace mediapipe
|
|
@ -0,0 +1,167 @@
|
|||
/* Copyright 2022 The MediaPipe Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
|
||||
#include "absl/flags/flag.h"
|
||||
#include "absl/status/statusor.h"
|
||||
#include "absl/strings/str_format.h"
|
||||
#include "absl/strings/string_view.h"
|
||||
#include "mediapipe/framework/api2/builder.h"
|
||||
#include "mediapipe/framework/api2/port.h"
|
||||
#include "mediapipe/framework/deps/file_path.h"
|
||||
#include "mediapipe/framework/formats/classification.pb.h"
|
||||
#include "mediapipe/framework/formats/image.h"
|
||||
#include "mediapipe/framework/formats/landmark.pb.h"
|
||||
#include "mediapipe/framework/formats/rect.pb.h"
|
||||
#include "mediapipe/framework/port/file_helpers.h"
|
||||
#include "mediapipe/framework/port/gmock.h"
|
||||
#include "mediapipe/framework/port/gtest.h"
|
||||
#include "mediapipe/tasks/cc/core/mediapipe_builtin_op_resolver.h"
|
||||
#include "mediapipe/tasks/cc/core/proto/base_options.pb.h"
|
||||
#include "mediapipe/tasks/cc/core/proto/external_file.pb.h"
|
||||
#include "mediapipe/tasks/cc/core/task_runner.h"
|
||||
#include "mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_graph_options.pb.h"
|
||||
#include "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarker_graph_options.pb.h"
|
||||
#include "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarks_detector_graph_options.pb.h"
|
||||
#include "mediapipe/tasks/cc/vision/utils/image_utils.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/core/shims/cc/shims_test_util.h"
|
||||
|
||||
namespace mediapipe {
|
||||
namespace tasks {
|
||||
namespace vision {
|
||||
namespace hand_landmarker {
|
||||
|
||||
namespace {
|
||||
|
||||
using ::file::Defaults;
|
||||
using ::file::GetTextProto;
|
||||
using ::mediapipe::api2::Input;
|
||||
using ::mediapipe::api2::Output;
|
||||
using ::mediapipe::api2::builder::Graph;
|
||||
using ::mediapipe::api2::builder::Source;
|
||||
using ::mediapipe::file::JoinPath;
|
||||
using ::mediapipe::tasks::core::TaskRunner;
|
||||
using ::mediapipe::tasks::vision::hand_landmarker::proto::
|
||||
HandLandmarkerGraphOptions;
|
||||
using ::testing::EqualsProto;
|
||||
using ::testing::proto::Approximately;
|
||||
using ::testing::proto::Partially;
|
||||
|
||||
constexpr char kTestDataDirectory[] = "/mediapipe/tasks/testdata/vision/";
|
||||
constexpr char kPalmDetectionModel[] = "palm_detection_full.tflite";
|
||||
constexpr char kHandLandmarkerFullModel[] = "hand_landmark_full.tflite";
|
||||
constexpr char kLeftHandsImage[] = "left_hands.jpg";
|
||||
|
||||
constexpr char kImageTag[] = "IMAGE";
|
||||
constexpr char kImageName[] = "image_in";
|
||||
constexpr char kLandmarksTag[] = "LANDMARKS";
|
||||
constexpr char kLandmarksName[] = "landmarks";
|
||||
constexpr char kWorldLandmarksTag[] = "WORLD_LANDMARKS";
|
||||
constexpr char kWorldLandmarksName[] = "world_landmarks";
|
||||
constexpr char kHandRectNextFrameTag[] = "HAND_RECT_NEXT_FRAME";
|
||||
constexpr char kHandRectNextFrameName[] = "hand_rect_next_frame";
|
||||
constexpr char kHandednessTag[] = "HANDEDNESS";
|
||||
constexpr char kHandednessName[] = "handedness";
|
||||
|
||||
// Expected hand landmarks positions, in text proto format.
|
||||
constexpr char kExpectedLeftUpHandLandmarksFilename[] =
|
||||
"expected_left_up_hand_landmarks.prototxt";
|
||||
constexpr char kExpectedLeftDownHandLandmarksFilename[] =
|
||||
"expected_left_down_hand_landmarks.prototxt";
|
||||
|
||||
constexpr float kFullModelFractionDiff = 0.03; // percentage
|
||||
constexpr float kAbsMargin = 0.03;
|
||||
constexpr int kMaxNumHands = 2;
|
||||
constexpr float kMinTrackingConfidence = 0.5;
|
||||
|
||||
NormalizedLandmarkList GetExpectedLandmarkList(absl::string_view filename) {
|
||||
NormalizedLandmarkList expected_landmark_list;
|
||||
MP_EXPECT_OK(GetTextProto(file::JoinPath("./", kTestDataDirectory, filename),
|
||||
&expected_landmark_list, Defaults()));
|
||||
return expected_landmark_list;
|
||||
}
|
||||
|
||||
// Helper function to create a Hand Landmarker TaskRunner.
|
||||
absl::StatusOr<std::unique_ptr<TaskRunner>> CreateTaskRunner() {
|
||||
Graph graph;
|
||||
auto& hand_landmarker_graph = graph.AddNode(
|
||||
"mediapipe.tasks.vision.hand_landmarker.HandLandmarkerGraph");
|
||||
auto& options =
|
||||
hand_landmarker_graph.GetOptions<HandLandmarkerGraphOptions>();
|
||||
options.mutable_hand_detector_graph_options()
|
||||
->mutable_base_options()
|
||||
->mutable_model_asset()
|
||||
->set_file_name(JoinPath("./", kTestDataDirectory, kPalmDetectionModel));
|
||||
options.mutable_hand_detector_graph_options()->mutable_base_options();
|
||||
options.mutable_hand_detector_graph_options()->set_num_hands(kMaxNumHands);
|
||||
options.mutable_hand_landmarks_detector_graph_options()
|
||||
->mutable_base_options()
|
||||
->mutable_model_asset()
|
||||
->set_file_name(
|
||||
JoinPath("./", kTestDataDirectory, kHandLandmarkerFullModel));
|
||||
options.set_min_tracking_confidence(kMinTrackingConfidence);
|
||||
|
||||
graph[Input<Image>(kImageTag)].SetName(kImageName) >>
|
||||
hand_landmarker_graph.In(kImageTag);
|
||||
hand_landmarker_graph.Out(kLandmarksTag).SetName(kLandmarksName) >>
|
||||
graph[Output<std::vector<NormalizedLandmarkList>>(kLandmarksTag)];
|
||||
hand_landmarker_graph.Out(kWorldLandmarksTag).SetName(kWorldLandmarksName) >>
|
||||
graph[Output<std::vector<LandmarkList>>(kWorldLandmarksTag)];
|
||||
hand_landmarker_graph.Out(kHandednessTag).SetName(kHandednessName) >>
|
||||
graph[Output<std::vector<ClassificationList>>(kHandednessTag)];
|
||||
hand_landmarker_graph.Out(kHandRectNextFrameTag)
|
||||
.SetName(kHandRectNextFrameName) >>
|
||||
graph[Output<std::vector<NormalizedRect>>(kHandRectNextFrameTag)];
|
||||
return TaskRunner::Create(
|
||||
graph.GetConfig(), absl::make_unique<core::MediaPipeBuiltinOpResolver>());
|
||||
}
|
||||
|
||||
class HandLandmarkerTest : public tflite_shims::testing::Test {};
|
||||
|
||||
TEST_F(HandLandmarkerTest, Succeeds) {
|
||||
MP_ASSERT_OK_AND_ASSIGN(
|
||||
Image image,
|
||||
DecodeImageFromFile(JoinPath("./", kTestDataDirectory, kLeftHandsImage)));
|
||||
MP_ASSERT_OK_AND_ASSIGN(auto task_runner, CreateTaskRunner());
|
||||
auto output_packets =
|
||||
task_runner->Process({{kImageName, MakePacket<Image>(std::move(image))}});
|
||||
const auto& landmarks = (*output_packets)[kLandmarksName]
|
||||
.Get<std::vector<NormalizedLandmarkList>>();
|
||||
ASSERT_EQ(landmarks.size(), kMaxNumHands);
|
||||
std::vector<NormalizedLandmarkList> expected_landmarks = {
|
||||
GetExpectedLandmarkList(kExpectedLeftUpHandLandmarksFilename),
|
||||
GetExpectedLandmarkList(kExpectedLeftDownHandLandmarksFilename)};
|
||||
|
||||
EXPECT_THAT(landmarks[0],
|
||||
Approximately(Partially(EqualsProto(expected_landmarks[0])),
|
||||
/*margin=*/kAbsMargin,
|
||||
/*fraction=*/kFullModelFractionDiff));
|
||||
EXPECT_THAT(landmarks[1],
|
||||
Approximately(Partially(EqualsProto(expected_landmarks[1])),
|
||||
/*margin=*/kAbsMargin,
|
||||
/*fraction=*/kFullModelFractionDiff));
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
} // namespace hand_landmarker
|
||||
} // namespace vision
|
||||
} // namespace tasks
|
||||
} // namespace mediapipe
|
|
@ -34,12 +34,13 @@ limitations under the License.
|
|||
#include "mediapipe/framework/formats/tensor.h"
|
||||
#include "mediapipe/tasks/cc/common.h"
|
||||
#include "mediapipe/tasks/cc/components/image_preprocessing.h"
|
||||
#include "mediapipe/tasks/cc/components/utils/gate.h"
|
||||
#include "mediapipe/tasks/cc/core/model_resources.h"
|
||||
#include "mediapipe/tasks/cc/core/model_task_graph.h"
|
||||
#include "mediapipe/tasks/cc/core/proto/inference_subgraph.pb.h"
|
||||
#include "mediapipe/tasks/cc/core/utils.h"
|
||||
#include "mediapipe/tasks/cc/metadata/metadata_extractor.h"
|
||||
#include "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarker_subgraph_options.pb.h"
|
||||
#include "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarks_detector_graph_options.pb.h"
|
||||
#include "mediapipe/tasks/cc/vision/utils/image_tensor_specs.h"
|
||||
#include "mediapipe/tasks/metadata/metadata_schema_generated.h"
|
||||
#include "mediapipe/util/label_map.pb.h"
|
||||
|
@ -48,6 +49,7 @@ limitations under the License.
|
|||
namespace mediapipe {
|
||||
namespace tasks {
|
||||
namespace vision {
|
||||
namespace hand_landmarker {
|
||||
|
||||
namespace {
|
||||
|
||||
|
@ -55,9 +57,10 @@ using ::mediapipe::api2::Input;
|
|||
using ::mediapipe::api2::Output;
|
||||
using ::mediapipe::api2::builder::Graph;
|
||||
using ::mediapipe::api2::builder::Source;
|
||||
using ::mediapipe::tasks::components::utils::AllowIf;
|
||||
using ::mediapipe::tasks::core::ModelResources;
|
||||
using ::mediapipe::tasks::vision::hand_landmarker::proto::
|
||||
HandLandmarkerSubgraphOptions;
|
||||
HandLandmarksDetectorGraphOptions;
|
||||
using LabelItems = mediapipe::proto_ns::Map<int64, ::mediapipe::LabelMapItem>;
|
||||
|
||||
constexpr char kImageTag[] = "IMAGE";
|
||||
|
@ -82,7 +85,6 @@ struct SingleHandLandmarkerOutputs {
|
|||
Source<bool> hand_presence;
|
||||
Source<float> hand_presence_score;
|
||||
Source<ClassificationList> handedness;
|
||||
Source<std::pair<int, int>> image_size;
|
||||
};
|
||||
|
||||
struct HandLandmarkerOutputs {
|
||||
|
@ -92,10 +94,10 @@ struct HandLandmarkerOutputs {
|
|||
Source<std::vector<bool>> presences;
|
||||
Source<std::vector<float>> presence_scores;
|
||||
Source<std::vector<ClassificationList>> handednesses;
|
||||
Source<std::pair<int, int>> image_size;
|
||||
};
|
||||
|
||||
absl::Status SanityCheckOptions(const HandLandmarkerSubgraphOptions& options) {
|
||||
absl::Status SanityCheckOptions(
|
||||
const HandLandmarksDetectorGraphOptions& options) {
|
||||
if (options.min_detection_confidence() < 0 ||
|
||||
options.min_detection_confidence() > 1) {
|
||||
return CreateStatusWithPayload(absl::StatusCode::kInvalidArgument,
|
||||
|
@ -182,8 +184,8 @@ void ConfigureHandRectTransformationCalculator(
|
|||
|
||||
} // namespace
|
||||
|
||||
// A "mediapipe.tasks.vision.SingleHandLandmarkerSubgraph" performs hand
|
||||
// landmark detection.
|
||||
// A "mediapipe.tasks.vision.hand_landmarker.SingleHandLandmarksDetectorGraph"
|
||||
// performs hand landmarks detection.
|
||||
// - Accepts CPU input images and outputs Landmark on CPU.
|
||||
//
|
||||
// Inputs:
|
||||
|
@ -208,12 +210,11 @@ void ConfigureHandRectTransformationCalculator(
|
|||
// Float value indicates the probability that the hand is present.
|
||||
// HANDEDNESS - ClassificationList
|
||||
// Classification of handedness.
|
||||
// IMAGE_SIZE - std::vector<int, int>
|
||||
// The size of input image.
|
||||
//
|
||||
// Example:
|
||||
// node {
|
||||
// calculator: "mediapipe.tasks.vision.SingleHandLandmarkerSubgraph"
|
||||
// calculator:
|
||||
// "mediapipe.tasks.vision.hand_landmarker.SingleHandLandmarksDetectorGraph"
|
||||
// input_stream: "IMAGE:input_image"
|
||||
// input_stream: "HAND_RECT:hand_rect"
|
||||
// output_stream: "LANDMARKS:hand_landmarks"
|
||||
|
@ -221,10 +222,8 @@ void ConfigureHandRectTransformationCalculator(
|
|||
// output_stream: "HAND_RECT_NEXT_FRAME:hand_rect_next_frame"
|
||||
// output_stream: "PRESENCE:hand_presence"
|
||||
// output_stream: "PRESENCE_SCORE:hand_presence_score"
|
||||
// output_stream: "HANDEDNESS:handedness"
|
||||
// output_stream: "IMAGE_SIZE:image_size"
|
||||
// options {
|
||||
// [mediapipe.tasks.vision.hand_landmarker.proto.HandLandmarkerSubgraphOptions.ext]
|
||||
// [mediapipe.tasks.vision.hand_landmarker.proto.HandLandmarksDetectorGraphOptions.ext]
|
||||
// {
|
||||
// base_options {
|
||||
// model_asset {
|
||||
|
@ -235,16 +234,17 @@ void ConfigureHandRectTransformationCalculator(
|
|||
// }
|
||||
// }
|
||||
// }
|
||||
class SingleHandLandmarkerSubgraph : public core::ModelTaskGraph {
|
||||
class SingleHandLandmarksDetectorGraph : public core::ModelTaskGraph {
|
||||
public:
|
||||
absl::StatusOr<CalculatorGraphConfig> GetConfig(
|
||||
SubgraphContext* sc) override {
|
||||
ASSIGN_OR_RETURN(const auto* model_resources,
|
||||
CreateModelResources<HandLandmarkerSubgraphOptions>(sc));
|
||||
ASSIGN_OR_RETURN(
|
||||
const auto* model_resources,
|
||||
CreateModelResources<HandLandmarksDetectorGraphOptions>(sc));
|
||||
Graph graph;
|
||||
ASSIGN_OR_RETURN(auto hand_landmark_detection_outs,
|
||||
BuildSingleHandLandmarkerSubgraph(
|
||||
sc->Options<HandLandmarkerSubgraphOptions>(),
|
||||
BuildSingleHandLandmarksDetectorGraph(
|
||||
sc->Options<HandLandmarksDetectorGraphOptions>(),
|
||||
*model_resources, graph[Input<Image>(kImageTag)],
|
||||
graph[Input<NormalizedRect>(kHandRectTag)], graph));
|
||||
hand_landmark_detection_outs.hand_landmarks >>
|
||||
|
@ -259,8 +259,6 @@ class SingleHandLandmarkerSubgraph : public core::ModelTaskGraph {
|
|||
graph[Output<float>(kPresenceScoreTag)];
|
||||
hand_landmark_detection_outs.handedness >>
|
||||
graph[Output<ClassificationList>(kHandednessTag)];
|
||||
hand_landmark_detection_outs.image_size >>
|
||||
graph[Output<std::pair<int, int>>(kImageSizeTag)];
|
||||
|
||||
return graph.GetConfig();
|
||||
}
|
||||
|
@ -269,14 +267,16 @@ class SingleHandLandmarkerSubgraph : public core::ModelTaskGraph {
|
|||
// Adds a mediapipe hand landmark detection graph into the provided
|
||||
// builder::Graph instance.
|
||||
//
|
||||
// subgraph_options: the mediapipe tasks module HandLandmarkerSubgraphOptions.
|
||||
// model_resources: the ModelSources object initialized from a hand landmark
|
||||
// subgraph_options: the mediapipe tasks module
|
||||
// HandLandmarksDetectorGraphOptions. model_resources: the ModelSources object
|
||||
// initialized from a hand landmark
|
||||
// detection model file with model metadata.
|
||||
// image_in: (mediapipe::Image) stream to run hand landmark detection on.
|
||||
// rect: (NormalizedRect) stream to run on the RoI of image.
|
||||
// graph: the mediapipe graph instance to be updated.
|
||||
absl::StatusOr<SingleHandLandmarkerOutputs> BuildSingleHandLandmarkerSubgraph(
|
||||
const HandLandmarkerSubgraphOptions& subgraph_options,
|
||||
absl::StatusOr<SingleHandLandmarkerOutputs>
|
||||
BuildSingleHandLandmarksDetectorGraph(
|
||||
const HandLandmarksDetectorGraphOptions& subgraph_options,
|
||||
const core::ModelResources& model_resources, Source<Image> image_in,
|
||||
Source<NormalizedRect> hand_rect, Graph& graph) {
|
||||
MP_RETURN_IF_ERROR(SanityCheckOptions(subgraph_options));
|
||||
|
@ -332,18 +332,7 @@ class SingleHandLandmarkerSubgraph : public core::ModelTaskGraph {
|
|||
// score of hand presence.
|
||||
auto& tensors_to_hand_presence = graph.AddNode("TensorsToFloatsCalculator");
|
||||
hand_flag_tensors >> tensors_to_hand_presence.In("TENSORS");
|
||||
|
||||
// Converts the handedness tensor into a float that represents the
|
||||
// classification score of handedness.
|
||||
auto& tensors_to_handedness =
|
||||
graph.AddNode("TensorsToClassificationCalculator");
|
||||
ConfigureTensorsToHandednessCalculator(
|
||||
&tensors_to_handedness.GetOptions<
|
||||
mediapipe::TensorsToClassificationCalculatorOptions>());
|
||||
handedness_tensors >> tensors_to_handedness.In("TENSORS");
|
||||
auto hand_presence_score = tensors_to_hand_presence[Output<float>("FLOAT")];
|
||||
auto handedness =
|
||||
tensors_to_handedness[Output<ClassificationList>("CLASSIFICATIONS")];
|
||||
|
||||
// Applies a threshold to the confidence score to determine whether a
|
||||
// hand is present.
|
||||
|
@ -354,6 +343,18 @@ class SingleHandLandmarkerSubgraph : public core::ModelTaskGraph {
|
|||
hand_presence_score >> hand_presence_thresholding.In("FLOAT");
|
||||
auto hand_presence = hand_presence_thresholding[Output<bool>("FLAG")];
|
||||
|
||||
// Converts the handedness tensor into a float that represents the
|
||||
// classification score of handedness.
|
||||
auto& tensors_to_handedness =
|
||||
graph.AddNode("TensorsToClassificationCalculator");
|
||||
ConfigureTensorsToHandednessCalculator(
|
||||
&tensors_to_handedness.GetOptions<
|
||||
mediapipe::TensorsToClassificationCalculatorOptions>());
|
||||
handedness_tensors >> tensors_to_handedness.In("TENSORS");
|
||||
auto handedness = AllowIf(
|
||||
tensors_to_handedness[Output<ClassificationList>("CLASSIFICATIONS")],
|
||||
hand_presence, graph);
|
||||
|
||||
// Adjusts landmarks (already normalized to [0.f, 1.f]) on the letterboxed
|
||||
// hand image (after image transformation with the FIT scale mode) to the
|
||||
// corresponding locations on the same image with the letterbox removed
|
||||
|
@ -371,8 +372,9 @@ class SingleHandLandmarkerSubgraph : public core::ModelTaskGraph {
|
|||
landmark_letterbox_removal.Out("LANDMARKS") >>
|
||||
landmark_projection.In("NORM_LANDMARKS");
|
||||
hand_rect >> landmark_projection.In("NORM_RECT");
|
||||
auto projected_landmarks =
|
||||
landmark_projection[Output<NormalizedLandmarkList>("NORM_LANDMARKS")];
|
||||
auto projected_landmarks = AllowIf(
|
||||
landmark_projection[Output<NormalizedLandmarkList>("NORM_LANDMARKS")],
|
||||
hand_presence, graph);
|
||||
|
||||
// Projects the world landmarks from the cropped hand image to the
|
||||
// corresponding locations on the full image before cropping (input to the
|
||||
|
@ -383,7 +385,8 @@ class SingleHandLandmarkerSubgraph : public core::ModelTaskGraph {
|
|||
world_landmark_projection.In("LANDMARKS");
|
||||
hand_rect >> world_landmark_projection.In("NORM_RECT");
|
||||
auto projected_world_landmarks =
|
||||
world_landmark_projection[Output<LandmarkList>("LANDMARKS")];
|
||||
AllowIf(world_landmark_projection[Output<LandmarkList>("LANDMARKS")],
|
||||
hand_presence, graph);
|
||||
|
||||
// Converts the hand landmarks into a rectangle (normalized by image size)
|
||||
// that encloses the hand.
|
||||
|
@ -403,7 +406,8 @@ class SingleHandLandmarkerSubgraph : public core::ModelTaskGraph {
|
|||
hand_landmarks_to_rect.Out("NORM_RECT") >>
|
||||
hand_rect_transformation.In("NORM_RECT");
|
||||
auto hand_rect_next_frame =
|
||||
hand_rect_transformation[Output<NormalizedRect>("")];
|
||||
AllowIf(hand_rect_transformation[Output<NormalizedRect>("")],
|
||||
hand_presence, graph);
|
||||
|
||||
return {{
|
||||
/* hand_landmarks= */ projected_landmarks,
|
||||
|
@ -412,16 +416,17 @@ class SingleHandLandmarkerSubgraph : public core::ModelTaskGraph {
|
|||
/* hand_presence= */ hand_presence,
|
||||
/* hand_presence_score= */ hand_presence_score,
|
||||
/* handedness= */ handedness,
|
||||
/* image_size= */ image_size,
|
||||
}};
|
||||
}
|
||||
};
|
||||
|
||||
// clang-format off
|
||||
REGISTER_MEDIAPIPE_GRAPH(
|
||||
::mediapipe::tasks::vision::SingleHandLandmarkerSubgraph);
|
||||
::mediapipe::tasks::vision::hand_landmarker::SingleHandLandmarksDetectorGraph); // NOLINT
|
||||
// clang-format on
|
||||
|
||||
// A "mediapipe.tasks.vision.HandLandmarkerSubgraph" performs multi
|
||||
// hand landmark detection.
|
||||
// A "mediapipe.tasks.vision.hand_landmarker.MultipleHandLandmarksDetectorGraph"
|
||||
// performs multi hand landmark detection.
|
||||
// - Accepts CPU input image and a vector of hand rect RoIs to detect the
|
||||
// multiple hands landmarks enclosed by the RoIs. Output vectors of
|
||||
// hand landmarks related results, where each element in the vectors
|
||||
|
@ -449,12 +454,11 @@ REGISTER_MEDIAPIPE_GRAPH(
|
|||
// Vector of float value indicates the probability that the hand is present.
|
||||
// HANDEDNESS - std::vector<ClassificationList>
|
||||
// Vector of classification of handedness.
|
||||
// IMAGE_SIZE - std::vector<int, int>
|
||||
// The size of input image.
|
||||
//
|
||||
// Example:
|
||||
// node {
|
||||
// calculator: "mediapipe.tasks.vision.HandLandmarkerSubgraph"
|
||||
// calculator:
|
||||
// "mediapipe.tasks.vision.hand_landmarker.MultipleHandLandmarksDetectorGraph"
|
||||
// input_stream: "IMAGE:input_image"
|
||||
// input_stream: "HAND_RECT:hand_rect"
|
||||
// output_stream: "LANDMARKS:hand_landmarks"
|
||||
|
@ -463,9 +467,8 @@ REGISTER_MEDIAPIPE_GRAPH(
|
|||
// output_stream: "PRESENCE:hand_presence"
|
||||
// output_stream: "PRESENCE_SCORE:hand_presence_score"
|
||||
// output_stream: "HANDEDNESS:handedness"
|
||||
// output_stream: "IMAGE_SIZE:image_size"
|
||||
// options {
|
||||
// [mediapipe.tasks.vision.hand_landmarker.proto.HandLandmarkerSubgraphOptions.ext]
|
||||
// [mediapipe.tasks.vision.hand_landmarker.proto.HandLandmarksDetectorGraphOptions.ext]
|
||||
// {
|
||||
// base_options {
|
||||
// model_asset {
|
||||
|
@ -476,15 +479,15 @@ REGISTER_MEDIAPIPE_GRAPH(
|
|||
// }
|
||||
// }
|
||||
// }
|
||||
class HandLandmarkerSubgraph : public core::ModelTaskGraph {
|
||||
class MultipleHandLandmarksDetectorGraph : public core::ModelTaskGraph {
|
||||
public:
|
||||
absl::StatusOr<CalculatorGraphConfig> GetConfig(
|
||||
SubgraphContext* sc) override {
|
||||
Graph graph;
|
||||
ASSIGN_OR_RETURN(
|
||||
auto hand_landmark_detection_outputs,
|
||||
BuildHandLandmarkerSubgraph(
|
||||
sc->Options<HandLandmarkerSubgraphOptions>(),
|
||||
BuildHandLandmarksDetectorGraph(
|
||||
sc->Options<HandLandmarksDetectorGraphOptions>(),
|
||||
graph[Input<Image>(kImageTag)],
|
||||
graph[Input<std::vector<NormalizedRect>>(kHandRectTag)], graph));
|
||||
hand_landmark_detection_outputs.landmark_lists >>
|
||||
|
@ -499,21 +502,20 @@ class HandLandmarkerSubgraph : public core::ModelTaskGraph {
|
|||
graph[Output<std::vector<float>>(kPresenceScoreTag)];
|
||||
hand_landmark_detection_outputs.handednesses >>
|
||||
graph[Output<std::vector<ClassificationList>>(kHandednessTag)];
|
||||
hand_landmark_detection_outputs.image_size >>
|
||||
graph[Output<std::pair<int, int>>(kImageSizeTag)];
|
||||
|
||||
return graph.GetConfig();
|
||||
}
|
||||
|
||||
private:
|
||||
absl::StatusOr<HandLandmarkerOutputs> BuildHandLandmarkerSubgraph(
|
||||
const HandLandmarkerSubgraphOptions& subgraph_options,
|
||||
absl::StatusOr<HandLandmarkerOutputs> BuildHandLandmarksDetectorGraph(
|
||||
const HandLandmarksDetectorGraphOptions& subgraph_options,
|
||||
Source<Image> image_in,
|
||||
Source<std::vector<NormalizedRect>> multi_hand_rects, Graph& graph) {
|
||||
auto& hand_landmark_subgraph =
|
||||
graph.AddNode("mediapipe.tasks.vision.SingleHandLandmarkerSubgraph");
|
||||
hand_landmark_subgraph.GetOptions<HandLandmarkerSubgraphOptions>().CopyFrom(
|
||||
subgraph_options);
|
||||
auto& hand_landmark_subgraph = graph.AddNode(
|
||||
"mediapipe.tasks.vision.hand_landmarker."
|
||||
"SingleHandLandmarksDetectorGraph");
|
||||
hand_landmark_subgraph.GetOptions<HandLandmarksDetectorGraphOptions>()
|
||||
.CopyFrom(subgraph_options);
|
||||
|
||||
auto& begin_loop_multi_hand_rects =
|
||||
graph.AddNode("BeginLoopNormalizedRectCalculator");
|
||||
|
@ -533,8 +535,6 @@ class HandLandmarkerSubgraph : public core::ModelTaskGraph {
|
|||
hand_landmark_subgraph.Out("HAND_RECT_NEXT_FRAME");
|
||||
auto landmarks = hand_landmark_subgraph.Out("LANDMARKS");
|
||||
auto world_landmarks = hand_landmark_subgraph.Out("WORLD_LANDMARKS");
|
||||
auto image_size =
|
||||
hand_landmark_subgraph[Output<std::pair<int, int>>("IMAGE_SIZE")];
|
||||
|
||||
auto& end_loop_handedness =
|
||||
graph.AddNode("EndLoopClassificationListCalculator");
|
||||
|
@ -585,13 +585,16 @@ class HandLandmarkerSubgraph : public core::ModelTaskGraph {
|
|||
/* presences= */ presences,
|
||||
/* presence_scores= */ presence_scores,
|
||||
/* handednesses= */ handednesses,
|
||||
/* image_size= */ image_size,
|
||||
}};
|
||||
}
|
||||
};
|
||||
|
||||
REGISTER_MEDIAPIPE_GRAPH(::mediapipe::tasks::vision::HandLandmarkerSubgraph);
|
||||
// clang-format off
|
||||
REGISTER_MEDIAPIPE_GRAPH(
|
||||
::mediapipe::tasks::vision::hand_landmarker::MultipleHandLandmarksDetectorGraph); // NOLINT
|
||||
// clang-format on
|
||||
|
||||
} // namespace hand_landmarker
|
||||
} // namespace vision
|
||||
} // namespace tasks
|
||||
} // namespace mediapipe
|
|
@ -39,12 +39,13 @@ limitations under the License.
|
|||
#include "mediapipe/tasks/cc/core/proto/base_options.pb.h"
|
||||
#include "mediapipe/tasks/cc/core/proto/external_file.pb.h"
|
||||
#include "mediapipe/tasks/cc/core/task_runner.h"
|
||||
#include "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarker_subgraph_options.pb.h"
|
||||
#include "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarks_detector_graph_options.pb.h"
|
||||
#include "mediapipe/tasks/cc/vision/utils/image_utils.h"
|
||||
|
||||
namespace mediapipe {
|
||||
namespace tasks {
|
||||
namespace vision {
|
||||
namespace hand_landmarker {
|
||||
namespace {
|
||||
|
||||
using ::file::Defaults;
|
||||
|
@ -57,7 +58,7 @@ using ::mediapipe::file::JoinPath;
|
|||
using ::mediapipe::tasks::core::TaskRunner;
|
||||
using ::mediapipe::tasks::vision::DecodeImageFromFile;
|
||||
using ::mediapipe::tasks::vision::hand_landmarker::proto::
|
||||
HandLandmarkerSubgraphOptions;
|
||||
HandLandmarksDetectorGraphOptions;
|
||||
using ::testing::ElementsAreArray;
|
||||
using ::testing::EqualsProto;
|
||||
using ::testing::Pointwise;
|
||||
|
@ -112,13 +113,14 @@ absl::StatusOr<std::unique_ptr<TaskRunner>> CreateSingleHandTaskRunner(
|
|||
absl::string_view model_name) {
|
||||
Graph graph;
|
||||
|
||||
auto& hand_landmark_detection =
|
||||
graph.AddNode("mediapipe.tasks.vision.SingleHandLandmarkerSubgraph");
|
||||
auto& hand_landmark_detection = graph.AddNode(
|
||||
"mediapipe.tasks.vision.hand_landmarker."
|
||||
"SingleHandLandmarksDetectorGraph");
|
||||
|
||||
auto options = std::make_unique<HandLandmarkerSubgraphOptions>();
|
||||
auto options = std::make_unique<HandLandmarksDetectorGraphOptions>();
|
||||
options->mutable_base_options()->mutable_model_asset()->set_file_name(
|
||||
JoinPath("./", kTestDataDirectory, model_name));
|
||||
hand_landmark_detection.GetOptions<HandLandmarkerSubgraphOptions>().Swap(
|
||||
hand_landmark_detection.GetOptions<HandLandmarksDetectorGraphOptions>().Swap(
|
||||
options.get());
|
||||
|
||||
graph[Input<Image>(kImageTag)].SetName(kImageName) >>
|
||||
|
@ -151,13 +153,14 @@ absl::StatusOr<std::unique_ptr<TaskRunner>> CreateMultiHandTaskRunner(
|
|||
absl::string_view model_name) {
|
||||
Graph graph;
|
||||
|
||||
auto& multi_hand_landmark_detection =
|
||||
graph.AddNode("mediapipe.tasks.vision.HandLandmarkerSubgraph");
|
||||
auto& multi_hand_landmark_detection = graph.AddNode(
|
||||
"mediapipe.tasks.vision.hand_landmarker."
|
||||
"MultipleHandLandmarksDetectorGraph");
|
||||
|
||||
auto options = std::make_unique<HandLandmarkerSubgraphOptions>();
|
||||
auto options = std::make_unique<HandLandmarksDetectorGraphOptions>();
|
||||
options->mutable_base_options()->mutable_model_asset()->set_file_name(
|
||||
JoinPath("./", kTestDataDirectory, model_name));
|
||||
multi_hand_landmark_detection.GetOptions<HandLandmarkerSubgraphOptions>()
|
||||
multi_hand_landmark_detection.GetOptions<HandLandmarksDetectorGraphOptions>()
|
||||
.Swap(options.get());
|
||||
|
||||
graph[Input<Image>(kImageTag)].SetName(kImageName) >>
|
||||
|
@ -462,6 +465,7 @@ INSTANTIATE_TEST_SUITE_P(
|
|||
});
|
||||
|
||||
} // namespace
|
||||
} // namespace hand_landmarker
|
||||
} // namespace vision
|
||||
} // namespace tasks
|
||||
} // namespace mediapipe
|
|
@ -21,8 +21,8 @@ package(default_visibility = [
|
|||
licenses(["notice"])
|
||||
|
||||
mediapipe_proto_library(
|
||||
name = "hand_landmarker_subgraph_options_proto",
|
||||
srcs = ["hand_landmarker_subgraph_options.proto"],
|
||||
name = "hand_landmarks_detector_graph_options_proto",
|
||||
srcs = ["hand_landmarks_detector_graph_options.proto"],
|
||||
deps = [
|
||||
"//mediapipe/framework:calculator_options_proto",
|
||||
"//mediapipe/framework:calculator_proto",
|
||||
|
@ -31,13 +31,13 @@ mediapipe_proto_library(
|
|||
)
|
||||
|
||||
mediapipe_proto_library(
|
||||
name = "hand_landmarker_options_proto",
|
||||
srcs = ["hand_landmarker_options.proto"],
|
||||
name = "hand_landmarker_graph_options_proto",
|
||||
srcs = ["hand_landmarker_graph_options.proto"],
|
||||
deps = [
|
||||
":hand_landmarker_subgraph_options_proto",
|
||||
":hand_landmarks_detector_graph_options_proto",
|
||||
"//mediapipe/framework:calculator_options_proto",
|
||||
"//mediapipe/framework:calculator_proto",
|
||||
"//mediapipe/tasks/cc/core/proto:base_options_proto",
|
||||
"//mediapipe/tasks/cc/vision/hand_detector/proto:hand_detector_options_proto",
|
||||
"//mediapipe/tasks/cc/vision/hand_detector/proto:hand_detector_graph_options_proto",
|
||||
],
|
||||
)
|
||||
|
|
|
@ -19,22 +19,26 @@ package mediapipe.tasks.vision.hand_landmarker.proto;
|
|||
|
||||
import "mediapipe/framework/calculator.proto";
|
||||
import "mediapipe/tasks/cc/core/proto/base_options.proto";
|
||||
import "mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_options.proto";
|
||||
import "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarker_subgraph_options.proto";
|
||||
import "mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_graph_options.proto";
|
||||
import "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarks_detector_graph_options.proto";
|
||||
|
||||
message HandLandmarkerOptions {
|
||||
message HandLandmarkerGraphOptions {
|
||||
extend mediapipe.CalculatorOptions {
|
||||
optional HandLandmarkerOptions ext = 462713202;
|
||||
optional HandLandmarkerGraphOptions ext = 462713202;
|
||||
}
|
||||
// Base options for configuring MediaPipe Tasks, such as specifying the TfLite
|
||||
// model file with metadata, accelerator options, etc.
|
||||
optional core.proto.BaseOptions base_options = 1;
|
||||
|
||||
// The locale to use for display names specified through the TFLite Model
|
||||
// Metadata, if any. Defaults to English.
|
||||
optional string display_names_locale = 2 [default = "en"];
|
||||
// Options for hand detector graph.
|
||||
optional hand_detector.proto.HandDetectorGraphOptions
|
||||
hand_detector_graph_options = 2;
|
||||
|
||||
optional hand_detector.proto.HandDetectorOptions hand_detector_options = 3;
|
||||
// Options for hand landmarker subgraph.
|
||||
optional HandLandmarksDetectorGraphOptions
|
||||
hand_landmarks_detector_graph_options = 3;
|
||||
|
||||
optional HandLandmarkerSubgraphOptions hand_landmarker_subgraph_options = 4;
|
||||
// Minimum confidence for hand landmarks tracking to be considered
|
||||
// successfully.
|
||||
optional float min_tracking_confidence = 4 [default = 0.5];
|
||||
}
|
|
@ -20,19 +20,15 @@ package mediapipe.tasks.vision.hand_landmarker.proto;
|
|||
import "mediapipe/framework/calculator.proto";
|
||||
import "mediapipe/tasks/cc/core/proto/base_options.proto";
|
||||
|
||||
message HandLandmarkerSubgraphOptions {
|
||||
message HandLandmarksDetectorGraphOptions {
|
||||
extend mediapipe.CalculatorOptions {
|
||||
optional HandLandmarkerSubgraphOptions ext = 474472470;
|
||||
optional HandLandmarksDetectorGraphOptions ext = 474472470;
|
||||
}
|
||||
// Base options for configuring MediaPipe Tasks, such as specifying the TfLite
|
||||
// model file with metadata, accelerator options, etc.
|
||||
optional core.proto.BaseOptions base_options = 1;
|
||||
|
||||
// The locale to use for display names specified through the TFLite Model
|
||||
// Metadata, if any. Defaults to English.
|
||||
optional string display_names_locale = 2 [default = "en"];
|
||||
|
||||
// Minimum confidence value ([0.0, 1.0]) for hand presence score to be
|
||||
// considered successfully detecting a hand in the image.
|
||||
optional float min_detection_confidence = 3 [default = 0.5];
|
||||
optional float min_detection_confidence = 2 [default = 0.5];
|
||||
}
|
Loading…
Reference in New Issue
Block a user