Internal change

PiperOrigin-RevId: 479724318
This commit is contained in:
MediaPipe Team 2022-10-07 20:42:51 -07:00 committed by Copybara-Service
parent 65e1d722eb
commit 635dc0a24e
10 changed files with 821 additions and 272 deletions

View File

@ -20,3 +20,12 @@ cc_library(
name = "landmarks_detection",
hdrs = ["landmarks_detection.h"],
)
cc_library(
name = "gesture_recognition_result",
hdrs = ["gesture_recognition_result.h"],
deps = [
"//mediapipe/framework/formats:classification_cc_proto",
"//mediapipe/framework/formats:landmark_cc_proto",
],
)

View File

@ -0,0 +1,46 @@
/* Copyright 2022 The MediaPipe Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef MEDIAPIPE_TASKS_CC_COMPONENTS_CONTAINERS_GESTURE_RECOGNITION_RESULT_H_
#define MEDIAPIPE_TASKS_CC_COMPONENTS_CONTAINERS_GESTURE_RECOGNITION_RESULT_H_
#include "mediapipe/framework/formats/classification.pb.h"
#include "mediapipe/framework/formats/landmark.pb.h"
namespace mediapipe {
namespace tasks {
namespace components {
namespace containers {
// The gesture recognition result from GestureRecognizer, where each vector
// element represents a single hand detected in the image.
struct GestureRecognitionResult {
// Recognized hand gestures with sorted order such that the winning label is
// the first item in the list.
std::vector<mediapipe::ClassificationList> gestures;
// Classification of handedness.
std::vector<mediapipe::ClassificationList> handedness;
// Detected hand landmarks in normalized image coordinates.
std::vector<mediapipe::NormalizedLandmarkList> hand_landmarks;
// Detected hand landmarks in world coordinates.
std::vector<mediapipe::LandmarkList> hand_world_landmarks;
};
} // namespace containers
} // namespace components
} // namespace tasks
} // namespace mediapipe
#endif // MEDIAPIPE_TASKS_CC_COMPONENTS_CONTAINERS_GESTURE_RECOGNITION_RESULT_H_

View File

@ -47,6 +47,7 @@ cc_library(
"//mediapipe/calculators/core:begin_loop_calculator",
"//mediapipe/calculators/core:concatenate_vector_calculator",
"//mediapipe/calculators/core:end_loop_calculator",
"//mediapipe/calculators/core:get_vector_item_calculator",
"//mediapipe/calculators/tensor:tensor_converter_calculator",
"//mediapipe/calculators/tensor:tensors_to_classification_calculator",
"//mediapipe/calculators/tensor:tensors_to_classification_calculator_cc_proto",
@ -69,7 +70,8 @@ cc_library(
"//mediapipe/tasks/cc/vision/gesture_recognizer/calculators:landmarks_to_matrix_calculator_cc_proto",
"//mediapipe/tasks/cc/vision/gesture_recognizer/proto:hand_gesture_recognizer_graph_options_cc_proto",
"//mediapipe/tasks/cc/vision/hand_landmarker:hand_landmarks_detector_graph",
"//mediapipe/tasks/cc/vision/utils:image_tensor_specs",
"//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_landmarker_graph_options_cc_proto",
"//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_landmarks_detector_graph_options_cc_proto",
"//mediapipe/tasks/metadata:metadata_schema_cc",
"@com_google_absl//absl/status",
"@com_google_absl//absl/status:statusor",
@ -90,7 +92,6 @@ cc_library(
"//mediapipe/framework/formats:image",
"//mediapipe/framework/formats:landmark_cc_proto",
"//mediapipe/tasks/cc:common",
"//mediapipe/tasks/cc/components/containers/proto:classifications_cc_proto",
"//mediapipe/tasks/cc/components/processors/proto:classifier_options_cc_proto",
"//mediapipe/tasks/cc/core:model_task_graph",
"//mediapipe/tasks/cc/core:utils",
@ -108,3 +109,42 @@ cc_library(
],
alwayslink = 1,
)
cc_library(
name = "gesture_recognizer",
srcs = ["gesture_recognizer.cc"],
hdrs = ["gesture_recognizer.h"],
deps = [
":gesture_recognizer_graph",
":hand_gesture_recognizer_graph",
"//mediapipe/framework:packet",
"//mediapipe/framework/api2:builder",
"//mediapipe/framework/api2:port",
"//mediapipe/framework/formats:classification_cc_proto",
"//mediapipe/framework/formats:image",
"//mediapipe/framework/formats:landmark_cc_proto",
"//mediapipe/tasks/cc:common",
"//mediapipe/tasks/cc/components:image_preprocessing",
"//mediapipe/tasks/cc/components/containers:gesture_recognition_result",
"//mediapipe/tasks/cc/components/processors/proto:classifier_options_cc_proto",
"//mediapipe/tasks/cc/core:base_options",
"//mediapipe/tasks/cc/core:base_task_api",
"//mediapipe/tasks/cc/core:model_resources",
"//mediapipe/tasks/cc/core:task_runner",
"//mediapipe/tasks/cc/core:utils",
"//mediapipe/tasks/cc/core/proto:inference_subgraph_cc_proto",
"//mediapipe/tasks/cc/vision/core:base_vision_task_api",
"//mediapipe/tasks/cc/vision/core:running_mode",
"//mediapipe/tasks/cc/vision/core:vision_task_api_factory",
"//mediapipe/tasks/cc/vision/gesture_recognizer/proto:gesture_recognizer_graph_options_cc_proto",
"//mediapipe/tasks/cc/vision/gesture_recognizer/proto:hand_gesture_recognizer_graph_options_cc_proto",
"//mediapipe/tasks/cc/vision/hand_detector/proto:hand_detector_graph_options_cc_proto",
"//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_landmarker_graph_options_cc_proto",
"//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_landmarks_detector_graph_options_cc_proto",
"@com_google_absl//absl/memory",
"@com_google_absl//absl/status",
"@com_google_absl//absl/status:statusor",
"@org_tensorflow//tensorflow/lite/core/api:op_resolver",
"@org_tensorflow//tensorflow/lite/kernels:builtin_ops",
],
)

View File

@ -0,0 +1,282 @@
/* Copyright 2022 The MediaPipe Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "mediapipe/tasks/cc/vision/gesture_recognizer/gesture_recognizer.h"
#include <memory>
#include <type_traits>
#include <vector>
#include "absl/memory/memory.h"
#include "absl/status/status.h"
#include "absl/status/statusor.h"
#include "mediapipe/framework/api2/builder.h"
#include "mediapipe/framework/api2/port.h"
#include "mediapipe/framework/formats/classification.pb.h"
#include "mediapipe/framework/formats/image.h"
#include "mediapipe/framework/formats/landmark.pb.h"
#include "mediapipe/framework/packet.h"
#include "mediapipe/tasks/cc/common.h"
#include "mediapipe/tasks/cc/components/image_preprocessing.h"
#include "mediapipe/tasks/cc/components/processors/proto/classifier_options.pb.h"
#include "mediapipe/tasks/cc/core/base_task_api.h"
#include "mediapipe/tasks/cc/core/model_resources.h"
#include "mediapipe/tasks/cc/core/proto/inference_subgraph.pb.h"
#include "mediapipe/tasks/cc/core/task_runner.h"
#include "mediapipe/tasks/cc/core/utils.h"
#include "mediapipe/tasks/cc/vision/core/base_vision_task_api.h"
#include "mediapipe/tasks/cc/vision/core/vision_task_api_factory.h"
#include "mediapipe/tasks/cc/vision/gesture_recognizer/proto/gesture_recognizer_graph_options.pb.h"
#include "mediapipe/tasks/cc/vision/gesture_recognizer/proto/hand_gesture_recognizer_graph_options.pb.h"
#include "mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_graph_options.pb.h"
#include "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarker_graph_options.pb.h"
#include "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarks_detector_graph_options.pb.h"
namespace mediapipe {
namespace tasks {
namespace vision {
namespace gesture_recognizer {
namespace {
using GestureRecognizerGraphOptionsProto = ::mediapipe::tasks::vision::
gesture_recognizer::proto::GestureRecognizerGraphOptions;
using ::mediapipe::tasks::components::containers::GestureRecognitionResult;
constexpr char kHandGestureSubgraphTypeName[] =
"mediapipe.tasks.vision.gesture_recognizer.GestureRecognizerGraph";
constexpr char kImageTag[] = "IMAGE";
constexpr char kImageInStreamName[] = "image_in";
constexpr char kImageOutStreamName[] = "image_out";
constexpr char kHandGesturesTag[] = "HAND_GESTURES";
constexpr char kHandGesturesStreamName[] = "hand_gestures";
constexpr char kHandednessTag[] = "HANDEDNESS";
constexpr char kHandednessStreamName[] = "handedness";
constexpr char kHandLandmarksTag[] = "LANDMARKS";
constexpr char kHandLandmarksStreamName[] = "landmarks";
constexpr char kHandWorldLandmarksTag[] = "WORLD_LANDMARKS";
constexpr char kHandWorldLandmarksStreamName[] = "world_landmarks";
constexpr int kMicroSecondsPerMilliSecond = 1000;
// Creates a MediaPipe graph config that contains a subgraph node of
// "mediapipe.tasks.vision.GestureRecognizerGraph". If the task is running
// in the live stream mode, a "FlowLimiterCalculator" will be added to limit the
// number of frames in flight.
CalculatorGraphConfig CreateGraphConfig(
std::unique_ptr<GestureRecognizerGraphOptionsProto> options,
bool enable_flow_limiting) {
api2::builder::Graph graph;
auto& subgraph = graph.AddNode(kHandGestureSubgraphTypeName);
subgraph.GetOptions<GestureRecognizerGraphOptionsProto>().Swap(options.get());
graph.In(kImageTag).SetName(kImageInStreamName);
subgraph.Out(kHandGesturesTag).SetName(kHandGesturesStreamName) >>
graph.Out(kHandGesturesTag);
subgraph.Out(kHandednessTag).SetName(kHandednessStreamName) >>
graph.Out(kHandednessTag);
subgraph.Out(kHandLandmarksTag).SetName(kHandLandmarksStreamName) >>
graph.Out(kHandLandmarksTag);
subgraph.Out(kHandWorldLandmarksTag).SetName(kHandWorldLandmarksStreamName) >>
graph.Out(kHandWorldLandmarksTag);
subgraph.Out(kImageTag).SetName(kImageOutStreamName) >> graph.Out(kImageTag);
if (enable_flow_limiting) {
return tasks::core::AddFlowLimiterCalculator(graph, subgraph, {kImageTag},
kHandGesturesTag);
}
graph.In(kImageTag) >> subgraph.In(kImageTag);
return graph.GetConfig();
}
// Converts the user-facing GestureRecognizerOptions struct to the internal
// GestureRecognizerGraphOptions proto.
std::unique_ptr<GestureRecognizerGraphOptionsProto>
ConvertGestureRecognizerGraphOptionsProto(GestureRecognizerOptions* options) {
auto options_proto = std::make_unique<GestureRecognizerGraphOptionsProto>();
bool use_stream_mode = options->running_mode != core::RunningMode::IMAGE;
// TODO remove these workarounds for base options of subgraphs.
// Configure hand detector options.
auto base_options_proto_for_hand_detector =
std::make_unique<tasks::core::proto::BaseOptions>(
tasks::core::ConvertBaseOptionsToProto(
&(options->base_options_for_hand_detector)));
base_options_proto_for_hand_detector->set_use_stream_mode(use_stream_mode);
auto* hand_detector_graph_options =
options_proto->mutable_hand_landmarker_graph_options()
->mutable_hand_detector_graph_options();
hand_detector_graph_options->mutable_base_options()->Swap(
base_options_proto_for_hand_detector.get());
hand_detector_graph_options->set_num_hands(options->num_hands);
hand_detector_graph_options->set_min_detection_confidence(
options->min_hand_detection_confidence);
// Configure hand landmark detector options.
auto base_options_proto_for_hand_landmarker =
std::make_unique<tasks::core::proto::BaseOptions>(
tasks::core::ConvertBaseOptionsToProto(
&(options->base_options_for_hand_landmarker)));
base_options_proto_for_hand_landmarker->set_use_stream_mode(use_stream_mode);
auto* hand_landmarks_detector_graph_options =
options_proto->mutable_hand_landmarker_graph_options()
->mutable_hand_landmarks_detector_graph_options();
hand_landmarks_detector_graph_options->mutable_base_options()->Swap(
base_options_proto_for_hand_landmarker.get());
hand_landmarks_detector_graph_options->set_min_detection_confidence(
options->min_hand_presence_confidence);
auto* hand_landmarker_graph_options =
options_proto->mutable_hand_landmarker_graph_options();
hand_landmarker_graph_options->set_min_tracking_confidence(
options->min_tracking_confidence);
// Configure hand gesture recognizer options.
auto base_options_proto_for_gesture_recognizer =
std::make_unique<tasks::core::proto::BaseOptions>(
tasks::core::ConvertBaseOptionsToProto(
&(options->base_options_for_gesture_recognizer)));
base_options_proto_for_gesture_recognizer->set_use_stream_mode(
use_stream_mode);
auto* hand_gesture_recognizer_graph_options =
options_proto->mutable_hand_gesture_recognizer_graph_options();
hand_gesture_recognizer_graph_options->mutable_base_options()->Swap(
base_options_proto_for_gesture_recognizer.get());
if (options->min_gesture_confidence >= 0) {
hand_gesture_recognizer_graph_options->mutable_classifier_options()
->set_score_threshold(options->min_gesture_confidence);
}
return options_proto;
}
} // namespace
absl::StatusOr<std::unique_ptr<GestureRecognizer>> GestureRecognizer::Create(
std::unique_ptr<GestureRecognizerOptions> options) {
auto options_proto = ConvertGestureRecognizerGraphOptionsProto(options.get());
tasks::core::PacketsCallback packets_callback = nullptr;
if (options->result_callback) {
auto result_callback = options->result_callback;
packets_callback = [=](absl::StatusOr<tasks::core::PacketMap>
status_or_packets) {
if (!status_or_packets.ok()) {
Image image;
result_callback(status_or_packets.status(), image,
Timestamp::Unset().Value());
return;
}
if (status_or_packets.value()[kImageOutStreamName].IsEmpty()) {
return;
}
Packet gesture_packet =
status_or_packets.value()[kHandGesturesStreamName];
Packet handedness_packet =
status_or_packets.value()[kHandednessStreamName];
Packet hand_landmarks_packet =
status_or_packets.value()[kHandLandmarksStreamName];
Packet hand_world_landmarks_packet =
status_or_packets.value()[kHandWorldLandmarksStreamName];
Packet image_packet = status_or_packets.value()[kImageOutStreamName];
result_callback(
{{gesture_packet.Get<std::vector<ClassificationList>>(),
handedness_packet.Get<std::vector<ClassificationList>>(),
hand_landmarks_packet.Get<std::vector<NormalizedLandmarkList>>(),
hand_world_landmarks_packet.Get<std::vector<LandmarkList>>()}},
image_packet.Get<Image>(),
gesture_packet.Timestamp().Value() / kMicroSecondsPerMilliSecond);
};
}
return core::VisionTaskApiFactory::Create<GestureRecognizer,
GestureRecognizerGraphOptionsProto>(
CreateGraphConfig(
std::move(options_proto),
options->running_mode == core::RunningMode::LIVE_STREAM),
std::move(options->base_options.op_resolver), options->running_mode,
std::move(packets_callback));
}
absl::StatusOr<GestureRecognitionResult> GestureRecognizer::Recognize(
mediapipe::Image image) {
if (image.UsesGpu()) {
return CreateStatusWithPayload(
absl::StatusCode::kInvalidArgument,
"GPU input images are currently not supported.",
MediaPipeTasksStatus::kRunnerUnexpectedInputError);
}
ASSIGN_OR_RETURN(auto output_packets,
ProcessImageData({{kImageInStreamName,
MakePacket<Image>(std::move(image))}}));
return {
{/* gestures= */ {output_packets[kHandGesturesStreamName]
.Get<std::vector<ClassificationList>>()},
/* handedness= */
{output_packets[kHandednessStreamName]
.Get<std::vector<mediapipe::ClassificationList>>()},
/* hand_landmarks= */
{output_packets[kHandLandmarksStreamName]
.Get<std::vector<mediapipe::NormalizedLandmarkList>>()},
/* hand_world_landmarks */
{output_packets[kHandWorldLandmarksStreamName]
.Get<std::vector<mediapipe::LandmarkList>>()}},
};
}
absl::StatusOr<GestureRecognitionResult> GestureRecognizer::RecognizeForVideo(
mediapipe::Image image, int64 timestamp_ms) {
if (image.UsesGpu()) {
return CreateStatusWithPayload(
absl::StatusCode::kInvalidArgument,
absl::StrCat("GPU input images are currently not supported."),
MediaPipeTasksStatus::kRunnerUnexpectedInputError);
}
ASSIGN_OR_RETURN(
auto output_packets,
ProcessVideoData(
{{kImageInStreamName,
MakePacket<Image>(std::move(image))
.At(Timestamp(timestamp_ms * kMicroSecondsPerMilliSecond))}}));
return {
{/* gestures= */ {output_packets[kHandGesturesStreamName]
.Get<std::vector<ClassificationList>>()},
/* handedness= */
{output_packets[kHandednessStreamName]
.Get<std::vector<mediapipe::ClassificationList>>()},
/* hand_landmarks= */
{output_packets[kHandLandmarksStreamName]
.Get<std::vector<mediapipe::NormalizedLandmarkList>>()},
/* hand_world_landmarks */
{output_packets[kHandWorldLandmarksStreamName]
.Get<std::vector<mediapipe::LandmarkList>>()}},
};
}
absl::Status GestureRecognizer::RecognizeAsync(mediapipe::Image image,
int64 timestamp_ms) {
if (image.UsesGpu()) {
return CreateStatusWithPayload(
absl::StatusCode::kInvalidArgument,
absl::StrCat("GPU input images are currently not supported."),
MediaPipeTasksStatus::kRunnerUnexpectedInputError);
}
return SendLiveStreamData(
{{kImageInStreamName,
MakePacket<Image>(std::move(image))
.At(Timestamp(timestamp_ms * kMicroSecondsPerMilliSecond))}});
}
} // namespace gesture_recognizer
} // namespace vision
} // namespace tasks
} // namespace mediapipe

View File

@ -0,0 +1,172 @@
/* Copyright 2022 The MediaPipe Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef MEDIAPIPE_TASKS_CC_VISION_GESTURE_RECOGNIZRER_GESTURE_RECOGNIZER_H_
#define MEDIAPIPE_TASKS_CC_VISION_GESTURE_RECOGNIZRER_GESTURE_RECOGNIZER_H_
#include <memory>
#include "absl/status/statusor.h"
#include "mediapipe/framework/formats/classification.pb.h"
#include "mediapipe/framework/formats/image.h"
#include "mediapipe/framework/formats/landmark.pb.h"
#include "mediapipe/tasks/cc/components/containers/gesture_recognition_result.h"
#include "mediapipe/tasks/cc/core/base_options.h"
#include "mediapipe/tasks/cc/vision/core/base_vision_task_api.h"
#include "mediapipe/tasks/cc/vision/core/running_mode.h"
namespace mediapipe {
namespace tasks {
namespace vision {
namespace gesture_recognizer {
struct GestureRecognizerOptions {
// Base options for configuring Task library, such as specifying the TfLite
// model file with metadata, accelerator options, op resolver, etc.
tasks::core::BaseOptions base_options;
// TODO: remove these. Temporary solutions before bundle asset is
// ready.
tasks::core::BaseOptions base_options_for_hand_landmarker;
tasks::core::BaseOptions base_options_for_hand_detector;
tasks::core::BaseOptions base_options_for_gesture_recognizer;
// The running mode of the task. Default to the image mode.
// GestureRecognizer has three running modes:
// 1) The image mode for recognizing hand gestures on single image inputs.
// 2) The video mode for recognizing hand gestures on the decoded frames of a
// video.
// 3) The live stream mode for recognizing hand gestures on the live stream of
// input data, such as from camera. In this mode, the "result_callback"
// below must be specified to receive the detection results asynchronously.
core::RunningMode running_mode = core::RunningMode::IMAGE;
// The maximum number of hands can be detected by the GestureRecognizer.
int num_hands = 1;
// The minimum confidence score for the hand detection to be considered
// successfully.
float min_hand_detection_confidence = 0.5;
// The minimum confidence score of hand presence score in the hand landmark
// detection.
float min_hand_presence_confidence = 0.5;
// The minimum confidence score for the hand tracking to be considered
// successfully.
float min_tracking_confidence = 0.5;
// The minimum confidence score for the gestures to be considered
// successfully. If < 0, the gesture confidence thresholds in the model
// metadata are used.
// TODO Note this option is subject to change, after scoring
// merging calculator is implemented.
float min_gesture_confidence = -1;
// The user-defined result callback for processing live stream data.
// The result callback should only be specified when the running mode is set
// to RunningMode::LIVE_STREAM.
std::function<void(
absl::StatusOr<components::containers::GestureRecognitionResult>,
const Image&, int64)>
result_callback = nullptr;
};
// Performs hand gesture recognition on the given image.
//
// TODO add the link to DevSite.
// This API expects expects a pre-trained hand gesture model asset bundle, or a
// custom one created using Model Maker. See <link to the DevSite documentation
// page>.
//
// Inputs:
// Image
// - The image that gesture recognition runs on.
// Outputs:
// GestureRecognitionResult
// - The hand gesture recognition results.
class GestureRecognizer : tasks::vision::core::BaseVisionTaskApi {
public:
using BaseVisionTaskApi::BaseVisionTaskApi;
// Creates a GestureRecognizer from a GestureRecognizerhOptions to process
// image data or streaming data. Gesture recognizer can be created with one of
// the following three running modes:
// 1) Image mode for recognizing gestures on single image inputs.
// Users provide mediapipe::Image to the `Recognize` method, and will
// receive the recognized hand gesture results as the return value.
// 2) Video mode for recognizing gestures on the decoded frames of a video.
// 3) Live stream mode for recognizing gestures on the live stream of the
// input data, such as from camera. Users call `RecognizeAsync` to push the
// image data into the GestureRecognizer, the recognized results along with
// the input timestamp and the image that gesture recognizer runs on will
// be available in the result callback when the gesture recognizer finishes
// the work.
static absl::StatusOr<std::unique_ptr<GestureRecognizer>> Create(
std::unique_ptr<GestureRecognizerOptions> options);
// Performs hand gesture recognition on the given image.
// Only use this method when the GestureRecognizer is created with the image
// running mode.
//
// image - mediapipe::Image
// Image to perform hand gesture recognition on.
//
// The image can be of any size with format RGB or RGBA.
// TODO: Describes how the input image will be preprocessed
// after the yuv support is implemented.
absl::StatusOr<components::containers::GestureRecognitionResult> Recognize(
Image image);
// Performs gesture recognition on the provided video frame.
// Only use this method when the GestureRecognizer is created with the video
// running mode.
//
// The image can be of any size with format RGB or RGBA. It's required to
// provide the video frame's timestamp (in milliseconds). The input timestamps
// must be monotonically increasing.
absl::StatusOr<components::containers::GestureRecognitionResult>
RecognizeForVideo(Image image, int64 timestamp_ms);
// Sends live image data to perform gesture recognition, and the results will
// be available via the "result_callback" provided in the
// GestureRecognizerOptions. Only use this method when the GestureRecognizer
// is created with the live stream running mode.
//
// The image can be of any size with format RGB or RGBA. It's required to
// provide a timestamp (in milliseconds) to indicate when the input image is
// sent to the gesture recognizer. The input timestamps must be monotonically
// increasing.
//
// The "result_callback" provides
// - A vector of GestureRecognitionResult, each is the recognized results
// for a input frame.
// - The const reference to the corresponding input image that the gesture
// recognizer runs on. Note that the const reference to the image will no
// longer be valid when the callback returns. To access the image data
// outside of the callback, callers need to make a copy of the image.
// - The input timestamp in milliseconds.
absl::Status RecognizeAsync(Image image, int64 timestamp_ms);
// Shuts down the GestureRecognizer when all works are done.
absl::Status Close() { return runner_->Close(); }
};
} // namespace gesture_recognizer
} // namespace vision
} // namespace tasks
} // namespace mediapipe
#endif // MEDIAPIPE_TASKS_CC_VISION_GESTURE_RECOGNIZRER_GESTURE_RECOGNIZER_H_

View File

@ -25,7 +25,6 @@ limitations under the License.
#include "mediapipe/framework/formats/image.h"
#include "mediapipe/framework/formats/landmark.pb.h"
#include "mediapipe/tasks/cc/common.h"
#include "mediapipe/tasks/cc/components/containers/proto/classifications.pb.h"
#include "mediapipe/tasks/cc/core/model_task_graph.h"
#include "mediapipe/tasks/cc/core/utils.h"
#include "mediapipe/tasks/cc/vision/gesture_recognizer/proto/gesture_recognizer_graph_options.pb.h"
@ -46,7 +45,6 @@ using ::mediapipe::api2::Input;
using ::mediapipe::api2::Output;
using ::mediapipe::api2::builder::Graph;
using ::mediapipe::api2::builder::Source;
using ::mediapipe::tasks::components::containers::proto::ClassificationResult;
using ::mediapipe::tasks::vision::gesture_recognizer::proto::
GestureRecognizerGraphOptions;
using ::mediapipe::tasks::vision::gesture_recognizer::proto::
@ -63,10 +61,10 @@ constexpr char kHandGesturesTag[] = "HAND_GESTURES";
constexpr char kHandTrackingIdsTag[] = "HAND_TRACKING_IDS";
struct GestureRecognizerOutputs {
Source<std::vector<ClassificationResult>> gesture;
Source<std::vector<mediapipe::ClassificationList>> handedness;
Source<std::vector<mediapipe::NormalizedLandmarkList>> hand_landmarks;
Source<std::vector<mediapipe::LandmarkList>> hand_world_landmarks;
Source<std::vector<ClassificationList>> gesture;
Source<std::vector<ClassificationList>> handedness;
Source<std::vector<NormalizedLandmarkList>> hand_landmarks;
Source<std::vector<LandmarkList>> hand_world_landmarks;
Source<Image> image;
};
@ -80,7 +78,7 @@ struct GestureRecognizerOutputs {
// Image to perform hand gesture recognition on.
//
// Outputs:
// HAND_GESTURES - std::vector<ClassificationResult>
// HAND_GESTURES - std::vector<ClassificationList>
// Recognized hand gestures with sorted order such that the winning label is
// the first item in the list.
// LANDMARKS: - std::vector<NormalizedLandmarkList>
@ -136,15 +134,13 @@ class GestureRecognizerGraph : public core::ModelTaskGraph {
*sc->MutableOptions<GestureRecognizerGraphOptions>(),
graph[Input<Image>(kImageTag)], graph));
hand_gesture_recognition_output.gesture >>
graph[Output<std::vector<ClassificationResult>>(kHandGesturesTag)];
graph[Output<std::vector<ClassificationList>>(kHandGesturesTag)];
hand_gesture_recognition_output.handedness >>
graph[Output<std::vector<mediapipe::ClassificationList>>(
kHandednessTag)];
graph[Output<std::vector<ClassificationList>>(kHandednessTag)];
hand_gesture_recognition_output.hand_landmarks >>
graph[Output<std::vector<mediapipe::NormalizedLandmarkList>>(
kLandmarksTag)];
graph[Output<std::vector<NormalizedLandmarkList>>(kLandmarksTag)];
hand_gesture_recognition_output.hand_world_landmarks >>
graph[Output<std::vector<mediapipe::LandmarkList>>(kWorldLandmarksTag)];
graph[Output<std::vector<LandmarkList>>(kWorldLandmarksTag)];
hand_gesture_recognition_output.image >> graph[Output<Image>(kImageTag)];
return graph.GetConfig();
}
@ -193,7 +189,7 @@ class GestureRecognizerGraph : public core::ModelTaskGraph {
image_size >> hand_gesture_subgraph.In(kImageSizeTag);
hand_landmarks_id >> hand_gesture_subgraph.In(kHandTrackingIdsTag);
auto hand_gestures =
hand_gesture_subgraph[Output<std::vector<ClassificationResult>>(
hand_gesture_subgraph[Output<std::vector<ClassificationList>>(
kHandGesturesTag)];
return {{.gesture = hand_gestures,

View File

@ -47,6 +47,7 @@ mediapipe_files(srcs = [
"mozart_square.jpg",
"multi_objects.jpg",
"palm_detection_full.tflite",
"pointing_up.jpg",
"right_hands.jpg",
"segmentation_golden_rotation0.png",
"segmentation_input_rotation0.jpg",
@ -54,6 +55,7 @@ mediapipe_files(srcs = [
"selfie_segm_128_128_3_expected_mask.jpg",
"selfie_segm_144_256_3.tflite",
"selfie_segm_144_256_3_expected_mask.jpg",
"thumb_up.jpg",
])
exports_files(
@ -79,11 +81,13 @@ filegroup(
"left_hands.jpg",
"mozart_square.jpg",
"multi_objects.jpg",
"pointing_up.jpg",
"right_hands.jpg",
"segmentation_golden_rotation0.png",
"segmentation_input_rotation0.jpg",
"selfie_segm_128_128_3_expected_mask.jpg",
"selfie_segm_144_256_3_expected_mask.jpg",
"thumb_up.jpg",
],
visibility = [
"//mediapipe/python:__subpackages__",

View File

@ -8,216 +8,216 @@ classifications {
landmarks {
landmark {
x: 0.4749803
y: 0.76872
z: 9.286178e-08
x: 0.47923622
y: 0.7426044
z: 2.3221878e-07
}
landmark {
x: 0.5466898
y: 0.6706463
z: -0.03454024
x: 0.5403745
y: 0.66178805
z: -0.044572093
}
landmark {
x: 0.5890165
y: 0.5604909
z: -0.055142127
x: 0.5774534
y: 0.5608346
z: -0.07581605
}
landmark {
x: 0.52780133
y: 0.49855334
z: -0.07846409
x: 0.52648556
y: 0.50247055
z: -0.105467044
}
landmark {
x: 0.44487286
y: 0.49801928
z: -0.10188004
x: 0.44289914
y: 0.49489295
z: -0.13422011
}
landmark {
x: 0.47572923
y: 0.44477755
z: -0.028345175
x: 0.4728853
y: 0.43925008
z: -0.058122505
}
landmark {
x: 0.48013464
y: 0.32467923
z: -0.06513901
x: 0.4803168
y: 0.32889345
z: -0.101187326
}
landmark {
x: 0.48351905
y: 0.25804192
z: -0.086756624
x: 0.48436823
y: 0.25876504
z: -0.12840955
}
landmark {
x: 0.47760454
y: 0.19289327
z: -0.10468461
x: 0.47388697
y: 0.19592366
z: -0.15085006
}
landmark {
x: 0.3993108
y: 0.47566867
z: -0.040357687
x: 0.39129356
y: 0.47211456
z: -0.06835801
}
landmark {
x: 0.42361537
y: 0.42491958
z: -0.103545874
x: 0.41798547
y: 0.42218646
z: -0.12954563
}
landmark {
x: 0.46059948
y: 0.51723665
z: -0.1214961
x: 0.45758423
y: 0.5232461
z: -0.14131334
}
landmark {
x: 0.4580545
y: 0.55640894
z: -0.12272568
x: 0.45100626
y: 0.5554065
z: -0.13883406
}
landmark {
x: 0.34109607
y: 0.5184511
z: -0.056422118
x: 0.33133638
y: 0.51777464
z: -0.08227023
}
landmark {
x: 0.36177525
y: 0.48427337
z: -0.12584248
x: 0.35698116
y: 0.48688585
z: -0.14713185
}
landmark {
x: 0.40706652
y: 0.5700621
z: -0.11658718
x: 0.40754414
y: 0.57370347
z: -0.12981415
}
landmark {
x: 0.40535083
y: 0.6000496
z: -0.09520916
x: 0.40011865
y: 0.5930706
z: -0.10554546
}
landmark {
x: 0.2872031
y: 0.57303333
z: -0.074813806
x: 0.2783401
y: 0.5735568
z: -0.09971398
}
landmark {
x: 0.30961618
y: 0.533245
z: -0.114366606
x: 0.30884498
y: 0.5394487
z: -0.14033116
}
landmark {
x: 0.35510173
y: 0.5838698
z: -0.096521005
x: 0.35470563
y: 0.5917965
z: -0.11820527
}
landmark {
x: 0.36053744
y: 0.608682
z: -0.07574715
x: 0.34865493
y: 0.61057556
z: -0.09509217
}
}
world_landmarks {
landmark {
x: 0.018890835
y: 0.09005852
z: 0.031907097
x: 0.016918864
y: 0.08634466
z: 0.035783045
}
landmark {
x: 0.04198891
y: 0.061256267
z: 0.017695501
x: 0.04193685
y: 0.056667875
z: 0.019453367
}
landmark {
x: 0.05044507
y: 0.033841074
z: 0.0015051212
x: 0.050382353
y: 0.031786427
z: 0.0023380776
}
landmark {
x: 0.039822325
y: 0.0073827556
z: -0.02168335
x: 0.043284662
y: 0.008976387
z: -0.02496663
}
landmark {
x: 0.012921701
y: 0.0025111444
z: -0.033813436
x: 0.016010094
y: 0.004991216
z: -0.036876947
}
landmark {
x: 0.023851154
y: -0.011495698
z: 0.0066048754
x: 0.02450771
y: -0.013496464
z: 0.0041254223
}
landmark {
x: 0.023206754
y: -0.042496294
z: -0.0026847485
x: 0.024783865
y: -0.041331705
z: -0.0028748964
}
landmark {
x: 0.02298078
y: -0.062678955
z: -0.013068148
x: 0.025917178
y: -0.06191107
z: -0.010242647
}
landmark {
x: 0.021972645
y: -0.08151748
z: -0.03677687
x: 0.023101516
y: -0.07967696
z: -0.03152665
}
landmark {
x: -0.00016964211
y: -0.005549716
z: 0.0058569373
x: 0.0006629339
y: -0.0060150283
z: 0.004906766
}
landmark {
x: 0.0075052455
y: -0.020031122
z: -0.027775772
x: 0.0077093104
y: -0.017035034
z: -0.029702934
}
landmark {
x: 0.017835317
y: 0.004899453
z: -0.037390795
x: 0.017517095
y: 0.008997183
z: -0.03692814
}
landmark {
x: 0.016913192
y: 0.018281722
z: -0.019302163
x: 0.0145079205
y: 0.017461296
z: -0.011290487
}
landmark {
x: -0.018799124
y: 0.0053577404
z: -0.0040608873
x: -0.018095909
y: 0.006112392
z: -0.0027157406
}
landmark {
x: -0.00747582
y: 0.0019600953
z: -0.034023333
x: -0.010212201
y: 0.0052777785
z: -0.034659054
}
landmark {
x: 0.0035368819
y: 0.025736088
z: -0.03452471
x: 0.0043836404
y: 0.028383566
z: -0.03296758
}
landmark {
x: 0.0080153765
y: 0.039885145
z: -0.013341276
x: 0.003886811
y: 0.036054
z: -0.0074628904
}
landmark {
x: -0.029628165
y: 0.028607829
z: -0.011377414
x: -0.03178849
y: 0.029854178
z: -0.008874044
}
landmark {
x: -0.023356002
y: 0.017514031
z: -0.029408533
x: -0.02403016
y: 0.021497255
z: -0.027618393
}
landmark {
x: -0.008503268
y: 0.027560957
z: -0.035641473
x: -0.008522437
y: 0.031886857
z: -0.032367583
}
landmark {
x: -0.0070180474
y: 0.039056484
z: -0.023629948
x: -0.012865841
y: 0.038687646
z: -0.017172804
}
}

View File

@ -8,216 +8,216 @@ classifications {
landmarks {
landmark {
x: 0.6065784
y: 0.7356081
z: -5.2289305e-08
x: 0.6387502
y: 0.67134184
z: -3.4044612e-07
}
landmark {
x: 0.6349347
y: 0.5735343
z: -0.047243003
x: 0.634891
y: 0.53670025
z: -0.06968865
}
landmark {
x: 0.5788341
y: 0.42688707
z: -0.036071796
x: 0.5746676
y: 0.41283816
z: -0.09383486
}
landmark {
x: 0.51322824
y: 0.3153786
z: -0.021018881
x: 0.49967948
y: 0.32550922
z: -0.10799447
}
landmark {
x: 0.49179295
y: 0.25291175
z: 0.0061425082
x: 0.47362617
y: 0.25102285
z: -0.10590933
}
landmark {
x: 0.49944243
y: 0.45409226
z: 0.06513325
x: 0.40749234
y: 0.47130388
z: -0.04694611
}
landmark {
x: 0.3822241
y: 0.45645967
z: 0.045028925
x: 0.3372087
y: 0.46742308
z: -0.0997342
}
landmark {
x: 0.4427338
y: 0.49150866
z: 0.024395633
x: 0.4418445
y: 0.50960016
z: -0.111206524
}
landmark {
x: 0.5015556
y: 0.4798539
z: 0.014423937
x: 0.48056933
y: 0.5187666
z: -0.11022365
}
landmark {
x: 0.46654877
y: 0.5420721
z: 0.08380699
x: 0.39218128
y: 0.5495232
z: -0.028925514
}
landmark {
x: 0.3540949
y: 0.545657
z: 0.056201216
x: 0.34047198
y: 0.55610204
z: -0.08213869
}
landmark {
x: 0.43828446
y: 0.5723222
z: 0.03073385
x: 0.46152583
y: 0.58310646
z: -0.08393028
}
landmark {
x: 0.4894746
y: 0.54662794
z: 0.016284892
x: 0.47058716
y: 0.56413835
z: -0.078857616
}
landmark {
x: 0.44287524
y: 0.6153337
z: 0.0878331
x: 0.39237642
y: 0.61864823
z: -0.022026168
}
landmark {
x: 0.3531985
y: 0.6305228
z: 0.048528627
x: 0.34304678
y: 0.62800515
z: -0.08132204
}
landmark {
x: 0.42727134
y: 0.64344436
z: 0.027383275
x: 0.45004016
y: 0.64300805
z: -0.06211204
}
landmark {
x: 0.46999624
y: 0.61115295
z: 0.021795912
x: 0.4640005
y: 0.6221539
z: -0.038953774
}
landmark {
x: 0.43323213
y: 0.6734935
z: 0.087731235
x: 0.39231628
y: 0.68187976
z: -0.020164328
}
landmark {
x: 0.3772134
y: 0.69590896
z: 0.07259013
x: 0.35785866
y: 0.6985842
z: -0.052247807
}
landmark {
x: 0.42301077
y: 0.70083475
z: 0.06279105
x: 0.42698768
y: 0.69892275
z: -0.037642766
}
landmark {
x: 0.45672464
y: 0.6844607
z: 0.059202813
x: 0.44422707
y: 0.6876204
z: -0.02034688
}
}
world_landmarks {
landmark {
x: 0.047059614
y: 0.04719348
z: 0.03951376
x: 0.06753889
y: 0.031051591
z: 0.05541924
}
landmark {
x: 0.050449535
y: 0.012183173
z: 0.016567508
x: 0.06327636
y: -0.003913434
z: 0.02125023
}
landmark {
x: 0.04375921
y: -0.020305036
z: 0.012189768
x: 0.05469646
y: -0.038668767
z: 0.01118496
}
landmark {
x: 0.022525383
y: -0.04830697
z: 0.008714083
x: 0.03557241
y: -0.06865983
z: 0.0029562893
}
landmark {
x: 0.011789754
y: -0.06952699
z: 0.0029319536
x: 0.019069858
y: -0.08740239
z: 0.007222481
}
landmark {
x: 0.009532374
y: -0.019510617
z: 0.0015609035
x: 0.0044852756
y: -0.02772763
z: -0.004234833
}
landmark {
x: -0.007894232
y: -0.022080563
z: -0.014592148
x: -0.0031203926
y: -0.024173645
z: -0.033932913
}
landmark {
x: -0.002826123
y: -0.019949362
z: -0.009392118
x: 0.0080217365
y: -0.018939625
z: -0.032623816
}
landmark {
x: 0.009066351
y: -0.016403511
z: 0.005516675
x: 0.025537387
y: -0.014517117
z: -0.004398854
}
landmark {
x: -0.0031000748
y: -0.003971943
z: 0.004851345
x: -0.004470923
y: -0.0040212176
z: 0.0025033879
}
landmark {
x: -0.016852753
y: -0.009905987
z: -0.016275175
x: -0.010845158
y: -0.0031857258
z: -0.036282137
}
landmark {
x: -0.006703893
y: -0.0026965735
z: -0.015606856
x: 0.016729971
y: 0.0028876318
z: -0.036264844
}
landmark {
x: 0.007890566
y: -0.010418876
z: 0.0050479355
x: 0.019928008
y: -0.0032422952
z: 0.004380459
}
landmark {
x: -0.007842411
y: 0.011552694
z: -0.0005755241
x: -0.005686749
y: 0.017101247
z: 0.0036791638
}
landmark {
x: -0.021125216
y: 0.009268615
z: -0.017993882
x: -0.010514952
y: 0.017355483
z: -0.02882688
}
landmark {
x: -0.006585305
y: 0.013378072
z: -0.01709412
x: 0.014503509
y: 0.019414417
z: -0.026207235
}
landmark {
x: 0.008140431
y: 0.008364402
z: -0.0051898304
x: 0.0211232
y: 0.014327417
z: 0.0011467658
}
landmark {
x: -0.01082343
y: 0.03213215
z: -0.00069864903
x: 0.0011399705
y: 0.043651186
z: 0.0068390737
}
landmark {
x: -0.0199164
y: 0.028296603
z: -0.01447433
x: -0.010388309
y: 0.03904784
z: -0.015677728
}
landmark {
x: -0.00960456
y: 0.026734762
z: -0.019243335
x: 0.006957108
y: 0.03613425
z: -0.028704688
}
landmark {
x: 0.0040425956
y: 0.025051914
z: -0.014775545
x: 0.012793289
y: 0.03930679
z: -0.012465539
}
}

View File

@ -432,8 +432,8 @@ def external_files():
http_file(
name = "com_google_mediapipe_pointing_up_landmarks_pbtxt",
sha256 = "1255b6ba17b4ef7a9b3ce92c0a139e74fbcec272dc251b049b2f06732f9fed83",
urls = ["https://storage.googleapis.com/mediapipe-assets/pointing_up_landmarks.pbtxt?generation=1662650664573638"],
sha256 = "a3cd7f088a9e997dbb8f00d91dbf3faaacbdb262c8f2fde3c07a9d0656488065",
urls = ["https://storage.googleapis.com/mediapipe-assets/pointing_up_landmarks.pbtxt?generation=1665174976408451"],
)
http_file(
@ -588,8 +588,8 @@ def external_files():
http_file(
name = "com_google_mediapipe_thumb_up_landmarks_pbtxt",
sha256 = "bf1913df6ac7cc14b492c10411c827832839985c057b112789e04ce7c1fdd0fa",
urls = ["https://storage.googleapis.com/mediapipe-assets/thumb_up_landmarks.pbtxt?generation=1662650669387278"],
sha256 = "b129ae0536be4e25d6cdee74aabe9dedf1bcfe87430a40b68be4079db3a4d926",
urls = ["https://storage.googleapis.com/mediapipe-assets/thumb_up_landmarks.pbtxt?generation=1665174979747784"],
)
http_file(