Merge branch 'master' into gesture-recognizer-python

2022-10-31 16:47:43 +05:30 · 2022-10-31 16:47:43 +05:30 · 5ec87c8bd2
commit 5ec87c8bd2
parent fb4872b068 459214e6a3
24 changed files with 1370 additions and 121 deletions
--- a/mediapipe/model_maker/python/core/data/classification_dataset.py
+++ b/mediapipe/model_maker/python/core/data/classification_dataset.py
@ -13,7 +13,7 @@
 # limitations under the License.
 """Common classification dataset library."""
-from typing import Any, Tuple
+from typing import List, Tuple
 import tensorflow as tf
@ -21,19 +21,20 @@ from mediapipe.model_maker.python.core.data import dataset as ds
 class ClassificationDataset(ds.Dataset):
-  """DataLoader for classification models."""
+  """Dataset Loader for classification models."""
-  def __init__(self, dataset: tf.data.Dataset, size: int, index_by_label: Any):
+  def __init__(self, dataset: tf.data.Dataset, size: int,
               label_names: List[str]):
    super().__init__(dataset, size)
-    self._index_by_label = index_by_label
+    self._label_names = label_names
  @property
  def num_classes(self: ds._DatasetT) -> int:
-    return len(self._index_by_label)
+    return len(self._label_names)
  @property
-  def index_by_label(self: ds._DatasetT) -> Any:
+  def label_names(self: ds._DatasetT) -> List[str]:
-    return self._index_by_label
+    return self._label_names
  def split(self: ds._DatasetT,
            fraction: float) -> Tuple[ds._DatasetT, ds._DatasetT]:
@ -48,4 +49,4 @@ class ClassificationDataset(ds.Dataset):
    Returns:
      The splitted two sub datasets.
    """
-    return self._split(fraction, self._index_by_label)
+    return self._split(fraction, self._label_names)
--- a/mediapipe/model_maker/python/core/data/classification_dataset_test.py
+++ b/mediapipe/model_maker/python/core/data/classification_dataset_test.py
@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import Any, Tuple, TypeVar
+from typing import Any, List, Tuple, TypeVar
 # Dependency imports
@ -37,26 +37,22 @@ class ClassificationDatasetTest(tf.test.TestCase):
      """
      def __init__(self, dataset: tf.data.Dataset, size: int,
-                   index_by_label: Any, value: Any):
+                   label_names: List[str], value: Any):
-        super().__init__(
+        super().__init__(dataset=dataset, size=size, label_names=label_names)
            dataset=dataset, size=size, index_by_label=index_by_label)
        self.value = value
      def split(self, fraction: float) -> Tuple[_DatasetT, _DatasetT]:
-        return self._split(fraction, self.index_by_label, self.value)
+        return self._split(fraction, self.label_names, self.value)
    # Some dummy inputs.
    magic_value = 42
    num_classes = 2
-    index_by_label = (False, True)
+    label_names = ['foo', 'bar']
    # Create data loader from sample data.
    ds = tf.data.Dataset.from_tensor_slices([[0, 1], [1, 1], [0, 0], [1, 0]])
    data = MagicClassificationDataset(
-        dataset=ds,
+        dataset=ds, size=len(ds), label_names=label_names, value=magic_value)
        size=len(ds),
        index_by_label=index_by_label,
        value=magic_value)
    # Train/Test data split.
    fraction = .25
@ -73,7 +69,7 @@ class ClassificationDatasetTest(tf.test.TestCase):
    # Make sure attributes propagated correctly.
    self.assertEqual(train_data.num_classes, num_classes)
-    self.assertEqual(test_data.index_by_label, index_by_label)
+    self.assertEqual(test_data.label_names, label_names)
    self.assertEqual(train_data.value, magic_value)
    self.assertEqual(test_data.value, magic_value)
--- a/mediapipe/model_maker/python/core/tasks/classifier.py
+++ b/mediapipe/model_maker/python/core/tasks/classifier.py
@ -29,22 +29,22 @@ from mediapipe.model_maker.python.core.tasks import custom_model
 class Classifier(custom_model.CustomModel):
  """An abstract base class that represents a TensorFlow classifier."""
-  def __init__(self, model_spec: Any, index_by_label: List[str], shuffle: bool,
+  def __init__(self, model_spec: Any, label_names: List[str], shuffle: bool,
               full_train: bool):
    """Initilizes a classifier with its specifications.
    Args:
        model_spec: Specification for the model.
-        index_by_label: A list that map from index to label class name.
+        label_names: A list of label names for the classes.
        shuffle: Whether the dataset should be shuffled.
        full_train: If true, train the model end-to-end including the backbone
          and the classification layers on top. Otherwise, only train the top
          classification layers.
    """
    super(Classifier, self).__init__(model_spec, shuffle)
-    self._index_by_label = index_by_label
+    self._label_names = label_names
    self._full_train = full_train
-    self._num_classes = len(index_by_label)
+    self._num_classes = len(label_names)
  def evaluate(self, data: dataset.Dataset, batch_size: int = 32) -> Any:
    """Evaluates the classifier with the provided evaluation dataset.
@ -74,4 +74,4 @@ class Classifier(custom_model.CustomModel):
    label_filepath = os.path.join(export_dir, label_filename)
    tf.compat.v1.logging.info('Saving labels in %s', label_filepath)
    with tf.io.gfile.GFile(label_filepath, 'w') as f:
-      f.write('\n'.join(self._index_by_label))
+      f.write('\n'.join(self._label_names))
--- a/mediapipe/model_maker/python/core/tasks/classifier_test.py
+++ b/mediapipe/model_maker/python/core/tasks/classifier_test.py
@ -36,10 +36,10 @@ class ClassifierTest(tf.test.TestCase):
  def setUp(self):
    super(ClassifierTest, self).setUp()
-    index_by_label = ['cat', 'dog']
+    label_names = ['cat', 'dog']
    self.model = MockClassifier(
        model_spec=None,
-        index_by_label=index_by_label,
+        label_names=label_names,
        shuffle=False,
        full_train=False)
    self.model.model = test_util.build_model(input_shape=[4], num_classes=2)
--- a/mediapipe/model_maker/python/vision/image_classifier/dataset.py
+++ b/mediapipe/model_maker/python/vision/image_classifier/dataset.py
@ -106,4 +106,4 @@ class Dataset(classification_dataset.ClassificationDataset):
        'Load image with size: %d, num_label: %d, labels: %s.', all_image_size,
        all_label_size, ', '.join(label_names))
    return Dataset(
-        dataset=image_label_ds, size=all_image_size, index_by_label=label_names)
+        dataset=image_label_ds, size=all_image_size, label_names=label_names)
--- a/mediapipe/model_maker/python/vision/image_classifier/dataset_test.py
+++ b/mediapipe/model_maker/python/vision/image_classifier/dataset_test.py
@ -49,27 +49,27 @@ class DatasetTest(tf.test.TestCase):
  def test_split(self):
    ds = tf.data.Dataset.from_tensor_slices([[0, 1], [1, 1], [0, 0], [1, 0]])
-    data = dataset.Dataset(dataset=ds, size=4, index_by_label=['pos', 'neg'])
+    data = dataset.Dataset(dataset=ds, size=4, label_names=['pos', 'neg'])
    train_data, test_data = data.split(fraction=0.5)
    self.assertLen(train_data, 2)
    for i, elem in enumerate(train_data._dataset):
      self.assertTrue((elem.numpy() == np.array([i, 1])).all())
    self.assertEqual(train_data.num_classes, 2)
-    self.assertEqual(train_data.index_by_label, ['pos', 'neg'])
+    self.assertEqual(train_data.label_names, ['pos', 'neg'])
    self.assertLen(test_data, 2)
    for i, elem in enumerate(test_data._dataset):
      self.assertTrue((elem.numpy() == np.array([i, 0])).all())
    self.assertEqual(test_data.num_classes, 2)
-    self.assertEqual(test_data.index_by_label, ['pos', 'neg'])
+    self.assertEqual(test_data.label_names, ['pos', 'neg'])
  def test_from_folder(self):
    data = dataset.Dataset.from_folder(dirname=self.image_path)
    self.assertLen(data, 2)
    self.assertEqual(data.num_classes, 2)
-    self.assertEqual(data.index_by_label, ['daisy', 'tulips'])
+    self.assertEqual(data.label_names, ['daisy', 'tulips'])
    for image, label in data.gen_tf_dataset():
      self.assertTrue(label.numpy() == 1 or label.numpy() == 0)
      if label.numpy() == 0:
@ -88,19 +88,19 @@ class DatasetTest(tf.test.TestCase):
    self.assertIsInstance(train_data.gen_tf_dataset(), tf.data.Dataset)
    self.assertLen(train_data, 1034)
    self.assertEqual(train_data.num_classes, 3)
-    self.assertEqual(train_data.index_by_label,
+    self.assertEqual(train_data.label_names,
                     ['angular_leaf_spot', 'bean_rust', 'healthy'])
    self.assertIsInstance(validation_data.gen_tf_dataset(), tf.data.Dataset)
    self.assertLen(validation_data, 133)
    self.assertEqual(validation_data.num_classes, 3)
-    self.assertEqual(validation_data.index_by_label,
+    self.assertEqual(validation_data.label_names,
                     ['angular_leaf_spot', 'bean_rust', 'healthy'])
    self.assertIsInstance(test_data.gen_tf_dataset(), tf.data.Dataset)
    self.assertLen(test_data, 128)
    self.assertEqual(test_data.num_classes, 3)
-    self.assertEqual(test_data.index_by_label,
+    self.assertEqual(test_data.label_names,
                     ['angular_leaf_spot', 'bean_rust', 'healthy'])
--- a/mediapipe/model_maker/python/vision/image_classifier/image_classifier.py
+++ b/mediapipe/model_maker/python/vision/image_classifier/image_classifier.py
@ -13,7 +13,7 @@
 # limitations under the License.
 """APIs to train image classifier model."""
-from typing import Any, List, Optional
+from typing import List, Optional
 import tensorflow as tf
 import tensorflow_hub as hub
@ -31,18 +31,18 @@ from mediapipe.model_maker.python.vision.image_classifier import train_image_cla
 class ImageClassifier(classifier.Classifier):
  """ImageClassifier for building image classification model."""
-  def __init__(self, model_spec: ms.ModelSpec, index_by_label: List[Any],
+  def __init__(self, model_spec: ms.ModelSpec, label_names: List[str],
               hparams: hp.HParams):
    """Initializes ImageClassifier class.
    Args:
      model_spec: Specification for the model.
-      index_by_label: A list that maps from index to label class name.
+      label_names: A list of label names for the classes.
      hparams: The hyperparameters for training image classifier.
    """
    super().__init__(
        model_spec=model_spec,
-        index_by_label=index_by_label,
+        label_names=label_names,
        shuffle=hparams.shuffle,
        full_train=hparams.do_fine_tuning)
    self._hparams = hparams
@ -80,9 +80,7 @@ class ImageClassifier(classifier.Classifier):
    spec = ms.SupportedModels.get(model_spec)
    image_classifier = cls(
-        model_spec=spec,
+        model_spec=spec, label_names=train_data.label_names, hparams=hparams)
        index_by_label=train_data.index_by_label,
        hparams=hparams)
    image_classifier._create_model()
--- a/mediapipe/python/BUILD
+++ b/mediapipe/python/BUILD
@ -88,6 +88,7 @@ cc_library(
    name = "builtin_task_graphs",
    deps = [
        "//mediapipe/tasks/cc/vision/image_classifier:image_classifier_graph",
        "//mediapipe/tasks/cc/vision/image_segmenter:image_segmenter_graph",
        "//mediapipe/tasks/cc/vision/object_detector:object_detector_graph",
        "//mediapipe/tasks/cc/vision/gesture_recognizer:gesture_recognizer_graph",
    ],
--- a/mediapipe/tasks/cc/vision/gesture_recognizer/BUILD
+++ b/mediapipe/tasks/cc/vision/gesture_recognizer/BUILD
@ -62,14 +62,18 @@ cc_library(
        "//mediapipe/tasks/cc/components:image_preprocessing",
        "//mediapipe/tasks/cc/components/processors:classification_postprocessing_graph",
        "//mediapipe/tasks/cc/components/processors/proto:classification_postprocessing_graph_options_cc_proto",
        "//mediapipe/tasks/cc/components/processors/proto:classifier_options_cc_proto",
        "//mediapipe/tasks/cc/core:model_asset_bundle_resources",
        "//mediapipe/tasks/cc/core:model_resources",
        "//mediapipe/tasks/cc/core:model_resources_cache",
        "//mediapipe/tasks/cc/core:model_task_graph",
        "//mediapipe/tasks/cc/core:utils",
        "//mediapipe/tasks/cc/core/proto:base_options_cc_proto",
        "//mediapipe/tasks/cc/core/proto:external_file_cc_proto",
        "//mediapipe/tasks/cc/core/proto:inference_subgraph_cc_proto",
        "//mediapipe/tasks/cc/metadata/utils:zip_utils",
        "//mediapipe/tasks/cc/vision/gesture_recognizer/calculators:combined_prediction_calculator",
        "//mediapipe/tasks/cc/vision/gesture_recognizer/calculators:combined_prediction_calculator_cc_proto",
        "//mediapipe/tasks/cc/vision/gesture_recognizer/calculators:handedness_to_matrix_calculator",
        "//mediapipe/tasks/cc/vision/gesture_recognizer/calculators:landmarks_to_matrix_calculator",
        "//mediapipe/tasks/cc/vision/gesture_recognizer/calculators:landmarks_to_matrix_calculator_cc_proto",
@ -77,8 +81,6 @@ cc_library(
        "//mediapipe/tasks/cc/vision/gesture_recognizer/proto:gesture_embedder_graph_options_cc_proto",
        "//mediapipe/tasks/cc/vision/gesture_recognizer/proto:hand_gesture_recognizer_graph_options_cc_proto",
        "//mediapipe/tasks/cc/vision/hand_landmarker:hand_landmarks_detector_graph",
        "//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_landmarker_graph_options_cc_proto",
        "//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_landmarks_detector_graph_options_cc_proto",
        "//mediapipe/tasks/metadata:metadata_schema_cc",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
--- a/mediapipe/tasks/cc/vision/gesture_recognizer/calculators/combined_prediction_calculator.cc
+++ b/mediapipe/tasks/cc/vision/gesture_recognizer/calculators/combined_prediction_calculator.cc
@ -153,13 +153,12 @@ class CombinedPredictionCalculator : public Node {
    // After loop, if have winning prediction return. Otherwise empty packet.
    std::unique_ptr<ClassificationList> first_winning_prediction = nullptr;
    auto collection = kClassificationListIn(cc);
-    for (int idx = 0; idx < collection.Count(); ++idx) {
+    for (const auto& input : collection) {
-      const auto& packet = collection[idx];
+      if (input.IsEmpty() || input.Get().classification_size() == 0) {
      if (packet.IsEmpty()) {
        continue;
      }
      auto prediction = GetWinningPrediction(
-          packet.Get(), classwise_thresholds_, options_.background_label(),
+          input.Get(), classwise_thresholds_, options_.background_label(),
          options_.default_global_threshold());
      if (prediction->classification(0).label() !=
          options_.background_label()) {
--- a/mediapipe/tasks/cc/vision/gesture_recognizer/gesture_recognizer.cc
+++ b/mediapipe/tasks/cc/vision/gesture_recognizer/gesture_recognizer.cc
@ -146,6 +146,10 @@ ConvertGestureRecognizerGraphOptionsProto(GestureRecognizerOptions* options) {
        ->mutable_canned_gesture_classifier_graph_options()
        ->mutable_classifier_options()
        ->set_score_threshold(options->min_gesture_confidence);
    hand_gesture_recognizer_graph_options
        ->mutable_custom_gesture_classifier_graph_options()
        ->mutable_classifier_options()
        ->set_score_threshold(options->min_gesture_confidence);
  }
  return options_proto;
 }
--- a/mediapipe/tasks/cc/vision/gesture_recognizer/hand_gesture_recognizer_graph.cc
+++ b/mediapipe/tasks/cc/vision/gesture_recognizer/hand_gesture_recognizer_graph.cc
@ -30,14 +30,17 @@ limitations under the License.
 #include "mediapipe/tasks/cc/common.h"
 #include "mediapipe/tasks/cc/components/processors/classification_postprocessing_graph.h"
 #include "mediapipe/tasks/cc/components/processors/proto/classification_postprocessing_graph_options.pb.h"
 #include "mediapipe/tasks/cc/components/processors/proto/classifier_options.pb.h"
 #include "mediapipe/tasks/cc/core/model_asset_bundle_resources.h"
 #include "mediapipe/tasks/cc/core/model_resources.h"
 #include "mediapipe/tasks/cc/core/model_resources_cache.h"
 #include "mediapipe/tasks/cc/core/model_task_graph.h"
 #include "mediapipe/tasks/cc/core/proto/base_options.pb.h"
 #include "mediapipe/tasks/cc/core/proto/external_file.pb.h"
 #include "mediapipe/tasks/cc/core/proto/inference_subgraph.pb.h"
 #include "mediapipe/tasks/cc/core/utils.h"
 #include "mediapipe/tasks/cc/metadata/utils/zip_utils.h"
 #include "mediapipe/tasks/cc/vision/gesture_recognizer/calculators/combined_prediction_calculator.pb.h"
 #include "mediapipe/tasks/cc/vision/gesture_recognizer/calculators/landmarks_to_matrix_calculator.pb.h"
 #include "mediapipe/tasks/cc/vision/gesture_recognizer/proto/gesture_classifier_graph_options.pb.h"
 #include "mediapipe/tasks/cc/vision/gesture_recognizer/proto/gesture_embedder_graph_options.pb.h"
@ -58,6 +61,7 @@ using ::mediapipe::api2::builder::Source;
 using ::mediapipe::tasks::components::processors::
    ConfigureTensorsToClassificationCalculator;
 using ::mediapipe::tasks::core::ModelAssetBundleResources;
 using ::mediapipe::tasks::core::proto::BaseOptions;
 using ::mediapipe::tasks::metadata::SetExternalFile;
 using ::mediapipe::tasks::vision::gesture_recognizer::proto::
    HandGestureRecognizerGraphOptions;
@ -78,13 +82,20 @@ constexpr char kVectorTag[] = "VECTOR";
 constexpr char kIndexTag[] = "INDEX";
 constexpr char kIterableTag[] = "ITERABLE";
 constexpr char kBatchEndTag[] = "BATCH_END";
 constexpr char kPredictionTag[] = "PREDICTION";
 constexpr char kBackgroundLabel[] = "None";
 constexpr char kGestureEmbedderTFLiteName[] = "gesture_embedder.tflite";
 constexpr char kCannedGestureClassifierTFLiteName[] =
    "canned_gesture_classifier.tflite";
 constexpr char kCustomGestureClassifierTFLiteName[] =
    "custom_gesture_classifier.tflite";
 struct SubTaskModelResources {
-  const core::ModelResources* gesture_embedder_model_resource;
+  const core::ModelResources* gesture_embedder_model_resource = nullptr;
-  const core::ModelResources* canned_gesture_classifier_model_resource;
+  const core::ModelResources* canned_gesture_classifier_model_resource =
      nullptr;
  const core::ModelResources* custom_gesture_classifier_model_resource =
      nullptr;
 };
 Source<std::vector<Tensor>> ConvertMatrixToTensor(Source<Matrix> matrix,
@ -94,41 +105,21 @@ Source<std::vector<Tensor>> ConvertMatrixToTensor(Source<Matrix> matrix,
  return node[Output<std::vector<Tensor>>{"TENSORS"}];
 }
-// Sets the base options in the sub tasks.
+absl::Status ConfigureCombinedPredictionCalculator(
-absl::Status SetSubTaskBaseOptions(const ModelAssetBundleResources& resources,
+    CombinedPredictionCalculatorOptions* options) {
-                                   HandGestureRecognizerGraphOptions* options,
+  options->set_background_label(kBackgroundLabel);
                                   bool is_copy) {
  ASSIGN_OR_RETURN(const auto gesture_embedder_file,
                   resources.GetModelFile(kGestureEmbedderTFLiteName));
  auto* gesture_embedder_graph_options =
      options->mutable_gesture_embedder_graph_options();
  SetExternalFile(gesture_embedder_file,
                  gesture_embedder_graph_options->mutable_base_options()
                      ->mutable_model_asset(),
                  is_copy);
  gesture_embedder_graph_options->mutable_base_options()
      ->mutable_acceleration()
      ->CopyFrom(options->base_options().acceleration());
  gesture_embedder_graph_options->mutable_base_options()->set_use_stream_mode(
      options->base_options().use_stream_mode());
  ASSIGN_OR_RETURN(const auto canned_gesture_classifier_file,
                   resources.GetModelFile(kCannedGestureClassifierTFLiteName));
  auto* canned_gesture_classifier_graph_options =
      options->mutable_canned_gesture_classifier_graph_options();
  SetExternalFile(
      canned_gesture_classifier_file,
      canned_gesture_classifier_graph_options->mutable_base_options()
          ->mutable_model_asset(),
      is_copy);
  canned_gesture_classifier_graph_options->mutable_base_options()
      ->mutable_acceleration()
      ->CopyFrom(options->base_options().acceleration());
  canned_gesture_classifier_graph_options->mutable_base_options()
      ->set_use_stream_mode(options->base_options().use_stream_mode());
  return absl::OkStatus();
 }
 void PopulateAccelerationAndUseStreamMode(
    const BaseOptions& parent_base_options,
    BaseOptions* sub_task_base_options) {
  sub_task_base_options->mutable_acceleration()->CopyFrom(
      parent_base_options.acceleration());
  sub_task_base_options->set_use_stream_mode(
      parent_base_options.use_stream_mode());
 }
 }  // namespace
 // A
@ -212,6 +203,56 @@ class SingleHandGestureRecognizerGraph : public core::ModelTaskGraph {
  }
 private:
  // Sets the base options in the sub tasks.
  absl::Status SetSubTaskBaseOptions(const ModelAssetBundleResources& resources,
                                     HandGestureRecognizerGraphOptions* options,
                                     bool is_copy) {
    ASSIGN_OR_RETURN(const auto gesture_embedder_file,
                     resources.GetModelFile(kGestureEmbedderTFLiteName));
    auto* gesture_embedder_graph_options =
        options->mutable_gesture_embedder_graph_options();
    SetExternalFile(gesture_embedder_file,
                    gesture_embedder_graph_options->mutable_base_options()
                        ->mutable_model_asset(),
                    is_copy);
    PopulateAccelerationAndUseStreamMode(
        options->base_options(),
        gesture_embedder_graph_options->mutable_base_options());
    ASSIGN_OR_RETURN(
        const auto canned_gesture_classifier_file,
        resources.GetModelFile(kCannedGestureClassifierTFLiteName));
    auto* canned_gesture_classifier_graph_options =
        options->mutable_canned_gesture_classifier_graph_options();
    SetExternalFile(
        canned_gesture_classifier_file,
        canned_gesture_classifier_graph_options->mutable_base_options()
            ->mutable_model_asset(),
        is_copy);
    PopulateAccelerationAndUseStreamMode(
        options->base_options(),
        canned_gesture_classifier_graph_options->mutable_base_options());
    const auto custom_gesture_classifier_file =
        resources.GetModelFile(kCustomGestureClassifierTFLiteName);
    if (custom_gesture_classifier_file.ok()) {
      has_custom_gesture_classifier = true;
      auto* custom_gesture_classifier_graph_options =
          options->mutable_custom_gesture_classifier_graph_options();
      SetExternalFile(
          custom_gesture_classifier_file.value(),
          custom_gesture_classifier_graph_options->mutable_base_options()
              ->mutable_model_asset(),
          is_copy);
      PopulateAccelerationAndUseStreamMode(
          options->base_options(),
          custom_gesture_classifier_graph_options->mutable_base_options());
    } else {
      LOG(INFO) << "Custom gesture classifier is not defined.";
    }
    return absl::OkStatus();
  }
  absl::StatusOr<SubTaskModelResources> CreateSubTaskModelResources(
      SubgraphContext* sc) {
    auto* options = sc->MutableOptions<HandGestureRecognizerGraphOptions>();
@ -237,6 +278,19 @@ class SingleHandGestureRecognizerGraph : public core::ModelTaskGraph {
            std::make_unique<core::proto::ExternalFile>(
                std::move(canned_gesture_classifier_model_asset)),
            "_canned_gesture_classifier"));
    if (has_custom_gesture_classifier) {
      auto& custom_gesture_classifier_model_asset =
          *options->mutable_custom_gesture_classifier_graph_options()
               ->mutable_base_options()
               ->mutable_model_asset();
      ASSIGN_OR_RETURN(
          sub_task_model_resources.custom_gesture_classifier_model_resource,
          CreateModelResources(
              sc,
              std::make_unique<core::proto::ExternalFile>(
                  std::move(custom_gesture_classifier_model_asset)),
              "_custom_gesture_classifier"));
    }
    return sub_task_model_resources;
  }
@ -302,7 +356,7 @@ class SingleHandGestureRecognizerGraph : public core::ModelTaskGraph {
    hand_world_landmarks_tensor >> concatenate_tensor_vector.In(2);
    auto concatenated_tensors = concatenate_tensor_vector.Out("");
-    // Inference for static hand gesture recognition.
+    // Inference for gesture embedder.
    auto& gesture_embedder_inference =
        AddInference(*sub_task_model_resources.gesture_embedder_model_resource,
                     graph_options.gesture_embedder_graph_options()
@ -310,34 +364,64 @@ class SingleHandGestureRecognizerGraph : public core::ModelTaskGraph {
                         .acceleration(),
                     graph);
    concatenated_tensors >> gesture_embedder_inference.In(kTensorsTag);
-    auto embedding_tensors = gesture_embedder_inference.Out(kTensorsTag);
+    auto embedding_tensors =
        gesture_embedder_inference.Out(kTensorsTag).Cast<Tensor>();
-    auto& canned_gesture_classifier_inference = AddInference(
+    auto& combine_predictions = graph.AddNode("CombinedPredictionCalculator");
-        *sub_task_model_resources.canned_gesture_classifier_model_resource,
+    MP_RETURN_IF_ERROR(ConfigureCombinedPredictionCalculator(
-        graph_options.canned_gesture_classifier_graph_options()
+        &combine_predictions
-            .base_options()
+             .GetOptions<CombinedPredictionCalculatorOptions>()));
            .acceleration(),
        graph);
    embedding_tensors >> canned_gesture_classifier_inference.In(kTensorsTag);
    auto inference_output_tensors =
        canned_gesture_classifier_inference.Out(kTensorsTag);
    int classifier_nums = 0;
    // Inference for custom gesture classifier if it exists.
    if (has_custom_gesture_classifier) {
      ASSIGN_OR_RETURN(
          auto gesture_clasification_list,
          GetGestureClassificationList(
              sub_task_model_resources.custom_gesture_classifier_model_resource,
              graph_options.custom_gesture_classifier_graph_options(),
              embedding_tensors, graph));
      gesture_clasification_list >> combine_predictions.In(classifier_nums++);
    }
    // Inference for canned gesture classifier.
    ASSIGN_OR_RETURN(
        auto gesture_clasification_list,
        GetGestureClassificationList(
            sub_task_model_resources.canned_gesture_classifier_model_resource,
            graph_options.canned_gesture_classifier_graph_options(),
            embedding_tensors, graph));
    gesture_clasification_list >> combine_predictions.In(classifier_nums++);
    auto combined_classification_list =
        combine_predictions.Out(kPredictionTag).Cast<ClassificationList>();
    return combined_classification_list;
  }
  absl::StatusOr<Source<ClassificationList>> GetGestureClassificationList(
      const core::ModelResources* model_resources,
      const proto::GestureClassifierGraphOptions& options,
      Source<Tensor>& embedding_tensors, Graph& graph) {
    auto& custom_gesture_classifier_inference = AddInference(
        *model_resources, options.base_options().acceleration(), graph);
    embedding_tensors >> custom_gesture_classifier_inference.In(kTensorsTag);
    auto custom_gesture_inference_out_tensors =
        custom_gesture_classifier_inference.Out(kTensorsTag);
    auto& tensors_to_classification =
        graph.AddNode("TensorsToClassificationCalculator");
    MP_RETURN_IF_ERROR(ConfigureTensorsToClassificationCalculator(
-        graph_options.canned_gesture_classifier_graph_options()
+        options.classifier_options(), *model_resources->GetMetadataExtractor(),
            .classifier_options(),
        *sub_task_model_resources.canned_gesture_classifier_model_resource
             ->GetMetadataExtractor(),
        0,
        &tensors_to_classification.GetOptions<
            mediapipe::TensorsToClassificationCalculatorOptions>()));
-    inference_output_tensors >> tensors_to_classification.In(kTensorsTag);
+    custom_gesture_inference_out_tensors >>
-    auto classification_list =
+        tensors_to_classification.In(kTensorsTag);
-        tensors_to_classification[Output<ClassificationList>(
+    return tensors_to_classification.Out("CLASSIFICATIONS")
-            "CLASSIFICATIONS")];
+        .Cast<ClassificationList>();
    return classification_list;
  }
  bool has_custom_gesture_classifier = false;
 };
 // clang-format off
--- a/mediapipe/tasks/java/com/google/mediapipe/tasks/vision/gesturerecognizer/GestureRecognitionResult.java
+++ b/mediapipe/tasks/java/com/google/mediapipe/tasks/vision/gesturerecognizer/GestureRecognitionResult.java
@ -31,6 +31,8 @@ import java.util.List;
@AutoValue
 public abstract class GestureRecognitionResult implements TaskResult {
  private static final int kGestureDefaultIndex = -1;
  /**
   * Creates a {@link GestureRecognitionResult} instance from the lists of landmarks, handedness,
   * and gestures protobuf messages.
@ -97,7 +99,9 @@ public abstract class GestureRecognitionResult implements TaskResult {
        gestures.add(
            Category.create(
                classification.getScore(),
-                classification.getIndex(),
+                // Gesture index is not used, because the final gesture result comes from multiple
                // classifiers.
                kGestureDefaultIndex,
                classification.getLabel(),
                classification.getDisplayName()));
      }
@ -123,6 +127,10 @@ public abstract class GestureRecognitionResult implements TaskResult {
  /** Handedness of detected hands. */
  public abstract List<List<Category>> handednesses();
-  /** Recognized hand gestures of detected hands */
+  /**
   * Recognized hand gestures of detected hands. Note that the index of the gesture is always -1,
   * because the raw indices from multiple gesture classifiers cannot consolidate to a meaningful
   * index.
   */
  public abstract List<List<Category>> gestures();
 }
--- a/mediapipe/tasks/javatests/com/google/mediapipe/tasks/vision/gesturerecognizer/GestureRecognizerTest.java
+++ b/mediapipe/tasks/javatests/com/google/mediapipe/tasks/vision/gesturerecognizer/GestureRecognizerTest.java
@ -46,19 +46,24 @@ import org.junit.runners.Suite.SuiteClasses;
@SuiteClasses({GestureRecognizerTest.General.class, GestureRecognizerTest.RunningModeTest.class})
 public class GestureRecognizerTest {
  private static final String GESTURE_RECOGNIZER_BUNDLE_ASSET_FILE = "gesture_recognizer.task";
  private static final String GESTURE_RECOGNIZER_WITH_CUSTOM_CLASSIFIER_BUNDLE_ASSET_FILE =
      "gesture_recognizer_with_custom_classifier.task";
  private static final String TWO_HANDS_IMAGE = "right_hands.jpg";
  private static final String THUMB_UP_IMAGE = "thumb_up.jpg";
  private static final String POINTING_UP_ROTATED_IMAGE = "pointing_up_rotated.jpg";
  private static final String NO_HANDS_IMAGE = "cats_and_dogs.jpg";
  private static final String FIST_IMAGE = "fist.jpg";
  private static final String THUMB_UP_LANDMARKS = "thumb_up_landmarks.pb";
  private static final String FIST_LANDMARKS = "fist_landmarks.pb";
  private static final String TAG = "Gesture Recognizer Test";
  private static final String THUMB_UP_LABEL = "Thumb_Up";
  private static final int THUMB_UP_INDEX = 5;
  private static final String POINTING_UP_LABEL = "Pointing_Up";
-  private static final int POINTING_UP_INDEX = 3;
+  private static final String FIST_LABEL = "Closed_Fist";
  private static final String ROCK_LABEL = "Rock";
  private static final float LANDMARKS_ERROR_TOLERANCE = 0.03f;
  private static final int IMAGE_WIDTH = 382;
  private static final int IMAGE_HEIGHT = 406;
  private static final int GESTURE_EXPECTED_INDEX = -1;
  @RunWith(AndroidJUnit4.class)
  public static final class General extends GestureRecognizerTest {
@ -77,7 +82,7 @@ public class GestureRecognizerTest {
      GestureRecognitionResult actualResult =
          gestureRecognizer.recognize(getImageFromAsset(THUMB_UP_IMAGE));
      GestureRecognitionResult expectedResult =
-          getExpectedGestureRecognitionResult(THUMB_UP_LANDMARKS, THUMB_UP_LABEL, THUMB_UP_INDEX);
+          getExpectedGestureRecognitionResult(THUMB_UP_LANDMARKS, THUMB_UP_LABEL);
      assertActualResultApproximatelyEqualsToExpectedResult(actualResult, expectedResult);
    }
@ -108,16 +113,14 @@ public class GestureRecognizerTest {
                  BaseOptions.builder()
                      .setModelAssetPath(GESTURE_RECOGNIZER_BUNDLE_ASSET_FILE)
                      .build())
-              // TODO update the confidence to be in range [0,1] after embedding model
+              .setMinGestureConfidence(0.5f)
              // and scoring calculator is integrated.
              .setMinGestureConfidence(2.0f)
              .build();
      GestureRecognizer gestureRecognizer =
          GestureRecognizer.createFromOptions(ApplicationProvider.getApplicationContext(), options);
      GestureRecognitionResult actualResult =
          gestureRecognizer.recognize(getImageFromAsset(THUMB_UP_IMAGE));
      GestureRecognitionResult expectedResult =
-          getExpectedGestureRecognitionResult(THUMB_UP_LANDMARKS, THUMB_UP_LABEL, THUMB_UP_INDEX);
+          getExpectedGestureRecognitionResult(THUMB_UP_LANDMARKS, THUMB_UP_LABEL);
      // Only contains one top scoring gesture.
      assertThat(actualResult.gestures().get(0)).hasSize(1);
      assertActualGestureEqualExpectedGesture(
@ -159,10 +162,48 @@ public class GestureRecognizerTest {
          gestureRecognizer.recognize(
              getImageFromAsset(POINTING_UP_ROTATED_IMAGE), imageProcessingOptions);
      assertThat(actualResult.gestures()).hasSize(1);
      assertThat(actualResult.gestures().get(0).get(0).index()).isEqualTo(POINTING_UP_INDEX);
      assertThat(actualResult.gestures().get(0).get(0).categoryName()).isEqualTo(POINTING_UP_LABEL);
    }
    @Test
    public void recognize_successWithCannedGestureFist() throws Exception {
      GestureRecognizerOptions options =
          GestureRecognizerOptions.builder()
              .setBaseOptions(
                  BaseOptions.builder()
                      .setModelAssetPath(GESTURE_RECOGNIZER_BUNDLE_ASSET_FILE)
                      .build())
              .setNumHands(1)
              .build();
      GestureRecognizer gestureRecognizer =
          GestureRecognizer.createFromOptions(ApplicationProvider.getApplicationContext(), options);
      GestureRecognitionResult actualResult =
          gestureRecognizer.recognize(getImageFromAsset(FIST_IMAGE));
      GestureRecognitionResult expectedResult =
          getExpectedGestureRecognitionResult(FIST_LANDMARKS, FIST_LABEL);
      assertActualResultApproximatelyEqualsToExpectedResult(actualResult, expectedResult);
    }
    @Test
    public void recognize_successWithCustomGestureRock() throws Exception {
      GestureRecognizerOptions options =
          GestureRecognizerOptions.builder()
              .setBaseOptions(
                  BaseOptions.builder()
                      .setModelAssetPath(
                          GESTURE_RECOGNIZER_WITH_CUSTOM_CLASSIFIER_BUNDLE_ASSET_FILE)
                      .build())
              .setNumHands(1)
              .build();
      GestureRecognizer gestureRecognizer =
          GestureRecognizer.createFromOptions(ApplicationProvider.getApplicationContext(), options);
      GestureRecognitionResult actualResult =
          gestureRecognizer.recognize(getImageFromAsset(FIST_IMAGE));
      GestureRecognitionResult expectedResult =
          getExpectedGestureRecognitionResult(FIST_LANDMARKS, ROCK_LABEL);
      assertActualResultApproximatelyEqualsToExpectedResult(actualResult, expectedResult);
    }
    @Test
    public void recognize_failsWithRegionOfInterest() throws Exception {
      GestureRecognizerOptions options =
@ -331,7 +372,7 @@ public class GestureRecognizerTest {
    GestureRecognitionResult actualResult =
        gestureRecognizer.recognize(getImageFromAsset(THUMB_UP_IMAGE));
    GestureRecognitionResult expectedResult =
-        getExpectedGestureRecognitionResult(THUMB_UP_LANDMARKS, THUMB_UP_LABEL, THUMB_UP_INDEX);
+        getExpectedGestureRecognitionResult(THUMB_UP_LANDMARKS, THUMB_UP_LABEL);
    assertActualResultApproximatelyEqualsToExpectedResult(actualResult, expectedResult);
  }
@ -348,7 +389,7 @@ public class GestureRecognizerTest {
    GestureRecognizer gestureRecognizer =
        GestureRecognizer.createFromOptions(ApplicationProvider.getApplicationContext(), options);
    GestureRecognitionResult expectedResult =
-        getExpectedGestureRecognitionResult(THUMB_UP_LANDMARKS, THUMB_UP_LABEL, THUMB_UP_INDEX);
+        getExpectedGestureRecognitionResult(THUMB_UP_LANDMARKS, THUMB_UP_LABEL);
    for (int i = 0; i < 3; i++) {
      GestureRecognitionResult actualResult =
          gestureRecognizer.recognizeForVideo(
@ -361,7 +402,7 @@ public class GestureRecognizerTest {
  public void recognize_failsWithOutOfOrderInputTimestamps() throws Exception {
    MPImage image = getImageFromAsset(THUMB_UP_IMAGE);
    GestureRecognitionResult expectedResult =
-        getExpectedGestureRecognitionResult(THUMB_UP_LANDMARKS, THUMB_UP_LABEL, THUMB_UP_INDEX);
+        getExpectedGestureRecognitionResult(THUMB_UP_LANDMARKS, THUMB_UP_LABEL);
    GestureRecognizerOptions options =
        GestureRecognizerOptions.builder()
            .setBaseOptions(
@ -393,7 +434,7 @@ public class GestureRecognizerTest {
  public void recognize_successWithLiveSteamMode() throws Exception {
    MPImage image = getImageFromAsset(THUMB_UP_IMAGE);
    GestureRecognitionResult expectedResult =
-        getExpectedGestureRecognitionResult(THUMB_UP_LANDMARKS, THUMB_UP_LABEL, THUMB_UP_INDEX);
+        getExpectedGestureRecognitionResult(THUMB_UP_LANDMARKS, THUMB_UP_LABEL);
    GestureRecognizerOptions options =
        GestureRecognizerOptions.builder()
            .setBaseOptions(
@ -423,7 +464,7 @@ public class GestureRecognizerTest {
  }
  private static GestureRecognitionResult getExpectedGestureRecognitionResult(
-      String filePath, String gestureLabel, int gestureIndex) throws Exception {
+      String filePath, String gestureLabel) throws Exception {
    AssetManager assetManager = ApplicationProvider.getApplicationContext().getAssets();
    InputStream istr = assetManager.open(filePath);
    LandmarksDetectionResult landmarksDetectionResultProto =
@ -431,9 +472,7 @@ public class GestureRecognizerTest {
    ClassificationProto.ClassificationList gesturesProto =
        ClassificationProto.ClassificationList.newBuilder()
            .addClassification(
-                ClassificationProto.Classification.newBuilder()
+                ClassificationProto.Classification.newBuilder().setLabel(gestureLabel))
                    .setLabel(gestureLabel)
                    .setIndex(gestureIndex))
            .build();
    return GestureRecognitionResult.create(
        Arrays.asList(landmarksDetectionResultProto.getLandmarks()),
@ -479,8 +518,8 @@ public class GestureRecognizerTest {
  private static void assertActualGestureEqualExpectedGesture(
      Category actualGesture, Category expectedGesture) {
-    assertThat(actualGesture.index()).isEqualTo(actualGesture.index());
+    assertThat(actualGesture.categoryName()).isEqualTo(expectedGesture.categoryName());
-    assertThat(expectedGesture.categoryName()).isEqualTo(expectedGesture.categoryName());
+    assertThat(actualGesture.index()).isEqualTo(GESTURE_EXPECTED_INDEX);
  }
  private static void assertImageSizeIsExpected(MPImage inputImage) {
--- a/mediapipe/tasks/python/test/vision/BUILD
+++ b/mediapipe/tasks/python/test/vision/BUILD
@ -57,6 +57,22 @@ py_test(
    ],
 )
 py_test(
    name = "image_segmenter_test",
    srcs = ["image_segmenter_test.py"],
    data = [
        "//mediapipe/tasks/testdata/vision:test_images",
        "//mediapipe/tasks/testdata/vision:test_models",
    ],
    deps = [
        "//mediapipe/python:_framework_bindings",
        "//mediapipe/tasks/python/core:base_options",
        "//mediapipe/tasks/python/test:test_utils",
        "//mediapipe/tasks/python/vision:image_segmenter",
        "//mediapipe/tasks/python/vision/core:vision_task_running_mode",
    ],
 )
 py_test(
    name = "gesture_recognizer_test",
    srcs = ["gesture_recognizer_test.py"],
--- a/mediapipe/tasks/python/test/vision/image_segmenter_test.py
+++ b/mediapipe/tasks/python/test/vision/image_segmenter_test.py
@ -0,0 +1,353 @@
 # Copyright 2022 The MediaPipe Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Tests for image segmenter."""
 import enum
 from typing import List
 from unittest import mock
 from absl.testing import absltest
 from absl.testing import parameterized
 import cv2
 import numpy as np
 from mediapipe.python._framework_bindings import image as image_module
 from mediapipe.python._framework_bindings import image_frame
 from mediapipe.tasks.python.core import base_options as base_options_module
 from mediapipe.tasks.python.test import test_utils
 from mediapipe.tasks.python.vision import image_segmenter
 from mediapipe.tasks.python.vision.core import vision_task_running_mode
 _BaseOptions = base_options_module.BaseOptions
 _Image = image_module.Image
 _ImageFormat = image_frame.ImageFormat
 _OutputType = image_segmenter.OutputType
 _Activation = image_segmenter.Activation
 _ImageSegmenter = image_segmenter.ImageSegmenter
 _ImageSegmenterOptions = image_segmenter.ImageSegmenterOptions
 _RUNNING_MODE = vision_task_running_mode.VisionTaskRunningMode
 _MODEL_FILE = 'deeplabv3.tflite'
 _IMAGE_FILE = 'segmentation_input_rotation0.jpg'
 _SEGMENTATION_FILE = 'segmentation_golden_rotation0.png'
 _MASK_MAGNIFICATION_FACTOR = 10
 _MASK_SIMILARITY_THRESHOLD = 0.98
 def _similar_to_uint8_mask(actual_mask, expected_mask):
  actual_mask_pixels = actual_mask.numpy_view().flatten()
  expected_mask_pixels = expected_mask.numpy_view().flatten()
  consistent_pixels = 0
  num_pixels = len(expected_mask_pixels)
  for index in range(num_pixels):
    consistent_pixels += (
        actual_mask_pixels[index] *
        _MASK_MAGNIFICATION_FACTOR == expected_mask_pixels[index])
  return consistent_pixels / num_pixels >= _MASK_SIMILARITY_THRESHOLD
 class ModelFileType(enum.Enum):
  FILE_CONTENT = 1
  FILE_NAME = 2
 class ImageSegmenterTest(parameterized.TestCase):
  def setUp(self):
    super().setUp()
    # Load the test input image.
    self.test_image = _Image.create_from_file(
        test_utils.get_test_data_path(_IMAGE_FILE))
    # Loads ground truth segmentation file.
    gt_segmentation_data = cv2.imread(
        test_utils.get_test_data_path(_SEGMENTATION_FILE), cv2.IMREAD_GRAYSCALE)
    self.test_seg_image = _Image(_ImageFormat.GRAY8, gt_segmentation_data)
    self.model_path = test_utils.get_test_data_path(_MODEL_FILE)
  def test_create_from_file_succeeds_with_valid_model_path(self):
    # Creates with default option and valid model file successfully.
    with _ImageSegmenter.create_from_model_path(self.model_path) as segmenter:
      self.assertIsInstance(segmenter, _ImageSegmenter)
  def test_create_from_options_succeeds_with_valid_model_path(self):
    # Creates with options containing model file successfully.
    base_options = _BaseOptions(model_asset_path=self.model_path)
    options = _ImageSegmenterOptions(base_options=base_options)
    with _ImageSegmenter.create_from_options(options) as segmenter:
      self.assertIsInstance(segmenter, _ImageSegmenter)
  def test_create_from_options_fails_with_invalid_model_path(self):
    # Invalid empty model path.
    with self.assertRaisesRegex(
        ValueError,
        r"ExternalFile must specify at least one of 'file_content', "
        r"'file_name', 'file_pointer_meta' or 'file_descriptor_meta'."):
      base_options = _BaseOptions(model_asset_path='')
      options = _ImageSegmenterOptions(base_options=base_options)
      _ImageSegmenter.create_from_options(options)
  def test_create_from_options_succeeds_with_valid_model_content(self):
    # Creates with options containing model content successfully.
    with open(self.model_path, 'rb') as f:
      base_options = _BaseOptions(model_asset_buffer=f.read())
      options = _ImageSegmenterOptions(base_options=base_options)
      segmenter = _ImageSegmenter.create_from_options(options)
      self.assertIsInstance(segmenter, _ImageSegmenter)
  @parameterized.parameters((ModelFileType.FILE_NAME,),
                            (ModelFileType.FILE_CONTENT,))
  def test_segment_succeeds_with_category_mask(self, model_file_type):
    # Creates segmenter.
    if model_file_type is ModelFileType.FILE_NAME:
      base_options = _BaseOptions(model_asset_path=self.model_path)
    elif model_file_type is ModelFileType.FILE_CONTENT:
      with open(self.model_path, 'rb') as f:
        model_content = f.read()
      base_options = _BaseOptions(model_asset_buffer=model_content)
    else:
      # Should never happen
      raise ValueError('model_file_type is invalid.')
    options = _ImageSegmenterOptions(
        base_options=base_options, output_type=_OutputType.CATEGORY_MASK)
    segmenter = _ImageSegmenter.create_from_options(options)
    # Performs image segmentation on the input.
    category_masks = segmenter.segment(self.test_image)
    self.assertLen(category_masks, 1)
    category_mask = category_masks[0]
    result_pixels = category_mask.numpy_view().flatten()
    # Check if data type of `category_mask` is correct.
    self.assertEqual(result_pixels.dtype, np.uint8)
    self.assertTrue(
        _similar_to_uint8_mask(category_masks[0], self.test_seg_image),
        f'Number of pixels in the candidate mask differing from that of the '
        f'ground truth mask exceeds {_MASK_SIMILARITY_THRESHOLD}.')
    # Closes the segmenter explicitly when the segmenter is not used in
    # a context.
    segmenter.close()
  def test_segment_succeeds_with_confidence_mask(self):
    # Creates segmenter.
    base_options = _BaseOptions(model_asset_path=self.model_path)
    # Run segmentation on the model in CATEGORY_MASK mode.
    options = _ImageSegmenterOptions(
        base_options=base_options, output_type=_OutputType.CATEGORY_MASK)
    segmenter = _ImageSegmenter.create_from_options(options)
    category_masks = segmenter.segment(self.test_image)
    category_mask = category_masks[0].numpy_view()
    # Run segmentation on the model in CONFIDENCE_MASK mode.
    options = _ImageSegmenterOptions(
        base_options=base_options,
        output_type=_OutputType.CONFIDENCE_MASK,
        activation=_Activation.SOFTMAX)
    segmenter = _ImageSegmenter.create_from_options(options)
    confidence_masks = segmenter.segment(self.test_image)
    # Check if confidence mask shape is correct.
    self.assertLen(
        confidence_masks, 21,
        'Number of confidence masks must match with number of categories.')
    # Gather the confidence masks in a single array `confidence_mask_array`.
    confidence_mask_array = np.array(
        [confidence_mask.numpy_view() for confidence_mask in confidence_masks])
    # Check if data type of `confidence_masks` are correct.
    self.assertEqual(confidence_mask_array.dtype, np.float32)
    # Compute the category mask from the created confidence mask.
    calculated_category_mask = np.argmax(confidence_mask_array, axis=0)
    self.assertListEqual(
        calculated_category_mask.tolist(), category_mask.tolist(),
        'Confidence mask does not match with the category mask.')
    # Closes the segmenter explicitly when the segmenter is not used in
    # a context.
    segmenter.close()
  @parameterized.parameters((ModelFileType.FILE_NAME),
                            (ModelFileType.FILE_CONTENT))
  def test_segment_in_context(self, model_file_type):
    if model_file_type is ModelFileType.FILE_NAME:
      base_options = _BaseOptions(model_asset_path=self.model_path)
    elif model_file_type is ModelFileType.FILE_CONTENT:
      with open(self.model_path, 'rb') as f:
        model_contents = f.read()
      base_options = _BaseOptions(model_asset_buffer=model_contents)
    else:
      # Should never happen
      raise ValueError('model_file_type is invalid.')
    options = _ImageSegmenterOptions(
        base_options=base_options, output_type=_OutputType.CATEGORY_MASK)
    with _ImageSegmenter.create_from_options(options) as segmenter:
      # Performs image segmentation on the input.
      category_masks = segmenter.segment(self.test_image)
      self.assertLen(category_masks, 1)
      self.assertTrue(
          _similar_to_uint8_mask(category_masks[0], self.test_seg_image),
          f'Number of pixels in the candidate mask differing from that of the '
          f'ground truth mask exceeds {_MASK_SIMILARITY_THRESHOLD}.')
  def test_missing_result_callback(self):
    options = _ImageSegmenterOptions(
        base_options=_BaseOptions(model_asset_path=self.model_path),
        running_mode=_RUNNING_MODE.LIVE_STREAM)
    with self.assertRaisesRegex(ValueError,
                                r'result callback must be provided'):
      with _ImageSegmenter.create_from_options(options) as unused_segmenter:
        pass
  @parameterized.parameters((_RUNNING_MODE.IMAGE), (_RUNNING_MODE.VIDEO))
  def test_illegal_result_callback(self, running_mode):
    options = _ImageSegmenterOptions(
        base_options=_BaseOptions(model_asset_path=self.model_path),
        running_mode=running_mode,
        result_callback=mock.MagicMock())
    with self.assertRaisesRegex(ValueError,
                                r'result callback should not be provided'):
      with _ImageSegmenter.create_from_options(options) as unused_segmenter:
        pass
  def test_calling_segment_for_video_in_image_mode(self):
    options = _ImageSegmenterOptions(
        base_options=_BaseOptions(model_asset_path=self.model_path),
        running_mode=_RUNNING_MODE.IMAGE)
    with _ImageSegmenter.create_from_options(options) as segmenter:
      with self.assertRaisesRegex(ValueError,
                                  r'not initialized with the video mode'):
        segmenter.segment_for_video(self.test_image, 0)
  def test_calling_segment_async_in_image_mode(self):
    options = _ImageSegmenterOptions(
        base_options=_BaseOptions(model_asset_path=self.model_path),
        running_mode=_RUNNING_MODE.IMAGE)
    with _ImageSegmenter.create_from_options(options) as segmenter:
      with self.assertRaisesRegex(ValueError,
                                  r'not initialized with the live stream mode'):
        segmenter.segment_async(self.test_image, 0)
  def test_calling_segment_in_video_mode(self):
    options = _ImageSegmenterOptions(
        base_options=_BaseOptions(model_asset_path=self.model_path),
        running_mode=_RUNNING_MODE.VIDEO)
    with _ImageSegmenter.create_from_options(options) as segmenter:
      with self.assertRaisesRegex(ValueError,
                                  r'not initialized with the image mode'):
        segmenter.segment(self.test_image)
  def test_calling_segment_async_in_video_mode(self):
    options = _ImageSegmenterOptions(
        base_options=_BaseOptions(model_asset_path=self.model_path),
        running_mode=_RUNNING_MODE.VIDEO)
    with _ImageSegmenter.create_from_options(options) as segmenter:
      with self.assertRaisesRegex(ValueError,
                                  r'not initialized with the live stream mode'):
        segmenter.segment_async(self.test_image, 0)
  def test_segment_for_video_with_out_of_order_timestamp(self):
    options = _ImageSegmenterOptions(
        base_options=_BaseOptions(model_asset_path=self.model_path),
        running_mode=_RUNNING_MODE.VIDEO)
    with _ImageSegmenter.create_from_options(options) as segmenter:
      unused_result = segmenter.segment_for_video(self.test_image, 1)
      with self.assertRaisesRegex(
          ValueError, r'Input timestamp must be monotonically increasing'):
        segmenter.segment_for_video(self.test_image, 0)
  def test_segment_for_video(self):
    options = _ImageSegmenterOptions(
        base_options=_BaseOptions(model_asset_path=self.model_path),
        output_type=_OutputType.CATEGORY_MASK,
        running_mode=_RUNNING_MODE.VIDEO)
    with _ImageSegmenter.create_from_options(options) as segmenter:
      for timestamp in range(0, 300, 30):
        category_masks = segmenter.segment_for_video(self.test_image, timestamp)
        self.assertLen(category_masks, 1)
        self.assertTrue(
            _similar_to_uint8_mask(category_masks[0], self.test_seg_image),
            f'Number of pixels in the candidate mask differing from that of the '
            f'ground truth mask exceeds {_MASK_SIMILARITY_THRESHOLD}.')
  def test_calling_segment_in_live_stream_mode(self):
    options = _ImageSegmenterOptions(
        base_options=_BaseOptions(model_asset_path=self.model_path),
        running_mode=_RUNNING_MODE.LIVE_STREAM,
        result_callback=mock.MagicMock())
    with _ImageSegmenter.create_from_options(options) as segmenter:
      with self.assertRaisesRegex(ValueError,
                                  r'not initialized with the image mode'):
        segmenter.segment(self.test_image)
  def test_calling_segment_for_video_in_live_stream_mode(self):
    options = _ImageSegmenterOptions(
        base_options=_BaseOptions(model_asset_path=self.model_path),
        running_mode=_RUNNING_MODE.LIVE_STREAM,
        result_callback=mock.MagicMock())
    with _ImageSegmenter.create_from_options(options) as segmenter:
      with self.assertRaisesRegex(ValueError,
                                  r'not initialized with the video mode'):
        segmenter.segment_for_video(self.test_image, 0)
  def test_segment_async_calls_with_illegal_timestamp(self):
    options = _ImageSegmenterOptions(
        base_options=_BaseOptions(model_asset_path=self.model_path),
        running_mode=_RUNNING_MODE.LIVE_STREAM,
        result_callback=mock.MagicMock())
    with _ImageSegmenter.create_from_options(options) as segmenter:
      segmenter.segment_async(self.test_image, 100)
      with self.assertRaisesRegex(
          ValueError, r'Input timestamp must be monotonically increasing'):
        segmenter.segment_async(self.test_image, 0)
  def test_segment_async_calls(self):
    observed_timestamp_ms = -1
    def check_result(result: List[image_module.Image], output_image: _Image,
                     timestamp_ms: int):
      # Get the output category mask.
      category_mask = result[0]
      self.assertEqual(output_image.width, self.test_image.width)
      self.assertEqual(output_image.height, self.test_image.height)
      self.assertEqual(output_image.width, self.test_seg_image.width)
      self.assertEqual(output_image.height, self.test_seg_image.height)
      self.assertTrue(
          _similar_to_uint8_mask(category_mask, self.test_seg_image),
          f'Number of pixels in the candidate mask differing from that of the '
          f'ground truth mask exceeds {_MASK_SIMILARITY_THRESHOLD}.')
      self.assertLess(observed_timestamp_ms, timestamp_ms)
      self.observed_timestamp_ms = timestamp_ms
    options = _ImageSegmenterOptions(
        base_options=_BaseOptions(model_asset_path=self.model_path),
        output_type=_OutputType.CATEGORY_MASK,
        running_mode=_RUNNING_MODE.LIVE_STREAM,
        result_callback=check_result)
    with _ImageSegmenter.create_from_options(options) as segmenter:
      for timestamp in range(0, 300, 30):
        segmenter.segment_async(self.test_image, timestamp)
 if __name__ == '__main__':
  absltest.main()
--- a/mediapipe/tasks/python/vision/BUILD
+++ b/mediapipe/tasks/python/vision/BUILD
@ -59,6 +59,25 @@ py_library(
    ],
 )
 py_library(
    name = "image_segmenter",
    srcs = [
        "image_segmenter.py",
    ],
    deps = [
        "//mediapipe/python:_framework_bindings",
        "//mediapipe/python:packet_creator",
        "//mediapipe/python:packet_getter",
        "//mediapipe/tasks/cc/components/proto:segmenter_options_py_pb2",
        "//mediapipe/tasks/cc/vision/image_segmenter/proto:image_segmenter_options_py_pb2",
        "//mediapipe/tasks/python/core:base_options",
        "//mediapipe/tasks/python/core:optional_dependencies",
        "//mediapipe/tasks/python/core:task_info",
        "//mediapipe/tasks/python/vision/core:base_vision_task_api",
        "//mediapipe/tasks/python/vision/core:vision_task_running_mode",
    ],
 )
 py_library(
    name = "gesture_recognizer",
    srcs = [
--- a/mediapipe/tasks/python/vision/image_segmenter.py
+++ b/mediapipe/tasks/python/vision/image_segmenter.py
@ -0,0 +1,253 @@
 # Copyright 2022 The MediaPipe Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """MediaPipe image segmenter task."""
 import dataclasses
 import enum
 from typing import Callable, List, Mapping, Optional
 from mediapipe.python import packet_creator
 from mediapipe.python import packet_getter
 from mediapipe.python._framework_bindings import image as image_module
 from mediapipe.python._framework_bindings import packet
 from mediapipe.python._framework_bindings import task_runner
 from mediapipe.tasks.cc.components.proto import segmenter_options_pb2
 from mediapipe.tasks.cc.vision.image_segmenter.proto import image_segmenter_options_pb2
 from mediapipe.tasks.python.core import base_options as base_options_module
 from mediapipe.tasks.python.core import task_info as task_info_module
 from mediapipe.tasks.python.core.optional_dependencies import doc_controls
 from mediapipe.tasks.python.vision.core import base_vision_task_api
 from mediapipe.tasks.python.vision.core import vision_task_running_mode
 _BaseOptions = base_options_module.BaseOptions
 _SegmenterOptionsProto = segmenter_options_pb2.SegmenterOptions
 _ImageSegmenterOptionsProto = image_segmenter_options_pb2.ImageSegmenterOptions
 _RunningMode = vision_task_running_mode.VisionTaskRunningMode
 _TaskInfo = task_info_module.TaskInfo
 _TaskRunner = task_runner.TaskRunner
 _SEGMENTATION_OUT_STREAM_NAME = 'segmented_mask_out'
 _SEGMENTATION_TAG = 'GROUPED_SEGMENTATION'
 _IMAGE_IN_STREAM_NAME = 'image_in'
 _IMAGE_OUT_STREAM_NAME = 'image_out'
 _IMAGE_TAG = 'IMAGE'
 _TASK_GRAPH_NAME = 'mediapipe.tasks.vision.ImageSegmenterGraph'
 _MICRO_SECONDS_PER_MILLISECOND = 1000
 class OutputType(enum.Enum):
  UNSPECIFIED = 0
  CATEGORY_MASK = 1
  CONFIDENCE_MASK = 2
 class Activation(enum.Enum):
  NONE = 0
  SIGMOID = 1
  SOFTMAX = 2
@dataclasses.dataclass
 class ImageSegmenterOptions:
  """Options for the image segmenter task.
  Attributes:
    base_options: Base options for the image segmenter task.
    running_mode: The running mode of the task. Default to the image mode. Image
      segmenter task has three running modes: 1) The image mode for segmenting
      objects on single image inputs. 2) The video mode for segmenting objects
      on the decoded frames of a video. 3) The live stream mode for segmenting
      objects on a live stream of input data, such as from camera.
    output_type: The output mask type allows specifying the type of
      post-processing to perform on the raw model results.
    activation: Activation function to apply to input tensor.
    result_callback: The user-defined result callback for processing live stream
      data. The result callback should only be specified when the running mode
      is set to the live stream mode.
  """
  base_options: _BaseOptions
  running_mode: _RunningMode = _RunningMode.IMAGE
  output_type: Optional[OutputType] = OutputType.CATEGORY_MASK
  activation: Optional[Activation] = Activation.NONE
  result_callback: Optional[Callable[
      [List[image_module.Image], image_module.Image, int], None]] = None
  @doc_controls.do_not_generate_docs
  def to_pb2(self) -> _ImageSegmenterOptionsProto:
    """Generates an ImageSegmenterOptions protobuf object."""
    base_options_proto = self.base_options.to_pb2()
    base_options_proto.use_stream_mode = False if self.running_mode == _RunningMode.IMAGE else True
    segmenter_options_proto = _SegmenterOptionsProto(
        output_type=self.output_type.value, activation=self.activation.value)
    return _ImageSegmenterOptionsProto(
        base_options=base_options_proto,
        segmenter_options=segmenter_options_proto)
 class ImageSegmenter(base_vision_task_api.BaseVisionTaskApi):
  """Class that performs image segmentation on images."""
  @classmethod
  def create_from_model_path(cls, model_path: str) -> 'ImageSegmenter':
    """Creates an `ImageSegmenter` object from a TensorFlow Lite model and the default `ImageSegmenterOptions`.
    Note that the created `ImageSegmenter` instance is in image mode, for
    performing image segmentation on single image inputs.
    Args:
      model_path: Path to the model.
    Returns:
      `ImageSegmenter` object that's created from the model file and the default
      `ImageSegmenterOptions`.
    Raises:
      ValueError: If failed to create `ImageSegmenter` object from the provided
        file such as invalid file path.
      RuntimeError: If other types of error occurred.
    """
    base_options = _BaseOptions(model_asset_path=model_path)
    options = ImageSegmenterOptions(
        base_options=base_options, running_mode=_RunningMode.IMAGE)
    return cls.create_from_options(options)
  @classmethod
  def create_from_options(cls,
                          options: ImageSegmenterOptions) -> 'ImageSegmenter':
    """Creates the `ImageSegmenter` object from image segmenter options.
    Args:
      options: Options for the image segmenter task.
    Returns:
      `ImageSegmenter` object that's created from `options`.
    Raises:
      ValueError: If failed to create `ImageSegmenter` object from
        `ImageSegmenterOptions` such as missing the model.
      RuntimeError: If other types of error occurred.
    """
    def packets_callback(output_packets: Mapping[str, packet.Packet]):
      if output_packets[_IMAGE_OUT_STREAM_NAME].is_empty():
        return
      segmentation_result = packet_getter.get_image_list(
          output_packets[_SEGMENTATION_OUT_STREAM_NAME])
      image = packet_getter.get_image(output_packets[_IMAGE_OUT_STREAM_NAME])
      timestamp = output_packets[_SEGMENTATION_OUT_STREAM_NAME].timestamp
      options.result_callback(segmentation_result, image,
                              timestamp.value // _MICRO_SECONDS_PER_MILLISECOND)
    task_info = _TaskInfo(
        task_graph=_TASK_GRAPH_NAME,
        input_streams=[':'.join([_IMAGE_TAG, _IMAGE_IN_STREAM_NAME])],
        output_streams=[
            ':'.join([_SEGMENTATION_TAG, _SEGMENTATION_OUT_STREAM_NAME]),
            ':'.join([_IMAGE_TAG, _IMAGE_OUT_STREAM_NAME])
        ],
        task_options=options)
    return cls(
        task_info.generate_graph_config(
            enable_flow_limiting=options.running_mode ==
            _RunningMode.LIVE_STREAM), options.running_mode,
        packets_callback if options.result_callback else None)
  def segment(self, image: image_module.Image) -> List[image_module.Image]:
    """Performs the actual segmentation task on the provided MediaPipe Image.
    Args:
      image: MediaPipe Image.
    Returns:
      If the output_type is CATEGORY_MASK, the returned vector of images is
      per-category segmented image mask.
      If the output_type is CONFIDENCE_MASK, the returned vector of images
      contains only one confidence image mask. A segmentation result object that
      contains a list of segmentation masks as images.
    Raises:
      ValueError: If any of the input arguments is invalid.
      RuntimeError: If image segmentation failed to run.
    """
    output_packets = self._process_image_data(
        {_IMAGE_IN_STREAM_NAME: packet_creator.create_image(image)})
    segmentation_result = packet_getter.get_image_list(
        output_packets[_SEGMENTATION_OUT_STREAM_NAME])
    return segmentation_result
  def segment_for_video(self, image: image_module.Image,
                        timestamp_ms: int) -> List[image_module.Image]:
    """Performs segmentation on the provided video frames.
    Only use this method when the ImageSegmenter is created with the video
    running mode. It's required to provide the video frame's timestamp (in
    milliseconds) along with the video frame. The input timestamps should be
    monotonically increasing for adjacent calls of this method.
    Args:
      image: MediaPipe Image.
      timestamp_ms: The timestamp of the input video frame in milliseconds.
    Returns:
      If the output_type is CATEGORY_MASK, the returned vector of images is
      per-category segmented image mask.
      If the output_type is CONFIDENCE_MASK, the returned vector of images
      contains only one confidence image mask. A segmentation result object that
      contains a list of segmentation masks as images.
    Raises:
      ValueError: If any of the input arguments is invalid.
      RuntimeError: If image segmentation failed to run.
    """
    output_packets = self._process_video_data({
        _IMAGE_IN_STREAM_NAME:
            packet_creator.create_image(image).at(
                timestamp_ms * _MICRO_SECONDS_PER_MILLISECOND)
    })
    segmentation_result = packet_getter.get_image_list(
        output_packets[_SEGMENTATION_OUT_STREAM_NAME])
    return segmentation_result
  def segment_async(self, image: image_module.Image, timestamp_ms: int) -> None:
    """Sends live image data (an Image with a unique timestamp) to perform image segmentation.
    Only use this method when the ImageSegmenter is created with the live stream
    running mode. The input timestamps should be monotonically increasing for
    adjacent calls of this method. This method will return immediately after the
    input image is accepted. The results will be available via the
    `result_callback` provided in the `ImageSegmenterOptions`. The
    `segment_async` method is designed to process live stream data such as
    camera input. To lower the overall latency, image segmenter may drop the
    input images if needed. In other words, it's not guaranteed to have output
    per input image.
    The `result_callback` prvoides:
      - A segmentation result object that contains a list of segmentation masks
        as images.
      - The input image that the image segmenter runs on.
      - The input timestamp in milliseconds.
    Args:
      image: MediaPipe Image.
      timestamp_ms: The timestamp of the input image in milliseconds.
    Raises:
      ValueError: If the current input timestamp is smaller than what the image
        segmenter has already processed.
    """
    self._send_live_stream_data({
        _IMAGE_IN_STREAM_NAME:
            packet_creator.create_image(image).at(
                timestamp_ms * _MICRO_SECONDS_PER_MILLISECOND)
    })
--- a/mediapipe/tasks/testdata/vision/BUILD
+++ b/mediapipe/tasks/testdata/vision/BUILD
@ -37,6 +37,7 @@ mediapipe_files(srcs = [
    "coco_ssd_mobilenet_v1_1.0_quant_2018_06_29.tflite",
    "coco_ssd_mobilenet_v1_1.0_quant_2018_06_29_with_dummy_score_calibration.tflite",
    "deeplabv3.tflite",
    "fist.jpg",
    "hand_landmark_full.tflite",
    "hand_landmark_lite.tflite",
    "left_hands.jpg",
@ -64,6 +65,7 @@ mediapipe_files(srcs = [
    "selfie_segm_144_256_3.tflite",
    "selfie_segm_144_256_3_expected_mask.jpg",
    "thumb_up.jpg",
    "victory.jpg",
 ])
 exports_files(
@ -91,6 +93,7 @@ filegroup(
        "cats_and_dogs.jpg",
        "cats_and_dogs_no_resizing.jpg",
        "cats_and_dogs_rotated.jpg",
        "fist.jpg",
        "hand_landmark_full.tflite",
        "hand_landmark_lite.tflite",
        "left_hands.jpg",
@ -107,6 +110,7 @@ filegroup(
        "selfie_segm_128_128_3_expected_mask.jpg",
        "selfie_segm_144_256_3_expected_mask.jpg",
        "thumb_up.jpg",
        "victory.jpg",
    ],
    visibility = [
        "//mediapipe/python:__subpackages__",
@ -149,6 +153,7 @@ filegroup(
        "expected_left_up_hand_rotated_landmarks.prototxt",
        "expected_right_down_hand_landmarks.prototxt",
        "expected_right_up_hand_landmarks.prototxt",
        "fist_landmarks.pbtxt",
        "hand_detector_result_one_hand.pbtxt",
        "hand_detector_result_one_hand_rotated.pbtxt",
        "hand_detector_result_two_hands.pbtxt",
@ -156,5 +161,6 @@ filegroup(
        "pointing_up_rotated_landmarks.pbtxt",
        "thumb_up_landmarks.pbtxt",
        "thumb_up_rotated_landmarks.pbtxt",
        "victory_landmarks.pbtxt",
    ],
 )
--- a/mediapipe/tasks/testdata/vision/fist_landmarks.pbtxt
+++ b/mediapipe/tasks/testdata/vision/fist_landmarks.pbtxt
@ -0,0 +1,223 @@
 classifications {
  classification {
    score: 1.0
    label: "Left"
    display_name: "Left"
  }
 }
 landmarks {
  landmark {
    x: 0.47709703
    y: 0.66129065
    z: -3.3540672e-07
  }
  landmark {
    x: 0.6125982
    y: 0.5578249
    z: -0.041392017
  }
  landmark {
    x: 0.71123487
    y: 0.4316616
    z: -0.064544134
  }
  landmark {
    x: 0.6836403
    y: 0.3199585
    z: -0.08752567
  }
  landmark {
    x: 0.5593274
    y: 0.3206453
    z: -0.09880819
  }
  landmark {
    x: 0.60828537
    y: 0.3068749
    z: -0.014799656
  }
  landmark {
    x: 0.62940764
    y: 0.21414441
    z: -0.06007311
  }
  landmark {
    x: 0.6244353
    y: 0.32872596
    z: -0.08326768
  }
  landmark {
    x: 0.60784453
    y: 0.3684796
    z: -0.09658983
  }
  landmark {
    x: 0.5156504
    y: 0.32194698
    z: -0.021699267
  }
  landmark {
    x: 0.52931
    y: 0.24767634
    z: -0.062571
  }
  landmark {
    x: 0.5484773
    y: 0.3805329
    z: -0.07028895
  }
  landmark {
    x: 0.54428184
    y: 0.3881125
    z: -0.07458326
  }
  landmark {
    x: 0.43159598
    y: 0.34918433
    z: -0.037482508
  }
  landmark {
    x: 0.4486106
    y: 0.27649382
    z: -0.08174769
  }
  landmark {
    x: 0.47723144
    y: 0.3964985
    z: -0.06496752
  }
  landmark {
    x: 0.46794242
    y: 0.4082967
    z: -0.04897496
  }
  landmark {
    x: 0.34826216
    y: 0.37813392
    z: -0.057438444
  }
  landmark {
    x: 0.3861837
    y: 0.32820183
    z: -0.07282783
  }
  landmark {
    x: 0.41143674
    y: 0.39734486
    z: -0.047633167
  }
  landmark {
    x: 0.39401984
    y: 0.41149133
    z: -0.029640475
  }
 }
 world_landmarks {
  landmark {
    x: -0.008604452
    y: 0.08165767
    z: 0.0061365655
  }
  landmark {
    x: 0.027301773
    y: 0.061905317
    z: -0.00872007
  }
  landmark {
    x: 0.049898714
    y: 0.035359327
    z: -0.016682662
  }
  landmark {
    x: 0.050297678
    y: 0.005200807
    z: -0.028928496
  }
  landmark {
    x: 0.015639625
    y: -0.0063155442
    z: -0.03174634
  }
  landmark {
    x: 0.029161729
    y: -0.0024596984
    z: 0.0011553494
  }
  landmark {
    x: 0.034491
    y: -0.017581237
    z: -0.020781275
  }
  landmark {
    x: 0.034020264
    y: -0.0059247985
    z: -0.02573838
  }
  landmark {
    x: 0.02867364
    y: 0.011137734
    z: -0.009430941
  }
  landmark {
    x: 0.0015385814
    y: -0.004778851
    z: 0.0056454404
  }
  landmark {
    x: 0.010490709
    y: -0.019680617
    z: -0.027034117
  }
  landmark {
    x: 0.0132071925
    y: 0.0071370844
    z: -0.034802448
  }
  landmark {
    x: 0.0139978565
    y: 0.011672501
    z: -0.0040006908
  }
  landmark {
    x: -0.019919239
    y: -0.0006897822
    z: -0.0003317799
  }
  landmark {
    x: -0.01088193
    y: -0.008502296
    z: -0.02873486
  }
  landmark {
    x: -0.005327127
    y: 0.012745364
    z: -0.034153957
  }
  landmark {
    x: -0.0027040644
    y: 0.02167169
    z: -0.011669062
  }
  landmark {
    x: -0.038813893
    y: 0.011925209
    z: -0.0076287366
  }
  landmark {
    x: -0.030842202
    y: 0.0010964936
    z: -0.022697516
  }
  landmark {
    x: -0.01829514
    y: 0.013929318
    z: -0.032819964
  }
  landmark {
    x: -0.024175374
    y: 0.022456694
    z: -0.02357186
  }
 }
--- a/mediapipe/tasks/testdata/vision/gesture_recognizer_with_custom_classifier.task
+++ b/mediapipe/tasks/testdata/vision/gesture_recognizer_with_custom_classifier.task
--- a/mediapipe/tasks/testdata/vision/hand_gesture_recognizer_with_custom_classifier.task
+++ b/mediapipe/tasks/testdata/vision/hand_gesture_recognizer_with_custom_classifier.task
--- a/mediapipe/tasks/testdata/vision/victory_landmarks.pbtxt
+++ b/mediapipe/tasks/testdata/vision/victory_landmarks.pbtxt
@ -0,0 +1,223 @@
 classifications {
  classification {
    score: 1.0
    label: "Left"
    display_name: "Left"
  }
 }
 landmarks {
  landmark {
    x: 0.5164316
    y: 0.804093
    z: 8.7653416e-07
  }
  landmark {
    x: 0.6063608
    y: 0.7111354
    z: -0.044089418
  }
  landmark {
    x: 0.6280186
    y: 0.588498
    z: -0.062358405
  }
  landmark {
    x: 0.5265348
    y: 0.52083343
    z: -0.08526791
  }
  landmark {
    x: 0.4243384
    y: 0.4993468
    z: -0.1077741
  }
  landmark {
    x: 0.5605667
    y: 0.4489705
    z: -0.016151091
  }
  landmark {
    x: 0.5766643
    y: 0.32260323
    z: -0.049342215
  }
  landmark {
    x: 0.5795845
    y: 0.24180722
    z: -0.07323826
  }
  landmark {
    x: 0.5827511
    y: 0.16940045
    z: -0.09069163
  }
  landmark {
    x: 0.4696163
    y: 0.4599558
    z: -0.032168437
  }
  landmark {
    x: 0.44361597
    y: 0.31689578
    z: -0.075698614
  }
  landmark {
    x: 0.42695498
    y: 0.22273324
    z: -0.10819675
  }
  landmark {
    x: 0.40697217
    y: 0.14279765
    z: -0.12666894
  }
  landmark {
    x: 0.39543492
    y: 0.50612336
    z: -0.055138163
  }
  landmark {
    x: 0.3618012
    y: 0.4388296
    z: -0.1298119
  }
  landmark {
    x: 0.4154368
    y: 0.52674913
    z: -0.1463017
  }
  landmark {
    x: 0.44916254
    y: 0.59442246
    z: -0.13470782
  }
  landmark {
    x: 0.33178204
    y: 0.5731769
    z: -0.08103096
  }
  landmark {
    x: 0.3092102
    y: 0.5040002
    z: -0.13258384
  }
  landmark {
    x: 0.35576707
    y: 0.5576498
    z: -0.12714732
  }
  landmark {
    x: 0.393444
    y: 0.6118667
    z: -0.11102459
  }
 }
 world_landmarks {
  landmark {
    x: 0.01299962
    y: 0.09162361
    z: 0.011185312
  }
  landmark {
    x: 0.03726317
    y: 0.0638103
    z: -0.010005756
  }
  landmark {
    x: 0.03975261
    y: 0.03712649
    z: -0.02906275
  }
  landmark {
    x: 0.018798776
    y: 0.012429599
    z: -0.048737116
  }
  landmark {
    x: -0.0128555335
    y: 0.001022811
    z: -0.044505004
  }
  landmark {
    x: 0.025658218
    y: -0.008031519
    z: -0.0058278795
  }
  landmark {
    x: 0.028017294
    y: -0.038120236
    z: -0.010376478
  }
  landmark {
    x: 0.030067094
    y: -0.059907563
    z: -0.014568218
  }
  landmark {
    x: 0.027284538
    y: -0.07803874
    z: -0.032692235
  }
  landmark {
    x: 0.0013260426
    y: -0.005039873
    z: 0.005567288
  }
  landmark {
    x: -0.002380834
    y: -0.044605374
    z: -0.0038231965
  }
  landmark {
    x: -0.009240147
    y: -0.066279344
    z: -0.02161214
  }
  landmark {
    x: -0.0092535615
    y: -0.08933755
    z: -0.037401434
  }
  landmark {
    x: -0.01751284
    y: 0.0037118336
    z: 0.0047480655
  }
  landmark {
    x: -0.02195602
    y: -0.010006189
    z: -0.02371484
  }
  landmark {
    x: -0.012851426
    y: 0.008346066
    z: -0.037721373
  }
  landmark {
    x: -0.00018795021
    y: 0.026816685
    z: -0.03732748
  }
  landmark {
    x: -0.034864448
    y: 0.022316
    z: -0.0002774651
  }
  landmark {
    x: -0.035896845
    y: 0.01066218
    z: -0.017325373
  }
  landmark {
    x: -0.02358637
    y: 0.018667895
    z: -0.028403495
  }
  landmark {
    x: -0.013704676
    y: 0.033456434
    z: -0.02595728
  }
 }
--- a/third_party/external_files.bzl
+++ b/third_party/external_files.bzl
@ -244,6 +244,18 @@ def external_files():
        urls = ["https://storage.googleapis.com/mediapipe-assets/feature_tensor_meta.json?generation=1665422818797346"],
    )
    http_file(
        name = "com_google_mediapipe_fist_jpg",
        sha256 = "43fa1cabf3f90d574accc9a56986e2ee48638ce59fc65af1846487f73bb2ef24",
        urls = ["https://storage.googleapis.com/mediapipe-assets/fist.jpg?generation=1666999359066679"],
    )
    http_file(
        name = "com_google_mediapipe_fist_landmarks_pbtxt",
        sha256 = "76d6489e6163211ce5e9080e51983165bb9b24ff50146cc7487bd629f011c598",
        urls = ["https://storage.googleapis.com/mediapipe-assets/fist_landmarks.pbtxt?generation=1666999360561864"],
    )
    http_file(
        name = "com_google_mediapipe_general_meta_json",
        sha256 = "b95363e4bae89b9c2af484498312aaad4efc7ff57c7eadcc4e5e7adca641445f",
@ -838,6 +850,18 @@ def external_files():
        urls = ["https://storage.googleapis.com/mediapipe-assets/universal_sentence_encoder_qa_with_metadata.tflite?generation=1665445919252005"],
    )
    http_file(
        name = "com_google_mediapipe_victory_jpg",
        sha256 = "84cb8853e3df614e0cb5c93a25e3e2f38ea5e4f92fd428ee7d867ed3479d5764",
        urls = ["https://storage.googleapis.com/mediapipe-assets/victory.jpg?generation=1666999364225126"],
    )
    http_file(
        name = "com_google_mediapipe_victory_landmarks_pbtxt",
        sha256 = "b25ab4f222674489f543afb6454396ecbc1437a7ae6213dbf0553029ae939ab0",
        urls = ["https://storage.googleapis.com/mediapipe-assets/victory_landmarks.pbtxt?generation=1666999366036622"],
    )
    http_file(
        name = "com_google_mediapipe_vocab_for_regex_tokenizer_txt",
        sha256 = "b1134b10927a53ce4224bbc30ccf075c9969c94ebf40c368966d1dcf445ca923",