Add support for rotations in GestureRecognizer C++ API.

PiperOrigin-RevId: 482533599
2022-10-20 10:40:56 -07:00 · 2022-10-20 10:40:56 -07:00 · 4b5df1cb96
commit 4b5df1cb96
parent e71638cf67
21 changed files with 1048 additions and 63 deletions
--- a/mediapipe/tasks/cc/vision/gesture_recognizer/BUILD
+++ b/mediapipe/tasks/cc/vision/gesture_recognizer/BUILD
@ -56,6 +56,7 @@ cc_library(
        "//mediapipe/framework/formats:classification_cc_proto",
        "//mediapipe/framework/formats:landmark_cc_proto",
        "//mediapipe/framework/formats:matrix",
        "//mediapipe/framework/formats:rect_cc_proto",
        "//mediapipe/framework/formats:tensor",
        "//mediapipe/tasks/cc:common",
        "//mediapipe/tasks/cc/components:image_preprocessing",
@ -91,6 +92,7 @@ cc_library(
        "//mediapipe/framework/formats:classification_cc_proto",
        "//mediapipe/framework/formats:image",
        "//mediapipe/framework/formats:landmark_cc_proto",
        "//mediapipe/framework/formats:rect_cc_proto",
        "//mediapipe/tasks/cc:common",
        "//mediapipe/tasks/cc/components/processors/proto:classifier_options_cc_proto",
        "//mediapipe/tasks/cc/core:model_task_graph",
@ -123,6 +125,7 @@ cc_library(
        "//mediapipe/framework/formats:classification_cc_proto",
        "//mediapipe/framework/formats:image",
        "//mediapipe/framework/formats:landmark_cc_proto",
        "//mediapipe/framework/formats:rect_cc_proto",
        "//mediapipe/tasks/cc:common",
        "//mediapipe/tasks/cc/components:image_preprocessing",
        "//mediapipe/tasks/cc/components/containers:gesture_recognition_result",
--- a/mediapipe/tasks/cc/vision/gesture_recognizer/calculators/BUILD
+++ b/mediapipe/tasks/cc/vision/gesture_recognizer/calculators/BUILD
@ -69,6 +69,7 @@ cc_library(
        "//mediapipe/framework:calculator_framework",
        "//mediapipe/framework/formats:landmark_cc_proto",
        "//mediapipe/framework/formats:matrix",
        "//mediapipe/framework/formats:rect_cc_proto",
        "//mediapipe/framework/port:ret_check",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
@ -86,6 +87,7 @@ cc_test(
        "//mediapipe/framework:calculator_runner",
        "//mediapipe/framework/formats:landmark_cc_proto",
        "//mediapipe/framework/formats:matrix",
        "//mediapipe/framework/formats:rect_cc_proto",
        "//mediapipe/framework/port:gtest_main",
        "//mediapipe/framework/port:parse_text_proto",
        "@com_google_absl//absl/strings",
--- a/mediapipe/tasks/cc/vision/gesture_recognizer/calculators/landmarks_to_matrix_calculator.cc
+++ b/mediapipe/tasks/cc/vision/gesture_recognizer/calculators/landmarks_to_matrix_calculator.cc
@ -14,6 +14,7 @@ limitations under the License.
 ==============================================================================*/
 #include <algorithm>
 #include <cmath>
 #include <limits>
 #include <memory>
 #include <string>
@ -26,6 +27,7 @@ limitations under the License.
 #include "mediapipe/framework/calculator_framework.h"
 #include "mediapipe/framework/formats/landmark.pb.h"
 #include "mediapipe/framework/formats/matrix.h"
 #include "mediapipe/framework/formats/rect.pb.h"
 #include "mediapipe/framework/port/ret_check.h"
 #include "mediapipe/tasks/cc/vision/gesture_recognizer/calculators/landmarks_to_matrix_calculator.pb.h"
@ -38,6 +40,7 @@ namespace {
 constexpr char kLandmarksTag[] = "LANDMARKS";
 constexpr char kWorldLandmarksTag[] = "WORLD_LANDMARKS";
 constexpr char kImageSizeTag[] = "IMAGE_SIZE";
 constexpr char kNormRectTag[] = "NORM_RECT";
 constexpr char kLandmarksMatrixTag[] = "LANDMARKS_MATRIX";
 constexpr int kFeaturesPerLandmark = 3;
@ -62,6 +65,25 @@ absl::StatusOr<LandmarkListT> NormalizeLandmarkAspectRatio(
  return normalized_landmarks;
 }
 template <class LandmarkListT>
 absl::StatusOr<LandmarkListT> RotateLandmarks(const LandmarkListT& landmarks,
                                              float rotation) {
  float cos = std::cos(rotation);
  // Negate because Y-axis points down and not up.
  float sin = std::sin(-rotation);
  LandmarkListT rotated_landmarks;
  for (int i = 0; i < landmarks.landmark_size(); ++i) {
    const auto& old_landmark = landmarks.landmark(i);
    float x = old_landmark.x() - 0.5;
    float y = old_landmark.y() - 0.5;
    auto* new_landmark = rotated_landmarks.add_landmark();
    new_landmark->set_x(x * cos - y * sin + 0.5);
    new_landmark->set_y(y * cos + x * sin + 0.5);
    new_landmark->set_z(old_landmark.z());
  }
  return rotated_landmarks;
 }
 template <class LandmarkListT>
 absl::StatusOr<LandmarkListT> NormalizeObject(const LandmarkListT& landmarks,
                                              int origin_offset) {
@ -134,6 +156,13 @@ absl::Status ProcessLandmarks(LandmarkListT landmarks, CalculatorContext* cc) {
                     NormalizeLandmarkAspectRatio(landmarks, width, height));
  }
  if (cc->Inputs().HasTag(kNormRectTag)) {
    RET_CHECK(!cc->Inputs().Tag(kNormRectTag).IsEmpty());
    const auto rotation =
        cc->Inputs().Tag(kNormRectTag).Get<NormalizedRect>().rotation();
    ASSIGN_OR_RETURN(landmarks, RotateLandmarks(landmarks, rotation));
  }
  const auto& options = cc->Options<LandmarksToMatrixCalculatorOptions>();
  if (options.object_normalization()) {
    ASSIGN_OR_RETURN(
@ -163,6 +192,8 @@ absl::Status ProcessLandmarks(LandmarkListT landmarks, CalculatorContext* cc) {
 //   WORLD_LANDMARKS - World 3d landmarks of one object. Use *either*
 //               LANDMARKS or WORLD_LANDMARKS.
 //   IMAGE_SIZE - (width, height) of the image
 //   NORM_RECT - Optional NormalizedRect object whose 'rotation' field is used
 //               to rotate the landmarks.
 // Output:
 //   LANDMARKS_MATRIX - Matrix for the landmarks.
 //
@ -185,6 +216,7 @@ class LandmarksToMatrixCalculator : public CalculatorBase {
    cc->Inputs().Tag(kLandmarksTag).Set<NormalizedLandmarkList>().Optional();
    cc->Inputs().Tag(kWorldLandmarksTag).Set<LandmarkList>().Optional();
    cc->Inputs().Tag(kImageSizeTag).Set<std::pair<int, int>>().Optional();
    cc->Inputs().Tag(kNormRectTag).Set<NormalizedRect>().Optional();
    cc->Outputs().Tag(kLandmarksMatrixTag).Set<Matrix>();
    return absl::OkStatus();
  }
--- a/mediapipe/tasks/cc/vision/gesture_recognizer/calculators/landmarks_to_matrix_calculator_test.cc
+++ b/mediapipe/tasks/cc/vision/gesture_recognizer/calculators/landmarks_to_matrix_calculator_test.cc
@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 #include <cmath>
 #include <memory>
 #include <string>
 #include <utility>
@ -23,6 +24,7 @@ limitations under the License.
 #include "mediapipe/framework/calculator_runner.h"
 #include "mediapipe/framework/formats/landmark.pb.h"
 #include "mediapipe/framework/formats/matrix.h"
 #include "mediapipe/framework/formats/rect.pb.h"
 #include "mediapipe/framework/port/gtest.h"
 #include "mediapipe/framework/port/parse_text_proto.h"
 #include "mediapipe/framework/port/status_matchers.h"
@ -35,6 +37,7 @@ constexpr char kLandmarksTag[] = "LANDMARKS";
 constexpr char kWorldLandmarksTag[] = "WORLD_LANDMARKS";
 constexpr char kImageSizeTag[] = "IMAGE_SIZE";
 constexpr char kLandmarksMatrixTag[] = "LANDMARKS_MATRIX";
 constexpr char kNormRectTag[] = "NORM_RECT";
 template <class LandmarkListT>
 LandmarkListT BuildPseudoLandmarks(int num_landmarks, int offset = 0) {
@ -54,6 +57,7 @@ struct Landmarks2dToMatrixCalculatorTestCase {
  int object_normalization_origin_offset = -1;
  float expected_cell_0_2;
  float expected_cell_1_5;
  float rotation;
 };
 using Landmarks2dToMatrixCalculatorTest =
@ -68,6 +72,7 @@ TEST_P(Landmarks2dToMatrixCalculatorTest, OutputsCorrectResult) {
            calculator: "LandmarksToMatrixCalculator"
            input_stream: "LANDMARKS:landmarks"
            input_stream: "IMAGE_SIZE:image_size"
            input_stream: "NORM_RECT:norm_rect"
            output_stream: "LANDMARKS_MATRIX:landmarks_matrix"
            options {
              [mediapipe.LandmarksToMatrixCalculatorOptions.ext] {
@ -91,6 +96,11 @@ TEST_P(Landmarks2dToMatrixCalculatorTest, OutputsCorrectResult) {
  runner.MutableInputs()
      ->Tag(kImageSizeTag)
      .packets.push_back(Adopt(image_size.release()).At(Timestamp(0)));
  auto norm_rect = std::make_unique<NormalizedRect>();
  norm_rect->set_rotation(test_case.rotation);
  runner.MutableInputs()
      ->Tag(kNormRectTag)
      .packets.push_back(Adopt(norm_rect.release()).At(Timestamp(0)));
  MP_ASSERT_OK(runner.Run()) << "Calculator execution failed.";
@ -109,12 +119,20 @@ INSTANTIATE_TEST_CASE_P(
          .base_offset = 0,
          .object_normalization_origin_offset = 0,
          .expected_cell_0_2 = 0.1f,
-          .expected_cell_1_5 = 0.1875f},
+          .expected_cell_1_5 = 0.1875f,
          .rotation = 0},
         {.test_name = "TestWithOffset21",
          .base_offset = 21,
          .object_normalization_origin_offset = 0,
          .expected_cell_0_2 = 0.1f,
-          .expected_cell_1_5 = 0.1875f}}),
+          .expected_cell_1_5 = 0.1875f,
          .rotation = 0},
         {.test_name = "TestWithRotation",
          .base_offset = 0,
          .object_normalization_origin_offset = 0,
          .expected_cell_0_2 = 0.075f,
          .expected_cell_1_5 = -0.25f,
          .rotation = M_PI / 2.0}}),
    [](const testing::TestParamInfo<
        Landmarks2dToMatrixCalculatorTest::ParamType>& info) {
      return info.param.test_name;
@ -126,6 +144,7 @@ struct LandmarksWorld3dToMatrixCalculatorTestCase {
  int object_normalization_origin_offset = -1;
  float expected_cell_0_2;
  float expected_cell_1_5;
  float rotation;
 };
 using LandmarksWorld3dToMatrixCalculatorTest =
@ -140,6 +159,7 @@ TEST_P(LandmarksWorld3dToMatrixCalculatorTest, OutputsCorrectResult) {
            calculator: "LandmarksToMatrixCalculator"
            input_stream: "WORLD_LANDMARKS:landmarks"
            input_stream: "IMAGE_SIZE:image_size"
            input_stream: "NORM_RECT:norm_rect"
            output_stream: "LANDMARKS_MATRIX:landmarks_matrix"
            options {
              [mediapipe.LandmarksToMatrixCalculatorOptions.ext] {
@ -162,6 +182,11 @@ TEST_P(LandmarksWorld3dToMatrixCalculatorTest, OutputsCorrectResult) {
  runner.MutableInputs()
      ->Tag(kImageSizeTag)
      .packets.push_back(Adopt(image_size.release()).At(Timestamp(0)));
  auto norm_rect = std::make_unique<NormalizedRect>();
  norm_rect->set_rotation(test_case.rotation);
  runner.MutableInputs()
      ->Tag(kNormRectTag)
      .packets.push_back(Adopt(norm_rect.release()).At(Timestamp(0)));
  MP_ASSERT_OK(runner.Run()) << "Calculator execution failed.";
@ -180,17 +205,26 @@ INSTANTIATE_TEST_CASE_P(
          .base_offset = 0,
          .object_normalization_origin_offset = 0,
          .expected_cell_0_2 = 0.1f,
-          .expected_cell_1_5 = 0.25},
+          .expected_cell_1_5 = 0.25,
          .rotation = 0},
         {.test_name = "TestWithOffset21",
          .base_offset = 21,
          .object_normalization_origin_offset = 0,
          .expected_cell_0_2 = 0.1f,
-          .expected_cell_1_5 = 0.25},
+          .expected_cell_1_5 = 0.25,
          .rotation = 0},
         {.test_name = "NoObjectNormalization",
          .base_offset = 0,
          .object_normalization_origin_offset = -1,
          .expected_cell_0_2 = 0.021f,
-          .expected_cell_1_5 = 0.052f}}),
+          .expected_cell_1_5 = 0.052f,
          .rotation = 0},
         {.test_name = "TestWithRotation",
          .base_offset = 0,
          .object_normalization_origin_offset = 0,
          .expected_cell_0_2 = 0.1f,
          .expected_cell_1_5 = -0.25f,
          .rotation = M_PI / 2.0}}),
    [](const testing::TestParamInfo<
        LandmarksWorld3dToMatrixCalculatorTest::ParamType>& info) {
      return info.param.test_name;
--- a/mediapipe/tasks/cc/vision/gesture_recognizer/gesture_recognizer.cc
+++ b/mediapipe/tasks/cc/vision/gesture_recognizer/gesture_recognizer.cc
@ -17,6 +17,7 @@ limitations under the License.
 #include <memory>
 #include <type_traits>
 #include <utility>
 #include <vector>
 #include "absl/memory/memory.h"
@ -27,6 +28,7 @@ limitations under the License.
 #include "mediapipe/framework/formats/classification.pb.h"
 #include "mediapipe/framework/formats/image.h"
 #include "mediapipe/framework/formats/landmark.pb.h"
 #include "mediapipe/framework/formats/rect.pb.h"
 #include "mediapipe/framework/packet.h"
 #include "mediapipe/tasks/cc/common.h"
 #include "mediapipe/tasks/cc/components/image_preprocessing.h"
@ -62,6 +64,8 @@ constexpr char kHandGestureSubgraphTypeName[] =
 constexpr char kImageTag[] = "IMAGE";
 constexpr char kImageInStreamName[] = "image_in";
 constexpr char kImageOutStreamName[] = "image_out";
 constexpr char kNormRectTag[] = "NORM_RECT";
 constexpr char kNormRectStreamName[] = "norm_rect_in";
 constexpr char kHandGesturesTag[] = "HAND_GESTURES";
 constexpr char kHandGesturesStreamName[] = "hand_gestures";
 constexpr char kHandednessTag[] = "HANDEDNESS";
@ -72,6 +76,31 @@ constexpr char kHandWorldLandmarksTag[] = "WORLD_LANDMARKS";
 constexpr char kHandWorldLandmarksStreamName[] = "world_landmarks";
 constexpr int kMicroSecondsPerMilliSecond = 1000;
 // Returns a NormalizedRect filling the whole image. If input is present, its
 // rotation is set in the returned NormalizedRect and a check is performed to
 // make sure no region-of-interest was provided. Otherwise, rotation is set to
 // 0.
 absl::StatusOr<NormalizedRect> FillNormalizedRect(
    std::optional<NormalizedRect> normalized_rect) {
  NormalizedRect result;
  if (normalized_rect.has_value()) {
    result = *normalized_rect;
  }
  bool has_coordinates = result.has_x_center() || result.has_y_center() ||
                         result.has_width() || result.has_height();
  if (has_coordinates) {
    return CreateStatusWithPayload(
        absl::StatusCode::kInvalidArgument,
        "GestureRecognizer does not support region-of-interest.",
        MediaPipeTasksStatus::kInvalidArgumentError);
  }
  result.set_x_center(0.5);
  result.set_y_center(0.5);
  result.set_width(1);
  result.set_height(1);
  return result;
 }
 // Creates a MediaPipe graph config that contains a subgraph node of
 // "mediapipe.tasks.vision.GestureRecognizerGraph". If the task is running
 // in the live stream mode, a "FlowLimiterCalculator" will be added to limit the
@ -83,6 +112,7 @@ CalculatorGraphConfig CreateGraphConfig(
  auto& subgraph = graph.AddNode(kHandGestureSubgraphTypeName);
  subgraph.GetOptions<GestureRecognizerGraphOptionsProto>().Swap(options.get());
  graph.In(kImageTag).SetName(kImageInStreamName);
  graph.In(kNormRectTag).SetName(kNormRectStreamName);
  subgraph.Out(kHandGesturesTag).SetName(kHandGesturesStreamName) >>
      graph.Out(kHandGesturesTag);
  subgraph.Out(kHandednessTag).SetName(kHandednessStreamName) >>
@ -93,10 +123,11 @@ CalculatorGraphConfig CreateGraphConfig(
      graph.Out(kHandWorldLandmarksTag);
  subgraph.Out(kImageTag).SetName(kImageOutStreamName) >> graph.Out(kImageTag);
  if (enable_flow_limiting) {
-    return tasks::core::AddFlowLimiterCalculator(graph, subgraph, {kImageTag},
+    return tasks::core::AddFlowLimiterCalculator(
-                                                 kHandGesturesTag);
+        graph, subgraph, {kImageTag, kNormRectTag}, kHandGesturesTag);
  }
  graph.In(kImageTag) >> subgraph.In(kImageTag);
  graph.In(kNormRectTag) >> subgraph.In(kNormRectTag);
  return graph.GetConfig();
 }
@ -216,16 +247,22 @@ absl::StatusOr<std::unique_ptr<GestureRecognizer>> GestureRecognizer::Create(
 }
 absl::StatusOr<GestureRecognitionResult> GestureRecognizer::Recognize(
-    mediapipe::Image image) {
+    mediapipe::Image image,
    std::optional<mediapipe::NormalizedRect> image_processing_options) {
  if (image.UsesGpu()) {
    return CreateStatusWithPayload(
        absl::StatusCode::kInvalidArgument,
        "GPU input images are currently not supported.",
        MediaPipeTasksStatus::kRunnerUnexpectedInputError);
  }
-  ASSIGN_OR_RETURN(auto output_packets,
+  ASSIGN_OR_RETURN(NormalizedRect norm_rect,
-                   ProcessImageData({{kImageInStreamName,
+                   FillNormalizedRect(image_processing_options));
-                                      MakePacket<Image>(std::move(image))}}));
+  ASSIGN_OR_RETURN(
      auto output_packets,
      ProcessImageData(
          {{kImageInStreamName, MakePacket<Image>(std::move(image))},
           {kNormRectStreamName,
            MakePacket<NormalizedRect>(std::move(norm_rect))}}));
  if (output_packets[kHandGesturesStreamName].IsEmpty()) {
    return {{{}, {}, {}, {}}};
  }
@ -245,18 +282,24 @@ absl::StatusOr<GestureRecognitionResult> GestureRecognizer::Recognize(
 }
 absl::StatusOr<GestureRecognitionResult> GestureRecognizer::RecognizeForVideo(
-    mediapipe::Image image, int64 timestamp_ms) {
+    mediapipe::Image image, int64 timestamp_ms,
    std::optional<mediapipe::NormalizedRect> image_processing_options) {
  if (image.UsesGpu()) {
    return CreateStatusWithPayload(
        absl::StatusCode::kInvalidArgument,
        absl::StrCat("GPU input images are currently not supported."),
        MediaPipeTasksStatus::kRunnerUnexpectedInputError);
  }
  ASSIGN_OR_RETURN(NormalizedRect norm_rect,
                   FillNormalizedRect(image_processing_options));
  ASSIGN_OR_RETURN(
      auto output_packets,
      ProcessVideoData(
          {{kImageInStreamName,
            MakePacket<Image>(std::move(image))
                .At(Timestamp(timestamp_ms * kMicroSecondsPerMilliSecond))},
           {kNormRectStreamName,
            MakePacket<NormalizedRect>(std::move(norm_rect))
                .At(Timestamp(timestamp_ms * kMicroSecondsPerMilliSecond))}}));
  if (output_packets[kHandGesturesStreamName].IsEmpty()) {
    return {{{}, {}, {}, {}}};
@ -276,17 +319,23 @@ absl::StatusOr<GestureRecognitionResult> GestureRecognizer::RecognizeForVideo(
  };
 }
-absl::Status GestureRecognizer::RecognizeAsync(mediapipe::Image image,
+absl::Status GestureRecognizer::RecognizeAsync(
-                                               int64 timestamp_ms) {
+    mediapipe::Image image, int64 timestamp_ms,
    std::optional<mediapipe::NormalizedRect> image_processing_options) {
  if (image.UsesGpu()) {
    return CreateStatusWithPayload(
        absl::StatusCode::kInvalidArgument,
        absl::StrCat("GPU input images are currently not supported."),
        MediaPipeTasksStatus::kRunnerUnexpectedInputError);
  }
  ASSIGN_OR_RETURN(NormalizedRect norm_rect,
                   FillNormalizedRect(image_processing_options));
  return SendLiveStreamData(
      {{kImageInStreamName,
        MakePacket<Image>(std::move(image))
            .At(Timestamp(timestamp_ms * kMicroSecondsPerMilliSecond))},
       {kNormRectStreamName,
        MakePacket<NormalizedRect>(std::move(norm_rect))
            .At(Timestamp(timestamp_ms * kMicroSecondsPerMilliSecond))}});
 }
--- a/mediapipe/tasks/cc/vision/gesture_recognizer/gesture_recognizer.h
+++ b/mediapipe/tasks/cc/vision/gesture_recognizer/gesture_recognizer.h
@ -17,11 +17,13 @@ limitations under the License.
 #define MEDIAPIPE_TASKS_CC_VISION_GESTURE_RECOGNIZRER_GESTURE_RECOGNIZER_H_
 #include <memory>
 #include <optional>
 #include "absl/status/statusor.h"
 #include "mediapipe/framework/formats/classification.pb.h"
 #include "mediapipe/framework/formats/image.h"
 #include "mediapipe/framework/formats/landmark.pb.h"
 #include "mediapipe/framework/formats/rect.pb.h"
 #include "mediapipe/tasks/cc/components/containers/gesture_recognition_result.h"
 #include "mediapipe/tasks/cc/core/base_options.h"
 #include "mediapipe/tasks/cc/vision/core/base_vision_task_api.h"
@ -93,6 +95,13 @@ struct GestureRecognizerOptions {
 // Inputs:
 //   Image
 //     - The image that gesture recognition runs on.
 //   std::optional<NormalizedRect>
 //     - If provided, can be used to specify the rotation to apply to the image
 //       before performing gesture recognition, by setting its 'rotation' field
 //       in radians (e.g. 'M_PI / 2' for a 90° anti-clockwise rotation). Note
 //       that specifying a region-of-interest using the 'x_center', 'y_center',
 //       'width' and 'height' fields is NOT supported and will result in an
 //       invalid argument error being returned.
 // Outputs:
 //   GestureRecognitionResult
 //     - The hand gesture recognition results.
@ -122,12 +131,23 @@ class GestureRecognizer : tasks::vision::core::BaseVisionTaskApi {
  //
  // image - mediapipe::Image
  //   Image to perform hand gesture recognition on.
  // imageProcessingOptions - std::optional<NormalizedRect>
  //   If provided, can be used to specify the rotation to apply to the image
  //   before performing classification, by setting its 'rotation' field in
  //   radians (e.g. 'M_PI / 2' for a 90° anti-clockwise rotation). Note that
  //   specifying a region-of-interest using the 'x_center', 'y_center', 'width'
  //   and 'height' fields is NOT supported and will result in an invalid
  //   argument error being returned.
  //
  // The image can be of any size with format RGB or RGBA.
  // TODO: Describes how the input image will be preprocessed
  // after the yuv support is implemented.
  // TODO: use an ImageProcessingOptions struct instead of
  // NormalizedRect.
  absl::StatusOr<components::containers::GestureRecognitionResult> Recognize(
-      Image image);
+      Image image,
      std::optional<mediapipe::NormalizedRect> image_processing_options =
          std::nullopt);
  // Performs gesture recognition on the provided video frame.
  // Only use this method when the GestureRecognizer is created with the video
@ -137,7 +157,9 @@ class GestureRecognizer : tasks::vision::core::BaseVisionTaskApi {
  // provide the video frame's timestamp (in milliseconds). The input timestamps
  // must be monotonically increasing.
  absl::StatusOr<components::containers::GestureRecognitionResult>
-  RecognizeForVideo(Image image, int64 timestamp_ms);
+  RecognizeForVideo(Image image, int64 timestamp_ms,
                    std::optional<mediapipe::NormalizedRect>
                        image_processing_options = std::nullopt);
  // Sends live image data to perform gesture recognition, and the results will
  // be available via the "result_callback" provided in the
@ -157,7 +179,9 @@ class GestureRecognizer : tasks::vision::core::BaseVisionTaskApi {
  //     longer be valid when the callback returns. To access the image data
  //     outside of the callback, callers need to make a copy of the image.
  //   - The input timestamp in milliseconds.
-  absl::Status RecognizeAsync(Image image, int64 timestamp_ms);
+  absl::Status RecognizeAsync(Image image, int64 timestamp_ms,
                              std::optional<mediapipe::NormalizedRect>
                                  image_processing_options = std::nullopt);
  // Shuts down the GestureRecognizer when all works are done.
  absl::Status Close() { return runner_->Close(); }
--- a/mediapipe/tasks/cc/vision/gesture_recognizer/gesture_recognizer_graph.cc
+++ b/mediapipe/tasks/cc/vision/gesture_recognizer/gesture_recognizer_graph.cc
@ -24,6 +24,7 @@ limitations under the License.
 #include "mediapipe/framework/formats/classification.pb.h"
 #include "mediapipe/framework/formats/image.h"
 #include "mediapipe/framework/formats/landmark.pb.h"
 #include "mediapipe/framework/formats/rect.pb.h"
 #include "mediapipe/tasks/cc/common.h"
 #include "mediapipe/tasks/cc/core/model_task_graph.h"
 #include "mediapipe/tasks/cc/core/utils.h"
@ -53,6 +54,7 @@ using ::mediapipe::tasks::vision::hand_landmarker::proto::
    HandLandmarkerGraphOptions;
 constexpr char kImageTag[] = "IMAGE";
 constexpr char kNormRectTag[] = "NORM_RECT";
 constexpr char kLandmarksTag[] = "LANDMARKS";
 constexpr char kWorldLandmarksTag[] = "WORLD_LANDMARKS";
 constexpr char kHandednessTag[] = "HANDEDNESS";
@ -76,6 +78,9 @@ struct GestureRecognizerOutputs {
 // Inputs:
 //   IMAGE - Image
 //     Image to perform hand gesture recognition on.
 //   NORM_RECT - NormalizedRect
 //     Describes image rotation and region of image to perform landmarks
 //     detection on.
 //
 // Outputs:
 //   HAND_GESTURES - std::vector<ClassificationList>
@ -93,13 +98,15 @@ struct GestureRecognizerOutputs {
 //   IMAGE - mediapipe::Image
 //     The image that gesture recognizer runs on and has the pixel data stored
 //     on the target storage (CPU vs GPU).
-//
+// All returned coordinates are in the unrotated and uncropped input image
 // coordinates system.
 //
 // Example:
 // node {
 //   calculator:
 //   "mediapipe.tasks.vision.gesture_recognizer.GestureRecognizerGraph"
 //   input_stream: "IMAGE:image_in"
 //   input_stream: "NORM_RECT:norm_rect"
 //   output_stream: "HAND_GESTURES:hand_gestures"
 //   output_stream: "LANDMARKS:hand_landmarks"
 //   output_stream: "WORLD_LANDMARKS:world_hand_landmarks"
@ -132,7 +139,8 @@ class GestureRecognizerGraph : public core::ModelTaskGraph {
    ASSIGN_OR_RETURN(auto hand_gesture_recognition_output,
                     BuildGestureRecognizerGraph(
                         *sc->MutableOptions<GestureRecognizerGraphOptions>(),
-                         graph[Input<Image>(kImageTag)], graph));
+                         graph[Input<Image>(kImageTag)],
                         graph[Input<NormalizedRect>(kNormRectTag)], graph));
    hand_gesture_recognition_output.gesture >>
        graph[Output<std::vector<ClassificationList>>(kHandGesturesTag)];
    hand_gesture_recognition_output.handedness >>
@ -148,7 +156,7 @@ class GestureRecognizerGraph : public core::ModelTaskGraph {
 private:
  absl::StatusOr<GestureRecognizerOutputs> BuildGestureRecognizerGraph(
      GestureRecognizerGraphOptions& graph_options, Source<Image> image_in,
-      Graph& graph) {
+      Source<NormalizedRect> norm_rect_in, Graph& graph) {
    auto& image_property = graph.AddNode("ImagePropertiesCalculator");
    image_in >> image_property.In("IMAGE");
    auto image_size = image_property.Out("SIZE");
@ -162,6 +170,7 @@ class GestureRecognizerGraph : public core::ModelTaskGraph {
        graph_options.mutable_hand_landmarker_graph_options());
    image_in >> hand_landmarker_graph.In(kImageTag);
    norm_rect_in >> hand_landmarker_graph.In(kNormRectTag);
    auto hand_landmarks =
        hand_landmarker_graph[Output<std::vector<NormalizedLandmarkList>>(
            kLandmarksTag)];
@ -187,6 +196,7 @@ class GestureRecognizerGraph : public core::ModelTaskGraph {
    hand_world_landmarks >> hand_gesture_subgraph.In(kWorldLandmarksTag);
    handedness >> hand_gesture_subgraph.In(kHandednessTag);
    image_size >> hand_gesture_subgraph.In(kImageSizeTag);
    norm_rect_in >> hand_gesture_subgraph.In(kNormRectTag);
    hand_landmarks_id >> hand_gesture_subgraph.In(kHandTrackingIdsTag);
    auto hand_gestures =
        hand_gesture_subgraph[Output<std::vector<ClassificationList>>(
--- a/mediapipe/tasks/cc/vision/gesture_recognizer/hand_gesture_recognizer_graph.cc
+++ b/mediapipe/tasks/cc/vision/gesture_recognizer/hand_gesture_recognizer_graph.cc
@ -25,6 +25,7 @@ limitations under the License.
 #include "mediapipe/framework/formats/classification.pb.h"
 #include "mediapipe/framework/formats/landmark.pb.h"
 #include "mediapipe/framework/formats/matrix.h"
 #include "mediapipe/framework/formats/rect.pb.h"
 #include "mediapipe/framework/formats/tensor.h"
 #include "mediapipe/tasks/cc/common.h"
 #include "mediapipe/tasks/cc/components/processors/classification_postprocessing_graph.h"
@ -57,6 +58,7 @@ constexpr char kHandednessTag[] = "HANDEDNESS";
 constexpr char kLandmarksTag[] = "LANDMARKS";
 constexpr char kWorldLandmarksTag[] = "WORLD_LANDMARKS";
 constexpr char kImageSizeTag[] = "IMAGE_SIZE";
 constexpr char kNormRectTag[] = "NORM_RECT";
 constexpr char kHandTrackingIdsTag[] = "HAND_TRACKING_IDS";
 constexpr char kHandGesturesTag[] = "HAND_GESTURES";
 constexpr char kLandmarksMatrixTag[] = "LANDMARKS_MATRIX";
@ -92,6 +94,9 @@ Source<std::vector<Tensor>> ConvertMatrixToTensor(Source<Matrix> matrix,
 //     Detected hand landmarks in world coordinates.
 //   IMAGE_SIZE - std::pair<int, int>
 //     The size of image from which the landmarks detected from.
 //   NORM_RECT - NormalizedRect
 //     NormalizedRect whose 'rotation' field is used to rotate the
 //     landmarks before processing them.
 //
 // Outputs:
 //   HAND_GESTURES - ClassificationList
@ -106,6 +111,7 @@ Source<std::vector<Tensor>> ConvertMatrixToTensor(Source<Matrix> matrix,
 //   input_stream: "LANDMARKS:landmarks"
 //   input_stream: "WORLD_LANDMARKS:world_landmarks"
 //   input_stream: "IMAGE_SIZE:image_size"
 //   input_stream: "NORM_RECT:norm_rect"
 //   output_stream: "HAND_GESTURES:hand_gestures"
 //   options {
 //     [mediapipe.tasks.vision.gesture_recognizer.proto.HandGestureRecognizerGraphOptions.ext]
@ -133,7 +139,8 @@ class SingleHandGestureRecognizerGraph : public core::ModelTaskGraph {
            graph[Input<ClassificationList>(kHandednessTag)],
            graph[Input<NormalizedLandmarkList>(kLandmarksTag)],
            graph[Input<LandmarkList>(kWorldLandmarksTag)],
-            graph[Input<std::pair<int, int>>(kImageSizeTag)], graph));
+            graph[Input<std::pair<int, int>>(kImageSizeTag)],
            graph[Input<NormalizedRect>(kNormRectTag)], graph));
    hand_gestures >> graph[Output<ClassificationList>(kHandGesturesTag)];
    return graph.GetConfig();
  }
@ -145,7 +152,8 @@ class SingleHandGestureRecognizerGraph : public core::ModelTaskGraph {
      Source<ClassificationList> handedness,
      Source<NormalizedLandmarkList> hand_landmarks,
      Source<LandmarkList> hand_world_landmarks,
-      Source<std::pair<int, int>> image_size, Graph& graph) {
+      Source<std::pair<int, int>> image_size, Source<NormalizedRect> norm_rect,
      Graph& graph) {
    // Converts the ClassificationList to a matrix.
    auto& handedness_to_matrix = graph.AddNode("HandednessToMatrixCalculator");
    handedness >> handedness_to_matrix.In(kHandednessTag);
@ -166,6 +174,7 @@ class SingleHandGestureRecognizerGraph : public core::ModelTaskGraph {
        landmarks_options;
    hand_landmarks >> hand_landmarks_to_matrix.In(kLandmarksTag);
    image_size >> hand_landmarks_to_matrix.In(kImageSizeTag);
    norm_rect >> hand_landmarks_to_matrix.In(kNormRectTag);
    auto hand_landmarks_matrix =
        hand_landmarks_to_matrix[Output<Matrix>(kLandmarksMatrixTag)];
@ -181,6 +190,7 @@ class SingleHandGestureRecognizerGraph : public core::ModelTaskGraph {
    hand_world_landmarks >>
        hand_world_landmarks_to_matrix.In(kWorldLandmarksTag);
    image_size >> hand_world_landmarks_to_matrix.In(kImageSizeTag);
    norm_rect >> hand_world_landmarks_to_matrix.In(kNormRectTag);
    auto hand_world_landmarks_matrix =
        hand_world_landmarks_to_matrix[Output<Matrix>(kLandmarksMatrixTag)];
@ -239,6 +249,9 @@ REGISTER_MEDIAPIPE_GRAPH(
 //     A vector hand landmarks in world coordinates.
 //   IMAGE_SIZE - std::pair<int, int>
 //     The size of image from which the landmarks detected from.
 //   NORM_RECT - NormalizedRect
 //     NormalizedRect whose 'rotation' field is used to rotate the
 //     landmarks before processing them.
 //   HAND_TRACKING_IDS - std::vector<int>
 //     A vector of the tracking ids of the hands. The tracking id is the vector
 //     index corresponding to the same hand if the graph runs multiple times.
@ -257,6 +270,7 @@ REGISTER_MEDIAPIPE_GRAPH(
 //   input_stream: "LANDMARKS:landmarks"
 //   input_stream: "WORLD_LANDMARKS:world_landmarks"
 //   input_stream: "IMAGE_SIZE:image_size"
 //   input_stream: "NORM_RECT:norm_rect"
 //   input_stream: "HAND_TRACKING_IDS:hand_tracking_ids"
 //   output_stream: "HAND_GESTURES:hand_gestures"
 //   options {
@ -283,6 +297,7 @@ class MultipleHandGestureRecognizerGraph : public core::ModelTaskGraph {
            graph[Input<std::vector<NormalizedLandmarkList>>(kLandmarksTag)],
            graph[Input<std::vector<LandmarkList>>(kWorldLandmarksTag)],
            graph[Input<std::pair<int, int>>(kImageSizeTag)],
            graph[Input<NormalizedRect>(kNormRectTag)],
            graph[Input<std::vector<int>>(kHandTrackingIdsTag)], graph));
    multi_hand_gestures >>
        graph[Output<std::vector<ClassificationList>>(kHandGesturesTag)];
@ -296,18 +311,20 @@ class MultipleHandGestureRecognizerGraph : public core::ModelTaskGraph {
      Source<std::vector<ClassificationList>> multi_handedness,
      Source<std::vector<NormalizedLandmarkList>> multi_hand_landmarks,
      Source<std::vector<LandmarkList>> multi_hand_world_landmarks,
-      Source<std::pair<int, int>> image_size,
+      Source<std::pair<int, int>> image_size, Source<NormalizedRect> norm_rect,
      Source<std::vector<int>> multi_hand_tracking_ids, Graph& graph) {
    auto& begin_loop_int = graph.AddNode("BeginLoopIntCalculator");
    image_size >> begin_loop_int.In(kCloneTag)[0];
-    multi_handedness >> begin_loop_int.In(kCloneTag)[1];
+    norm_rect >> begin_loop_int.In(kCloneTag)[1];
-    multi_hand_landmarks >> begin_loop_int.In(kCloneTag)[2];
+    multi_handedness >> begin_loop_int.In(kCloneTag)[2];
-    multi_hand_world_landmarks >> begin_loop_int.In(kCloneTag)[3];
+    multi_hand_landmarks >> begin_loop_int.In(kCloneTag)[3];
    multi_hand_world_landmarks >> begin_loop_int.In(kCloneTag)[4];
    multi_hand_tracking_ids >> begin_loop_int.In(kIterableTag);
    auto image_size_clone = begin_loop_int.Out(kCloneTag)[0];
-    auto multi_handedness_clone = begin_loop_int.Out(kCloneTag)[1];
+    auto norm_rect_clone = begin_loop_int.Out(kCloneTag)[1];
-    auto multi_hand_landmarks_clone = begin_loop_int.Out(kCloneTag)[2];
+    auto multi_handedness_clone = begin_loop_int.Out(kCloneTag)[2];
-    auto multi_hand_world_landmarks_clone = begin_loop_int.Out(kCloneTag)[3];
+    auto multi_hand_landmarks_clone = begin_loop_int.Out(kCloneTag)[3];
    auto multi_hand_world_landmarks_clone = begin_loop_int.Out(kCloneTag)[4];
    auto hand_tracking_id = begin_loop_int.Out(kItemTag);
    auto batch_end = begin_loop_int.Out(kBatchEndTag);
@ -341,6 +358,7 @@ class MultipleHandGestureRecognizerGraph : public core::ModelTaskGraph {
    hand_world_landmarks >>
        hand_gesture_recognizer_graph.In(kWorldLandmarksTag);
    image_size_clone >> hand_gesture_recognizer_graph.In(kImageSizeTag);
    norm_rect_clone >> hand_gesture_recognizer_graph.In(kNormRectTag);
    auto hand_gestures = hand_gesture_recognizer_graph.Out(kHandGesturesTag);
    auto& end_loop_classification_lists =
--- a/mediapipe/tasks/cc/vision/hand_detector/BUILD
+++ b/mediapipe/tasks/cc/vision/hand_detector/BUILD
@ -32,7 +32,7 @@ cc_library(
        "//mediapipe/calculators/tflite:ssd_anchors_calculator_cc_proto",
        "//mediapipe/calculators/util:detection_label_id_to_text_calculator",
        "//mediapipe/calculators/util:detection_label_id_to_text_calculator_cc_proto",
-        "//mediapipe/calculators/util:detection_letterbox_removal_calculator",
+        "//mediapipe/calculators/util:detection_projection_calculator",
        "//mediapipe/calculators/util:detections_to_rects_calculator",
        "//mediapipe/calculators/util:detections_to_rects_calculator_cc_proto",
        "//mediapipe/calculators/util:non_max_suppression_calculator",
--- a/mediapipe/tasks/cc/vision/hand_detector/hand_detector_graph.cc
+++ b/mediapipe/tasks/cc/vision/hand_detector/hand_detector_graph.cc
@ -58,6 +58,7 @@ using ::mediapipe::tasks::vision::hand_detector::proto::
    HandDetectorGraphOptions;
 constexpr char kImageTag[] = "IMAGE";
 constexpr char kNormRectTag[] = "NORM_RECT";
 constexpr char kPalmDetectionsTag[] = "PALM_DETECTIONS";
 constexpr char kHandRectsTag[] = "HAND_RECTS";
 constexpr char kPalmRectsTag[] = "PALM_RECTS";
@ -148,6 +149,9 @@ void ConfigureRectTransformationCalculator(
 // Inputs:
 //   IMAGE - Image
 //     Image to perform detection on.
 //   NORM_RECT - NormalizedRect
 //     Describes image rotation and region of image to perform detection
 //     on.
 //
 // Outputs:
 //   PALM_DETECTIONS - std::vector<Detection>
@ -159,11 +163,14 @@ void ConfigureRectTransformationCalculator(
 //   IMAGE - Image
 //     The input image that the hand detector runs on and has the pixel data
 //     stored on the target storage (CPU vs GPU).
 // All returned coordinates are in the unrotated and uncropped input image
 // coordinates system.
 //
 // Example:
 // node {
 //   calculator: "mediapipe.tasks.vision.hand_detector.HandDetectorGraph"
 //   input_stream: "IMAGE:image"
 //   input_stream: "NORM_RECT:norm_rect"
 //   output_stream: "PALM_DETECTIONS:palm_detections"
 //   output_stream: "HAND_RECTS:hand_rects_from_palm_detections"
 //   output_stream: "PALM_RECTS:palm_rects"
@ -189,11 +196,11 @@ class HandDetectorGraph : public core::ModelTaskGraph {
    ASSIGN_OR_RETURN(const auto* model_resources,
                     CreateModelResources<HandDetectorGraphOptions>(sc));
    Graph graph;
-    ASSIGN_OR_RETURN(
+    ASSIGN_OR_RETURN(auto hand_detection_outs,
-        auto hand_detection_outs,
+                     BuildHandDetectionSubgraph(
-        BuildHandDetectionSubgraph(sc->Options<HandDetectorGraphOptions>(),
+                         sc->Options<HandDetectorGraphOptions>(),
-                                   *model_resources,
+                         *model_resources, graph[Input<Image>(kImageTag)],
-                                   graph[Input<Image>(kImageTag)], graph));
+                         graph[Input<NormalizedRect>(kNormRectTag)], graph));
    hand_detection_outs.palm_detections >>
        graph[Output<std::vector<Detection>>(kPalmDetectionsTag)];
    hand_detection_outs.hand_rects >>
@ -216,7 +223,7 @@ class HandDetectorGraph : public core::ModelTaskGraph {
  absl::StatusOr<HandDetectionOuts> BuildHandDetectionSubgraph(
      const HandDetectorGraphOptions& subgraph_options,
      const core::ModelResources& model_resources, Source<Image> image_in,
-      Graph& graph) {
+      Source<NormalizedRect> norm_rect_in, Graph& graph) {
    // Add image preprocessing subgraph. The model expects aspect ratio
    // unchanged.
    auto& preprocessing =
@ -233,8 +240,9 @@ class HandDetectorGraph : public core::ModelTaskGraph {
        &preprocessing
             .GetOptions<tasks::components::ImagePreprocessingOptions>()));
    image_in >> preprocessing.In("IMAGE");
    norm_rect_in >> preprocessing.In("NORM_RECT");
    auto preprocessed_tensors = preprocessing.Out("TENSORS");
-    auto letterbox_padding = preprocessing.Out("LETTERBOX_PADDING");
+    auto matrix = preprocessing.Out("MATRIX");
    auto image_size = preprocessing.Out("IMAGE_SIZE");
    // Adds SSD palm detection model.
@ -278,17 +286,12 @@ class HandDetectorGraph : public core::ModelTaskGraph {
    nms_detections >> detection_label_id_to_text.In("");
    auto detections_with_text = detection_label_id_to_text.Out("");
-    // Adjusts detection locations (already normalized to [0.f, 1.f]) on the
+    // Projects detections back into the input image coordinates system.
-    // letterboxed image (after image transformation with the FIT scale mode) to
+    auto& detection_projection = graph.AddNode("DetectionProjectionCalculator");
-    // the corresponding locations on the same image with the letterbox removed
+    detections_with_text >> detection_projection.In("DETECTIONS");
-    // (the input image to the graph before image transformation).
+    matrix >> detection_projection.In("PROJECTION_MATRIX");
    auto& detection_letterbox_removal =
        graph.AddNode("DetectionLetterboxRemovalCalculator");
    detections_with_text >> detection_letterbox_removal.In("DETECTIONS");
    letterbox_padding >> detection_letterbox_removal.In("LETTERBOX_PADDING");
    auto palm_detections =
-        detection_letterbox_removal[Output<std::vector<Detection>>(
+        detection_projection[Output<std::vector<Detection>>("DETECTIONS")];
            "DETECTIONS")];
    // Converts each palm detection into a rectangle (normalized by image size)
    // that encloses the palm and is rotated such that the line connecting
--- a/mediapipe/tasks/cc/vision/hand_detector/hand_detector_graph_test.cc
+++ b/mediapipe/tasks/cc/vision/hand_detector/hand_detector_graph_test.cc
@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 #include <cmath>
 #include <iostream>
 #include <memory>
 #include <string>
@ -75,13 +76,18 @@ using ::testing::proto::Partially;
 constexpr char kTestDataDirectory[] = "/mediapipe/tasks/testdata/vision/";
 constexpr char kPalmDetectionModel[] = "palm_detection_full.tflite";
 constexpr char kTestRightHandsImage[] = "right_hands.jpg";
 constexpr char kTestRightHandsRotatedImage[] = "right_hands_rotated.jpg";
 constexpr char kTestModelResourcesTag[] = "test_model_resources";
 constexpr char kOneHandResultFile[] = "hand_detector_result_one_hand.pbtxt";
 constexpr char kOneHandRotatedResultFile[] =
    "hand_detector_result_one_hand_rotated.pbtxt";
 constexpr char kTwoHandsResultFile[] = "hand_detector_result_two_hands.pbtxt";
 constexpr char kImageTag[] = "IMAGE";
 constexpr char kImageName[] = "image";
 constexpr char kNormRectTag[] = "NORM_RECT";
 constexpr char kNormRectName[] = "norm_rect";
 constexpr char kPalmDetectionsTag[] = "PALM_DETECTIONS";
 constexpr char kPalmDetectionsName[] = "palm_detections";
 constexpr char kHandRectsTag[] = "HAND_RECTS";
@ -117,6 +123,8 @@ absl::StatusOr<std::unique_ptr<TaskRunner>> CreateTaskRunner(
  graph[Input<Image>(kImageTag)].SetName(kImageName) >>
      hand_detection.In(kImageTag);
  graph[Input<NormalizedRect>(kNormRectTag)].SetName(kNormRectName) >>
      hand_detection.In(kNormRectTag);
  hand_detection.Out(kPalmDetectionsTag).SetName(kPalmDetectionsName) >>
      graph[Output<std::vector<Detection>>(kPalmDetectionsTag)];
@ -142,6 +150,9 @@ struct TestParams {
  std::string hand_detection_model_name;
  // The filename of test image.
  std::string test_image_name;
  // The rotation to apply to the test image before processing, in radians
  // counter-clockwise.
  float rotation;
  // The number of maximum detected hands.
  int num_hands;
  // The expected hand detector result.
@ -154,14 +165,22 @@ TEST_P(HandDetectionTest, DetectTwoHands) {
  MP_ASSERT_OK_AND_ASSIGN(
      Image image, DecodeImageFromFile(JoinPath("./", kTestDataDirectory,
                                                GetParam().test_image_name)));
  NormalizedRect input_norm_rect;
  input_norm_rect.set_rotation(GetParam().rotation);
  input_norm_rect.set_x_center(0.5);
  input_norm_rect.set_y_center(0.5);
  input_norm_rect.set_width(1.0);
  input_norm_rect.set_height(1.0);
  MP_ASSERT_OK_AND_ASSIGN(
      auto model_resources,
      CreateModelResourcesForModel(GetParam().hand_detection_model_name));
  MP_ASSERT_OK_AND_ASSIGN(
      auto task_runner, CreateTaskRunner(*model_resources, kPalmDetectionModel,
                                         GetParam().num_hands));
-  auto output_packets =
+  auto output_packets = task_runner->Process(
-      task_runner->Process({{kImageName, MakePacket<Image>(std::move(image))}});
+      {{kImageName, MakePacket<Image>(std::move(image))},
       {kNormRectName,
        MakePacket<NormalizedRect>(std::move(input_norm_rect))}});
  MP_ASSERT_OK(output_packets);
  const std::vector<Detection>& palm_detections =
      (*output_packets)[kPalmDetectionsName].Get<std::vector<Detection>>();
@ -188,15 +207,24 @@ INSTANTIATE_TEST_SUITE_P(
    Values(TestParams{.test_name = "DetectOneHand",
                      .hand_detection_model_name = kPalmDetectionModel,
                      .test_image_name = kTestRightHandsImage,
                      .rotation = 0,
                      .num_hands = 1,
                      .expected_result =
                          GetExpectedHandDetectorResult(kOneHandResultFile)},
           TestParams{.test_name = "DetectTwoHands",
                      .hand_detection_model_name = kPalmDetectionModel,
                      .test_image_name = kTestRightHandsImage,
                      .rotation = 0,
                      .num_hands = 2,
                      .expected_result =
-                          GetExpectedHandDetectorResult(kTwoHandsResultFile)}),
+                          GetExpectedHandDetectorResult(kTwoHandsResultFile)},
           TestParams{.test_name = "DetectOneHandWithRotation",
                      .hand_detection_model_name = kPalmDetectionModel,
                      .test_image_name = kTestRightHandsRotatedImage,
                      .rotation = M_PI / 2.0f,
                      .num_hands = 1,
                      .expected_result = GetExpectedHandDetectorResult(
                          kOneHandRotatedResultFile)}),
    [](const TestParamInfo<HandDetectionTest::ParamType>& info) {
      return info.param.test_name;
    });
--- a/mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarker_graph.cc
+++ b/mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarker_graph.cc
@ -64,6 +64,7 @@ using ::mediapipe::tasks::vision::hand_landmarker::proto::
    HandLandmarksDetectorGraphOptions;
 constexpr char kImageTag[] = "IMAGE";
 constexpr char kNormRectTag[] = "NORM_RECT";
 constexpr char kLandmarksTag[] = "LANDMARKS";
 constexpr char kWorldLandmarksTag[] = "WORLD_LANDMARKS";
 constexpr char kHandRectNextFrameTag[] = "HAND_RECT_NEXT_FRAME";
@ -122,6 +123,9 @@ absl::Status SetSubTaskBaseOptions(const ModelAssetBundleResources& resources,
 // Inputs:
 //   IMAGE - Image
 //     Image to perform hand landmarks detection on.
 //   NORM_RECT - NormalizedRect
 //     Describes image rotation and region of image to perform landmarks
 //     detection on.
 //
 // Outputs:
 //   LANDMARKS: - std::vector<NormalizedLandmarkList>
@ -140,11 +144,14 @@ absl::Status SetSubTaskBaseOptions(const ModelAssetBundleResources& resources,
 //   IMAGE - Image
 //     The input image that the hand landmarker runs on and has the pixel data
 //     stored on the target storage (CPU vs GPU).
 // All returned coordinates are in the unrotated and uncropped input image
 // coordinates system.
 //
 // Example:
 // node {
 //   calculator: "mediapipe.tasks.vision.hand_landmarker.HandLandmarkerGraph"
 //   input_stream: "IMAGE:image_in"
 //   input_stream: "NORM_RECT:norm_rect"
 //   output_stream: "LANDMARKS:hand_landmarks"
 //   output_stream: "WORLD_LANDMARKS:world_hand_landmarks"
 //   output_stream: "HAND_RECT_NEXT_FRAME:hand_rect_next_frame"
@ -198,10 +205,11 @@ class HandLandmarkerGraph : public core::ModelTaskGraph {
          !sc->Service(::mediapipe::tasks::core::kModelResourcesCacheService)
               .IsAvailable()));
    }
-    ASSIGN_OR_RETURN(
+    ASSIGN_OR_RETURN(auto hand_landmarker_outputs,
-        auto hand_landmarker_outputs,
+                     BuildHandLandmarkerGraph(
-        BuildHandLandmarkerGraph(sc->Options<HandLandmarkerGraphOptions>(),
+                         sc->Options<HandLandmarkerGraphOptions>(),
-                                 graph[Input<Image>(kImageTag)], graph));
+                         graph[Input<Image>(kImageTag)],
                         graph[Input<NormalizedRect>(kNormRectTag)], graph));
    hand_landmarker_outputs.landmark_lists >>
        graph[Output<std::vector<NormalizedLandmarkList>>(kLandmarksTag)];
    hand_landmarker_outputs.world_landmark_lists >>
@ -240,7 +248,7 @@ class HandLandmarkerGraph : public core::ModelTaskGraph {
  // graph: the mediapipe graph instance to be updated.
  absl::StatusOr<HandLandmarkerOutputs> BuildHandLandmarkerGraph(
      const HandLandmarkerGraphOptions& tasks_options, Source<Image> image_in,
-      Graph& graph) {
+      Source<NormalizedRect> norm_rect_in, Graph& graph) {
    const int max_num_hands =
        tasks_options.hand_detector_graph_options().num_hands();
@ -258,12 +266,15 @@ class HandLandmarkerGraph : public core::ModelTaskGraph {
    auto image_for_hand_detector =
        DisallowIf(image_in, has_enough_hands, graph);
    auto norm_rect_in_for_hand_detector =
        DisallowIf(norm_rect_in, has_enough_hands, graph);
    auto& hand_detector =
        graph.AddNode("mediapipe.tasks.vision.hand_detector.HandDetectorGraph");
    hand_detector.GetOptions<HandDetectorGraphOptions>().CopyFrom(
        tasks_options.hand_detector_graph_options());
    image_for_hand_detector >> hand_detector.In("IMAGE");
    norm_rect_in_for_hand_detector >> hand_detector.In("NORM_RECT");
    auto hand_rects_from_hand_detector = hand_detector.Out("HAND_RECTS");
    auto& hand_association = graph.AddNode("HandAssociationCalculator");
--- a/mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarker_graph_test.cc
+++ b/mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarker_graph_test.cc
@ -13,10 +13,12 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 #include <cmath>
 #include <iostream>
 #include <memory>
 #include <string>
 #include <utility>
 #include <vector>
 #include "absl/flags/flag.h"
 #include "absl/status/statusor.h"
@ -67,9 +69,12 @@ using ::testing::proto::Partially;
 constexpr char kTestDataDirectory[] = "/mediapipe/tasks/testdata/vision/";
 constexpr char kHandLandmarkerModelBundle[] = "hand_landmark.task";
 constexpr char kLeftHandsImage[] = "left_hands.jpg";
 constexpr char kLeftHandsRotatedImage[] = "left_hands_rotated.jpg";
 constexpr char kImageTag[] = "IMAGE";
 constexpr char kImageName[] = "image_in";
 constexpr char kNormRectTag[] = "NORM_RECT";
 constexpr char kNormRectName[] = "norm_rect_in";
 constexpr char kLandmarksTag[] = "LANDMARKS";
 constexpr char kLandmarksName[] = "landmarks";
 constexpr char kWorldLandmarksTag[] = "WORLD_LANDMARKS";
@ -84,6 +89,11 @@ constexpr char kExpectedLeftUpHandLandmarksFilename[] =
    "expected_left_up_hand_landmarks.prototxt";
 constexpr char kExpectedLeftDownHandLandmarksFilename[] =
    "expected_left_down_hand_landmarks.prototxt";
 // Same but for the rotated image.
 constexpr char kExpectedLeftUpHandRotatedLandmarksFilename[] =
    "expected_left_up_hand_rotated_landmarks.prototxt";
 constexpr char kExpectedLeftDownHandRotatedLandmarksFilename[] =
    "expected_left_down_hand_rotated_landmarks.prototxt";
 constexpr float kFullModelFractionDiff = 0.03;  // percentage
 constexpr float kAbsMargin = 0.03;
@ -111,6 +121,8 @@ absl::StatusOr<std::unique_ptr<TaskRunner>> CreateTaskRunner() {
  graph[Input<Image>(kImageTag)].SetName(kImageName) >>
      hand_landmarker_graph.In(kImageTag);
  graph[Input<NormalizedRect>(kNormRectTag)].SetName(kNormRectName) >>
      hand_landmarker_graph.In(kNormRectTag);
  hand_landmarker_graph.Out(kLandmarksTag).SetName(kLandmarksName) >>
      graph[Output<std::vector<NormalizedLandmarkList>>(kLandmarksTag)];
  hand_landmarker_graph.Out(kWorldLandmarksTag).SetName(kWorldLandmarksName) >>
@ -130,9 +142,16 @@ TEST_F(HandLandmarkerTest, Succeeds) {
  MP_ASSERT_OK_AND_ASSIGN(
      Image image,
      DecodeImageFromFile(JoinPath("./", kTestDataDirectory, kLeftHandsImage)));
  NormalizedRect input_norm_rect;
  input_norm_rect.set_x_center(0.5);
  input_norm_rect.set_y_center(0.5);
  input_norm_rect.set_width(1.0);
  input_norm_rect.set_height(1.0);
  MP_ASSERT_OK_AND_ASSIGN(auto task_runner, CreateTaskRunner());
-  auto output_packets =
+  auto output_packets = task_runner->Process(
-      task_runner->Process({{kImageName, MakePacket<Image>(std::move(image))}});
+      {{kImageName, MakePacket<Image>(std::move(image))},
       {kNormRectName,
        MakePacket<NormalizedRect>(std::move(input_norm_rect))}});
  const auto& landmarks = (*output_packets)[kLandmarksName]
                              .Get<std::vector<NormalizedLandmarkList>>();
  ASSERT_EQ(landmarks.size(), kMaxNumHands);
@ -150,6 +169,38 @@ TEST_F(HandLandmarkerTest, Succeeds) {
                            /*fraction=*/kFullModelFractionDiff));
 }
 TEST_F(HandLandmarkerTest, SucceedsWithRotation) {
  MP_ASSERT_OK_AND_ASSIGN(
      Image image, DecodeImageFromFile(JoinPath("./", kTestDataDirectory,
                                                kLeftHandsRotatedImage)));
  NormalizedRect input_norm_rect;
  input_norm_rect.set_x_center(0.5);
  input_norm_rect.set_y_center(0.5);
  input_norm_rect.set_width(1.0);
  input_norm_rect.set_height(1.0);
  input_norm_rect.set_rotation(M_PI / 2.0);
  MP_ASSERT_OK_AND_ASSIGN(auto task_runner, CreateTaskRunner());
  auto output_packets = task_runner->Process(
      {{kImageName, MakePacket<Image>(std::move(image))},
       {kNormRectName,
        MakePacket<NormalizedRect>(std::move(input_norm_rect))}});
  const auto& landmarks = (*output_packets)[kLandmarksName]
                              .Get<std::vector<NormalizedLandmarkList>>();
  ASSERT_EQ(landmarks.size(), kMaxNumHands);
  std::vector<NormalizedLandmarkList> expected_landmarks = {
      GetExpectedLandmarkList(kExpectedLeftUpHandRotatedLandmarksFilename),
      GetExpectedLandmarkList(kExpectedLeftDownHandRotatedLandmarksFilename)};
  EXPECT_THAT(landmarks[0],
              Approximately(Partially(EqualsProto(expected_landmarks[0])),
                            /*margin=*/kAbsMargin,
                            /*fraction=*/kFullModelFractionDiff));
  EXPECT_THAT(landmarks[1],
              Approximately(Partially(EqualsProto(expected_landmarks[1])),
                            /*margin=*/kAbsMargin,
                            /*fraction=*/kFullModelFractionDiff));
 }
 }  // namespace
 }  // namespace hand_landmarker
--- a/mediapipe/tasks/java/com/google/mediapipe/tasks/vision/gesturerecognizer/GestureRecognizer.java
+++ b/mediapipe/tasks/java/com/google/mediapipe/tasks/vision/gesturerecognizer/GestureRecognizer.java
@ -15,6 +15,7 @@
 package com.google.mediapipe.tasks.vision.gesturerecognizer;
 import android.content.Context;
 import android.graphics.RectF;
 import android.os.ParcelFileDescriptor;
 import com.google.auto.value.AutoValue;
 import com.google.mediapipe.formats.proto.LandmarkProto.LandmarkList;
@ -71,8 +72,10 @@ import java.util.Optional;
 public final class GestureRecognizer extends BaseVisionTaskApi {
  private static final String TAG = GestureRecognizer.class.getSimpleName();
  private static final String IMAGE_IN_STREAM_NAME = "image_in";
  private static final String NORM_RECT_IN_STREAM_NAME = "norm_rect_in";
  private static final List<String> INPUT_STREAMS =
-      Collections.unmodifiableList(Arrays.asList("IMAGE:" + IMAGE_IN_STREAM_NAME));
+      Collections.unmodifiableList(
          Arrays.asList("IMAGE:" + IMAGE_IN_STREAM_NAME, "NORM_RECT:" + NORM_RECT_IN_STREAM_NAME));
  private static final List<String> OUTPUT_STREAMS =
      Collections.unmodifiableList(
          Arrays.asList(
@ -205,7 +208,7 @@ public final class GestureRecognizer extends BaseVisionTaskApi {
   * @param runningMode a mediapipe vision task {@link RunningMode}.
   */
  private GestureRecognizer(TaskRunner taskRunner, RunningMode runningMode) {
-    super(taskRunner, runningMode, IMAGE_IN_STREAM_NAME);
+    super(taskRunner, runningMode, IMAGE_IN_STREAM_NAME, NORM_RECT_IN_STREAM_NAME);
  }
  /**
@ -223,7 +226,8 @@ public final class GestureRecognizer extends BaseVisionTaskApi {
   * @throws MediaPipeException if there is an internal error.
   */
  public GestureRecognitionResult recognize(Image inputImage) {
-    return (GestureRecognitionResult) processImageData(inputImage);
+    // TODO: add proper support for rotations.
    return (GestureRecognitionResult) processImageData(inputImage, buildFullImageRectF());
  }
  /**
@ -244,7 +248,9 @@ public final class GestureRecognizer extends BaseVisionTaskApi {
   * @throws MediaPipeException if there is an internal error.
   */
  public GestureRecognitionResult recognizeForVideo(Image inputImage, long inputTimestampMs) {
-    return (GestureRecognitionResult) processVideoData(inputImage, inputTimestampMs);
+    // TODO: add proper support for rotations.
    return (GestureRecognitionResult)
        processVideoData(inputImage, buildFullImageRectF(), inputTimestampMs);
  }
  /**
@ -266,7 +272,8 @@ public final class GestureRecognizer extends BaseVisionTaskApi {
   * @throws MediaPipeException if there is an internal error.
   */
  public void recognizeAsync(Image inputImage, long inputTimestampMs) {
-    sendLiveStreamData(inputImage, inputTimestampMs);
+    // TODO: add proper support for rotations.
    sendLiveStreamData(inputImage, buildFullImageRectF(), inputTimestampMs);
  }
  /** Options for setting up an {@link GestureRecognizer}. */
@ -464,4 +471,9 @@ public final class GestureRecognizer extends BaseVisionTaskApi {
          .build();
    }
  }
  /** Creates a RectF covering the full image. */
  private static RectF buildFullImageRectF() {
    return new RectF(0, 0, 1, 1);
  }
 }
--- a/mediapipe/tasks/testdata/vision/BUILD
+++ b/mediapipe/tasks/testdata/vision/BUILD
@ -39,6 +39,7 @@ mediapipe_files(srcs = [
    "hand_landmark_full.tflite",
    "hand_landmark_lite.tflite",
    "left_hands.jpg",
    "left_hands_rotated.jpg",
    "mobilenet_v1_0.25_192_quantized_1_default_1.tflite",
    "mobilenet_v1_0.25_224_1_default_1.tflite",
    "mobilenet_v1_0.25_224_1_metadata_1.tflite",
@ -52,7 +53,9 @@ mediapipe_files(srcs = [
    "multi_objects_rotated.jpg",
    "palm_detection_full.tflite",
    "pointing_up.jpg",
    "pointing_up_rotated.jpg",
    "right_hands.jpg",
    "right_hands_rotated.jpg",
    "segmentation_golden_rotation0.png",
    "segmentation_input_rotation0.jpg",
    "selfie_segm_128_128_3.tflite",
@ -65,7 +68,9 @@ mediapipe_files(srcs = [
 exports_files(
    srcs = [
        "expected_left_down_hand_landmarks.prototxt",
        "expected_left_down_hand_rotated_landmarks.prototxt",
        "expected_left_up_hand_landmarks.prototxt",
        "expected_left_up_hand_rotated_landmarks.prototxt",
        "expected_right_down_hand_landmarks.prototxt",
        "expected_right_up_hand_landmarks.prototxt",
    ],
@ -85,11 +90,14 @@ filegroup(
        "hand_landmark_full.tflite",
        "hand_landmark_lite.tflite",
        "left_hands.jpg",
        "left_hands_rotated.jpg",
        "mozart_square.jpg",
        "multi_objects.jpg",
        "multi_objects_rotated.jpg",
        "pointing_up.jpg",
        "pointing_up_rotated.jpg",
        "right_hands.jpg",
        "right_hands_rotated.jpg",
        "segmentation_golden_rotation0.png",
        "segmentation_input_rotation0.jpg",
        "selfie_segm_128_128_3_expected_mask.jpg",
@ -131,12 +139,17 @@ filegroup(
    name = "test_protos",
    srcs = [
        "expected_left_down_hand_landmarks.prototxt",
        "expected_left_down_hand_rotated_landmarks.prototxt",
        "expected_left_up_hand_landmarks.prototxt",
        "expected_left_up_hand_rotated_landmarks.prototxt",
        "expected_right_down_hand_landmarks.prototxt",
        "expected_right_up_hand_landmarks.prototxt",
        "hand_detector_result_one_hand.pbtxt",
        "hand_detector_result_one_hand_rotated.pbtxt",
        "hand_detector_result_two_hands.pbtxt",
        "pointing_up_landmarks.pbtxt",
        "pointing_up_rotated_landmarks.pbtxt",
        "thumb_up_landmarks.pbtxt",
        "thumb_up_rotated_landmarks.pbtxt",
    ],
 )
--- a/mediapipe/tasks/testdata/vision/expected_left_down_hand_rotated_landmarks.prototxt
+++ b/mediapipe/tasks/testdata/vision/expected_left_down_hand_rotated_landmarks.prototxt
@ -0,0 +1,84 @@
 landmark {
  x: 0.9259716
  y: 0.18969846
 }
 landmark {
  x: 0.88135517
  y: 0.28856543
 }
 landmark {
  x: 0.7600651
  y: 0.3578236
 }
 landmark {
  x: 0.62631166
  y: 0.40490413
 }
 landmark {
  x: 0.5374573
  y: 0.45170194
 }
 landmark {
  x: 0.57372385
  y: 0.29924914
 }
 landmark {
  x: 0.36731184
  y: 0.33081773
 }
 landmark {
  x: 0.24132833
  y: 0.34759054
 }
 landmark {
  x: 0.13690609
  y: 0.35727677
 }
 landmark {
  x: 0.5535803
  y: 0.2398035
 }
 landmark {
  x: 0.31834763
  y: 0.24999242
 }
 landmark {
  x: 0.16748133
  y: 0.25625145
 }
 landmark {
  x: 0.050747424
  y: 0.25991398
 }
 landmark {
  x: 0.56593156
  y: 0.1867483
 }
 landmark {
  x: 0.3543046
  y: 0.17923892
 }
 landmark {
  x: 0.21360746
  y: 0.17454882
 }
 landmark {
  x: 0.11110917
  y: 0.17232567
 }
 landmark {
  x: 0.5948908
  y: 0.14024714
 }
 landmark {
  x: 0.42692152
  y: 0.11949824
 }
 landmark {
  x: 0.32239118
  y: 0.106370345
 }
 landmark {
  x: 0.23672739
  y: 0.09432885
 }
--- a/mediapipe/tasks/testdata/vision/expected_left_up_hand_rotated_landmarks.prototxt
+++ b/mediapipe/tasks/testdata/vision/expected_left_up_hand_rotated_landmarks.prototxt
@ -0,0 +1,84 @@
 landmark {
  x: 0.06676084
  y: 0.8095678
 }
 landmark {
  x: 0.11359626
  y: 0.71148247
 }
 landmark {
  x: 0.23572624
  y: 0.6414506
 }
 landmark {
  x: 0.37323278
  y: 0.5959156
 }
 landmark {
  x: 0.46243322
  y: 0.55125874
 }
 landmark {
  x: 0.4205411
  y: 0.69531494
 }
 landmark {
  x: 0.62798893
  y: 0.66715276
 }
 landmark {
  x: 0.7568023
  y: 0.65208924
 }
 landmark {
  x: 0.86370826
  y: 0.6437276
 }
 landmark {
  x: 0.445136
  y: 0.75394773
 }
 landmark {
  x: 0.6787485
  y: 0.745853
 }
 landmark {
  x: 0.8290694
  y: 0.7412988
 }
 landmark {
  x: 0.94454145
  y: 0.7384017
 }
 landmark {
  x: 0.43516788
  y: 0.8082166
 }
 landmark {
  x: 0.6459554
  y: 0.81768996
 }
 landmark {
  x: 0.7875173
  y: 0.825062
 }
 landmark {
  x: 0.89249825
  y: 0.82850707
 }
 landmark {
  x: 0.40665048
  y: 0.8567925
 }
 landmark {
  x: 0.57228816
  y: 0.8802181
 }
 landmark {
  x: 0.6762071
  y: 0.8941581
 }
 landmark {
  x: 0.76453924
  y: 0.90583205
 }
--- a/mediapipe/tasks/testdata/vision/hand_detector_result_one_hand_rotated.pbtxt
+++ b/mediapipe/tasks/testdata/vision/hand_detector_result_one_hand_rotated.pbtxt
@ -0,0 +1,33 @@
 detections {
  label: "Palm"
  score: 0.97115
  location_data {
    format: RELATIVE_BOUNDING_BOX
    relative_bounding_box {
      xmin: 0.5198178
      ymin: 0.6467485
      width: 0.42467535
      height: 0.22546273
    }
  }
 }
 detections {
  label: "Palm"
  score: 0.96701413
  location_data {
    format: RELATIVE_BOUNDING_BOX
    relative_bounding_box {
      xmin: 0.024490356
      ymin: 0.12620124
      width: 0.43832153
      height: 0.23269764
    }
  }
 }
 hand_rects {
  x_center: 0.5760683
  y_center: 0.6829921
  height: 0.5862031
  width: 1.1048855
  rotation: -0.8250832
 }
--- a/mediapipe/tasks/testdata/vision/pointing_up_rotated_landmarks.pbtxt
+++ b/mediapipe/tasks/testdata/vision/pointing_up_rotated_landmarks.pbtxt
@ -0,0 +1,223 @@
 classifications {
  classification {
    score: 1.0
    label: "Left"
    display_name: "Left"
  }
 }
 landmarks {
  landmark {
    x: 0.25546086
    y: 0.47584262
    z: 1.835341e-07
  }
  landmark {
    x: 0.3363011
    y: 0.54135
    z: -0.041144375
  }
  landmark {
    x: 0.4375146
    y: 0.57881975
    z: -0.06807727
  }
  landmark {
    x: 0.49603376
    y: 0.5263966
    z: -0.09387612
  }
  landmark {
    x: 0.5022822
    y: 0.4413827
    z: -0.1189948
  }
  landmark {
    x: 0.5569452
    y: 0.4724485
    z: -0.05138246
  }
  landmark {
    x: 0.6687125
    y: 0.47918057
    z: -0.09121969
  }
  landmark {
    x: 0.73666537
    y: 0.48318353
    z: -0.11703273
  }
  landmark {
    x: 0.7998315
    y: 0.4741413
    z: -0.1386424
  }
  landmark {
    x: 0.5244063
    y: 0.39292705
    z: -0.061040796
  }
  landmark {
    x: 0.57215345
    y: 0.41514704
    z: -0.11967233
  }
  landmark {
    x: 0.4724468
    y: 0.45553637
    z: -0.13287684
  }
  landmark {
    x: 0.43794966
    y: 0.45210314
    z: -0.13210714
  }
  landmark {
    x: 0.47838163
    y: 0.33329
    z: -0.07421263
  }
  landmark {
    x: 0.51081127
    y: 0.35479474
    z: -0.13596693
  }
  landmark {
    x: 0.42433846
    y: 0.40486792
    z: -0.121291734
  }
  landmark {
    x: 0.40280548
    y: 0.39977497
    z: -0.09928809
  }
  landmark {
    x: 0.42269367
    y: 0.2798249
    z: -0.09064263
  }
  landmark {
    x: 0.45849988
    y: 0.3069861
    z: -0.12894689
  }
  landmark {
    x: 0.40754712
    y: 0.35153976
    z: -0.109160855
  }
  landmark {
    x: 0.38855004
    y: 0.3467068
    z: -0.08820164
  }
 }
 world_landmarks {
  landmark {
    x: -0.08568013
    y: 0.016593203
    z: 0.036527164
  }
  landmark {
    x: -0.0565372
    y: 0.041761592
    z: 0.019493781
  }
  landmark {
    x: -0.031365488
    y: 0.05031186
    z: 0.0025481891
  }
  landmark {
    x: -0.008534161
    y: 0.04286737
    z: -0.024755282
  }
  landmark {
    x: -0.0047254
    y: 0.015748458
    z: -0.035581928
  }
  landmark {
    x: 0.013083893
    y: 0.024668094
    z: 0.0035934823
  }
  landmark {
    x: 0.04149521
    y: 0.024621274
    z: -0.0030611698
  }
  landmark {
    x: 0.06257473
    y: 0.025388625
    z: -0.010340984
  }
  landmark {
    x: 0.08009179
    y: 0.023082614
    z: -0.03162942
  }
  landmark {
    x: 0.006135068
    y: 0.000696786
    z: 0.0048212176
  }
  landmark {
    x: 0.01678449
    y: 0.0067061195
    z: -0.029920919
  }
  landmark {
    x: -0.008948593
    y: 0.016808286
    z: -0.03755109
  }
  landmark {
    x: -0.01789449
    y: 0.0153161455
    z: -0.012059977
  }
  landmark {
    x: -0.0061980113
    y: -0.017872887
    z: -0.002366997
  }
  landmark {
    x: -0.004643807
    y: -0.0108282855
    z: -0.034515083
  }
  landmark {
    x: -0.027603384
    y: 0.003529715
    z: -0.033665676
  }
  landmark {
    x: -0.035679806
    y: 0.0038255951
    z: -0.008094264
  }
  landmark {
    x: -0.02957782
    y: -0.031701155
    z: -0.008180461
  }
  landmark {
    x: -0.020741666
    y: -0.02506058
    z: -0.026839724
  }
  landmark {
    x: -0.0310834
    y: -0.009496164
    z: -0.032422185
  }
  landmark {
    x: -0.037420202
    y: -0.012883307
    z: -0.017971724
  }
 }
--- a/mediapipe/tasks/testdata/vision/thumb_up_rotated_landmarks.pbtxt
+++ b/mediapipe/tasks/testdata/vision/thumb_up_rotated_landmarks.pbtxt
@ -0,0 +1,223 @@
 classifications {
  classification {
    score: 1.0
    label: "Left"
    display_name: "Left"
  }
 }
 landmarks {
  landmark {
    x: 0.3283601
    y: 0.63773525
    z: -3.2280354e-07
  }
  landmark {
    x: 0.46280807
    y: 0.6339767
    z: -0.06408348
  }
  landmark {
    x: 0.5831279
    y: 0.57430106
    z: -0.08583106
  }
  landmark {
    x: 0.6689471
    y: 0.49959752
    z: -0.09886064
  }
  landmark {
    x: 0.74378216
    y: 0.47357544
    z: -0.09680563
  }
  landmark {
    x: 0.5233122
    y: 0.41020474
    z: -0.038088404
  }
  landmark {
    x: 0.5296913
    y: 0.3372598
    z: -0.08874837
  }
  landmark {
    x: 0.49039274
    y: 0.43994758
    z: -0.102315836
  }
  landmark {
    x: 0.4824569
    y: 0.47969607
    z: -0.1030014
  }
  landmark {
    x: 0.4451338
    y: 0.39520803
    z: -0.02177739
  }
  landmark {
    x: 0.4410001
    y: 0.34107083
    z: -0.07294245
  }
  landmark {
    x: 0.4162798
    y: 0.46102384
    z: -0.07746907
  }
  landmark {
    x: 0.43492994
    y: 0.47154287
    z: -0.07404131
  }
  landmark {
    x: 0.37671578
    y: 0.39535576
    z: -0.016277775
  }
  landmark {
    x: 0.36978847
    y: 0.34265152
    z: -0.07346253
  }
  landmark {
    x: 0.3559884
    y: 0.44905427
    z: -0.057693005
  }
  landmark {
    x: 0.37711847
    y: 0.46414754
    z: -0.03662908
  }
  landmark {
    x: 0.3142985
    y: 0.3942253
    z: -0.0152847925
  }
  landmark {
    x: 0.30000874
    y: 0.35543376
    z: -0.046002634
  }
  landmark {
    x: 0.30002704
    y: 0.42357764
    z: -0.032671776
  }
  landmark {
    x: 0.31079838
    y: 0.44218025
    z: -0.016200554
  }
 }
 world_landmarks {
  landmark {
    x: -0.030687196
    y: 0.0678545
    z: 0.051061403
  }
  landmark {
    x: 0.0047719833
    y: 0.06330968
    z: 0.018945374
  }
  landmark {
    x: 0.039799504
    y: 0.054109577
    z: 0.007930638
  }
  landmark {
    x: 0.069374144
    y: 0.035063196
    z: 2.2522348e-05
  }
  landmark {
    x: 0.087818466
    y: 0.018390425
    z: 0.004055788
  }
  landmark {
    x: 0.02810654
    y: 0.0043561812
    z: -0.0038672548
  }
  landmark {
    x: 0.025270049
    y: -0.0039896416
    z: -0.032991238
  }
  landmark {
    x: 0.020414166
    y: 0.006768506
    z: -0.032724563
  }
  landmark {
    x: 0.016415983
    y: 0.024563588
    z: -0.0058115427
  }
  landmark {
    x: 0.0038743173
    y: -0.0044466974
    z: 0.0024876352
  }
  landmark {
    x: 0.0041790796
    y: -0.0115309935
    z: -0.03532454
  }
  landmark {
    x: -0.0016900161
    y: 0.015519895
    z: -0.03596156
  }
  landmark {
    x: 0.004309217
    y: 0.01917039
    z: 0.003907912
  }
  landmark {
    x: -0.016969737
    y: -0.005584497
    z: 0.0034258277
  }
  landmark {
    x: -0.016737012
    y: -0.01159037
    z: -0.02876696
  }
  landmark {
    x: -0.018165365
    y: 0.01376111
    z: -0.026835402
  }
  landmark {
    x: -0.012430167
    y: 0.02064222
    z: -0.00087265146
  }
  landmark {
    x: -0.043247573
    y: 0.0011161827
    z: 0.0056269006
  }
  landmark {
    x: -0.038128495
    y: -0.011477032
    z: -0.016374081
  }
  landmark {
    x: -0.034920715
    y: 0.005510211
    z: -0.029714659
  }
  landmark {
    x: -0.03815982
    y: 0.011989757
    z: -0.014853194
  }
 }
--- a/third_party/external_files.bzl
+++ b/third_party/external_files.bzl
@ -151,7 +151,7 @@ def external_files():
    http_file(
        name = "com_google_mediapipe_dummy_gesture_recognizer_task",
        sha256 = "18e54586bda33300d459ca140cd045f6daf43d897224ba215a16db3423eae18e",
-        urls = ["https://storage.googleapis.com/mediapipe-assets/dummy_gesture_recognizer.task?generation=1665524417056146"],
+        urls = ["https://storage.googleapis.com/mediapipe-assets/dummy_gesture_recognizer.task?generation=1665707319890725"],
    )
    http_file(
@ -166,12 +166,24 @@ def external_files():
        urls = ["https://storage.googleapis.com/mediapipe-assets/expected_left_down_hand_landmarks.prototxt?generation=1661875720230540"],
    )
    http_file(
        name = "com_google_mediapipe_expected_left_down_hand_rotated_landmarks_prototxt",
        sha256 = "a16d6cb8dd07d60f0678ddeb6a7447b73b9b03d4ddde365c8770b472205bb6cf",
        urls = ["https://storage.googleapis.com/mediapipe-assets/expected_left_down_hand_rotated_landmarks.prototxt?generation=1666037061297507"],
    )
    http_file(
        name = "com_google_mediapipe_expected_left_up_hand_landmarks_prototxt",
        sha256 = "1353ba617c4f048083618587cd23a8a22115f634521c153d4e1bd1ebd4f49dd7",
        urls = ["https://storage.googleapis.com/mediapipe-assets/expected_left_up_hand_landmarks.prototxt?generation=1661875726008879"],
    )
    http_file(
        name = "com_google_mediapipe_expected_left_up_hand_rotated_landmarks_prototxt",
        sha256 = "a9b9789c274d48a7cb9cc10af7bc644eb2512bb934529790d0a5404726daa86a",
        urls = ["https://storage.googleapis.com/mediapipe-assets/expected_left_up_hand_rotated_landmarks.prototxt?generation=1666037063443676"],
    )
    http_file(
        name = "com_google_mediapipe_expected_right_down_hand_landmarks_prototxt",
        sha256 = "f281b745175aaa7f458def6cf4c89521fb56302dd61a05642b3b4a4f237ffaa3",
@ -250,6 +262,12 @@ def external_files():
        urls = ["https://storage.googleapis.com/mediapipe-assets/hand_detector_result_one_hand.pbtxt?generation=1662745351291628"],
    )
    http_file(
        name = "com_google_mediapipe_hand_detector_result_one_hand_rotated_pbtxt",
        sha256 = "ff5ca0654028d78a3380df90054273cae79abe1b7369b164063fd1d5758ec370",
        urls = ["https://storage.googleapis.com/mediapipe-assets/hand_detector_result_one_hand_rotated.pbtxt?generation=1666037065601724"],
    )
    http_file(
        name = "com_google_mediapipe_hand_detector_result_two_hands_pbtxt",
        sha256 = "2589cb08b0ee027dc24649fe597adcfa2156a21d12ea2480f83832714ebdf95f",
@ -352,6 +370,12 @@ def external_files():
        urls = ["https://storage.googleapis.com/mediapipe-assets/left_hands.jpg?generation=1661875796949017"],
    )
    http_file(
        name = "com_google_mediapipe_left_hands_rotated_jpg",
        sha256 = "8609c6202bca43a99bbf23fa8e687e49fa525e89481152e4c0987f46d60d7931",
        urls = ["https://storage.googleapis.com/mediapipe-assets/left_hands_rotated.jpg?generation=1666037068103465"],
    )
    http_file(
        name = "com_google_mediapipe_mobilebert_embedding_with_metadata_tflite",
        sha256 = "fa47142dcc6f446168bc672f2df9605b6da5d0c0d6264e9be62870282365b95c",
@ -544,6 +568,18 @@ def external_files():
        urls = ["https://storage.googleapis.com/mediapipe-assets/pointing_up_landmarks.pbtxt?generation=1665174976408451"],
    )
    http_file(
        name = "com_google_mediapipe_pointing_up_rotated_jpg",
        sha256 = "50ff66f50281207072a038e5bb6648c43f4aacbfb8204a4d2591868756aaeff1",
        urls = ["https://storage.googleapis.com/mediapipe-assets/pointing_up_rotated.jpg?generation=1666037072219697"],
    )
    http_file(
        name = "com_google_mediapipe_pointing_up_rotated_landmarks_pbtxt",
        sha256 = "ccf67e5867094ffb6c465a4dfbf2ef1eb3f9db2465803fc25a0b84c958e050de",
        urls = ["https://storage.googleapis.com/mediapipe-assets/pointing_up_rotated_landmarks.pbtxt?generation=1666037074376515"],
    )
    http_file(
        name = "com_google_mediapipe_pose_detection_tflite",
        sha256 = "a63c614bef30d35947f13be361820b1e4e3bec9cfeebf4d11216a18373108e85",
@ -580,6 +616,12 @@ def external_files():
        urls = ["https://storage.googleapis.com/mediapipe-assets/right_hands.jpg?generation=1661875908672404"],
    )
    http_file(
        name = "com_google_mediapipe_right_hands_rotated_jpg",
        sha256 = "b3bdf692f0d54b86c8b67e6d1286dd0078fbe6e9dfcd507b187e3bd8b398c0f9",
        urls = ["https://storage.googleapis.com/mediapipe-assets/right_hands_rotated.jpg?generation=1666037076873345"],
    )
    http_file(
        name = "com_google_mediapipe_score_calibration_file_meta_json",
        sha256 = "6a3c305620371f662419a496f75be5a10caebca7803b1e99d8d5d22ba51cda94",
@ -724,6 +766,12 @@ def external_files():
        urls = ["https://storage.googleapis.com/mediapipe-assets/thumb_up_landmarks.pbtxt?generation=1665174979747784"],
    )
    http_file(
        name = "com_google_mediapipe_thumb_up_rotated_landmarks_pbtxt",
        sha256 = "5d0a465959cacbd201ac8dd8fc8a66c5997a172b71809b12d27296db6a28a102",
        urls = ["https://storage.googleapis.com/mediapipe-assets/thumb_up_rotated_landmarks.pbtxt?generation=1666037079490527"],
    )
    http_file(
        name = "com_google_mediapipe_two_heads_16000_hz_mono_wav",
        sha256 = "a291a9c22c39bba30138a26915e154a96286ba6ca3b413053123c504a58cce3b",