Project import generated by Copybara.

GitOrigin-RevId: d4a11282d20fe4d2e137f9032cf349750030dcb9
2021-11-03 14:21:54 -07:00 · 2021-11-03 14:21:54 -07:00 · d4bb35fe5a
commit d4bb35fe5a
parent 1faeaae7e5
72 changed files with 1089 additions and 336 deletions
--- a/docs/solutions/face_detection.md
+++ b/docs/solutions/face_detection.md
@ -257,8 +257,15 @@ glSurfaceView.setSolutionResultRenderer(new FaceDetectionResultGlRenderer());
 glSurfaceView.setRenderInputImage(true);
 faceDetection.setResultListener(
    faceDetectionResult -> {
+      if (faceDetectionResult.multiFaceDetections().isEmpty()) {
+        return;
+      }
      RelativeKeypoint noseTip =
-          FaceDetection.getFaceKeypoint(result, 0, FaceKeypoint.NOSE_TIP);
+          faceDetectionResult
+              .multiFaceDetections()
+              .get(0)
+              .getLocationData()
+              .getRelativeKeypoints(FaceKeypoint.NOSE_TIP);
      Log.i(
          TAG,
          String.format(
@ -297,10 +304,17 @@ FaceDetection faceDetection = new FaceDetection(this, faceDetectionOptions);
 FaceDetectionResultImageView imageView = new FaceDetectionResultImageView(this);
 faceDetection.setResultListener(
    faceDetectionResult -> {
+      if (faceDetectionResult.multiFaceDetections().isEmpty()) {
+        return;
+      }
      int width = faceDetectionResult.inputBitmap().getWidth();
      int height = faceDetectionResult.inputBitmap().getHeight();
      RelativeKeypoint noseTip =
-          FaceDetection.getFaceKeypoint(result, 0, FaceKeypoint.NOSE_TIP);
+          faceDetectionResult
+              .multiFaceDetections()
+              .get(0)
+              .getLocationData()
+              .getRelativeKeypoints(FaceKeypoint.NOSE_TIP);
      Log.i(
          TAG,
          String.format(
@ -334,9 +348,9 @@ ActivityResultLauncher<Intent> imageGetter =
            }
          }
        });
-Intent gallery = new Intent(
-    Intent.ACTION_PICK, MediaStore.Images.Media.INTERNAL_CONTENT_URI);
-imageGetter.launch(gallery);
+Intent pickImageIntent = new Intent(Intent.ACTION_PICK);
+pickImageIntent.setDataAndType(MediaStore.Images.Media.INTERNAL_CONTENT_URI, "image/*");
+imageGetter.launch(pickImageIntent);
 ```

 #### Video Input
@ -368,8 +382,15 @@ glSurfaceView.setRenderInputImage(true);

 faceDetection.setResultListener(
    faceDetectionResult -> {
+      if (faceDetectionResult.multiFaceDetections().isEmpty()) {
+        return;
+      }
      RelativeKeypoint noseTip =
-        FaceDetection.getFaceKeypoint(result, 0, FaceKeypoint.NOSE_TIP);
+          faceDetectionResult
+              .multiFaceDetections()
+              .get(0)
+              .getLocationData()
+              .getRelativeKeypoints(FaceKeypoint.NOSE_TIP);
      Log.i(
          TAG,
          String.format(
@ -398,9 +419,9 @@ ActivityResultLauncher<Intent> videoGetter =
            }
          }
        });
-Intent gallery =
-    new Intent(Intent.ACTION_PICK, MediaStore.Video.Media.INTERNAL_CONTENT_URI);
-videoGetter.launch(gallery);
+Intent pickVideoIntent = new Intent(Intent.ACTION_PICK);
+pickVideoIntent.setDataAndType(MediaStore.Video.Media.INTERNAL_CONTENT_URI, "video/*");
+videoGetter.launch(pickVideoIntent);
 ```

 ## Example Apps
--- a/docs/solutions/face_mesh.md
+++ b/docs/solutions/face_mesh.md
@ -612,9 +612,9 @@ ActivityResultLauncher<Intent> imageGetter =
            }
          }
        });
-Intent gallery = new Intent(
-    Intent.ACTION_PICK, MediaStore.Images.Media.INTERNAL_CONTENT_URI);
-imageGetter.launch(gallery);
+Intent pickImageIntent = new Intent(Intent.ACTION_PICK);
+pickImageIntent.setDataAndType(MediaStore.Images.Media.INTERNAL_CONTENT_URI, "image/*");
+imageGetter.launch(pickImageIntent);
 ```

 #### Video Input
@ -678,9 +678,9 @@ ActivityResultLauncher<Intent> videoGetter =
            }
          }
        });
-Intent gallery =
-    new Intent(Intent.ACTION_PICK, MediaStore.Video.Media.INTERNAL_CONTENT_URI);
-videoGetter.launch(gallery);
+Intent pickVideoIntent = new Intent(Intent.ACTION_PICK);
+pickVideoIntent.setDataAndType(MediaStore.Video.Media.INTERNAL_CONTENT_URI, "video/*");
+videoGetter.launch(pickVideoIntent);
 ```

 ## Example Apps
--- a/docs/solutions/hands.md
+++ b/docs/solutions/hands.md
@ -91,8 +91,10 @@ To detect initial hand locations, we designed a
 mobile real-time uses in a manner similar to the face detection model in
 [MediaPipe Face Mesh](./face_mesh.md). Detecting hands is a decidedly complex
 task: our
-[model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/palm_detection/palm_detection.tflite)
-has to work across a variety of hand sizes with a large scale span (~20x)
+[lite model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/palm_detection/palm_detection_lite.tflite)
+and
+[full model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/palm_detection/palm_detection_full.tflite)
+have to work across a variety of hand sizes with a large scale span (~20x)
 relative to the image frame and be able to detect occluded and self-occluded
 hands. Whereas faces have high contrast patterns, e.g., in the eye and mouth
 region, the lack of such features in hands makes it comparatively difficult to
@ -195,6 +197,17 @@ of 21 hand landmarks and each landmark is composed of `x`, `y` and `z`. `x` and
 and the smaller the value the closer the landmark is to the camera. The
 magnitude of `z` uses roughly the same scale as `x`.

+#### multi_hand_world_landmarks
+
+Collection of detected/tracked hands, where each hand is represented as a list
+of 21 hand landmarks in world coordinates. Each landmark consists of the
+following:
+
+*   `x`, `y` and `z`: Real-world 3D coordinates in meters with the origin at the
+    hand's approximate geometric center.
+*   `visibility`: Identical to that defined in the corresponding
+    [multi_hand_landmarks](#multi_hand_landmarks).
+
 #### multi_handedness

 Collection of handedness of the detected/tracked hands (i.e. is it a left or
@ -262,6 +275,12 @@ with mp_hands.Hands(
          mp_drawing_styles.get_default_hand_connections_style())
    cv2.imwrite(
        '/tmp/annotated_image' + str(idx) + '.png', cv2.flip(annotated_image, 1))
+    # Draw hand world landmarks.
+    if not results.multi_hand_world_landmarks:
+      continue
+    for hand_world_landmarks in results.multi_hand_world_landmarks:
+      mp_drawing.plot_landmarks(
+        hand_world_landmarks, mp_hands.HAND_CONNECTIONS, azimuth=5)

 # For webcam input:
 cap = cv2.VideoCapture(0)
@ -400,7 +419,7 @@ Supported configuration options:
 HandsOptions handsOptions =
    HandsOptions.builder()
        .setStaticImageMode(false)
-        .setMaxNumHands(1)
+        .setMaxNumHands(2)
        .setRunOnGpu(true).build();
 Hands hands = new Hands(this, handsOptions);
 hands.setErrorListener(
@ -423,8 +442,11 @@ glSurfaceView.setRenderInputImage(true);

 hands.setResultListener(
    handsResult -> {
-      NormalizedLandmark wristLandmark = Hands.getHandLandmark(
-          handsResult, 0, HandLandmark.WRIST);
+      if (result.multiHandLandmarks().isEmpty()) {
+        return;
+      }
+      NormalizedLandmark wristLandmark =
+          handsResult.multiHandLandmarks().get(0).getLandmarkList().get(HandLandmark.WRIST);
      Log.i(
          TAG,
          String.format(
@ -453,7 +475,7 @@ glSurfaceView.post(
 HandsOptions handsOptions =
    HandsOptions.builder()
        .setStaticImageMode(true)
-        .setMaxNumHands(1)
+        .setMaxNumHands(2)
        .setRunOnGpu(true).build();
 Hands hands = new Hands(this, handsOptions);

@ -464,10 +486,13 @@ Hands hands = new Hands(this, handsOptions);
 HandsResultImageView imageView = new HandsResultImageView(this);
 hands.setResultListener(
    handsResult -> {
+      if (result.multiHandLandmarks().isEmpty()) {
+        return;
+      }
      int width = handsResult.inputBitmap().getWidth();
      int height = handsResult.inputBitmap().getHeight();
-      NormalizedLandmark wristLandmark = Hands.getHandLandmark(
-          handsResult, 0, HandLandmark.WRIST);
+      NormalizedLandmark wristLandmark =
+          handsResult.multiHandLandmarks().get(0).getLandmarkList().get(HandLandmark.WRIST);
      Log.i(
          TAG,
          String.format(
@ -501,9 +526,9 @@ ActivityResultLauncher<Intent> imageGetter =
            }
          }
        });
-Intent gallery = new Intent(
-    Intent.ACTION_PICK, MediaStore.Images.Media.INTERNAL_CONTENT_URI);
-imageGetter.launch(gallery);
+Intent pickImageIntent = new Intent(Intent.ACTION_PICK);
+pickImageIntent.setDataAndType(MediaStore.Images.Media.INTERNAL_CONTENT_URI, "image/*");
+imageGetter.launch(pickImageIntent);
 ```

 #### Video Input
@ -513,7 +538,7 @@ imageGetter.launch(gallery);
 HandsOptions handsOptions =
    HandsOptions.builder()
        .setStaticImageMode(false)
-        .setMaxNumHands(1)
+        .setMaxNumHands(2)
        .setRunOnGpu(true).build();
 Hands hands = new Hands(this, handsOptions);
 hands.setErrorListener(
@ -536,8 +561,11 @@ glSurfaceView.setRenderInputImage(true);

 hands.setResultListener(
    handsResult -> {
-      NormalizedLandmark wristLandmark = Hands.getHandLandmark(
-          handsResult, 0, HandLandmark.WRIST);
+      if (result.multiHandLandmarks().isEmpty()) {
+        return;
+      }
+      NormalizedLandmark wristLandmark =
+          handsResult.multiHandLandmarks().get(0).getLandmarkList().get(HandLandmark.WRIST);
      Log.i(
          TAG,
          String.format(
@ -566,9 +594,9 @@ ActivityResultLauncher<Intent> videoGetter =
            }
          }
        });
-Intent gallery =
-    new Intent(Intent.ACTION_PICK, MediaStore.Video.Media.INTERNAL_CONTENT_URI);
-videoGetter.launch(gallery);
+Intent pickVideoIntent = new Intent(Intent.ACTION_PICK);
+pickVideoIntent.setDataAndType(MediaStore.Video.Media.INTERNAL_CONTENT_URI, "video/*");
+videoGetter.launch(pickVideoIntent);
 ```

 ## Example Apps
--- a/docs/solutions/holistic.md
+++ b/docs/solutions/holistic.md
@ -159,6 +159,11 @@ images to reduce jitter. Ignored if [enable_segmentation](#enable_segmentation)
 is `false` or [static_image_mode](#static_image_mode) is `true`. Default to
 `true`.

+#### refine_face_landmarks
+
+Whether to further refine the landmark coordinates around the eyes and lips, and
+output additional landmarks around the irises. Default to `false`.
+
 #### min_detection_confidence

 Minimum confidence value (`[0.0, 1.0]`) from the person-detection model for the
@ -241,6 +246,7 @@ Supported configuration options:
 *   [smooth_landmarks](#smooth_landmarks)
 *   [enable_segmentation](#enable_segmentation)
 *   [smooth_segmentation](#smooth_segmentation)
+*   [refine_face_landmarks](#refine_face_landmarks)
 *   [min_detection_confidence](#min_detection_confidence)
 *   [min_tracking_confidence](#min_tracking_confidence)

@ -256,7 +262,8 @@ IMAGE_FILES = []
 with mp_holistic.Holistic(
    static_image_mode=True,
    model_complexity=2,
-    enable_segmentation=True) as holistic:
+    enable_segmentation=True,
+    refine_face_landmarks=True) as holistic:
  for idx, file in enumerate(IMAGE_FILES):
    image = cv2.imread(file)
    image_height, image_width, _ = image.shape
@ -350,6 +357,7 @@ Supported configuration options:
 *   [smoothLandmarks](#smooth_landmarks)
 *   [enableSegmentation](#enable_segmentation)
 *   [smoothSegmentation](#smooth_segmentation)
+*   [refineFaceLandmarks](#refineFaceLandmarks)
 *   [minDetectionConfidence](#min_detection_confidence)
 *   [minTrackingConfidence](#min_tracking_confidence)

@ -421,6 +429,7 @@ holistic.setOptions({
  smoothLandmarks: true,
  enableSegmentation: true,
  smoothSegmentation: true,
+  refineFaceLandmarks: true,
  minDetectionConfidence: 0.5,
  minTrackingConfidence: 0.5
 });
--- a/docs/solutions/models.md
+++ b/docs/solutions/models.md
@ -55,15 +55,14 @@ one over the other.
 ### [Hands](https://google.github.io/mediapipe/solutions/hands)

 *   Palm detection model:
-    [TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/palm_detection/palm_detection.tflite),
+    [TFLite model (lite)](https://github.com/google/mediapipe/tree/master/mediapipe/modules/palm_detection/palm_detection_lite.tflite),
+    [TFLite model (full)](https://github.com/google/mediapipe/tree/master/mediapipe/modules/palm_detection/palm_detection_full.tflite),
    [TF.js model](https://tfhub.dev/mediapipe/handdetector/1)
 *   Hand landmark model:
    [TFLite model (lite)](https://github.com/google/mediapipe/tree/master/mediapipe/modules/hand_landmark/hand_landmark_lite.tflite),
    [TFLite model (full)](https://github.com/google/mediapipe/tree/master/mediapipe/modules/hand_landmark/hand_landmark_full.tflite),
-    [TFLite model (sparse)](https://github.com/google/mediapipe/tree/master/mediapipe/modules/hand_landmark/hand_landmark_sparse.tflite),
    [TF.js model](https://tfhub.dev/mediapipe/handskeleton/1)
-*   [Model card](https://mediapipe.page.link/handmc),
-    [Model card (sparse)](https://mediapipe.page.link/handmc-sparse)
+*   [Model card](https://mediapipe.page.link/handmc)

 ### [Pose](https://google.github.io/mediapipe/solutions/pose)

--- a/mediapipe/calculators/core/packet_cloner_calculator.cc
+++ b/mediapipe/calculators/core/packet_cloner_calculator.cc
@ -60,7 +60,10 @@ class PacketClonerCalculator : public CalculatorBase {
    const auto calculator_options =
        cc->Options<mediapipe::PacketClonerCalculatorOptions>();
    output_only_when_all_inputs_received_ =
-        calculator_options.output_only_when_all_inputs_received();
+        calculator_options.output_only_when_all_inputs_received() ||
+        calculator_options.output_packets_only_when_all_inputs_received();
+    output_empty_packets_before_all_inputs_received_ =
+        calculator_options.output_packets_only_when_all_inputs_received();

    // Parse input streams.
    tick_signal_index_ = cc->Inputs().NumEntries() - 1;
@ -88,6 +91,9 @@ class PacketClonerCalculator : public CalculatorBase {
        // Return if one of the input is null.
        for (int i = 0; i < tick_signal_index_; ++i) {
          if (current_[i].IsEmpty()) {
+            if (output_empty_packets_before_all_inputs_received_) {
+              SetAllNextTimestampBounds(cc);
+            }
            return absl::OkStatus();
          }
        }
@ -107,9 +113,17 @@ class PacketClonerCalculator : public CalculatorBase {
  }

 private:
+  void SetAllNextTimestampBounds(CalculatorContext* cc) {
+    for (int j = 0; j < tick_signal_index_; ++j) {
+      cc->Outputs().Index(j).SetNextTimestampBound(
+          cc->InputTimestamp().NextAllowedInStream());
+    }
+  }
+
  std::vector<Packet> current_;
  int tick_signal_index_;
  bool output_only_when_all_inputs_received_;
+  bool output_empty_packets_before_all_inputs_received_;
 };

 REGISTER_CALCULATOR(PacketClonerCalculator);
--- a/mediapipe/calculators/core/packet_cloner_calculator.proto
+++ b/mediapipe/calculators/core/packet_cloner_calculator.proto
@ -28,4 +28,9 @@ message PacketClonerCalculatorOptions {
  // When true, this calculator will drop received TICK packets if any input
  // stream hasn't received a packet yet.
  optional bool output_only_when_all_inputs_received = 1 [default = false];
+
+  // Similar with above, but also transmit empty packet for all streams before
+  // all inputs are received.
+  optional bool output_packets_only_when_all_inputs_received = 2
+      [default = false];
 }
--- a/mediapipe/examples/android/solutions/facedetection/src/main/java/com/google/mediapipe/examples/facedetection/FaceDetectionResultImageView.java
+++ b/mediapipe/examples/android/solutions/facedetection/src/main/java/com/google/mediapipe/examples/facedetection/FaceDetectionResultImageView.java
@ -32,9 +32,9 @@ public class FaceDetectionResultImageView extends AppCompatImageView {
  private static final String TAG = "FaceDetectionResultImageView";

  private static final int KEYPOINT_COLOR = Color.RED;
-  private static final int KEYPOINT_RADIUS = 15;
+  private static final int KEYPOINT_RADIUS = 8; // Pixels
  private static final int BBOX_COLOR = Color.GREEN;
-  private static final int BBOX_THICKNESS = 10;
+  private static final int BBOX_THICKNESS = 5; // Pixels
  private Bitmap latest;

  public FaceDetectionResultImageView(Context context) {
--- a/mediapipe/examples/android/solutions/facedetection/src/main/java/com/google/mediapipe/examples/facedetection/MainActivity.java
+++ b/mediapipe/examples/android/solutions/facedetection/src/main/java/com/google/mediapipe/examples/facedetection/MainActivity.java
@ -28,7 +28,6 @@ import androidx.activity.result.ActivityResultLauncher;
 import androidx.activity.result.contract.ActivityResultContracts;
 import androidx.exifinterface.media.ExifInterface;
 // ContentResolver dependency
-import com.google.mediapipe.formats.proto.LocationDataProto.LocationData.RelativeKeypoint;
 import com.google.mediapipe.solutioncore.CameraInput;
 import com.google.mediapipe.solutioncore.SolutionGlSurfaceView;
 import com.google.mediapipe.solutioncore.VideoInput;
@ -36,6 +35,7 @@ import com.google.mediapipe.solutions.facedetection.FaceDetection;
 import com.google.mediapipe.solutions.facedetection.FaceDetectionOptions;
 import com.google.mediapipe.solutions.facedetection.FaceDetectionResult;
 import com.google.mediapipe.solutions.facedetection.FaceKeypoint;
+import com.google.mediapipe.formats.proto.LocationDataProto.LocationData.RelativeKeypoint;
 import java.io.IOException;
 import java.io.InputStream;

@ -175,9 +175,9 @@ public class MainActivity extends AppCompatActivity {
            setupStaticImageModePipeline();
          }
          // Reads images from gallery.
-          Intent gallery =
-              new Intent(Intent.ACTION_PICK, MediaStore.Images.Media.INTERNAL_CONTENT_URI);
-          imageGetter.launch(gallery);
+          Intent pickImageIntent = new Intent(Intent.ACTION_PICK);
+          pickImageIntent.setDataAndType(MediaStore.Images.Media.INTERNAL_CONTENT_URI, "image/*");
+          imageGetter.launch(pickImageIntent);
        });
    imageView = new FaceDetectionResultImageView(this);
  }
@ -240,9 +240,9 @@ public class MainActivity extends AppCompatActivity {
          stopCurrentPipeline();
          setupStreamingModePipeline(InputSource.VIDEO);
          // Reads video from gallery.
-          Intent gallery =
-              new Intent(Intent.ACTION_PICK, MediaStore.Video.Media.INTERNAL_CONTENT_URI);
-          videoGetter.launch(gallery);
+          Intent pickVideoIntent = new Intent(Intent.ACTION_PICK);
+          pickVideoIntent.setDataAndType(MediaStore.Video.Media.INTERNAL_CONTENT_URI, "video/*");
+          videoGetter.launch(pickVideoIntent);
        });
  }

@ -334,8 +334,15 @@ public class MainActivity extends AppCompatActivity {

  private void logNoseTipKeypoint(
      FaceDetectionResult result, int faceIndex, boolean showPixelValues) {
+    if (result.multiFaceDetections().isEmpty()) {
+      return;
+    }
    RelativeKeypoint noseTip =
-        FaceDetection.getFaceKeypoint(result, faceIndex, FaceKeypoint.NOSE_TIP);
+        result
+            .multiFaceDetections()
+            .get(faceIndex)
+            .getLocationData()
+            .getRelativeKeypoints(FaceKeypoint.NOSE_TIP);
    // For Bitmaps, show the pixel values. For texture inputs, show the normalized coordinates.
    if (showPixelValues) {
      int width = result.inputBitmap().getWidth();
--- a/mediapipe/examples/android/solutions/facemesh/src/main/java/com/google/mediapipe/examples/facemesh/FaceMeshResultImageView.java
+++ b/mediapipe/examples/android/solutions/facemesh/src/main/java/com/google/mediapipe/examples/facemesh/FaceMeshResultImageView.java
@ -34,19 +34,19 @@ public class FaceMeshResultImageView extends AppCompatImageView {
  private static final String TAG = "FaceMeshResultImageView";

  private static final int TESSELATION_COLOR = Color.parseColor("#70C0C0C0");
-  private static final int TESSELATION_THICKNESS = 5;
+  private static final int TESSELATION_THICKNESS = 3; // Pixels
  private static final int RIGHT_EYE_COLOR = Color.parseColor("#FF3030");
-  private static final int RIGHT_EYE_THICKNESS = 8;
+  private static final int RIGHT_EYE_THICKNESS = 5; // Pixels
  private static final int RIGHT_EYEBROW_COLOR = Color.parseColor("#FF3030");
-  private static final int RIGHT_EYEBROW_THICKNESS = 8;
+  private static final int RIGHT_EYEBROW_THICKNESS = 5; // Pixels
  private static final int LEFT_EYE_COLOR = Color.parseColor("#30FF30");
-  private static final int LEFT_EYE_THICKNESS = 8;
+  private static final int LEFT_EYE_THICKNESS = 5; // Pixels
  private static final int LEFT_EYEBROW_COLOR = Color.parseColor("#30FF30");
-  private static final int LEFT_EYEBROW_THICKNESS = 8;
+  private static final int LEFT_EYEBROW_THICKNESS = 5; // Pixels
  private static final int FACE_OVAL_COLOR = Color.parseColor("#E0E0E0");
-  private static final int FACE_OVAL_THICKNESS = 8;
+  private static final int FACE_OVAL_THICKNESS = 5; // Pixels
  private static final int LIPS_COLOR = Color.parseColor("#E0E0E0");
-  private static final int LIPS_THICKNESS = 8;
+  private static final int LIPS_THICKNESS = 5; // Pixels
  private Bitmap latest;

  public FaceMeshResultImageView(Context context) {
--- a/mediapipe/examples/android/solutions/facemesh/src/main/java/com/google/mediapipe/examples/facemesh/MainActivity.java
+++ b/mediapipe/examples/android/solutions/facemesh/src/main/java/com/google/mediapipe/examples/facemesh/MainActivity.java
@ -176,9 +176,9 @@ public class MainActivity extends AppCompatActivity {
            setupStaticImageModePipeline();
          }
          // Reads images from gallery.
-          Intent gallery =
-              new Intent(Intent.ACTION_PICK, MediaStore.Images.Media.INTERNAL_CONTENT_URI);
-          imageGetter.launch(gallery);
+          Intent pickImageIntent = new Intent(Intent.ACTION_PICK);
+          pickImageIntent.setDataAndType(MediaStore.Images.Media.INTERNAL_CONTENT_URI, "image/*");
+          imageGetter.launch(pickImageIntent);
        });
    imageView = new FaceMeshResultImageView(this);
  }
@ -240,9 +240,9 @@ public class MainActivity extends AppCompatActivity {
          stopCurrentPipeline();
          setupStreamingModePipeline(InputSource.VIDEO);
          // Reads video from gallery.
-          Intent gallery =
-              new Intent(Intent.ACTION_PICK, MediaStore.Video.Media.INTERNAL_CONTENT_URI);
-          videoGetter.launch(gallery);
+          Intent pickVideoIntent = new Intent(Intent.ACTION_PICK);
+          pickVideoIntent.setDataAndType(MediaStore.Video.Media.INTERNAL_CONTENT_URI, "video/*");
+          videoGetter.launch(pickVideoIntent);
        });
  }

--- a/mediapipe/examples/android/solutions/hands/src/main/java/com/google/mediapipe/examples/hands/HandsResultGlRenderer.java
+++ b/mediapipe/examples/android/solutions/hands/src/main/java/com/google/mediapipe/examples/hands/HandsResultGlRenderer.java
@ -28,7 +28,16 @@ import java.util.List;
 public class HandsResultGlRenderer implements ResultGlRenderer<HandsResult> {
  private static final String TAG = "HandsResultGlRenderer";

-  private static final float CONNECTION_THICKNESS = 20.0f;
+  private static final float[] LEFT_HAND_CONNECTION_COLOR = new float[] {0.2f, 1f, 0.2f, 1f};
+  private static final float[] RIGHT_HAND_CONNECTION_COLOR = new float[] {1f, 0.2f, 0.2f, 1f};
+  private static final float CONNECTION_THICKNESS = 25.0f;
+  private static final float[] LEFT_HAND_HOLLOW_CIRCLE_COLOR = new float[] {0.2f, 1f, 0.2f, 1f};
+  private static final float[] RIGHT_HAND_HOLLOW_CIRCLE_COLOR = new float[] {1f, 0.2f, 0.2f, 1f};
+  private static final float HOLLOW_CIRCLE_RADIUS = 0.01f;
+  private static final float[] LEFT_HAND_LANDMARK_COLOR = new float[] {1f, 0.2f, 0.2f, 1f};
+  private static final float[] RIGHT_HAND_LANDMARK_COLOR = new float[] {0.2f, 1f, 0.2f, 1f};
+  private static final float LANDMARK_RADIUS = 0.008f;
+  private static final int NUM_SEGMENTS = 120;
  private static final String VERTEX_SHADER =
      "uniform mat4 uProjectionMatrix;\n"
          + "attribute vec4 vPosition;\n"
@ -37,12 +46,14 @@ public class HandsResultGlRenderer implements ResultGlRenderer<HandsResult> {
          + "}";
  private static final String FRAGMENT_SHADER =
      "precision mediump float;\n"
+          + "uniform vec4 uColor;\n"
          + "void main() {\n"
-          + "  gl_FragColor = vec4(0, 1, 0, 1);\n"
+          + "  gl_FragColor = uColor;\n"
          + "}";
  private int program;
  private int positionHandle;
  private int projectionMatrixHandle;
+  private int colorHandle;

  private int loadShader(int type, String shaderCode) {
    int shader = GLES20.glCreateShader(type);
@ -61,6 +72,7 @@ public class HandsResultGlRenderer implements ResultGlRenderer<HandsResult> {
    GLES20.glLinkProgram(program);
    positionHandle = GLES20.glGetAttribLocation(program, "vPosition");
    projectionMatrixHandle = GLES20.glGetUniformLocation(program, "uProjectionMatrix");
+    colorHandle = GLES20.glGetUniformLocation(program, "uColor");
  }

  @Override
@ -74,7 +86,22 @@ public class HandsResultGlRenderer implements ResultGlRenderer<HandsResult> {

    int numHands = result.multiHandLandmarks().size();
    for (int i = 0; i < numHands; ++i) {
-      drawLandmarks(result.multiHandLandmarks().get(i).getLandmarkList());
+      boolean isLeftHand = result.multiHandedness().get(i).getLabel().equals("Left");
+      drawConnections(
+          result.multiHandLandmarks().get(i).getLandmarkList(),
+          isLeftHand ? LEFT_HAND_CONNECTION_COLOR : RIGHT_HAND_CONNECTION_COLOR);
+      for (NormalizedLandmark landmark : result.multiHandLandmarks().get(i).getLandmarkList()) {
+        // Draws the landmark.
+        drawCircle(
+            landmark.getX(),
+            landmark.getY(),
+            isLeftHand ? LEFT_HAND_LANDMARK_COLOR : RIGHT_HAND_LANDMARK_COLOR);
+        // Draws a hollow circle around the landmark.
+        drawHollowCircle(
+            landmark.getX(),
+            landmark.getY(),
+            isLeftHand ? LEFT_HAND_HOLLOW_CIRCLE_COLOR : RIGHT_HAND_HOLLOW_CIRCLE_COLOR);
+      }
    }
  }

@ -87,7 +114,8 @@ public class HandsResultGlRenderer implements ResultGlRenderer<HandsResult> {
    GLES20.glDeleteProgram(program);
  }

-  private void drawLandmarks(List<NormalizedLandmark> handLandmarkList) {
+  private void drawConnections(List<NormalizedLandmark> handLandmarkList, float[] colorArray) {
+    GLES20.glUniform4fv(colorHandle, 1, colorArray, 0);
    for (Hands.Connection c : Hands.HAND_CONNECTIONS) {
      NormalizedLandmark start = handLandmarkList.get(c.start());
      NormalizedLandmark end = handLandmarkList.get(c.end());
@ -103,4 +131,51 @@ public class HandsResultGlRenderer implements ResultGlRenderer<HandsResult> {
      GLES20.glDrawArrays(GLES20.GL_LINES, 0, 2);
    }
  }
+
+  private void drawCircle(float x, float y, float[] colorArray) {
+    GLES20.glUniform4fv(colorHandle, 1, colorArray, 0);
+    int vertexCount = NUM_SEGMENTS + 2;
+    float[] vertices = new float[vertexCount * 3];
+    vertices[0] = x;
+    vertices[1] = y;
+    vertices[2] = 0;
+    for (int i = 1; i < vertexCount; i++) {
+      float angle = 2.0f * i * (float) Math.PI / NUM_SEGMENTS;
+      int currentIndex = 3 * i;
+      vertices[currentIndex] = x + (float) (LANDMARK_RADIUS * Math.cos(angle));
+      vertices[currentIndex + 1] = y + (float) (LANDMARK_RADIUS * Math.sin(angle));
+      vertices[currentIndex + 2] = 0;
+    }
+    FloatBuffer vertexBuffer =
+        ByteBuffer.allocateDirect(vertices.length * 4)
+            .order(ByteOrder.nativeOrder())
+            .asFloatBuffer()
+            .put(vertices);
+    vertexBuffer.position(0);
+    GLES20.glEnableVertexAttribArray(positionHandle);
+    GLES20.glVertexAttribPointer(positionHandle, 3, GLES20.GL_FLOAT, false, 0, vertexBuffer);
+    GLES20.glDrawArrays(GLES20.GL_TRIANGLE_FAN, 0, vertexCount);
+  }
+
+  private void drawHollowCircle(float x, float y, float[] colorArray) {
+    GLES20.glUniform4fv(colorHandle, 1, colorArray, 0);
+    int vertexCount = NUM_SEGMENTS + 1;
+    float[] vertices = new float[vertexCount * 3];
+    for (int i = 0; i < vertexCount; i++) {
+      float angle = 2.0f * i * (float) Math.PI / NUM_SEGMENTS;
+      int currentIndex = 3 * i;
+      vertices[currentIndex] = x + (float) (HOLLOW_CIRCLE_RADIUS * Math.cos(angle));
+      vertices[currentIndex + 1] = y + (float) (HOLLOW_CIRCLE_RADIUS * Math.sin(angle));
+      vertices[currentIndex + 2] = 0;
+    }
+    FloatBuffer vertexBuffer =
+        ByteBuffer.allocateDirect(vertices.length * 4)
+            .order(ByteOrder.nativeOrder())
+            .asFloatBuffer()
+            .put(vertices);
+    vertexBuffer.position(0);
+    GLES20.glEnableVertexAttribArray(positionHandle);
+    GLES20.glVertexAttribPointer(positionHandle, 3, GLES20.GL_FLOAT, false, 0, vertexBuffer);
+    GLES20.glDrawArrays(GLES20.GL_LINE_STRIP, 0, vertexCount);
+  }
 }
--- a/mediapipe/examples/android/solutions/hands/src/main/java/com/google/mediapipe/examples/hands/HandsResultImageView.java
+++ b/mediapipe/examples/android/solutions/hands/src/main/java/com/google/mediapipe/examples/hands/HandsResultImageView.java
@ -31,10 +31,15 @@ import java.util.List;
 public class HandsResultImageView extends AppCompatImageView {
  private static final String TAG = "HandsResultImageView";

-  private static final int LANDMARK_COLOR = Color.RED;
-  private static final int LANDMARK_RADIUS = 15;
-  private static final int CONNECTION_COLOR = Color.GREEN;
-  private static final int CONNECTION_THICKNESS = 10;
+  private static final int LEFT_HAND_CONNECTION_COLOR = Color.parseColor("#30FF30");
+  private static final int RIGHT_HAND_CONNECTION_COLOR = Color.parseColor("#FF3030");
+  private static final int CONNECTION_THICKNESS = 8; // Pixels
+  private static final int LEFT_HAND_HOLLOW_CIRCLE_COLOR = Color.parseColor("#30FF30");
+  private static final int RIGHT_HAND_HOLLOW_CIRCLE_COLOR = Color.parseColor("#FF3030");
+  private static final int HOLLOW_CIRCLE_WIDTH = 5; // Pixels
+  private static final int LEFT_HAND_LANDMARK_COLOR = Color.parseColor("#FF3030");
+  private static final int RIGHT_HAND_LANDMARK_COLOR = Color.parseColor("#30FF30");
+  private static final int LANDMARK_RADIUS = 10; // Pixels
  private Bitmap latest;

  public HandsResultImageView(Context context) {
@ -62,7 +67,11 @@ public class HandsResultImageView extends AppCompatImageView {
    int numHands = result.multiHandLandmarks().size();
    for (int i = 0; i < numHands; ++i) {
      drawLandmarksOnCanvas(
-          result.multiHandLandmarks().get(i).getLandmarkList(), canvas, width, height);
+          result.multiHandLandmarks().get(i).getLandmarkList(),
+          result.multiHandedness().get(i).getLabel().equals("Left"),
+          canvas,
+          width,
+          height);
    }
  }

@ -75,11 +84,16 @@ public class HandsResultImageView extends AppCompatImageView {
  }

  private void drawLandmarksOnCanvas(
-      List<NormalizedLandmark> handLandmarkList, Canvas canvas, int width, int height) {
+      List<NormalizedLandmark> handLandmarkList,
+      boolean isLeftHand,
+      Canvas canvas,
+      int width,
+      int height) {
    // Draw connections.
    for (Hands.Connection c : Hands.HAND_CONNECTIONS) {
      Paint connectionPaint = new Paint();
-      connectionPaint.setColor(CONNECTION_COLOR);
+      connectionPaint.setColor(
+          isLeftHand ? LEFT_HAND_CONNECTION_COLOR : RIGHT_HAND_CONNECTION_COLOR);
      connectionPaint.setStrokeWidth(CONNECTION_THICKNESS);
      NormalizedLandmark start = handLandmarkList.get(c.start());
      NormalizedLandmark end = handLandmarkList.get(c.end());
@ -91,11 +105,23 @@ public class HandsResultImageView extends AppCompatImageView {
          connectionPaint);
    }
    Paint landmarkPaint = new Paint();
-    landmarkPaint.setColor(LANDMARK_COLOR);
-    // Draw landmarks.
+    landmarkPaint.setColor(isLeftHand ? LEFT_HAND_LANDMARK_COLOR : RIGHT_HAND_LANDMARK_COLOR);
+    // Draws landmarks.
    for (LandmarkProto.NormalizedLandmark landmark : handLandmarkList) {
      canvas.drawCircle(
          landmark.getX() * width, landmark.getY() * height, LANDMARK_RADIUS, landmarkPaint);
    }
+    // Draws hollow circles around landmarks.
+    landmarkPaint.setColor(
+        isLeftHand ? LEFT_HAND_HOLLOW_CIRCLE_COLOR : RIGHT_HAND_HOLLOW_CIRCLE_COLOR);
+    landmarkPaint.setStrokeWidth(HOLLOW_CIRCLE_WIDTH);
+    landmarkPaint.setStyle(Paint.Style.STROKE);
+    for (LandmarkProto.NormalizedLandmark landmark : handLandmarkList) {
+      canvas.drawCircle(
+          landmark.getX() * width,
+          landmark.getY() * height,
+          LANDMARK_RADIUS + HOLLOW_CIRCLE_WIDTH,
+          landmarkPaint);
+    }
  }
 }
--- a/mediapipe/examples/android/solutions/hands/src/main/java/com/google/mediapipe/examples/hands/MainActivity.java
+++ b/mediapipe/examples/android/solutions/hands/src/main/java/com/google/mediapipe/examples/hands/MainActivity.java
@ -28,6 +28,7 @@ import androidx.activity.result.ActivityResultLauncher;
 import androidx.activity.result.contract.ActivityResultContracts;
 import androidx.exifinterface.media.ExifInterface;
 // ContentResolver dependency
+import com.google.mediapipe.formats.proto.LandmarkProto.Landmark;
 import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmark;
 import com.google.mediapipe.solutioncore.CameraInput;
 import com.google.mediapipe.solutioncore.SolutionGlSurfaceView;
@ -177,9 +178,9 @@ public class MainActivity extends AppCompatActivity {
            setupStaticImageModePipeline();
          }
          // Reads images from gallery.
-          Intent gallery =
-              new Intent(Intent.ACTION_PICK, MediaStore.Images.Media.INTERNAL_CONTENT_URI);
-          imageGetter.launch(gallery);
+          Intent pickImageIntent = new Intent(Intent.ACTION_PICK);
+          pickImageIntent.setDataAndType(MediaStore.Images.Media.INTERNAL_CONTENT_URI, "image/*");
+          imageGetter.launch(pickImageIntent);
        });
    imageView = new HandsResultImageView(this);
  }
@ -193,7 +194,7 @@ public class MainActivity extends AppCompatActivity {
            this,
            HandsOptions.builder()
                .setStaticImageMode(true)
-                .setMaxNumHands(1)
+                .setMaxNumHands(2)
                .setRunOnGpu(RUN_ON_GPU)
                .build());

@ -241,9 +242,9 @@ public class MainActivity extends AppCompatActivity {
          stopCurrentPipeline();
          setupStreamingModePipeline(InputSource.VIDEO);
          // Reads video from gallery.
-          Intent gallery =
-              new Intent(Intent.ACTION_PICK, MediaStore.Video.Media.INTERNAL_CONTENT_URI);
-          videoGetter.launch(gallery);
+          Intent pickVideoIntent = new Intent(Intent.ACTION_PICK);
+          pickVideoIntent.setDataAndType(MediaStore.Video.Media.INTERNAL_CONTENT_URI, "video/*");
+          videoGetter.launch(pickVideoIntent);
        });
  }

@ -269,7 +270,7 @@ public class MainActivity extends AppCompatActivity {
            this,
            HandsOptions.builder()
                .setStaticImageMode(false)
-                .setMaxNumHands(1)
+                .setMaxNumHands(2)
                .setRunOnGpu(RUN_ON_GPU)
                .build());
    hands.setErrorListener((message, e) -> Log.e(TAG, "MediaPipe Hands error:" + message));
@ -336,7 +337,11 @@ public class MainActivity extends AppCompatActivity {
  }

  private void logWristLandmark(HandsResult result, boolean showPixelValues) {
-    NormalizedLandmark wristLandmark = Hands.getHandLandmark(result, 0, HandLandmark.WRIST);
+    if (result.multiHandLandmarks().isEmpty()) {
+      return;
+    }
+    NormalizedLandmark wristLandmark =
+        result.multiHandLandmarks().get(0).getLandmarkList().get(HandLandmark.WRIST);
    // For Bitmaps, show the pixel values. For texture inputs, show the normalized coordinates.
    if (showPixelValues) {
      int width = result.inputBitmap().getWidth();
@ -353,5 +358,16 @@ public class MainActivity extends AppCompatActivity {
              "MediaPipe Hand wrist normalized coordinates (value range: [0, 1]): x=%f, y=%f",
              wristLandmark.getX(), wristLandmark.getY()));
    }
+    if (result.multiHandWorldLandmarks().isEmpty()) {
+      return;
+    }
+    Landmark wristWorldLandmark =
+        result.multiHandWorldLandmarks().get(0).getLandmarkList().get(HandLandmark.WRIST);
+    Log.i(
+        TAG,
+        String.format(
+            "MediaPipe Hand wrist world coordinates (in meters with the origin at the hand's"
+                + " approximate geometric center): x=%f m, y=%f m, z=%f m",
+            wristWorldLandmark.getX(), wristWorldLandmark.getY(), wristWorldLandmark.getZ()));
  }
 }
--- a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/handdetectiongpu/BUILD
+++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/handdetectiongpu/BUILD
@ -37,7 +37,7 @@ android_binary(
    srcs = glob(["*.java"]),
    assets = [
        "//mediapipe/graphs/hand_tracking:hand_detection_mobile_gpu.binarypb",
-        "//mediapipe/modules/palm_detection:palm_detection.tflite",
+        "//mediapipe/modules/palm_detection:palm_detection_full.tflite",
    ],
    assets_dir = "",
    manifest = "//mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic:AndroidManifest.xml",
--- a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/handtrackinggpu/BUILD
+++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/handtrackinggpu/BUILD
@ -37,9 +37,11 @@ android_binary(
    srcs = glob(["*.java"]),
    assets = [
        "//mediapipe/graphs/hand_tracking:hand_tracking_mobile_gpu.binarypb",
-        "//mediapipe/modules/hand_landmark:handedness.txt",
        "//mediapipe/modules/hand_landmark:hand_landmark_full.tflite",
-        "//mediapipe/modules/palm_detection:palm_detection.tflite",
+        "//mediapipe/modules/hand_landmark:hand_landmark_lite.tflite",
+        "//mediapipe/modules/hand_landmark:handedness.txt",
+        "//mediapipe/modules/palm_detection:palm_detection_full.tflite",
+        "//mediapipe/modules/palm_detection:palm_detection_lite.tflite",
    ],
    assets_dir = "",
    manifest = "//mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic:AndroidManifest.xml",
@ -53,6 +55,7 @@ android_binary(
        "outputVideoStreamName": "output_video",
        "flipFramesVertically": "True",
        "converterNumBuffers": "2",
+        # "modelComplexity": "0" # 0=lite, 1=heavy, not specified=heavy
    },
    multidex = "native",
    deps = [
--- a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/handtrackinggpu/MainActivity.java
+++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/handtrackinggpu/MainActivity.java
@ -14,6 +14,9 @@

 package com.google.mediapipe.apps.handtrackinggpu;

+import android.content.pm.ApplicationInfo;
+import android.content.pm.PackageManager;
+import android.content.pm.PackageManager.NameNotFoundException;
 import android.os.Bundle;
 import android.util.Log;
 import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmark;
@ -30,6 +33,7 @@ public class MainActivity extends com.google.mediapipe.apps.basic.MainActivity {
  private static final String TAG = "MainActivity";

  private static final String INPUT_NUM_HANDS_SIDE_PACKET_NAME = "num_hands";
+  private static final String INPUT_MODEL_COMPLEXITY = "model_complexity";
  private static final String OUTPUT_LANDMARKS_STREAM_NAME = "hand_landmarks";
  // Max number of hands to detect/process.
  private static final int NUM_HANDS = 2;
@ -38,9 +42,22 @@ public class MainActivity extends com.google.mediapipe.apps.basic.MainActivity {
  protected void onCreate(Bundle savedInstanceState) {
    super.onCreate(savedInstanceState);

+    ApplicationInfo applicationInfo;
+    try {
+      applicationInfo =
+          getPackageManager().getApplicationInfo(getPackageName(), PackageManager.GET_META_DATA);
+    } catch (NameNotFoundException e) {
+      throw new AssertionError(e);
+    }
+
    AndroidPacketCreator packetCreator = processor.getPacketCreator();
    Map<String, Packet> inputSidePackets = new HashMap<>();
    inputSidePackets.put(INPUT_NUM_HANDS_SIDE_PACKET_NAME, packetCreator.createInt32(NUM_HANDS));
+    if (applicationInfo.metaData.containsKey("modelComplexity")) {
+      inputSidePackets.put(
+          INPUT_MODEL_COMPLEXITY,
+          packetCreator.createInt32(applicationInfo.metaData.getInt("modelComplexity")));
+    }
    processor.setInputSidePackets(inputSidePackets);

    // To show verbose logging, run:
--- a/mediapipe/examples/desktop/autoflip/quality/kinematic_path_solver.cc
+++ b/mediapipe/examples/desktop/autoflip/quality/kinematic_path_solver.cc
@ -282,8 +282,12 @@ absl::Status KinematicPathSolver::UpdatePixelsPerDegree(

 absl::Status KinematicPathSolver::UpdateMinMaxLocation(const int min_location,
                                                       const int max_location) {
-  RET_CHECK(initialized_)
-      << "UpdateMinMaxLocation called before first observation added.";
+  if (!initialized_) {
+    max_location_ = max_location;
+    min_location_ = min_location;
+    return absl::OkStatus();
+  }
+
  double prior_distance = max_location_ - min_location_;
  double updated_distance = max_location - min_location;
  double scale_change = updated_distance / prior_distance;
--- a/mediapipe/examples/desktop/autoflip/quality/kinematic_path_solver_test.cc
+++ b/mediapipe/examples/desktop/autoflip/quality/kinematic_path_solver_test.cc
@ -435,6 +435,23 @@ TEST(KinematicPathSolverTest, PassBorderTest) {
  EXPECT_FLOAT_EQ(state, 404.56668);
 }

+TEST(KinematicPathSolverTest, PassUpdateUpdateMinMaxLocationIfUninitialized) {
+  KinematicOptions options;
+  options.set_min_motion_to_reframe(2.0);
+  options.set_max_velocity(1000);
+  KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView);
+  MP_EXPECT_OK(solver.UpdateMinMaxLocation(0, 500));
+}
+
+TEST(KinematicPathSolverTest, PassUpdateUpdateMinMaxLocationIfInitialized) {
+  KinematicOptions options;
+  options.set_min_motion_to_reframe(2.0);
+  options.set_max_velocity(1000);
+  KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView);
+  MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 0));
+  MP_EXPECT_OK(solver.UpdateMinMaxLocation(0, 500));
+}
+
 }  // namespace
 }  // namespace autoflip
 }  // namespace mediapipe
--- a/mediapipe/examples/ios/handdetectiongpu/BUILD
+++ b/mediapipe/examples/ios/handdetectiongpu/BUILD
@ -55,7 +55,7 @@ objc_library(
    name = "HandDetectionGpuAppLibrary",
    data = [
        "//mediapipe/graphs/hand_tracking:hand_detection_mobile_gpu_binary_graph",
-        "//mediapipe/modules/palm_detection:palm_detection.tflite",
+        "//mediapipe/modules/palm_detection:palm_detection_full.tflite",
    ],
    deps = [
        "//mediapipe/examples/ios/common:CommonMediaPipeAppLibrary",
--- a/mediapipe/examples/ios/handtrackinggpu/BUILD
+++ b/mediapipe/examples/ios/handtrackinggpu/BUILD
@ -64,7 +64,7 @@ objc_library(
        "//mediapipe/graphs/hand_tracking:hand_tracking_mobile_gpu.binarypb",
        "//mediapipe/modules/hand_landmark:hand_landmark_full.tflite",
        "//mediapipe/modules/hand_landmark:handedness.txt",
-        "//mediapipe/modules/palm_detection:palm_detection.tflite",
+        "//mediapipe/modules/palm_detection:palm_detection_full.tflite",
    ],
    deps = [
        "//mediapipe/examples/ios/common:CommonMediaPipeAppLibrary",
--- a/mediapipe/framework/formats/body_rig.proto
+++ b/mediapipe/framework/formats/body_rig.proto
@ -0,0 +1,40 @@
+// Copyright 2021 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto2";
+
+package mediapipe;
+
+// Joint of a 3D human model (e.g. elbow, knee, wrist). Contains 3D rotation of
+// the joint and its visibility.
+message Joint {
+  // Joint rotation in 6D contineous representation.
+  // Such representation is more sutable for NN model training and can be
+  // converted to quaternions and Euler angles if needed. Details can be found
+  // in https://arxiv.org/abs/1812.07035.
+  repeated float rotation_6d = 1;
+
+  // Joint visibility.
+  // Float score of whether joint is visible: present on the screen and not
+  // occluded by other objects. Depending on the model, visibility value is
+  // either a sigmoid or an argument of sigmoid, but in any case higher value
+  // indicates higher probability of joint being visible. Should stay unset if
+  // not supported.
+  optional float visibility = 2;
+}
+
+// Group of Joint protos.
+message JointList {
+  repeated Joint joint = 1;
+}
--- a/mediapipe/framework/formats/image.h
+++ b/mediapipe/framework/formats/image.h
@ -109,8 +109,7 @@ class Image {
    return gpu_buffer_.GetCVPixelBufferRef();
  }
 #else
-  const mediapipe::GlTextureBufferSharedPtr& GetGlTextureBufferSharedPtr()
-      const {
+  mediapipe::GlTextureBufferSharedPtr GetGlTextureBufferSharedPtr() const {
    if (use_gpu_ == false) ConvertToGpu();
    return gpu_buffer_.GetGlTextureBufferSharedPtr();
  }
--- a/mediapipe/framework/port.h
+++ b/mediapipe/framework/port.h
@ -22,9 +22,8 @@
 // For consistency, we now set MEDIAPIPE_MOBILE there too. However, for the sake
 // of projects that may want to build MediaPipe using alternative build systems,
 // we also try to set platform-specific defines in this header if missing.
-#if !defined(MEDIAPIPE_MOBILE) &&                                      \
-    (defined(__ANDROID__) || (defined(__APPLE__) && !TARGET_OS_OSX) || \
-     defined(__EMSCRIPTEN__))
+#if !defined(MEDIAPIPE_MOBILE) && \
+    (defined(__ANDROID__) || defined(__EMSCRIPTEN__))
 #define MEDIAPIPE_MOBILE
 #endif

@ -36,6 +35,11 @@
 #include "TargetConditionals.h"  // for TARGET_OS_*
 #if !defined(MEDIAPIPE_IOS) && !TARGET_OS_OSX
 #define MEDIAPIPE_IOS
+
+#if !defined(MEDIAPIPE_MOBILE) && !TARGET_OS_OSX
+#define MEDIAPIPE_MOBILE
+#endif
+
 #endif
 #if !defined(MEDIAPIPE_OSX) && TARGET_OS_OSX
 #define MEDIAPIPE_OSX
--- a/mediapipe/framework/tool/options_util.cc
+++ b/mediapipe/framework/tool/options_util.cc
@ -65,9 +65,9 @@ absl::Status CopyLiteralOptions(CalculatorGraphConfig::Node parent_node,

  OptionsSyntaxUtil syntax_util;
  for (auto& node : *config->mutable_node()) {
-    FieldData node_data = options_field_util::AsFieldData(node);
-
    for (const std::string& option_def : node.option_value()) {
+      FieldData node_data = options_field_util::AsFieldData(node);
+
      std::vector<absl::string_view> tag_and_name =
          syntax_util.StrSplitTags(option_def);
      std::string graph_tag = syntax_util.OptionFieldsTag(tag_and_name[1]);
@ -96,6 +96,7 @@ absl::Status CopyLiteralOptions(CalculatorGraphConfig::Node parent_node,
      status.Update(MergeField(node_path, packet_data, &node_options));
      options_field_util::SetOptionsMessage(node_options, &node);
    }
+    node.clear_option_value();
  }
  return status;
 }
--- a/mediapipe/framework/tool/options_util_test.cc
+++ b/mediapipe/framework/tool/options_util_test.cc
@ -137,7 +137,6 @@ TEST_F(OptionsUtilTest, CopyLiteralOptions) {
  NightLightCalculatorOptions expected_node_options;
  expected_node_options.add_num_lights(8);
  expected_node.add_node_options()->PackFrom(expected_node_options);
-  *expected_node.add_option_value() = "num_lights:options/chain_length";
  EXPECT_THAT(actual_node, EqualsProto(expected_node));

  MP_EXPECT_OK(graph.StartRun({}));
--- a/mediapipe/framework/tool/subgraph_expansion_test.cc
+++ b/mediapipe/framework/tool/subgraph_expansion_test.cc
@ -656,7 +656,6 @@ TEST(SubgraphExpansionTest, SimpleSubgraphOptionsUsage) {
              chain_length: 3
            }
          }
-          option_value: "chain_length:options/chain_length"
        }
        type: "MoonSubgraph"
        graph_options {
@ -666,5 +665,84 @@ TEST(SubgraphExpansionTest, SimpleSubgraphOptionsUsage) {
  EXPECT_THAT(moon_subgraph, mediapipe::EqualsProto(expected_graph));
 }

+// Shows ExpandSubgraphs applied twice. "option_value" fields are evaluated
+// and removed on the first ExpandSubgraphs call.  If "option_value" fields
+// are not removed during ExpandSubgraphs, they evaluate incorrectly on the
+// second ExpandSubgraphs call and this test fails on "expected_node_options".
+TEST(SubgraphExpansionTest, SimpleSubgraphOptionsTwice) {
+  GraphRegistry graph_registry;
+
+  // Register a simple-subgraph that accepts graph options.
+  auto moon_subgraph =
+      mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(R"pb(
+        type: "MoonSubgraph"
+        graph_options: {
+          [type.googleapis.com/mediapipe.NodeChainSubgraphOptions] {}
+        }
+        node: {
+          calculator: "MoonCalculator"
+          node_options: {
+            [type.googleapis.com/mediapipe.NodeChainSubgraphOptions] {}
+          }
+          option_value: "chain_length:options/chain_length"
+        }
+      )pb");
+  graph_registry.Register("MoonSubgraph", moon_subgraph);
+
+  // Invoke the simple-subgraph with graph options.
+  // The empty NodeChainSubgraphOptions below allows "option_value" fields
+  // on "MoonCalculator" to evaluate incorrectly, if not removed.
+  auto sky_graph = mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(R"pb(
+    graph_options: {
+      [type.googleapis.com/mediapipe.NodeChainSubgraphOptions] {}
+    }
+    node: {
+      calculator: "MoonSubgraph"
+      options: {
+        [mediapipe.NodeChainSubgraphOptions.ext] {
+          node_type: "DoubleIntCalculator"
+          chain_length: 3
+        }
+      }
+    }
+  )pb");
+
+  // The first ExpandSubgraphs call evaluates and removes "option_value" fields.
+  MP_ASSERT_OK(tool::ExpandSubgraphs(&sky_graph, &graph_registry));
+  auto expanded_1 = sky_graph;
+
+  // The second ExpandSubgraphs call has no effect on the expanded graph.
+  MP_ASSERT_OK(tool::ExpandSubgraphs(&sky_graph, &graph_registry));
+
+  // Validate the expected node_options for the "MoonSubgraph".
+  // If the "option_value" fields are not removed during ExpandSubgraphs,
+  // this test fails with an incorrect value for "chain_length".
+  auto expected_node_options =
+      mediapipe::ParseTextProtoOrDie<mediapipe::NodeChainSubgraphOptions>(
+          "chain_length: 3");
+  mediapipe::NodeChainSubgraphOptions node_options;
+  sky_graph.node(0).node_options(0).UnpackTo(&node_options);
+  ASSERT_THAT(node_options, mediapipe::EqualsProto(expected_node_options));
+
+  // Validate the results from both ExpandSubgraphs() calls.
+  CalculatorGraphConfig expected_graph =
+      mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(R"pb(
+        graph_options {
+          [type.googleapis.com/mediapipe.NodeChainSubgraphOptions] {}
+        }
+        node {
+          name: "moonsubgraph__MoonCalculator"
+          calculator: "MoonCalculator"
+          node_options {
+            [type.googleapis.com/mediapipe.NodeChainSubgraphOptions] {
+              chain_length: 3
+            }
+          }
+        }
+      )pb");
+  EXPECT_THAT(expanded_1, mediapipe::EqualsProto(expected_graph));
+  EXPECT_THAT(sky_graph, mediapipe::EqualsProto(expected_graph));
+}
+
 }  // namespace
 }  // namespace mediapipe
--- a/mediapipe/gpu/gl_calculator_helper_impl_common.cc
+++ b/mediapipe/gpu/gl_calculator_helper_impl_common.cc
@ -112,13 +112,13 @@ GlTexture GlCalculatorHelperImpl::CreateSourceTexture(

 GlTexture GlCalculatorHelperImpl::CreateSourceTexture(
    const GpuBuffer& gpu_buffer, int plane) {
-  return MapGpuBuffer(gpu_buffer, gpu_buffer.GetGlTextureReadView(plane));
+  return MapGpuBuffer(gpu_buffer, gpu_buffer.GetReadView<GlTextureView>(plane));
 }

 GlTexture GlCalculatorHelperImpl::CreateSourceTexture(
    const ImageFrame& image_frame) {
  auto gpu_buffer = GpuBuffer::CopyingImageFrame(image_frame);
-  return MapGpuBuffer(gpu_buffer, gpu_buffer.GetGlTextureReadView(0));
+  return MapGpuBuffer(gpu_buffer, gpu_buffer.GetReadView<GlTextureView>(0));
 }

 template <>
@ -149,7 +149,7 @@ GlTexture GlCalculatorHelperImpl::CreateDestinationTexture(

  GpuBuffer gpu_buffer =
      gpu_resources_.gpu_buffer_pool().GetBuffer(width, height, format);
-  return MapGpuBuffer(gpu_buffer, gpu_buffer.GetGlTextureWriteView(0));
+  return MapGpuBuffer(gpu_buffer, gpu_buffer.GetWriteView<GlTextureView>(0));
 }

 }  // namespace mediapipe
--- a/mediapipe/gpu/gl_texture_buffer.cc
+++ b/mediapipe/gpu/gl_texture_buffer.cc
@ -224,7 +224,8 @@ void GlTextureBuffer::WaitForConsumersOnGpu() {
  // precisely, on only one GL context.
 }

-GlTextureView GlTextureBuffer::GetGlTextureReadView(
+GlTextureView GlTextureBuffer::GetReadView(
+    mediapipe::internal::types<GlTextureView>,
    std::shared_ptr<GpuBuffer> gpu_buffer, int plane) const {
  auto gl_context = GlContext::GetCurrent();
  CHECK(gl_context);
@ -241,7 +242,8 @@ GlTextureView GlTextureBuffer::GetGlTextureReadView(
                       nullptr);
 }

-GlTextureView GlTextureBuffer::GetGlTextureWriteView(
+GlTextureView GlTextureBuffer::GetWriteView(
+    mediapipe::internal::types<GlTextureView>,
    std::shared_ptr<GpuBuffer> gpu_buffer, int plane) {
  auto gl_context = GlContext::GetCurrent();
  CHECK(gl_context);
@ -341,7 +343,8 @@ std::unique_ptr<ImageFrame> GlTextureBuffer::AsImageFrame() const {
  ImageFormat::Format image_format = ImageFormatForGpuBufferFormat(format());
  auto output = absl::make_unique<ImageFrame>(
      image_format, width(), height(), ImageFrame::kGlDefaultAlignmentBoundary);
-  auto view = GetGlTextureReadView(nullptr, 0);
+  auto view =
+      GetReadView(mediapipe::internal::types<GlTextureView>{}, nullptr, 0);
  ReadTexture(view, format(), output->MutablePixelData(),
              output->PixelDataSize());
  return output;
--- a/mediapipe/gpu/gl_texture_buffer.h
+++ b/mediapipe/gpu/gl_texture_buffer.h
@ -32,7 +32,9 @@ namespace mediapipe {
 class GlCalculatorHelperImpl;

 // Implements a GPU memory buffer as an OpenGL texture. For internal use.
-class GlTextureBuffer : public mediapipe::internal::GpuBufferStorage {
+class GlTextureBuffer
+    : public mediapipe::internal::GpuBufferStorageImpl<
+          GlTextureBuffer, mediapipe::internal::ViewProvider<GlTextureView>> {
 public:
  // This is called when the texture buffer is deleted. It is passed a sync
  // token created at that time on the GlContext. If the GlTextureBuffer has
@ -86,11 +88,12 @@ class GlTextureBuffer : public mediapipe::internal::GpuBufferStorage {
  int height() const { return height_; }
  GpuBufferFormat format() const { return format_; }

-  GlTextureView GetGlTextureReadView(std::shared_ptr<GpuBuffer> gpu_buffer,
-                                     int plane) const override;
-  GlTextureView GetGlTextureWriteView(std::shared_ptr<GpuBuffer> gpu_buffer,
-                                      int plane) override;
-  void ViewDoneWriting(const GlTextureView& view) override;
+  GlTextureView GetReadView(mediapipe::internal::types<GlTextureView>,
+                            std::shared_ptr<GpuBuffer> gpu_buffer,
+                            int plane) const override;
+  GlTextureView GetWriteView(mediapipe::internal::types<GlTextureView>,
+                             std::shared_ptr<GpuBuffer> gpu_buffer,
+                             int plane) override;
  std::unique_ptr<ImageFrame> AsImageFrame() const override;

  // If this texture is going to be used outside of the context that produced
@ -142,6 +145,8 @@ class GlTextureBuffer : public mediapipe::internal::GpuBufferStorage {
  // Returns true on success.
  bool CreateInternal(const void* data, int alignment = 4);

+  void ViewDoneWriting(const GlTextureView& view);
+
  friend class GlCalculatorHelperImpl;

  GLuint name_ = 0;
--- a/mediapipe/gpu/gpu_buffer.h
+++ b/mediapipe/gpu/gpu_buffer.h
@ -15,6 +15,7 @@
 #ifndef MEDIAPIPE_GPU_GPU_BUFFER_H_
 #define MEDIAPIPE_GPU_GPU_BUFFER_H_

+#include <memory>
 #include <utility>

 #include "mediapipe/framework/formats/image_frame.h"
@ -23,6 +24,10 @@
 #include "mediapipe/gpu/gpu_buffer_format.h"
 #include "mediapipe/gpu/gpu_buffer_storage.h"

+// Note: these headers are needed for the legacy storage APIs. Do not add more
+// storage-specific headers here. See WebGpuTextureBuffer/View for an example
+// of adding a new storage and view.
+
 #if defined(__APPLE__)
 #include <CoreVideo/CoreVideo.h>

@ -31,9 +36,7 @@

 #if MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER
 #include "mediapipe/gpu/gpu_buffer_storage_cv_pixel_buffer.h"
-#endif  // MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER
-
-#if !MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER
+#else
 #include "mediapipe/gpu/gl_texture_buffer.h"
 #endif  // MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER

@ -60,19 +63,28 @@ class GpuBuffer {
  // are not portable. Applications and calculators should normally obtain
  // GpuBuffers in a portable way from the framework, e.g. using
  // GpuBufferMultiPool.
+  explicit GpuBuffer(
+      std::shared_ptr<mediapipe::internal::GpuBufferStorage> storage)
+      : storage_(std::move(storage)) {}
+
+  // Note: these constructors and accessors for specific storage types exist
+  // for backwards compatibility reasons. Do not add new ones.
 #if MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER
  explicit GpuBuffer(CFHolder<CVPixelBufferRef> pixel_buffer)
-      : pixel_buffer_(std::move(pixel_buffer)) {}
+      : storage_(std::make_shared<GpuBufferStorageCvPixelBuffer>(
+            std::move(pixel_buffer))) {}
  explicit GpuBuffer(CVPixelBufferRef pixel_buffer)
-      : pixel_buffer_(pixel_buffer) {}
+      : storage_(
+            std::make_shared<GpuBufferStorageCvPixelBuffer>(pixel_buffer)) {}

-  CVPixelBufferRef GetCVPixelBufferRef() const { return *pixel_buffer_; }
+  CVPixelBufferRef GetCVPixelBufferRef() const {
+    auto p = storage_->down_cast<GpuBufferStorageCvPixelBuffer>();
+    if (p) return **p;
+    return nullptr;
+  }
 #else
-  explicit GpuBuffer(GlTextureBufferSharedPtr texture_buffer)
-      : texture_buffer_(std::move(texture_buffer)) {}
-
-  const GlTextureBufferSharedPtr& GetGlTextureBufferSharedPtr() const {
-    return texture_buffer_;
+  GlTextureBufferSharedPtr GetGlTextureBufferSharedPtr() const {
+    return internal_storage<GlTextureBuffer>();
  }
 #endif  // MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER

@ -93,14 +105,26 @@ class GpuBuffer {
  // Allow assignment from nullptr.
  GpuBuffer& operator=(std::nullptr_t other);

-  GlTextureView GetGlTextureReadView(int plane) const {
-    return current_storage().GetGlTextureReadView(
-        std::make_shared<GpuBuffer>(*this), plane);
+  // Gets a read view of the specified type. The arguments depend on the
+  // specific view type; see the corresponding ViewProvider.
+  template <class View, class... Args>
+  auto GetReadView(Args... args) const {
+    return current_storage()
+        .down_cast<mediapipe::internal::ViewProvider<View>>()
+        ->GetReadView(mediapipe::internal::types<View>{},
+                      std::make_shared<GpuBuffer>(*this),
+                      std::forward<Args>(args)...);
  }

-  GlTextureView GetGlTextureWriteView(int plane) {
-    return current_storage().GetGlTextureWriteView(
-        std::make_shared<GpuBuffer>(*this), plane);
+  // Gets a write view of the specified type. The arguments depend on the
+  // specific view type; see the corresponding ViewProvider.
+  template <class View, class... Args>
+  auto GetWriteView(Args... args) {
+    return current_storage()
+        .down_cast<mediapipe::internal::ViewProvider<View>>()
+        ->GetWriteView(mediapipe::internal::types<View>{},
+                       std::make_shared<GpuBuffer>(*this),
+                       std::forward<Args>(args)...);
  }

  // Make a GpuBuffer copying the data from an ImageFrame.
@ -115,77 +139,57 @@ class GpuBuffer {
    return current_storage().AsImageFrame();
  }

+  // Attempts to access an underlying storage object of the specified type.
+  // This method is meant for internal use: user code should access the contents
+  // using views.
+  template <class T>
+  std::shared_ptr<T> internal_storage() const {
+    if (storage_->down_cast<T>()) return std::static_pointer_cast<T>(storage_);
+    return nullptr;
+  }
+
 private:
  class PlaceholderGpuBufferStorage
-      : public mediapipe::internal::GpuBufferStorage {
+      : public mediapipe::internal::GpuBufferStorageImpl<
+            PlaceholderGpuBufferStorage> {
   public:
    int width() const override { return 0; }
    int height() const override { return 0; }
    virtual GpuBufferFormat format() const override {
      return GpuBufferFormat::kUnknown;
    }
-    GlTextureView GetGlTextureReadView(std::shared_ptr<GpuBuffer> gpu_buffer,
-                                       int plane) const override {
-      return {};
-    }
-    GlTextureView GetGlTextureWriteView(std::shared_ptr<GpuBuffer> gpu_buffer,
-                                        int plane) override {
-      return {};
-    }
-    void ViewDoneWriting(const GlTextureView& view) override{};
    std::unique_ptr<ImageFrame> AsImageFrame() const override {
      return nullptr;
    }
  };

-  mediapipe::internal::GpuBufferStorage& no_storage() const {
-    static PlaceholderGpuBufferStorage placeholder;
+  std::shared_ptr<mediapipe::internal::GpuBufferStorage>& no_storage() const {
+    static auto placeholder =
+        std::static_pointer_cast<mediapipe::internal::GpuBufferStorage>(
+            std::make_shared<PlaceholderGpuBufferStorage>());
    return placeholder;
  }

  const mediapipe::internal::GpuBufferStorage& current_storage() const {
-#if MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER
-    if (pixel_buffer_ != nullptr) return pixel_buffer_;
-#else
-    if (texture_buffer_) return *texture_buffer_;
-#endif  // MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER
-    return no_storage();
+    return *storage_;
  }

-  mediapipe::internal::GpuBufferStorage& current_storage() {
-#if MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER
-    if (pixel_buffer_ != nullptr) return pixel_buffer_;
-#else
-    if (texture_buffer_) return *texture_buffer_;
-#endif  // MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER
-    return no_storage();
-  }
+  mediapipe::internal::GpuBufferStorage& current_storage() { return *storage_; }

-#if MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER
-  GpuBufferStorageCvPixelBuffer pixel_buffer_;
-#else
-  GlTextureBufferSharedPtr texture_buffer_;
-#endif  // MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER
+  std::shared_ptr<mediapipe::internal::GpuBufferStorage> storage_ =
+      no_storage();
 };

 inline bool GpuBuffer::operator==(std::nullptr_t other) const {
-  return &current_storage() == &no_storage();
+  return storage_ == no_storage();
 }

 inline bool GpuBuffer::operator==(const GpuBuffer& other) const {
-#if MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER
-  return pixel_buffer_ == other.pixel_buffer_;
-#else
-  return texture_buffer_ == other.texture_buffer_;
-#endif  // MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER
+  return storage_ == other.storage_;
 }

 inline GpuBuffer& GpuBuffer::operator=(std::nullptr_t other) {
-#if MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER
-  pixel_buffer_.reset(other);
-#else
-  texture_buffer_ = other;
-#endif  // MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER
+  storage_ = no_storage();
  return *this;
 }

--- a/mediapipe/gpu/gpu_buffer_storage.h
+++ b/mediapipe/gpu/gpu_buffer_storage.h
@ -12,27 +12,73 @@ class GpuBuffer;
 namespace mediapipe {
 namespace internal {

-using mediapipe::GlTextureView;
-using mediapipe::GpuBuffer;
-using mediapipe::GpuBufferFormat;
+template <class... T>
+struct types {};

-class GlTextureViewManager {
+template <class V>
+class ViewProvider;
+
+// Note: this specialization temporarily lives here for backwards compatibility
+// reasons. New specializations should be put in the same file as their view.
+template <>
+class ViewProvider<GlTextureView> {
 public:
-  virtual ~GlTextureViewManager() = default;
-  virtual GlTextureView GetGlTextureReadView(
-      std::shared_ptr<GpuBuffer> gpu_buffer, int plane) const = 0;
-  virtual GlTextureView GetGlTextureWriteView(
-      std::shared_ptr<GpuBuffer> gpu_buffer, int plane) = 0;
-  virtual void ViewDoneWriting(const GlTextureView& view) = 0;
+  virtual ~ViewProvider() = default;
+  // Note that the view type is encoded in an argument to allow overloading,
+  // so a storage class can implement GetRead/WriteView for multiple view types.
+  // We cannot use a template function because it cannot be virtual; we want to
+  // have a virtual function here to enforce that different storages supporting
+  // the same view implement the same signature.
+  // Note that we allow different views to have custom signatures, providing
+  // additional view-specific arguments that may be needed.
+  virtual GlTextureView GetReadView(types<GlTextureView>,
+                                    std::shared_ptr<GpuBuffer> gpu_buffer,
+                                    int plane) const = 0;
+  virtual GlTextureView GetWriteView(types<GlTextureView>,
+                                     std::shared_ptr<GpuBuffer> gpu_buffer,
+                                     int plane) = 0;
 };

-class GpuBufferStorage : public GlTextureViewManager {
+class GpuBufferStorage {
 public:
  virtual ~GpuBufferStorage() = default;
  virtual int width() const = 0;
  virtual int height() const = 0;
  virtual GpuBufferFormat format() const = 0;
  virtual std::unique_ptr<ImageFrame> AsImageFrame() const = 0;
+  // We can't use dynamic_cast since we want to support building without RTTI.
+  // The public methods delegate to the type-erased private virtual method.
+  template <class T>
+  T* down_cast() {
+    return static_cast<T*>(
+        const_cast<void*>(down_cast(tool::GetTypeHash<T>())));
+  }
+  template <class T>
+  const T* down_cast() const {
+    return static_cast<const T*>(down_cast(tool::GetTypeHash<T>()));
+  }
+
+ private:
+  virtual const void* down_cast(size_t type_hash) const = 0;
+  virtual size_t storage_type_hash() const = 0;
+};
+
+template <class T, class... U>
+class GpuBufferStorageImpl : public GpuBufferStorage, public U... {
+ private:
+  virtual const void* down_cast(size_t type_hash) const override {
+    return down_cast_impl(type_hash, types<T, U...>{});
+  }
+  size_t storage_type_hash() const override { return tool::GetTypeHash<T>(); }
+
+  const void* down_cast_impl(size_t type_hash, types<>) const {
+    return nullptr;
+  }
+  template <class V, class... W>
+  const void* down_cast_impl(size_t type_hash, types<V, W...>) const {
+    if (type_hash == tool::GetTypeHash<V>()) return static_cast<const V*>(this);
+    return down_cast_impl(type_hash, types<W...>{});
+  }
 };

 }  // namespace internal
--- a/mediapipe/gpu/gpu_buffer_storage_cv_pixel_buffer.cc
+++ b/mediapipe/gpu/gpu_buffer_storage_cv_pixel_buffer.cc
@ -11,7 +11,8 @@ typedef CVOpenGLTextureRef CVTextureType;
 typedef CVOpenGLESTextureRef CVTextureType;
 #endif  // TARGET_OS_OSX

-GlTextureView GpuBufferStorageCvPixelBuffer::GetGlTextureReadView(
+GlTextureView GpuBufferStorageCvPixelBuffer::GetReadView(
+    mediapipe::internal::types<GlTextureView>,
    std::shared_ptr<GpuBuffer> gpu_buffer, int plane) const {
  CVReturn err;
  auto gl_context = GlContext::GetCurrent();
@ -58,11 +59,13 @@ GlTextureView GpuBufferStorageCvPixelBuffer::GetGlTextureReadView(
 #endif  // TARGET_OS_OSX
 }

-GlTextureView GpuBufferStorageCvPixelBuffer::GetGlTextureWriteView(
+GlTextureView GpuBufferStorageCvPixelBuffer::GetWriteView(
+    mediapipe::internal::types<GlTextureView>,
    std::shared_ptr<GpuBuffer> gpu_buffer, int plane) {
  // For this storage there is currently no difference between read and write
  // views, so we delegate to the read method.
-  return GetGlTextureReadView(std::move(gpu_buffer), plane);
+  return GetReadView(mediapipe::internal::types<GlTextureView>{},
+                     std::move(gpu_buffer), plane);
 }

 void GpuBufferStorageCvPixelBuffer::ViewDoneWriting(const GlTextureView& view) {
--- a/mediapipe/gpu/gpu_buffer_storage_cv_pixel_buffer.h
+++ b/mediapipe/gpu/gpu_buffer_storage_cv_pixel_buffer.h
@ -12,7 +12,9 @@ namespace mediapipe {
 class GlContext;

 class GpuBufferStorageCvPixelBuffer
-    : public mediapipe::internal::GpuBufferStorage,
+    : public mediapipe::internal::GpuBufferStorageImpl<
+          GpuBufferStorageCvPixelBuffer,
+          mediapipe::internal::ViewProvider<GlTextureView>>,
      public CFHolder<CVPixelBufferRef> {
 public:
  using CFHolder<CVPixelBufferRef>::CFHolder;
@ -28,12 +30,16 @@ class GpuBufferStorageCvPixelBuffer
    return GpuBufferFormatForCVPixelFormat(
        CVPixelBufferGetPixelFormatType(**this));
  }
-  GlTextureView GetGlTextureReadView(std::shared_ptr<GpuBuffer> gpu_buffer,
-                                     int plane) const override;
-  GlTextureView GetGlTextureWriteView(std::shared_ptr<GpuBuffer> gpu_buffer,
-                                      int plane) override;
+  GlTextureView GetReadView(mediapipe::internal::types<GlTextureView>,
+                            std::shared_ptr<GpuBuffer> gpu_buffer,
+                            int plane) const override;
+  GlTextureView GetWriteView(mediapipe::internal::types<GlTextureView>,
+                             std::shared_ptr<GpuBuffer> gpu_buffer,
+                             int plane) override;
  std::unique_ptr<ImageFrame> AsImageFrame() const override;
-  void ViewDoneWriting(const GlTextureView& view) override;
+
+ private:
+  void ViewDoneWriting(const GlTextureView& view);
 };

 }  // namespace mediapipe
--- a/mediapipe/graphs/hand_tracking/hand_tracking_mobile.pbtxt
+++ b/mediapipe/graphs/hand_tracking/hand_tracking_mobile.pbtxt
@ -8,6 +8,9 @@ input_stream: "input_video"
 # Max number of hands to detect/process. (int)
 input_side_packet: "num_hands"

+# Model complexity (0 or 1). (int)
+input_side_packet: "model_complexity"
+
 # GPU image. (GpuBuffer)
 output_stream: "output_video"
 # Collection of detected/predicted hands, each represented as a list of
@ -39,6 +42,7 @@ node {
 node {
  calculator: "HandLandmarkTrackingGpu"
  input_stream: "IMAGE:throttled_input_video"
+  input_side_packet: "MODEL_COMPLEXITY:model_complexity"
  input_side_packet: "NUM_HANDS:num_hands"
  output_stream: "LANDMARKS:hand_landmarks"
  output_stream: "HANDEDNESS:handedness"
--- a/mediapipe/java/com/google/mediapipe/framework/jni/graph.cc
+++ b/mediapipe/java/com/google/mediapipe/framework/jni/graph.cc
@ -370,6 +370,7 @@ void Graph::CallbackToJava(JNIEnv* env, jobject java_callback_obj,
  jmethodID processMethod = env->GetMethodID(
      callback_cls, process_method_name.c_str(), "(Ljava/util/List;)V");

+  // TODO: move to register natives.
  jclass list_cls = env->FindClass("java/util/ArrayList");
  jobject java_list =
      env->NewObject(list_cls, env->GetMethodID(list_cls, "<init>", "()V"));
@ -392,6 +393,7 @@ void Graph::CallbackToJava(JNIEnv* env, jobject java_callback_obj,
    RemovePacket(packet_handle);
  }
  env->DeleteLocalRef(callback_cls);
+  env->DeleteLocalRef(list_cls);
  env->DeleteLocalRef(java_list);
  VLOG(2) << "Returned from java callback.";
 }
--- a/mediapipe/java/com/google/mediapipe/framework/jni/graph_profiler_jni.cc
+++ b/mediapipe/java/com/google/mediapipe/framework/jni/graph_profiler_jni.cc
@ -56,8 +56,11 @@ JNIEXPORT jobjectArray JNICALL GRAPH_PROFILER_METHOD(
    return nullptr;
  }

+  // TODO: move to register natives.
+  jclass byte_array_cls = env->FindClass("[B");
  jobjectArray profiles =
-      env->NewObjectArray(num_profiles, env->FindClass("[B"), nullptr);
+      env->NewObjectArray(num_profiles, byte_array_cls, nullptr);
+  env->DeleteLocalRef(byte_array_cls);
  for (int i = 0; i < num_profiles; i++) {
    const auto& profile = profiles_vec[i];
    int size = profile.ByteSize();
--- a/mediapipe/java/com/google/mediapipe/framework/jni/jni_util.cc
+++ b/mediapipe/java/com/google/mediapipe/framework/jni/jni_util.cc
@ -143,8 +143,10 @@ jthrowable CreateMediaPipeException(JNIEnv* env, absl::Status status) {
  env->SetByteArrayRegion(message_bytes, 0, length,
                          reinterpret_cast<jbyte*>(const_cast<char*>(
                              std::string(status.message()).c_str())));
-  return reinterpret_cast<jthrowable>(
+  jthrowable result = reinterpret_cast<jthrowable>(
      env->NewObject(status_cls, status_ctr, status.code(), message_bytes));
+  env->DeleteLocalRef(status_cls);
+  return result;
 }

 bool ThrowIfError(JNIEnv* env, absl::Status status) {
@ -165,11 +167,11 @@ SerializedMessageIds::SerializedMessageIds(JNIEnv* env, jobject data) {
      class_registry.GetFieldName(serialized_message, "typeName");
  std::string value_obfuscated =
      class_registry.GetFieldName(serialized_message, "value");
-  jclass j_class = reinterpret_cast<jclass>(
-      env->NewGlobalRef(env->FindClass(serialized_message_obfuscated.c_str())));
+  jclass j_class = env->FindClass(serialized_message_obfuscated.c_str());
  type_name_id = env->GetFieldID(j_class, type_name_obfuscated.c_str(),
                                 "Ljava/lang/String;");
  value_id = env->GetFieldID(j_class, value_obfuscated.c_str(), "[B");
+  env->DeleteLocalRef(j_class);
 }

 }  // namespace android
--- a/mediapipe/java/com/google/mediapipe/framework/jni/packet_getter_jni.cc
+++ b/mediapipe/java/com/google/mediapipe/framework/jni/packet_getter_jni.cc
@ -184,8 +184,11 @@ JNIEXPORT jobjectArray JNICALL PACKET_GETTER_METHOD(nativeGetProtoVector)(
  }
  const std::vector<const ::mediapipe::proto_ns::MessageLite*>& proto_vector =
      get_proto_vector.value();
+  // TODO: move to register natives.
+  jclass byte_array_cls = env->FindClass("[B");
  jobjectArray proto_array =
-      env->NewObjectArray(proto_vector.size(), env->FindClass("[B"), nullptr);
+      env->NewObjectArray(proto_vector.size(), byte_array_cls, nullptr);
+  env->DeleteLocalRef(byte_array_cls);
  for (int i = 0; i < proto_vector.size(); ++i) {
    const ::mediapipe::proto_ns::MessageLite* proto_message = proto_vector[i];

--- a/mediapipe/java/com/google/mediapipe/framework/jni/register_natives.cc
+++ b/mediapipe/java/com/google/mediapipe/framework/jni/register_natives.cc
@ -137,6 +137,7 @@ void RegisterGraphNatives(JNIEnv *env) {
  AddJNINativeMethod(&graph_methods, graph, "nativeGetProfiler", "(J)J",
                     (void *)&GRAPH_METHOD(nativeGetProfiler));
  RegisterNativesVector(env, graph_class, graph_methods);
+  env->DeleteLocalRef(graph_class);
 }

 void RegisterGraphProfilerNatives(JNIEnv *env) {
@ -151,6 +152,7 @@ void RegisterGraphProfilerNatives(JNIEnv *env) {
      &graph_profiler_methods, graph_profiler, "nativeGetCalculatorProfiles",
      "(J)[[B", (void *)&GRAPH_PROFILER_METHOD(nativeGetCalculatorProfiles));
  RegisterNativesVector(env, graph_profiler_class, graph_profiler_methods);
+  env->DeleteLocalRef(graph_profiler_class);
 }

 void RegisterAndroidAssetUtilNatives(JNIEnv *env) {
@ -171,6 +173,7 @@ void RegisterAndroidAssetUtilNatives(JNIEnv *env) {
      (void *)&ANDROID_ASSET_UTIL_METHOD(nativeInitializeAssetManager));
  RegisterNativesVector(env, android_asset_util_class,
                        android_asset_util_methods);
+  env->DeleteLocalRef(android_asset_util_class);
 #endif
 }

@ -191,6 +194,7 @@ void RegisterAndroidPacketCreatorNatives(JNIEnv *env) {
      (void *)&ANDROID_PACKET_CREATOR_METHOD(nativeCreateRgbImageFrame));
  RegisterNativesVector(env, android_packet_creator_class,
                        android_packet_creator_methods);
+  env->DeleteLocalRef(android_packet_creator_class);
 #endif
 }

@ -232,6 +236,7 @@ void RegisterPacketCreatorNatives(JNIEnv *env) {
                     "(JL" + serialized_message_name + ";)J",
                     (void *)&PACKET_CREATOR_METHOD(nativeCreateProto));
  RegisterNativesVector(env, packet_creator_class, packet_creator_methods);
+  env->DeleteLocalRef(packet_creator_class);
 }

 void RegisterPacketGetterNatives(JNIEnv *env) {
@ -260,6 +265,7 @@ void RegisterPacketGetterNatives(JNIEnv *env) {
                     "nativeGetFloat32Vector", "(J)[F",
                     (void *)&PACKET_GETTER_METHOD(nativeGetFloat32Vector));
  RegisterNativesVector(env, packet_getter_class, packet_getter_methods);
+  env->DeleteLocalRef(packet_getter_class);
 }

 void RegisterPacketNatives(JNIEnv *env) {
@ -278,6 +284,7 @@ void RegisterPacketNatives(JNIEnv *env) {
  AddJNINativeMethod(&packet_methods, packet, "nativeIsEmpty", "(J)Z",
                     (void *)&PACKET_METHOD(nativeIsEmpty));
  RegisterNativesVector(env, packet_class, packet_methods);
+  env->DeleteLocalRef(packet_class);
 }

 void RegisterCompatNatives(JNIEnv *env) {
@ -293,6 +300,7 @@ void RegisterCompatNatives(JNIEnv *env) {
                     "(I)J",
                     (void *)&COMPAT_METHOD(getCurrentNativeEGLSurface));
  RegisterNativesVector(env, compat_class, compat_methods);
+  env->DeleteLocalRef(compat_class);
 }

 }  // namespace
--- a/mediapipe/java/com/google/mediapipe/solutioncore/ImageSolutionResult.java
+++ b/mediapipe/java/com/google/mediapipe/solutioncore/ImageSolutionResult.java
@ -95,13 +95,12 @@ public class ImageSolutionResult implements SolutionResult {
    }
  }

-  // Releases image packet and the underlying data.
-  void releaseImagePackets() {
-    imagePacket.release();
+  // Clears the underlying image packets to prevent the callers from accessing the invalid packets
+  // outside of the output callback method.
+  void clearImagePackets() {
+    imagePacket = null;
    if (imageResultPackets != null) {
-      for (Packet p : imageResultPackets) {
-        p.release();
-      }
+      imageResultPackets.clear();
    }
  }
 }
--- a/mediapipe/java/com/google/mediapipe/solutioncore/OutputHandler.java
+++ b/mediapipe/java/com/google/mediapipe/solutioncore/OutputHandler.java
@ -90,12 +90,9 @@ public class OutputHandler<T extends SolutionResult> {
        Log.e(TAG, "Error occurs when getting MediaPipe solution result. " + e);
      }
    } finally {
-      for (Packet packet : packets) {
-        packet.release();
-      }
      if (solutionResult instanceof ImageSolutionResult) {
        ImageSolutionResult imageSolutionResult = (ImageSolutionResult) solutionResult;
-        imageSolutionResult.releaseImagePackets();
+        imageSolutionResult.clearImagePackets();
      }
    }
  }
--- a/mediapipe/java/com/google/mediapipe/solutions/facedetection/BUILD
+++ b/mediapipe/java/com/google/mediapipe/solutions/facedetection/BUILD
@ -34,7 +34,6 @@ android_library(
    visibility = ["//visibility:public"],
    deps = [
        "//mediapipe/framework/formats:detection_java_proto_lite",
-        "//mediapipe/framework/formats:location_data_java_proto_lite",
        "//mediapipe/java/com/google/mediapipe/framework:android_framework",
        "//mediapipe/java/com/google/mediapipe/solutioncore:solution_base",
        "//third_party:autovalue",
--- a/mediapipe/java/com/google/mediapipe/solutions/facedetection/FaceDetection.java
+++ b/mediapipe/java/com/google/mediapipe/solutions/facedetection/FaceDetection.java
@ -17,7 +17,6 @@ package com.google.mediapipe.solutions.facedetection;
 import android.content.Context;
 import com.google.common.collect.ImmutableList;
 import com.google.mediapipe.formats.proto.DetectionProto.Detection;
-import com.google.mediapipe.formats.proto.LocationDataProto.LocationData.RelativeKeypoint;
 import com.google.mediapipe.framework.MediaPipeException;
 import com.google.mediapipe.framework.Packet;
 import com.google.mediapipe.solutioncore.ErrorListener;
@ -104,27 +103,4 @@ public class FaceDetection extends ImageSolutionBase {
    this.outputHandler.setErrorListener(listener);
    this.errorListener = listener;
  }
-
-  /**
-   * Gets a specific face keypoint by face index and face keypoint type.
-   *
-   * @param result the returned {@link FaceDetectionResult} object.
-   * @param faceIndex the face index. A smaller index maps to a detected face with a higher
-   *     confidence score.
-   * @param faceKeypointType the face keypoint type defined in {@link FaceKeypoint}.
-   */
-  public static RelativeKeypoint getFaceKeypoint(
-      FaceDetectionResult result,
-      int faceIndex,
-      @FaceKeypoint.FaceKeypointType int faceKeypointType) {
-    if (result == null
-        || faceIndex >= result.multiFaceDetections().size()
-        || faceKeypointType >= FaceKeypoint.NUM_KEY_POINTS) {
-      return RelativeKeypoint.getDefaultInstance();
-    }
-    Detection detection = result.multiFaceDetections().get(faceIndex);
-    float x = detection.getLocationData().getRelativeKeypoints(faceKeypointType).getX();
-    float y = detection.getLocationData().getRelativeKeypoints(faceKeypointType).getY();
-    return RelativeKeypoint.newBuilder().setX(x).setY(y).build();
-  }
 }
--- a/mediapipe/java/com/google/mediapipe/solutions/hands/BUILD
+++ b/mediapipe/java/com/google/mediapipe/solutions/hands/BUILD
@ -23,12 +23,13 @@ android_library(
        "HandsResult.java",
    ],
    assets = [
-        "//mediapipe/modules/hand_landmark:hand_landmark_tracking_gpu_image.binarypb",
-        "//mediapipe/modules/hand_landmark:hand_landmark_tracking_cpu_image.binarypb",
-        "//mediapipe/modules/hand_landmark:handedness.txt",
-        "//mediapipe/modules/hand_landmark:hand_landmark_lite.tflite",
        "//mediapipe/modules/hand_landmark:hand_landmark_full.tflite",
-        "//mediapipe/modules/palm_detection:palm_detection.tflite",
+        "//mediapipe/modules/hand_landmark:hand_landmark_lite.tflite",
+        "//mediapipe/modules/hand_landmark:hand_landmark_tracking_cpu_image.binarypb",
+        "//mediapipe/modules/hand_landmark:hand_landmark_tracking_gpu_image.binarypb",
+        "//mediapipe/modules/hand_landmark:handedness.txt",
+        "//mediapipe/modules/palm_detection:palm_detection_full.tflite",
+        "//mediapipe/modules/palm_detection:palm_detection_lite.tflite",
    ],
    assets_dir = "",
    javacopts = ["-Acom.google.auto.value.AutoBuilderIsUnstable"],
--- a/mediapipe/java/com/google/mediapipe/solutions/hands/Hands.java
+++ b/mediapipe/java/com/google/mediapipe/solutions/hands/Hands.java
@ -18,9 +18,10 @@ import android.content.Context;
 import com.google.auto.value.AutoValue;
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableSet;
-import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmark;
+import com.google.mediapipe.formats.proto.LandmarkProto.LandmarkList;
 import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmarkList;
 import com.google.mediapipe.formats.proto.ClassificationProto.Classification;
+import com.google.mediapipe.formats.proto.ClassificationProto.ClassificationList;
 import com.google.mediapipe.framework.MediaPipeException;
 import com.google.mediapipe.framework.Packet;
 import com.google.mediapipe.solutioncore.ErrorListener;
@ -28,7 +29,9 @@ import com.google.mediapipe.solutioncore.ImageSolutionBase;
 import com.google.mediapipe.solutioncore.OutputHandler;
 import com.google.mediapipe.solutioncore.ResultListener;
 import com.google.mediapipe.solutioncore.SolutionInfo;
+import java.util.ArrayList;
 import java.util.HashMap;
+import java.util.List;
 import java.util.Map;
 import javax.annotation.Nullable;

@ -85,10 +88,15 @@ public class Hands extends ImageSolutionBase {
  private static final String CPU_GRAPH_NAME = "hand_landmark_tracking_cpu_image.binarypb";
  private static final String IMAGE_INPUT_STREAM = "image";
  private static final ImmutableList<String> OUTPUT_STREAMS =
-      ImmutableList.of("multi_hand_landmarks", "multi_handedness", "throttled_image");
+      ImmutableList.of(
+          "multi_hand_landmarks",
+          "multi_hand_world_landmarks",
+          "multi_handedness",
+          "throttled_image");
  private static final int LANDMARKS_INDEX = 0;
-  private static final int HANDEDNESS_INDEX = 1;
-  private static final int INPUT_IMAGE_INDEX = 2;
+  private static final int WORLD_LANDMARKS_INDEX = 1;
+  private static final int HANDEDNESS_INDEX = 2;
+  private static final int INPUT_IMAGE_INDEX = 3;
  private final OutputHandler<HandsResult> outputHandler;

  /**
@ -109,8 +117,18 @@ public class Hands extends ImageSolutionBase {
            reportError("Error occurs while getting MediaPipe hand landmarks.", e);
          }
          try {
-            handsResultBuilder.setMultiHandedness(
-                getProtoVector(packets.get(HANDEDNESS_INDEX), Classification.parser()));
+            handsResultBuilder.setMultiHandWorldLandmarks(
+                getProtoVector(packets.get(WORLD_LANDMARKS_INDEX), LandmarkList.parser()));
+          } catch (MediaPipeException e) {
+            reportError("Error occurs while getting MediaPipe hand world landmarks.", e);
+          }
+          try {
+            List<Classification> handednessList = new ArrayList<>();
+            for (ClassificationList protolist :
+                getProtoVector(packets.get(HANDEDNESS_INDEX), ClassificationList.parser())) {
+              handednessList.add(protolist.getClassification(0));
+            }
+            handsResultBuilder.setMultiHandedness(handednessList);
          } catch (MediaPipeException e) {
            reportError("Error occurs while getting MediaPipe handedness data.", e);
          }
@ -155,21 +173,4 @@ public class Hands extends ImageSolutionBase {
    this.outputHandler.setErrorListener(listener);
    this.errorListener = listener;
  }
-
-  /**
-   * Gets a specific hand landmark by hand index and hand landmark type.
-   *
-   * @param result the returned {@link HandsResult} object.
-   * @param handIndex the hand index. The hand landmark lists are sorted by the confidence score.
-   * @param landmarkType the hand landmark type defined in {@link HandLandmark}.
-   */
-  public static NormalizedLandmark getHandLandmark(
-      HandsResult result, int handIndex, @HandLandmark.HandLandmarkType int landmarkType) {
-    if (result == null
-        || handIndex >= result.multiHandLandmarks().size()
-        || landmarkType >= HandLandmark.NUM_LANDMARKS) {
-      return NormalizedLandmark.getDefaultInstance();
-    }
-    return result.multiHandLandmarks().get(handIndex).getLandmarkList().get(landmarkType);
-  }
 }
--- a/mediapipe/java/com/google/mediapipe/solutions/hands/HandsResult.java
+++ b/mediapipe/java/com/google/mediapipe/solutions/hands/HandsResult.java
@ -17,6 +17,7 @@ package com.google.mediapipe.solutions.hands;
 import android.graphics.Bitmap;
 import com.google.auto.value.AutoBuilder;
 import com.google.common.collect.ImmutableList;
+import com.google.mediapipe.formats.proto.LandmarkProto.LandmarkList;
 import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmarkList;
 import com.google.mediapipe.formats.proto.ClassificationProto.Classification;
 import com.google.mediapipe.framework.Packet;
@ -31,14 +32,17 @@ import java.util.List;
 */
 public class HandsResult extends ImageSolutionResult {
  private final ImmutableList<NormalizedLandmarkList> multiHandLandmarks;
+  private final ImmutableList<LandmarkList> multiHandWorldLandmarks;
  private final ImmutableList<Classification> multiHandedness;

  HandsResult(
      ImmutableList<NormalizedLandmarkList> multiHandLandmarks,
+      ImmutableList<LandmarkList> multiHandWorldLandmarks,
      ImmutableList<Classification> multiHandedness,
      Packet imagePacket,
      long timestamp) {
    this.multiHandLandmarks = multiHandLandmarks;
+    this.multiHandWorldLandmarks = multiHandWorldLandmarks;
    this.multiHandedness = multiHandedness;
    this.timestamp = timestamp;
    this.imagePacket = imagePacket;
@ -53,6 +57,12 @@ public class HandsResult extends ImageSolutionResult {
    return multiHandLandmarks;
  }

+  // Collection of detected/tracked hands' landmarks in real-world 3D coordinates that are in meters
+  // with the origin at the hand's approximate geometric center.
+  public ImmutableList<LandmarkList> multiHandWorldLandmarks() {
+    return multiHandWorldLandmarks;
+  }
+
  // Collection of handedness of the detected/tracked hands (i.e. is it a left or right hand). Each
  // hand is composed of label and score. label is a string of value either "Left" or "Right". score
  // is the estimated probability of the predicted handedness and is always greater than or equal to
@ -70,6 +80,8 @@ public class HandsResult extends ImageSolutionResult {
  public abstract static class Builder {
    abstract Builder setMultiHandLandmarks(List<NormalizedLandmarkList> value);

+    abstract Builder setMultiHandWorldLandmarks(List<LandmarkList> value);
+
    abstract Builder setMultiHandedness(List<Classification> value);

    abstract Builder setTimestamp(long value);
--- a/mediapipe/modules/hand_landmark/BUILD
+++ b/mediapipe/modules/hand_landmark/BUILD
@ -24,7 +24,6 @@ package(default_visibility = ["//visibility:public"])
 exports_files([
    "hand_landmark_full.tflite",
    "hand_landmark_lite.tflite",
-    "hand_landmark_sparse.tflite",
    "handedness.txt",
 ])

@ -56,6 +55,7 @@ mediapipe_simple_subgraph(
        "//mediapipe/calculators/util:landmark_letterbox_removal_calculator",
        "//mediapipe/calculators/util:landmark_projection_calculator",
        "//mediapipe/calculators/util:thresholding_calculator",
+        "//mediapipe/calculators/util:world_landmark_projection_calculator",
    ],
 )

@ -75,6 +75,7 @@ mediapipe_simple_subgraph(
        "//mediapipe/calculators/util:landmark_letterbox_removal_calculator",
        "//mediapipe/calculators/util:landmark_projection_calculator",
        "//mediapipe/calculators/util:thresholding_calculator",
+        "//mediapipe/calculators/util:world_landmark_projection_calculator",
    ],
 )

--- a/mediapipe/modules/hand_landmark/hand_landmark_cpu.pbtxt
+++ b/mediapipe/modules/hand_landmark/hand_landmark_cpu.pbtxt
@ -20,6 +20,16 @@ input_side_packet: "MODEL_COMPLEXITY:model_complexity"
 # the absence of this packet so that they don't wait for it unnecessarily.
 output_stream: "LANDMARKS:hand_landmarks"

+# Hand world landmarks within the given ROI. (LandmarkList)
+# World landmarks are real-world 3D coordinates in meters with the origin in the
+# center of the given ROI.
+#
+# WORLD_LANDMARKS shares the same landmark topology as LANDMARKS. However,
+# LANDMARKS provides coordinates (in pixels) of a 3D object projected onto the
+# 2D image surface, while WORLD_LANDMARKS provides coordinates (in meters) of
+# the 3D object itself.
+output_stream: "WORLD_LANDMARKS:hand_world_landmarks"
+
 # Handedness of the detected hand (i.e. is hand left or right).
 # (ClassificationList)
 output_stream: "HANDEDNESS:handedness"
@ -77,11 +87,13 @@ node {
  output_stream: "landmark_tensors"
  output_stream: "hand_flag_tensor"
  output_stream: "handedness_tensor"
+  output_stream: "world_landmark_tensor"
  options: {
    [mediapipe.SplitVectorCalculatorOptions.ext] {
      ranges: { begin: 0 end: 1 }
      ranges: { begin: 1 end: 2 }
      ranges: { begin: 2 end: 3 }
+      ranges: { begin: 3 end: 4 }
    }
  }
 }
@ -175,3 +187,33 @@ node {
  input_stream: "NORM_RECT:hand_rect"
  output_stream: "NORM_LANDMARKS:hand_landmarks"
 }
+
+# Drops world landmarks tensors if hand is not present.
+node {
+  calculator: "GateCalculator"
+  input_stream: "world_landmark_tensor"
+  input_stream: "ALLOW:hand_presence"
+  output_stream: "ensured_world_landmark_tensor"
+}
+
+# Decodes the landmark tensors into a list of landmarks, where the landmark
+# coordinates are normalized by the size of the input image to the model.
+node {
+  calculator: "TensorsToLandmarksCalculator"
+  input_stream: "TENSORS:ensured_world_landmark_tensor"
+  output_stream: "LANDMARKS:unprojected_world_landmarks"
+  options: {
+    [mediapipe.TensorsToLandmarksCalculatorOptions.ext] {
+      num_landmarks: 21
+    }
+  }
+}
+
+# Projects the world landmarks from the cropped hand image to the corresponding
+# locations on the full image before cropping (input to the graph).
+node {
+  calculator: "WorldLandmarkProjectionCalculator"
+  input_stream: "LANDMARKS:unprojected_world_landmarks"
+  input_stream: "NORM_RECT:hand_rect"
+  output_stream: "LANDMARKS:hand_world_landmarks"
+}
--- a/mediapipe/modules/hand_landmark/hand_landmark_gpu.pbtxt
+++ b/mediapipe/modules/hand_landmark/hand_landmark_gpu.pbtxt
@ -20,6 +20,16 @@ input_side_packet: "MODEL_COMPLEXITY:model_complexity"
 # the absence of this packet so that they don't wait for it unnecessarily.
 output_stream: "LANDMARKS:hand_landmarks"

+# Hand world landmarks within the given ROI. (LandmarkList)
+# World landmarks are real-world 3D coordinates in meters with the origin in the
+# center of the given ROI.
+#
+# WORLD_LANDMARKS shares the same landmark topology as LANDMARKS. However,
+# LANDMARKS provides coordinates (in pixels) of a 3D object projected onto the
+# 2D image surface, while WORLD_LANDMARKS provides coordinates (in meters) of
+# the 3D object itself.
+output_stream: "WORLD_LANDMARKS:hand_world_landmarks"
+
 # Handedness of the detected hand (i.e. is hand left or right).
 # (ClassificationList)
 output_stream: "HANDEDNESS:handedness"
@ -71,11 +81,13 @@ node {
  output_stream: "landmark_tensors"
  output_stream: "hand_flag_tensor"
  output_stream: "handedness_tensor"
+  output_stream: "world_landmark_tensor"
  options: {
    [mediapipe.SplitVectorCalculatorOptions.ext] {
      ranges: { begin: 0 end: 1 }
      ranges: { begin: 1 end: 2 }
      ranges: { begin: 2 end: 3 }
+      ranges: { begin: 3 end: 4 }
    }
  }
 }
@ -169,3 +181,33 @@ node {
  input_stream: "NORM_RECT:hand_rect"
  output_stream: "NORM_LANDMARKS:hand_landmarks"
 }
+
+# Drops world landmarks tensors if hand is not present.
+node {
+  calculator: "GateCalculator"
+  input_stream: "world_landmark_tensor"
+  input_stream: "ALLOW:hand_presence"
+  output_stream: "ensured_world_landmark_tensor"
+}
+
+# Decodes the landmark tensors into a list of landmarks, where the landmark
+# coordinates are normalized by the size of the input image to the model.
+node {
+  calculator: "TensorsToLandmarksCalculator"
+  input_stream: "TENSORS:ensured_world_landmark_tensor"
+  output_stream: "LANDMARKS:unprojected_world_landmarks"
+  options: {
+    [mediapipe.TensorsToLandmarksCalculatorOptions.ext] {
+      num_landmarks: 21
+    }
+  }
+}
+
+# Projects the world landmarks from the cropped hand image to the corresponding
+# locations on the full image before cropping (input to the graph).
+node {
+  calculator: "WorldLandmarkProjectionCalculator"
+  input_stream: "LANDMARKS:unprojected_world_landmarks"
+  input_stream: "NORM_RECT:hand_rect"
+  output_stream: "LANDMARKS:hand_world_landmarks"
+}
--- a/mediapipe/modules/hand_landmark/hand_landmark_sparse.tflite
+++ b/mediapipe/modules/hand_landmark/hand_landmark_sparse.tflite
--- a/mediapipe/modules/hand_landmark/hand_landmark_tracking_cpu.pbtxt
+++ b/mediapipe/modules/hand_landmark/hand_landmark_tracking_cpu.pbtxt
@ -14,9 +14,9 @@ input_stream: "IMAGE:image"
 # Max number of hands to detect/track. (int)
 input_side_packet: "NUM_HANDS:num_hands"

-# Complexity of the hand landmark model: 0 or 1. Landmark accuracy as well as
-# inference latency generally go up with the model complexity. If unspecified,
-# functions as set to 1. (int)
+# Complexity of hand landmark and palm detection models: 0 or 1. Accuracy as
+# well as inference latency generally go up with the model complexity. If
+# unspecified, functions as set to 1. (int)
 input_side_packet: "MODEL_COMPLEXITY:model_complexity"

 # Whether landmarks on the previous image should be used to help localize
@ -30,8 +30,22 @@ input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
 # framework will internally inform the downstream calculators of the absence of
 # this packet so that they don't wait for it unnecessarily.
 output_stream: "LANDMARKS:multi_hand_landmarks"
+
+# Collection of detected/predicted hand world landmarks.
+# (std::vector<LandmarkList>)
+#
+# World landmarks are real-world 3D coordinates in meters with the origin in the
+# center of the hand bounding box calculated from the landmarks.
+#
+# WORLD_LANDMARKS shares the same landmark topology as LANDMARKS. However,
+# LANDMARKS provides coordinates (in pixels) of a 3D object projected onto the
+# 2D image surface, while WORLD_LANDMARKS provides coordinates (in meters) of
+# the 3D object itself.
+output_stream: "WORLD_LANDMARKS:multi_hand_world_landmarks"
+
 # Collection of handedness of the detected hands (i.e. is hand left or right),
-# each represented as a Classification proto.
+# each represented as a ClassificationList proto with a single Classification
+# entry. (std::vector<ClassificationList>)
 # Note that handedness is determined assuming the input image is mirrored,
 # i.e., taken with a front-facing/selfie camera with images flipped
 # horizontally.
@ -89,6 +103,7 @@ node {
 # Detects palms.
 node {
  calculator: "PalmDetectionCpu"
+  input_side_packet: "MODEL_COMPLEXITY:model_complexity"
  input_stream: "IMAGE:palm_detection_image"
  output_stream: "DETECTIONS:all_palm_detections"
 }
@ -186,12 +201,13 @@ node {
  input_stream: "IMAGE:image_for_landmarks"
  input_stream: "ROI:single_hand_rect"
  output_stream: "LANDMARKS:single_hand_landmarks"
+  output_stream: "WORLD_LANDMARKS:single_hand_world_landmarks"
  output_stream: "HANDEDNESS:single_handedness"
 }

-# Collects the handedness for each single hand into a vector. Upon
-# receiving the BATCH_END timestamp, outputs a vector of classification at the
-# BATCH_END timestamp.
+# Collects the handedness for each single hand into a vector. Upon receiving the
+# BATCH_END timestamp, outputs a vector of ClassificationList at the BATCH_END
+# timestamp.
 node {
  calculator: "EndLoopClassificationListCalculator"
  input_stream: "ITEM:single_handedness"
@ -218,6 +234,16 @@ node {
  output_stream: "ITERABLE:multi_hand_landmarks"
 }

+# Collects a set of world landmarks for each hand into a vector. Upon receiving
+# the BATCH_END timestamp, outputs the vector of landmarks at the BATCH_END
+# timestamp.
+node {
+  calculator: "EndLoopLandmarkListVectorCalculator"
+  input_stream: "ITEM:single_hand_world_landmarks"
+  input_stream: "BATCH_END:hand_rects_timestamp"
+  output_stream: "ITERABLE:multi_hand_world_landmarks"
+}
+
 # Collects a NormalizedRect for each hand into a vector. Upon receiving the
 # BATCH_END timestamp, outputs the vector of NormalizedRect at the BATCH_END
 # timestamp.
--- a/mediapipe/modules/hand_landmark/hand_landmark_tracking_cpu_image.pbtxt
+++ b/mediapipe/modules/hand_landmark/hand_landmark_tracking_cpu_image.pbtxt
@ -14,9 +14,9 @@ input_stream: "IMAGE:image"
 # Max number of hands to detect/track. (int)
 input_side_packet: "NUM_HANDS:num_hands"

-# Complexity of the hand landmark model: 0 or 1. Landmark accuracy as well as
-# inference latency generally go up with the model complexity. If unspecified,
-# functions as set to 1. (int)
+# Complexity of hand landmark and palm detection models: 0 or 1. Accuracy as
+# well as inference latency generally go up with the model complexity. If
+# unspecified, functions as set to 1. (int)
 input_side_packet: "MODEL_COMPLEXITY:model_complexity"

 # Whether landmarks on the previous image should be used to help localize
@ -25,6 +25,7 @@ input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"

 # The throttled input image. (Image)
 output_stream: "IMAGE:throttled_image"
+
 # Collection of detected/predicted hands, each represented as a list of
 # landmarks. (std::vector<NormalizedLandmarkList>)
 # NOTE: there will not be an output packet in the LANDMARKS stream for this
@ -32,8 +33,22 @@ output_stream: "IMAGE:throttled_image"
 # framework will internally inform the downstream calculators of the absence of
 # this packet so that they don't wait for it unnecessarily.
 output_stream: "LANDMARKS:multi_hand_landmarks"
+
+# Collection of detected/predicted hand world landmarks.
+# (std::vector<LandmarkList>)
+#
+# World landmarks are real-world 3D coordinates in meters with the origin in the
+# center of the hand bounding box calculated from the landmarks.
+#
+# WORLD_LANDMARKS shares the same landmark topology as LANDMARKS. However,
+# LANDMARKS provides coordinates (in pixels) of a 3D object projected onto the
+# 2D image surface, while WORLD_LANDMARKS provides coordinates (in meters) of
+# the 3D object itself.
+output_stream: "WORLD_LANDMARKS:multi_hand_world_landmarks"
+
 # Collection of handedness of the detected hands (i.e. is hand left or right),
-# each represented as a Classification proto.
+# each represented as a ClassificationList proto with a single Classification
+# entry. (std::vector<ClassificationList>)
 # Note that handedness is determined assuming the input image is mirrored,
 # i.e., taken with a front-facing/selfie camera with images flipped
 # horizontally.
@ -93,6 +108,7 @@ node {
  input_side_packet: "MODEL_COMPLEXITY:model_complexity"
  input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
  output_stream: "LANDMARKS:multi_hand_landmarks"
+  output_stream: "WORLD_LANDMARKS:multi_hand_world_landmarks"
  output_stream: "HANDEDNESS:multi_handedness"
  output_stream: "PALM_DETECTIONS:palm_detections"
  output_stream: "HAND_ROIS_FROM_LANDMARKS:hand_rects"
--- a/mediapipe/modules/hand_landmark/hand_landmark_tracking_gpu.pbtxt
+++ b/mediapipe/modules/hand_landmark/hand_landmark_tracking_gpu.pbtxt
@ -14,9 +14,9 @@ input_stream: "IMAGE:image"
 # Max number of hands to detect/track. (int)
 input_side_packet: "NUM_HANDS:num_hands"

-# Complexity of the hand landmark model: 0 or 1. Landmark accuracy as well as
-# inference latency generally go up with the model complexity. If unspecified,
-# functions as set to 1. (int)
+# Complexity of hand landmark and palm detection models: 0 or 1. Accuracy as
+# well as inference latency generally go up with the model complexity. If
+# unspecified, functions as set to 1. (int)
 input_side_packet: "MODEL_COMPLEXITY:model_complexity"

 # Whether landmarks on the previous image should be used to help localize
@ -30,8 +30,22 @@ input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
 # framework will internally inform the downstream calculators of the absence of
 # this packet so that they don't wait for it unnecessarily.
 output_stream: "LANDMARKS:multi_hand_landmarks"
+
+# Collection of detected/predicted hand world landmarks.
+# (std::vector<LandmarkList>)
+#
+# World landmarks are real-world 3D coordinates in meters with the origin in the
+# center of the hand bounding box calculated from the landmarks.
+#
+# WORLD_LANDMARKS shares the same landmark topology as LANDMARKS. However,
+# LANDMARKS provides coordinates (in pixels) of a 3D object projected onto the
+# 2D image surface, while WORLD_LANDMARKS provides coordinates (in meters) of
+# the 3D object itself.
+output_stream: "WORLD_LANDMARKS:multi_hand_world_landmarks"
+
 # Collection of handedness of the detected hands (i.e. is hand left or right),
-# each represented as a Classification proto.
+# each represented as a ClassificationList proto with a single Classification
+# entry. (std::vector<ClassificationList>)
 # Note that handedness is determined assuming the input image is mirrored,
 # i.e., taken with a front-facing/selfie camera with images flipped
 # horizontally.
@ -89,6 +103,7 @@ node {
 # Detects palms.
 node {
  calculator: "PalmDetectionGpu"
+  input_side_packet: "MODEL_COMPLEXITY:model_complexity"
  input_stream: "IMAGE:palm_detection_image"
  output_stream: "DETECTIONS:all_palm_detections"
 }
@ -187,12 +202,13 @@ node {
  input_stream: "IMAGE:image_for_landmarks"
  input_stream: "ROI:single_hand_rect"
  output_stream: "LANDMARKS:single_hand_landmarks"
+  output_stream: "WORLD_LANDMARKS:single_hand_world_landmarks"
  output_stream: "HANDEDNESS:single_handedness"
 }

-# Collects the handedness for each single hand into a vector. Upon
-# receiving the BATCH_END timestamp, outputs a vector of classification at the
-# BATCH_END timestamp.
+# Collects the handedness for each single hand into a vector. Upon receiving the
+# BATCH_END timestamp, outputs a vector of ClassificationList at the BATCH_END
+# timestamp.
 node {
  calculator: "EndLoopClassificationListCalculator"
  input_stream: "ITEM:single_handedness"
@ -219,6 +235,16 @@ node {
  output_stream: "ITERABLE:multi_hand_landmarks"
 }

+# Collects a set of world landmarks for each hand into a vector. Upon receiving
+# the BATCH_END timestamp, outputs the vector of landmarks at the BATCH_END
+# timestamp.
+node {
+  calculator: "EndLoopLandmarkListVectorCalculator"
+  input_stream: "ITEM:single_hand_world_landmarks"
+  input_stream: "BATCH_END:hand_rects_timestamp"
+  output_stream: "ITERABLE:multi_hand_world_landmarks"
+}
+
 # Collects a NormalizedRect for each hand into a vector. Upon receiving the
 # BATCH_END timestamp, outputs the vector of NormalizedRect at the BATCH_END
 # timestamp.
--- a/mediapipe/modules/hand_landmark/hand_landmark_tracking_gpu_image.pbtxt
+++ b/mediapipe/modules/hand_landmark/hand_landmark_tracking_gpu_image.pbtxt
@ -14,9 +14,9 @@ input_stream: "IMAGE:image"
 # Max number of hands to detect/track. (int)
 input_side_packet: "NUM_HANDS:num_hands"

-# Complexity of the hand landmark model: 0 or 1. Landmark accuracy as well as
-# inference latency generally go up with the model complexity. If unspecified,
-# functions as set to 1. (int)
+# Complexity of hand landmark and palm detection models: 0 or 1. Accuracy as
+# well as inference latency generally go up with the model complexity. If
+# unspecified, functions as set to 1. (int)
 input_side_packet: "MODEL_COMPLEXITY:model_complexity"

 # Whether landmarks on the previous image should be used to help localize
@ -30,8 +30,22 @@ input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
 # framework will internally inform the downstream calculators of the absence of
 # this packet so that they don't wait for it unnecessarily.
 output_stream: "LANDMARKS:multi_hand_landmarks"
+
+# Collection of detected/predicted hand world landmarks.
+# (std::vector<LandmarkList>)
+#
+# World landmarks are real-world 3D coordinates in meters with the origin in the
+# center of the hand bounding box calculated from the landmarks.
+#
+# WORLD_LANDMARKS shares the same landmark topology as LANDMARKS. However,
+# LANDMARKS provides coordinates (in pixels) of a 3D object projected onto the
+# 2D image surface, while WORLD_LANDMARKS provides coordinates (in meters) of
+# the 3D object itself.
+output_stream: "WORLD_LANDMARKS:multi_hand_world_landmarks"
+
 # Collection of handedness of the detected hands (i.e. is hand left or right),
-# each represented as a Classification proto.
+# each represented as a ClassificationList proto with a single Classification
+# entry. (std::vector<ClassificationList>)
 # Note that handedness is determined assuming the input image is mirrored,
 # i.e., taken with a front-facing/selfie camera with images flipped
 # horizontally.
@ -93,6 +107,7 @@ node {
  input_side_packet: "MODEL_COMPLEXITY:model_complexity"
  input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
  output_stream: "LANDMARKS:multi_hand_landmarks"
+  output_stream: "WORLD_LANDMARKS:multi_hand_world_landmarks"
  output_stream: "HANDEDNESS:multi_handedness"
  output_stream: "PALM_DETECTIONS:palm_detections"
  output_stream: "HAND_ROIS_FROM_LANDMARKS:hand_rects"
--- a/mediapipe/modules/holistic_landmark/face_landmarks_from_pose_cpu.pbtxt
+++ b/mediapipe/modules/holistic_landmark/face_landmarks_from_pose_cpu.pbtxt
@ -8,6 +8,11 @@ input_stream: "IMAGE:input_video"
 # Face-related pose landmarks. (NormalizedLandmarkList)
 input_stream: "FACE_LANDMARKS_FROM_POSE:face_landmarks_from_pose"

+# Whether to run the face landmark model with attention on lips and eyes to
+# provide more accuracy, and additionally output iris landmarks. If unspecified,
+# functions as set to false. (bool)
+input_side_packet: "REFINE_LANDMARKS:refine_landmarks"
+
 # Face landmarks. (NormalizedLandmarkList)
 output_stream: "FACE_LANDMARKS:face_landmarks"

@ -72,5 +77,6 @@ node {
  calculator: "FaceLandmarkCpu"
  input_stream: "IMAGE:input_video"
  input_stream: "ROI:face_tracking_roi"
+  input_side_packet: "WITH_ATTENTION:refine_landmarks"
  output_stream: "LANDMARKS:face_landmarks"
 }
--- a/mediapipe/modules/holistic_landmark/face_landmarks_from_pose_gpu.pbtxt
+++ b/mediapipe/modules/holistic_landmark/face_landmarks_from_pose_gpu.pbtxt
@ -8,6 +8,11 @@ input_stream: "IMAGE:input_video"
 # Face-related pose landmarks. (NormalizedLandmarkList)
 input_stream: "FACE_LANDMARKS_FROM_POSE:face_landmarks_from_pose"

+# Whether to run the face landmark model with attention on lips and eyes to
+# provide more accuracy, and additionally output iris landmarks. If unspecified,
+# functions as set to false. (bool)
+input_side_packet: "REFINE_LANDMARKS:refine_landmarks"
+
 # Face landmarks. (NormalizedLandmarkList)
 output_stream: "FACE_LANDMARKS:face_landmarks"

@ -72,5 +77,6 @@ node {
  calculator: "FaceLandmarkGpu"
  input_stream: "IMAGE:input_video"
  input_stream: "ROI:face_tracking_roi"
+  input_side_packet: "WITH_ATTENTION:refine_landmarks"
  output_stream: "LANDMARKS:face_landmarks"
 }
--- a/mediapipe/modules/holistic_landmark/holistic_landmark_cpu.pbtxt
+++ b/mediapipe/modules/holistic_landmark/holistic_landmark_cpu.pbtxt
@ -35,6 +35,7 @@
 #     input_side_packet: "SMOOTH_LANDMARKS:smooth_landmarks"
 #     input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation"
 #     input_side_packet: "SMOOTH_SEGMENTATION:smooth_segmentation"
+#     input_side_packet: "REFINE_FACE_LANDMARKS:refine_face_landmarks"
 #     input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
 #     output_stream: "POSE_LANDMARKS:pose_landmarks"
 #     output_stream: "FACE_LANDMARKS:face_landmarks"
@ -70,6 +71,11 @@ input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation"
 # jitter. If unspecified, functions as set to true. (bool)
 input_side_packet: "SMOOTH_SEGMENTATION:smooth_segmentation"

+# Whether to run the face landmark model with attention on lips and eyes to
+# provide more accuracy, and additionally output iris landmarks. If unspecified,
+# functions as set to false. (bool)
+input_side_packet: "REFINE_FACE_LANDMARKS:refine_face_landmarks"
+
 # Whether landmarks on the previous image should be used to help localize
 # landmarks on the current image. (bool)
 input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
@ -135,5 +141,6 @@ node {
  calculator: "FaceLandmarksFromPoseCpu"
  input_stream: "IMAGE:image"
  input_stream: "FACE_LANDMARKS_FROM_POSE:face_landmarks_from_pose"
+  input_side_packet: "REFINE_LANDMARKS:refine_face_landmarks"
  output_stream: "FACE_LANDMARKS:face_landmarks"
 }
--- a/mediapipe/modules/holistic_landmark/holistic_landmark_gpu.pbtxt
+++ b/mediapipe/modules/holistic_landmark/holistic_landmark_gpu.pbtxt
@ -35,6 +35,7 @@
 #     input_side_packet: "SMOOTH_LANDMARKS:smooth_landmarks"
 #     input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation"
 #     input_side_packet: "SMOOTH_SEGMENTATION:smooth_segmentation"
+#     input_side_packet: "REFINE_FACE_LANDMARKS:refine_face_landmarks"
 #     input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
 #     output_stream: "POSE_LANDMARKS:pose_landmarks"
 #     output_stream: "FACE_LANDMARKS:face_landmarks"
@ -70,6 +71,11 @@ input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation"
 # jitter. If unspecified, functions as set to true. (bool)
 input_side_packet: "SMOOTH_SEGMENTATION:smooth_segmentation"

+# Whether to run the face landmark model with attention on lips and eyes to
+# provide more accuracy, and additionally output iris landmarks. If unspecified,
+# functions as set to false. (bool)
+input_side_packet: "REFINE_FACE_LANDMARKS:refine_face_landmarks"
+
 # Whether landmarks on the previous image should be used to help localize
 # landmarks on the current image. (bool)
 input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
@ -135,5 +141,6 @@ node {
  calculator: "FaceLandmarksFromPoseGpu"
  input_stream: "IMAGE:image"
  input_stream: "FACE_LANDMARKS_FROM_POSE:face_landmarks_from_pose"
+  input_side_packet: "REFINE_LANDMARKS:refine_face_landmarks"
  output_stream: "FACE_LANDMARKS:face_landmarks"
 }
--- a/mediapipe/modules/palm_detection/BUILD
+++ b/mediapipe/modules/palm_detection/BUILD
@ -21,13 +21,29 @@ licenses(["notice"])

 package(default_visibility = ["//visibility:public"])

-exports_files(["palm_detection.tflite"])
+exports_files([
+    "palm_detection_lite.tflite",
+    "palm_detection_full.tflite",
+])
+
+mediapipe_simple_subgraph(
+    name = "palm_detection_model_loader",
+    graph = "palm_detection_model_loader.pbtxt",
+    register_as = "PalmDetectionModelLoader",
+    deps = [
+        "//mediapipe/calculators/core:constant_side_packet_calculator",
+        "//mediapipe/calculators/tflite:tflite_model_calculator",
+        "//mediapipe/calculators/util:local_file_contents_calculator",
+        "//mediapipe/framework/tool:switch_container",
+    ],
+)

 mediapipe_simple_subgraph(
    name = "palm_detection_cpu",
    graph = "palm_detection_cpu.pbtxt",
    register_as = "PalmDetectionCpu",
    deps = [
+        ":palm_detection_model_loader",
        "//mediapipe/calculators/tensor:image_to_tensor_calculator",
        "//mediapipe/calculators/tensor:inference_calculator",
        "//mediapipe/calculators/tensor:tensors_to_detections_calculator",
@ -43,6 +59,7 @@ mediapipe_simple_subgraph(
    graph = "palm_detection_gpu.pbtxt",
    register_as = "PalmDetectionGpu",
    deps = [
+        ":palm_detection_model_loader",
        "//mediapipe/calculators/tensor:image_to_tensor_calculator",
        "//mediapipe/calculators/tensor:inference_calculator",
        "//mediapipe/calculators/tensor:tensors_to_detections_calculator",
@ -52,10 +69,3 @@ mediapipe_simple_subgraph(
        "//mediapipe/calculators/util:non_max_suppression_calculator",
    ],
 )
-
-exports_files(
-    srcs = [
-        "palm_detection.tflite",
-        "palm_detection_labelmap.txt",
-    ],
-)
--- a/mediapipe/modules/palm_detection/palm_detection.tflite
+++ b/mediapipe/modules/palm_detection/palm_detection.tflite
--- a/mediapipe/modules/palm_detection/palm_detection_cpu.pbtxt
+++ b/mediapipe/modules/palm_detection/palm_detection_cpu.pbtxt
@ -5,6 +5,11 @@ type: "PalmDetectionCpu"
 # CPU image. (ImageFrame)
 input_stream: "IMAGE:image"

+# Complexity of the palm detection model: 0 or 1. Accuracy as well as inference
+# latency generally go up with the model complexity. If unspecified, functions
+# as set to 1. (int)
+input_side_packet: "MODEL_COMPLEXITY:model_complexity"
+
 # Detected palms. (std::vector<Detection>)
 # NOTE: there will not be an output packet in the DETECTIONS stream for this
 # particular timestamp if none of palms detected. However, the MediaPipe
@ -21,11 +26,11 @@ node {
  output_stream: "LETTERBOX_PADDING:letterbox_padding"
  options: {
    [mediapipe.ImageToTensorCalculatorOptions.ext] {
-      output_tensor_width: 128
-      output_tensor_height: 128
+      output_tensor_width: 192
+      output_tensor_height: 192
      keep_aspect_ratio: true
      output_tensor_float_range {
-        min: -1.0
+        min: 0.0
        max: 1.0
      }
      border_mode: BORDER_ZERO
@ -39,6 +44,13 @@ node {
  output_side_packet: "opresolver"
 }

+# Loads the palm detection TF Lite model.
+node {
+  calculator: "PalmDetectionModelLoader"
+  input_side_packet: "MODEL_COMPLEXITY:model_complexity"
+  output_side_packet: "MODEL:model"
+}
+
 # Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a
 # vector of tensors representing, for instance, detection boxes/keypoints and
 # scores.
@ -47,9 +59,9 @@ node {
  input_stream: "TENSORS:input_tensor"
  output_stream: "TENSORS:detection_tensors"
  input_side_packet: "CUSTOM_OP_RESOLVER:opresolver"
+  input_side_packet: "MODEL:model"
  options: {
    [mediapipe.InferenceCalculatorOptions.ext] {
-      model_path: "mediapipe/modules/palm_detection/palm_detection.tflite"
      delegate { xnnpack {} }
    }
  }
@ -65,8 +77,8 @@ node {
      num_layers: 4
      min_scale: 0.1484375
      max_scale: 0.75
-      input_size_height: 128
-      input_size_width: 128
+      input_size_width: 192
+      input_size_height: 192
      anchor_offset_x: 0.5
      anchor_offset_y: 0.5
      strides: 8
@ -90,7 +102,7 @@ node {
  options: {
    [mediapipe.TensorsToDetectionsCalculatorOptions.ext] {
      num_classes: 1
-      num_boxes: 896
+      num_boxes: 2016
      num_coords: 18
      box_coord_offset: 0
      keypoint_coord_offset: 4
@ -100,10 +112,10 @@ node {
      score_clipping_thresh: 100.0
      reverse_output_order: true

-      x_scale: 128.0
-      y_scale: 128.0
-      h_scale: 128.0
-      w_scale: 128.0
+      x_scale: 192.0
+      y_scale: 192.0
+      w_scale: 192.0
+      h_scale: 192.0
      min_score_thresh: 0.5
    }
  }
--- a/mediapipe/modules/palm_detection/palm_detection_full.tflite
+++ b/mediapipe/modules/palm_detection/palm_detection_full.tflite
--- a/mediapipe/modules/palm_detection/palm_detection_gpu.pbtxt
+++ b/mediapipe/modules/palm_detection/palm_detection_gpu.pbtxt
@ -5,6 +5,11 @@ type: "PalmDetectionGpu"
 # GPU image. (GpuBuffer)
 input_stream: "IMAGE:image"

+# Complexity of the palm detection model: 0 or 1. Accuracy as well as inference
+# latency generally go up with the model complexity. If unspecified, functions
+# as set to 1. (int)
+input_side_packet: "MODEL_COMPLEXITY:model_complexity"
+
 # Detected palms. (std::vector<Detection>)
 # NOTE: there will not be an output packet in the DETECTIONS stream for this
 # particular timestamp if none of palms detected. However, the MediaPipe
@ -21,11 +26,11 @@ node {
  output_stream: "LETTERBOX_PADDING:letterbox_padding"
  options: {
    [mediapipe.ImageToTensorCalculatorOptions.ext] {
-      output_tensor_width: 128
-      output_tensor_height: 128
+      output_tensor_width: 192
+      output_tensor_height: 192
      keep_aspect_ratio: true
      output_tensor_float_range {
-        min: -1.0
+        min: 0.0
        max: 1.0
      }
      border_mode: BORDER_ZERO
@ -45,6 +50,13 @@ node {
  }
 }

+# Loads the palm detection TF Lite model.
+node {
+  calculator: "PalmDetectionModelLoader"
+  input_side_packet: "MODEL_COMPLEXITY:model_complexity"
+  output_side_packet: "MODEL:model"
+}
+
 # Runs a TensorFlow Lite model on GPU that takes an image tensor and outputs a
 # vector of tensors representing, for instance, detection boxes/keypoints and
 # scores.
@ -53,10 +65,10 @@ node {
  input_stream: "TENSORS:input_tensor"
  output_stream: "TENSORS:detection_tensors"
  input_side_packet: "CUSTOM_OP_RESOLVER:opresolver"
+  input_side_packet: "MODEL:model"
  options: {
    [mediapipe.InferenceCalculatorOptions.ext] {
-      model_path: "mediapipe/modules/palm_detection/palm_detection.tflite"
-      use_gpu: true
+      delegate { gpu {} }
    }
  }
 }
@ -71,8 +83,8 @@ node {
      num_layers: 4
      min_scale: 0.1484375
      max_scale: 0.75
-      input_size_height: 128
-      input_size_width: 128
+      input_size_width: 192
+      input_size_height: 192
      anchor_offset_x: 0.5
      anchor_offset_y: 0.5
      strides: 8
@ -96,7 +108,7 @@ node {
  options: {
    [mediapipe.TensorsToDetectionsCalculatorOptions.ext] {
      num_classes: 1
-      num_boxes: 896
+      num_boxes: 2016
      num_coords: 18
      box_coord_offset: 0
      keypoint_coord_offset: 4
@ -106,10 +118,10 @@ node {
      score_clipping_thresh: 100.0
      reverse_output_order: true

-      x_scale: 128.0
-      y_scale: 128.0
-      h_scale: 128.0
-      w_scale: 128.0
+      x_scale: 192.0
+      y_scale: 192.0
+      w_scale: 192.0
+      h_scale: 192.0
      min_score_thresh: 0.5
    }
  }
--- a/mediapipe/modules/palm_detection/palm_detection_lite.tflite
+++ b/mediapipe/modules/palm_detection/palm_detection_lite.tflite
--- a/mediapipe/modules/palm_detection/palm_detection_model_loader.pbtxt
+++ b/mediapipe/modules/palm_detection/palm_detection_model_loader.pbtxt
@ -0,0 +1,63 @@
+# MediaPipe graph to load a selected palm detection TF Lite model.
+
+type: "PalmDetectionModelLoader"
+
+# Complexity of the palm detection model: 0 or 1. Accuracy as well as inference
+# latency generally go up with the model complexity. If unspecified, functions
+# as set to 1. (int)
+input_side_packet: "MODEL_COMPLEXITY:model_complexity"
+
+# TF Lite model represented as a FlatBuffer.
+# (std::unique_ptr<tflite::FlatBufferModel, std::function<void(tflite::FlatBufferModel*)>>)
+output_side_packet: "MODEL:model"
+
+# Determines path to the desired pose landmark model file.
+node {
+  calculator: "SwitchContainer"
+  input_side_packet: "SELECT:model_complexity"
+  output_side_packet: "PACKET:model_path"
+  options: {
+    [mediapipe.SwitchContainerOptions.ext] {
+      select: 1
+      contained_node: {
+        calculator: "ConstantSidePacketCalculator"
+        options: {
+          [mediapipe.ConstantSidePacketCalculatorOptions.ext]: {
+            packet {
+              string_value: "mediapipe/modules/palm_detection/palm_detection_lite.tflite"
+            }
+          }
+        }
+      }
+      contained_node: {
+        calculator: "ConstantSidePacketCalculator"
+        options: {
+          [mediapipe.ConstantSidePacketCalculatorOptions.ext]: {
+            packet {
+              string_value: "mediapipe/modules/palm_detection/palm_detection_full.tflite"
+            }
+          }
+        }
+      }
+    }
+  }
+}
+
+# Loads the file in the specified path into a blob.
+node {
+  calculator: "LocalFileContentsCalculator"
+  input_side_packet: "FILE_PATH:model_path"
+  output_side_packet: "CONTENTS:model_blob"
+  options: {
+    [mediapipe.LocalFileContentsCalculatorOptions.ext]: {
+      text_mode: false
+    }
+  }
+}
+
+# Converts the input blob into a TF Lite model.
+node {
+  calculator: "TfLiteModelCalculator"
+  input_side_packet: "MODEL_BLOB:model_blob"
+  output_side_packet: "MODEL:model"
+}
--- a/mediapipe/modules/pose_detection/pose_detection.tflite
+++ b/mediapipe/modules/pose_detection/pose_detection.tflite
--- a/mediapipe/objc/MPPPlayerInputSource.m
+++ b/mediapipe/objc/MPPPlayerInputSource.m
@ -127,6 +127,7 @@ static CVReturn renderCallback(CVDisplayLinkRef displayLink, const CVTimeStamp*

 - (void)videoUpdateIfNeeded {
  CMTime timestamp = [_videoItem currentTime];
+
  if ([_videoOutput hasNewPixelBufferForItemTime:timestamp]) {
    CVPixelBufferRef pixelBuffer =
        [_videoOutput copyPixelBufferForItemTime:timestamp itemTimeForDisplay:nil];
@ -139,6 +140,12 @@ static CVReturn renderCallback(CVDisplayLinkRef displayLink, const CVTimeStamp*
        }
        CFRelease(pixelBuffer);
      });
+  } else if (!_videoDisplayLink.paused && _videoPlayer.rate == 0) {
+    // The video might be paused by the operating system fo other reasons not catched by the context
+    // of an interruption. If this condition happens the @c _videoDisplayLink will not have a
+    // paused state, while the _videoPlayer will have rate 0 AKA paused. In this scenario we restart
+    // the video playback.
+    [_videoPlayer play];
  }
 }

--- a/mediapipe/python/solutions/hands.py
+++ b/mediapipe/python/solutions/hands.py
@ -124,7 +124,10 @@ class Hands(SolutionBase):
            'handlandmarkcpu__ThresholdingCalculator.threshold':
                min_tracking_confidence,
        },
-        outputs=['multi_hand_landmarks', 'multi_handedness'])
+        outputs=[
+            'multi_hand_landmarks', 'multi_hand_world_landmarks',
+            'multi_handedness'
+        ])

  def process(self, image: np.ndarray) -> NamedTuple:
    """Processes an RGB image and returns the hand landmarks and handedness of each detected hand.
@ -137,10 +140,14 @@ class Hands(SolutionBase):
      ValueError: If the input image is not three channel RGB.

    Returns:
-      A NamedTuple object with two fields: a "multi_hand_landmarks" field that
-      contains the hand landmarks on each detected hand and a "multi_handedness"
-      field that contains the handedness (left v.s. right hand) of the detected
-      hand.
+      A NamedTuple object with the following fields:
+        1) a "multi_hand_landmarks" field that contains the hand landmarks on
+           each detected hand.
+        2) a "multi_hand_world_landmarks" field that contains the hand landmarks
+           on each detected hand in real-world 3D coordinates that are in meters
+           with the origin at the hand's approximate geometric center.
+        3) a "multi_handedness" field that contains the handedness (left v.s.
+           right hand) of the detected hand.
    """

    return super().process(input_data={'image': image})
--- a/mediapipe/python/solutions/hands_test.py
+++ b/mediapipe/python/solutions/hands_test.py
@ -34,20 +34,20 @@ from mediapipe.python.solutions import hands as mp_hands
 TEST_IMAGE_PATH = 'mediapipe/python/solutions/testdata'
 LITE_MODEL_DIFF_THRESHOLD = 25  # pixels
 FULL_MODEL_DIFF_THRESHOLD = 20  # pixels
-EXPECTED_HAND_COORDINATES_PREDICTION = [[[138, 343], [211, 330], [257, 286],
-                                         [289, 237], [322, 203], [219, 216],
-                                         [238, 138], [249, 90], [253, 51],
-                                         [177, 204], [184, 115], [187, 60],
-                                         [185, 19], [138, 208], [131, 127],
-                                         [124, 77], [117, 36], [106, 222],
-                                         [92, 159], [79, 124], [68, 93]],
-                                        [[580, 34], [504, 50], [459, 94],
+EXPECTED_HAND_COORDINATES_PREDICTION = [[[580, 34], [504, 50], [459, 94],
                                         [429, 146], [397, 182], [507, 167],
                                         [479, 245], [469, 292], [464, 330],
                                         [545, 180], [534, 265], [533, 319],
                                         [536, 360], [581, 172], [587, 252],
                                         [593, 304], [599, 346], [615, 168],
-                                         [628, 223], [638, 258], [648, 288]]]
+                                         [628, 223], [638, 258], [648, 288]],
+                                        [[138, 343], [211, 330], [257, 286],
+                                         [289, 237], [322, 203], [219, 216],
+                                         [238, 138], [249, 90], [253, 51],
+                                         [177, 204], [184, 115], [187, 60],
+                                         [185, 19], [138, 208], [131, 127],
+                                         [124, 77], [117, 36], [106, 222],
+                                         [92, 159], [79, 124], [68, 93]]]


 class HandsTest(parameterized.TestCase):
--- a/mediapipe/python/solutions/holistic.py
+++ b/mediapipe/python/solutions/holistic.py
@ -80,6 +80,7 @@ class Holistic(SolutionBase):
               smooth_landmarks=True,
               enable_segmentation=False,
               smooth_segmentation=True,
+               refine_face_landmarks=False,
               min_detection_confidence=0.5,
               min_tracking_confidence=0.5):
    """Initializes a MediaPipe Holistic object.
@ -98,6 +99,10 @@ class Holistic(SolutionBase):
      smooth_segmentation: Whether to filter segmentation across different input
        images to reduce jitter. See details in
        https://solutions.mediapipe.dev/holistic#smooth_segmentation.
+      refine_face_landmarks: Whether to further refine the landmark coordinates
+        around the eyes and lips, and output additional landmarks around the
+        irises. Default to False. See details in
+        https://solutions.mediapipe.dev/holistic#refine_face_landmarks.
      min_detection_confidence: Minimum confidence value ([0.0, 1.0]) for person
        detection to be considered successful. See details in
        https://solutions.mediapipe.dev/holistic#min_detection_confidence.
@ -114,6 +119,7 @@ class Holistic(SolutionBase):
            'enable_segmentation': enable_segmentation,
            'smooth_segmentation':
                smooth_segmentation and not static_image_mode,
+            'refine_face_landmarks': refine_face_landmarks,
            'use_prev_landmarks': not static_image_mode,
        },
        calculator_params={
--- a/mediapipe/python/solutions/holistic_test.py
+++ b/mediapipe/python/solutions/holistic_test.py
@ -99,18 +99,23 @@ class PoseTest(parameterized.TestCase):
      results = holistic.process(image)
      self.assertIsNone(results.pose_landmarks)

-  @parameterized.named_parameters(('static_lite', True, 0, 3),
-                                  ('static_full', True, 1, 3),
-                                  ('static_heavy', True, 2, 3),
-                                  ('video_lite', False, 0, 3),
-                                  ('video_full', False, 1, 3),
-                                  ('video_heavy', False, 2, 3))
-  def test_on_image(self, static_image_mode, model_complexity, num_frames):
+  @parameterized.named_parameters(('static_lite', True, 0, False, 3),
+                                  ('static_full', True, 1, False, 3),
+                                  ('static_heavy', True, 2, False, 3),
+                                  ('video_lite', False, 0, False, 3),
+                                  ('video_full', False, 1, False, 3),
+                                  ('video_heavy', False, 2, False, 3),
+                                  ('static_full_refine_face', True, 1, True, 3),
+                                  ('video_full_refine_face', False, 1, True, 3))
+  def test_on_image(self, static_image_mode, model_complexity,
+                    refine_face_landmarks, num_frames):
    image_path = os.path.join(os.path.dirname(__file__),
                              'testdata/holistic.jpg')
    image = cv2.imread(image_path)
-    with mp_holistic.Holistic(static_image_mode=static_image_mode,
-                              model_complexity=model_complexity) as holistic:
+    with mp_holistic.Holistic(
+        static_image_mode=static_image_mode,
+        model_complexity=model_complexity,
+        refine_face_landmarks=refine_face_landmarks) as holistic:
      for idx in range(num_frames):
        results = holistic.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
        self._annotate(image.copy(), results, idx)
@ -129,7 +134,8 @@ class PoseTest(parameterized.TestCase):
            EXPECTED_RIGHT_HAND_LANDMARKS,
            HAND_DIFF_THRESHOLD)
        # TODO: Verify the correctness of the face landmarks.
-        self.assertLen(results.face_landmarks.landmark, 468)
+        self.assertLen(results.face_landmarks.landmark,
+                       478 if refine_face_landmarks else 468)


 if __name__ == '__main__':