Create Pose Detector Web API

PiperOrigin-RevId: 526672533
2023-04-24 09:50:19 -07:00 · 2023-04-24 09:50:19 -07:00 · 61854dc6a3
commit 61854dc6a3
parent 6773188e26
9 changed files with 879 additions and 1 deletions
--- a/mediapipe/tasks/web/vision/BUILD
+++ b/mediapipe/tasks/web/vision/BUILD
@ -31,6 +31,7 @@ VISION_LIBS = [
    "//mediapipe/tasks/web/vision/image_segmenter",
    "//mediapipe/tasks/web/vision/interactive_segmenter",
    "//mediapipe/tasks/web/vision/object_detector",
+    "//mediapipe/tasks/web/vision/pose_landmarker",
 ]

 mediapipe_ts_library(
--- a/mediapipe/tasks/web/vision/README.md
+++ b/mediapipe/tasks/web/vision/README.md
@ -160,3 +160,20 @@ const detections = objectDetector.detect(image);

 For more information, refer to the [Object Detector](https://developers.google.com/mediapipe/solutions/vision/object_detector/web_js) documentation.

+
+## Pose Landmark Detection
+
+The MediaPipe Pose Landmarker task lets you detect the landmarks of body poses
+in an image. You can use this Task to localize key points of a pose and render
+visual effects over the body.
+
+```
+const vision = await FilesetResolver.forVisionTasks(
+    "https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@latest/wasm"
+);
+const poseLandmarker = await PoseLandmarker.createFromModelPath(vision,
+    "model.task"
+);
+const image = document.getElementById("image") as HTMLImageElement;
+const landmarks = poseLandmarker.detect(image);
+```
--- a/mediapipe/tasks/web/vision/index.ts
+++ b/mediapipe/tasks/web/vision/index.ts
@ -27,6 +27,7 @@ import {ImageEmbedder as ImageEmbedderImpl} from '../../../tasks/web/vision/imag
 import {ImageSegmenter as ImageSegementerImpl} from '../../../tasks/web/vision/image_segmenter/image_segmenter';
 import {InteractiveSegmenter as InteractiveSegmenterImpl} from '../../../tasks/web/vision/interactive_segmenter/interactive_segmenter';
 import {ObjectDetector as ObjectDetectorImpl} from '../../../tasks/web/vision/object_detector/object_detector';
+import {PoseLandmarker as PoseLandmarkerImpl} from '../../../tasks/web/vision/pose_landmarker/pose_landmarker';

 // Declare the variables locally so that Rollup in OSS includes them explicitly
 // as exports.
@ -44,6 +45,7 @@ const ImageEmbedder = ImageEmbedderImpl;
 const ImageSegmenter = ImageSegementerImpl;
 const InteractiveSegmenter = InteractiveSegmenterImpl;
 const ObjectDetector = ObjectDetectorImpl;
+const PoseLandmarker = PoseLandmarkerImpl;

 export {
  DrawingUtils,
@ -59,5 +61,6 @@ export {
  ImageEmbedder,
  ImageSegmenter,
  InteractiveSegmenter,
-  ObjectDetector
+  ObjectDetector,
+  PoseLandmarker
 };
--- a/mediapipe/tasks/web/vision/pose_landmarker/BUILD
+++ b/mediapipe/tasks/web/vision/pose_landmarker/BUILD
@ -0,0 +1,73 @@
+# This contains the MediaPipe Pose Landmarker Task.
+#
+# This task takes video frames and outputs synchronized frames along with
+# the detection results for one or more pose categories, using Pose Landmarker.
+
+load("//mediapipe/framework/port:build_config.bzl", "mediapipe_ts_declaration", "mediapipe_ts_library")
+load("@npm//@bazel/jasmine:index.bzl", "jasmine_node_test")
+
+package(default_visibility = ["//mediapipe/tasks:internal"])
+
+licenses(["notice"])
+
+mediapipe_ts_library(
+    name = "pose_landmarker",
+    srcs = ["pose_landmarker.ts"],
+    visibility = ["//visibility:public"],
+    deps = [
+        ":pose_landmarker_types",
+        "//mediapipe/framework:calculator_jspb_proto",
+        "//mediapipe/framework:calculator_options_jspb_proto",
+        "//mediapipe/framework/formats:landmark_jspb_proto",
+        "//mediapipe/tasks/cc/core/proto:base_options_jspb_proto",
+        "//mediapipe/tasks/cc/vision/pose_detector/proto:pose_detector_graph_options_jspb_proto",
+        "//mediapipe/tasks/cc/vision/pose_landmarker/proto:pose_landmarker_graph_options_jspb_proto",
+        "//mediapipe/tasks/cc/vision/pose_landmarker/proto:pose_landmarks_detector_graph_options_jspb_proto",
+        "//mediapipe/tasks/web/components/containers:category",
+        "//mediapipe/tasks/web/components/containers:landmark",
+        "//mediapipe/tasks/web/components/processors:landmark_result",
+        "//mediapipe/tasks/web/core",
+        "//mediapipe/tasks/web/vision/core:image_processing_options",
+        "//mediapipe/tasks/web/vision/core:types",
+        "//mediapipe/tasks/web/vision/core:vision_task_runner",
+        "//mediapipe/web/graph_runner:graph_runner_ts",
+    ],
+)
+
+mediapipe_ts_declaration(
+    name = "pose_landmarker_types",
+    srcs = [
+        "pose_landmarker_options.d.ts",
+        "pose_landmarker_result.d.ts",
+    ],
+    visibility = ["//visibility:public"],
+    deps = [
+        "//mediapipe/tasks/web/components/containers:category",
+        "//mediapipe/tasks/web/components/containers:landmark",
+        "//mediapipe/tasks/web/core",
+        "//mediapipe/tasks/web/vision/core:vision_task_options",
+    ],
+)
+
+mediapipe_ts_library(
+    name = "pose_landmarker_test_lib",
+    testonly = True,
+    srcs = [
+        "pose_landmarker_test.ts",
+    ],
+    deps = [
+        ":pose_landmarker",
+        ":pose_landmarker_types",
+        "//mediapipe/framework:calculator_jspb_proto",
+        "//mediapipe/tasks/web/components/processors:landmark_result",
+        "//mediapipe/tasks/web/core",
+        "//mediapipe/tasks/web/core:task_runner_test_utils",
+        "//mediapipe/tasks/web/vision/core:vision_task_runner",
+    ],
+)
+
+jasmine_node_test(
+    name = "pose_landmarker_test",
+    tags = ["nomsan"],
+    deps = [":pose_landmarker_test_lib"],
+)
--- a/mediapipe/tasks/web/vision/pose_landmarker/pose_landmarker.ts
+++ b/mediapipe/tasks/web/vision/pose_landmarker/pose_landmarker.ts
@ -0,0 +1,434 @@
+/**
+ * Copyright 2023 The MediaPipe Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import {CalculatorGraphConfig} from '../../../../framework/calculator_pb';
+import {CalculatorOptions} from '../../../../framework/calculator_options_pb';
+import {LandmarkList, NormalizedLandmarkList} from '../../../../framework/formats/landmark_pb';
+import {BaseOptions as BaseOptionsProto} from '../../../../tasks/cc/core/proto/base_options_pb';
+import {PoseDetectorGraphOptions} from '../../../../tasks/cc/vision/pose_detector/proto/pose_detector_graph_options_pb';
+import {PoseLandmarkerGraphOptions} from '../../../../tasks/cc/vision/pose_landmarker/proto/pose_landmarker_graph_options_pb';
+import {PoseLandmarksDetectorGraphOptions} from '../../../../tasks/cc/vision/pose_landmarker/proto/pose_landmarks_detector_graph_options_pb';
+import {convertToLandmarks, convertToWorldLandmarks} from '../../../../tasks/web/components/processors/landmark_result';
+import {WasmFileset} from '../../../../tasks/web/core/wasm_fileset';
+import {ImageProcessingOptions} from '../../../../tasks/web/vision/core/image_processing_options';
+import {Connection} from '../../../../tasks/web/vision/core/types';
+import {VisionGraphRunner, VisionTaskRunner} from '../../../../tasks/web/vision/core/vision_task_runner';
+import {ImageSource, WasmModule} from '../../../../web/graph_runner/graph_runner';
+// Placeholder for internal dependency on trusted resource url
+
+import {PoseLandmarkerOptions} from './pose_landmarker_options';
+import {PoseLandmarkerResult} from './pose_landmarker_result';
+
+export * from './pose_landmarker_options';
+export * from './pose_landmarker_result';
+export {ImageSource};
+
+// The OSS JS API does not support the builder pattern.
+// tslint:disable:jspb-use-builder-pattern
+
+const IMAGE_STREAM = 'image_in';
+const NORM_RECT_STREAM = 'norm_rect';
+const NORM_LANDMARKS_STREAM = 'normalized_landmarks';
+const WORLD_LANDMARKS_STREAM = 'world_landmarks';
+const AUXILIARY_LANDMARKS_STREAM = 'auxiliary_landmarks';
+const SEGMENTATION_MASK_STREAM = 'segmentation_masks';
+const POSE_LANDMARKER_GRAPH =
+    'mediapipe.tasks.vision.pose_landmarker.PoseLandmarkerGraph';
+
+const DEFAULT_NUM_POSES = 1;
+const DEFAULT_SCORE_THRESHOLD = 0.5;
+const DEFAULT_OUTPUT_SEGMANTATION_MASKS = false;
+
+/**
+ * A callback that receives the result from the pose detector. The returned
+ * masks are only valid for the duration of the callback. If asynchronous
+ * processing is needed, the masks need to be copied before the callback
+ * returns.
+ */
+export type PoseLandmarkerCallback = (result: PoseLandmarkerResult) => void;
+
+/** Performs pose landmarks detection on images. */
+export class PoseLandmarker extends VisionTaskRunner {
+  private result: PoseLandmarkerResult = {
+    landmarks: [],
+    worldLandmarks: [],
+    auxilaryLandmarks: []
+  };
+  private outputSegmentationMasks = false;
+  private readonly options: PoseLandmarkerGraphOptions;
+  private readonly poseLandmarksDetectorGraphOptions:
+      PoseLandmarksDetectorGraphOptions;
+  private readonly poseDetectorGraphOptions: PoseDetectorGraphOptions;
+
+  /**
+   * An array containing the pairs of pose landmark indices to be rendered with
+   * connections.
+   */
+  static POSE_CONNECTIONS: Connection[] = [
+    {start: 0, end: 1},   {start: 1, end: 2},   {start: 2, end: 3},
+    {start: 3, end: 7},   {start: 0, end: 4},   {start: 4, end: 5},
+    {start: 5, end: 6},   {start: 6, end: 8},   {start: 9, end: 10},
+    {start: 11, end: 12}, {start: 11, end: 13}, {start: 13, end: 15},
+    {start: 15, end: 17}, {start: 15, end: 19}, {start: 15, end: 21},
+    {start: 17, end: 19}, {start: 12, end: 14}, {start: 14, end: 16},
+    {start: 16, end: 18}, {start: 16, end: 20}, {start: 16, end: 22},
+    {start: 18, end: 20}, {start: 11, end: 23}, {start: 12, end: 24},
+    {start: 23, end: 24}, {start: 23, end: 25}, {start: 24, end: 26},
+    {start: 25, end: 27}, {start: 26, end: 28}, {start: 27, end: 29},
+    {start: 28, end: 30}, {start: 29, end: 31}, {start: 30, end: 32},
+    {start: 27, end: 31}, {start: 28, end: 32}
+  ];
+
+  /**
+   * Initializes the Wasm runtime and creates a new `PoseLandmarker` from the
+   * provided options.
+   * @param wasmFileset A configuration object that provides the location of the
+   *     Wasm binary and its loader.
+   * @param poseLandmarkerOptions The options for the PoseLandmarker.
+   *     Note that either a path to the model asset or a model buffer needs to
+   *     be provided (via `baseOptions`).
+   */
+  static createFromOptions(
+      wasmFileset: WasmFileset,
+      poseLandmarkerOptions: PoseLandmarkerOptions): Promise<PoseLandmarker> {
+    return VisionTaskRunner.createVisionInstance(
+        PoseLandmarker, wasmFileset, poseLandmarkerOptions);
+  }
+
+  /**
+   * Initializes the Wasm runtime and creates a new `PoseLandmarker` based on
+   * the provided model asset buffer.
+   * @param wasmFileset A configuration object that provides the location of the
+   *     Wasm binary and its loader.
+   * @param modelAssetBuffer A binary representation of the model.
+   */
+  static createFromModelBuffer(
+      wasmFileset: WasmFileset,
+      modelAssetBuffer: Uint8Array): Promise<PoseLandmarker> {
+    return VisionTaskRunner.createVisionInstance(
+        PoseLandmarker, wasmFileset, {baseOptions: {modelAssetBuffer}});
+  }
+
+  /**
+   * Initializes the Wasm runtime and creates a new `PoseLandmarker` based on
+   * the path to the model asset.
+   * @param wasmFileset A configuration object that provides the location of the
+   *     Wasm binary and its loader.
+   * @param modelAssetPath The path to the model asset.
+   */
+  static createFromModelPath(
+      wasmFileset: WasmFileset,
+      modelAssetPath: string): Promise<PoseLandmarker> {
+    return VisionTaskRunner.createVisionInstance(
+        PoseLandmarker, wasmFileset, {baseOptions: {modelAssetPath}});
+  }
+
+  /** @hideconstructor */
+  constructor(
+      wasmModule: WasmModule,
+      glCanvas?: HTMLCanvasElement|OffscreenCanvas|null) {
+    super(
+        new VisionGraphRunner(wasmModule, glCanvas), IMAGE_STREAM,
+        NORM_RECT_STREAM, /* roiAllowed= */ false);
+
+    this.options = new PoseLandmarkerGraphOptions();
+    this.options.setBaseOptions(new BaseOptionsProto());
+    this.poseLandmarksDetectorGraphOptions =
+        new PoseLandmarksDetectorGraphOptions();
+    this.options.setPoseLandmarksDetectorGraphOptions(
+        this.poseLandmarksDetectorGraphOptions);
+    this.poseDetectorGraphOptions = new PoseDetectorGraphOptions();
+    this.options.setPoseDetectorGraphOptions(this.poseDetectorGraphOptions);
+
+    this.initDefaults();
+  }
+
+  protected override get baseOptions(): BaseOptionsProto {
+    return this.options.getBaseOptions()!;
+  }
+
+  protected override set baseOptions(proto: BaseOptionsProto) {
+    this.options.setBaseOptions(proto);
+  }
+
+  /**
+   * Sets new options for this `PoseLandmarker`.
+   *
+   * Calling `setOptions()` with a subset of options only affects those options.
+   * You can reset an option back to its default value by explicitly setting it
+   * to `undefined`.
+   *
+   * @param options The options for the pose landmarker.
+   */
+  override setOptions(options: PoseLandmarkerOptions): Promise<void> {
+    // Configure pose detector options.
+    if ('numPoses' in options) {
+      this.poseDetectorGraphOptions.setNumPoses(
+          options.numPoses ?? DEFAULT_NUM_POSES);
+    }
+    if ('minPoseDetectionConfidence' in options) {
+      this.poseDetectorGraphOptions.setMinDetectionConfidence(
+          options.minPoseDetectionConfidence ?? DEFAULT_SCORE_THRESHOLD);
+    }
+
+    // Configure pose landmark detector options.
+    if ('minTrackingConfidence' in options) {
+      this.options.setMinTrackingConfidence(
+          options.minTrackingConfidence ?? DEFAULT_SCORE_THRESHOLD);
+    }
+    if ('minPosePresenceConfidence' in options) {
+      this.poseLandmarksDetectorGraphOptions.setMinDetectionConfidence(
+          options.minPosePresenceConfidence ?? DEFAULT_SCORE_THRESHOLD);
+    }
+
+    if ('outputSegmentationMasks' in options) {
+      this.outputSegmentationMasks =
+          options.outputSegmentationMasks ?? DEFAULT_OUTPUT_SEGMANTATION_MASKS;
+    }
+
+    return this.applyOptions(options);
+  }
+
+  /**
+   * Performs pose detection on the provided single image and waits
+   * synchronously for the response. Only use this method when the
+   * PoseLandmarker is created with running mode `image`.
+   *
+   * @param image An image to process.
+   * @param callback The callback that is invoked with the result. The
+   *    lifetime of the returned masks is only guaranteed for the duration of
+   *    the callback.
+   * @return The detected pose landmarks.
+   */
+  detect(image: ImageSource, callback: PoseLandmarkerCallback): void;
+  /**
+   * Performs pose detection on the provided single image and waits
+   * synchronously for the response. Only use this method when the
+   * PoseLandmarker is created with running mode `image`.
+   *
+   * @param image An image to process.
+   * @param imageProcessingOptions the `ImageProcessingOptions` specifying how
+   *    to process the input image before running inference.
+   * @param callback The callback that is invoked with the result. The
+   *    lifetime of the returned masks is only guaranteed for the duration of
+   *    the callback.
+   * @return The detected pose landmarks.
+   */
+  detect(
+      image: ImageSource, imageProcessingOptions: ImageProcessingOptions,
+      callback: PoseLandmarkerCallback): void;
+  detect(
+      image: ImageSource,
+      imageProcessingOptionsOrCallback: ImageProcessingOptions|
+      PoseLandmarkerCallback,
+      callback?: PoseLandmarkerCallback): void {
+    const imageProcessingOptions =
+        typeof imageProcessingOptionsOrCallback !== 'function' ?
+        imageProcessingOptionsOrCallback :
+        {};
+    const userCallback =
+        typeof imageProcessingOptionsOrCallback === 'function' ?
+        imageProcessingOptionsOrCallback :
+        callback!;
+
+    this.resetResults();
+    this.processImageData(image, imageProcessingOptions);
+    userCallback(this.result);
+  }
+
+  /**
+   * Performs pose detection on the provided video frame and waits
+   * synchronously for the response. Only use this method when the
+   * PoseLandmarker is created with running mode `video`.
+   *
+   * @param videoFrame A video frame to process.
+   * @param timestamp The timestamp of the current frame, in ms.
+   * @param callback The callback that is invoked with the result. The
+   *    lifetime of the returned masks is only guaranteed for the duration of
+   *    the callback.
+   * @return The detected pose landmarks.
+   */
+  detectForVideo(
+      videoFrame: ImageSource, timestamp: number,
+      callback: PoseLandmarkerCallback): void;
+  /**
+   * Performs pose detection on the provided video frame and waits
+   * synchronously for the response. Only use this method when the
+   * PoseLandmarker is created with running mode `video`.
+   *
+   * @param videoFrame A video frame to process.
+   * @param imageProcessingOptions the `ImageProcessingOptions` specifying how
+   *    to process the input image before running inference.
+   * @param timestamp The timestamp of the current frame, in ms.
+   * @param callback The callback that is invoked with the result. The
+   *    lifetime of the returned masks is only guaranteed for the duration of
+   *    the callback.
+   * @return The detected pose landmarks.
+   */
+  detectForVideo(
+      videoFrame: ImageSource, imageProcessingOptions: ImageProcessingOptions,
+      timestamp: number, callback: PoseLandmarkerCallback): void;
+  detectForVideo(
+      videoFrame: ImageSource,
+      timestampOrImageProcessingOptions: number|ImageProcessingOptions,
+      timestampOrCallback: number|PoseLandmarkerCallback,
+      callback?: PoseLandmarkerCallback): void {
+    const imageProcessingOptions =
+        typeof timestampOrImageProcessingOptions !== 'number' ?
+        timestampOrImageProcessingOptions :
+        {};
+    const timestamp = typeof timestampOrImageProcessingOptions === 'number' ?
+        timestampOrImageProcessingOptions :
+        timestampOrCallback as number;
+    const userCallback = typeof timestampOrCallback === 'function' ?
+        timestampOrCallback :
+        callback!;
+    this.resetResults();
+    this.processVideoData(videoFrame, imageProcessingOptions, timestamp);
+    userCallback(this.result);
+  }
+
+  private resetResults(): void {
+    this.result = {landmarks: [], worldLandmarks: [], auxilaryLandmarks: []};
+    if (this.outputSegmentationMasks) {
+      this.result.segmentationMasks = [];
+    }
+  }
+
+  /** Sets the default values for the graph. */
+  private initDefaults(): void {
+    this.poseDetectorGraphOptions.setNumPoses(DEFAULT_NUM_POSES);
+    this.poseDetectorGraphOptions.setMinDetectionConfidence(
+        DEFAULT_SCORE_THRESHOLD);
+    this.poseLandmarksDetectorGraphOptions.setMinDetectionConfidence(
+        DEFAULT_SCORE_THRESHOLD);
+    this.options.setMinTrackingConfidence(DEFAULT_SCORE_THRESHOLD);
+  }
+
+  /**
+   * Converts raw data into a landmark, and adds it to our landmarks list.
+   */
+  private addJsLandmarks(data: Uint8Array[]): void {
+    for (const binaryProto of data) {
+      const poseLandmarksProto =
+          NormalizedLandmarkList.deserializeBinary(binaryProto);
+      this.result.landmarks = convertToLandmarks(poseLandmarksProto);
+    }
+  }
+
+  /**
+   * Converts raw data into a world landmark, and adds it to our
+   * worldLandmarks list.
+   */
+  private adddJsWorldLandmarks(data: Uint8Array[]): void {
+    for (const binaryProto of data) {
+      const poseWorldLandmarksProto =
+          LandmarkList.deserializeBinary(binaryProto);
+      this.result.worldLandmarks =
+          convertToWorldLandmarks(poseWorldLandmarksProto);
+    }
+  }
+
+  /**
+   * Converts raw data into a landmark, and adds it to our auxilary
+   * landmarks list.
+   */
+  private addJsAuxiliaryLandmarks(data: Uint8Array[]): void {
+    for (const binaryProto of data) {
+      const auxiliaryLandmarksProto =
+          NormalizedLandmarkList.deserializeBinary(binaryProto);
+      this.result.auxilaryLandmarks =
+          convertToLandmarks(auxiliaryLandmarksProto);
+    }
+  }
+
+  /** Updates the MediaPipe graph configuration. */
+  protected override refreshGraph(): void {
+    const graphConfig = new CalculatorGraphConfig();
+    graphConfig.addInputStream(IMAGE_STREAM);
+    graphConfig.addInputStream(NORM_RECT_STREAM);
+    graphConfig.addOutputStream(NORM_LANDMARKS_STREAM);
+    graphConfig.addOutputStream(WORLD_LANDMARKS_STREAM);
+    graphConfig.addOutputStream(AUXILIARY_LANDMARKS_STREAM);
+    graphConfig.addOutputStream(SEGMENTATION_MASK_STREAM);
+
+    const calculatorOptions = new CalculatorOptions();
+    calculatorOptions.setExtension(
+        PoseLandmarkerGraphOptions.ext, this.options);
+
+    const landmarkerNode = new CalculatorGraphConfig.Node();
+    landmarkerNode.setCalculator(POSE_LANDMARKER_GRAPH);
+    landmarkerNode.addInputStream('IMAGE:' + IMAGE_STREAM);
+    landmarkerNode.addInputStream('NORM_RECT:' + NORM_RECT_STREAM);
+    landmarkerNode.addOutputStream('NORM_LANDMARKS:' + NORM_LANDMARKS_STREAM);
+    landmarkerNode.addOutputStream('WORLD_LANDMARKS:' + WORLD_LANDMARKS_STREAM);
+    landmarkerNode.addOutputStream(
+        'AUXILIARY_LANDMARKS:' + AUXILIARY_LANDMARKS_STREAM);
+    landmarkerNode.setOptions(calculatorOptions);
+
+    graphConfig.addNode(landmarkerNode);
+
+    this.graphRunner.attachProtoVectorListener(
+        NORM_LANDMARKS_STREAM, (binaryProto, timestamp) => {
+          this.addJsLandmarks(binaryProto);
+          this.setLatestOutputTimestamp(timestamp);
+        });
+    this.graphRunner.attachEmptyPacketListener(
+        NORM_LANDMARKS_STREAM, timestamp => {
+          this.setLatestOutputTimestamp(timestamp);
+        });
+
+    this.graphRunner.attachProtoVectorListener(
+        WORLD_LANDMARKS_STREAM, (binaryProto, timestamp) => {
+          this.adddJsWorldLandmarks(binaryProto);
+          this.setLatestOutputTimestamp(timestamp);
+        });
+    this.graphRunner.attachEmptyPacketListener(
+        WORLD_LANDMARKS_STREAM, timestamp => {
+          this.setLatestOutputTimestamp(timestamp);
+        });
+
+    this.graphRunner.attachProtoVectorListener(
+        AUXILIARY_LANDMARKS_STREAM, (binaryProto, timestamp) => {
+          this.addJsAuxiliaryLandmarks(binaryProto);
+          this.setLatestOutputTimestamp(timestamp);
+        });
+    this.graphRunner.attachEmptyPacketListener(
+        AUXILIARY_LANDMARKS_STREAM, timestamp => {
+          this.setLatestOutputTimestamp(timestamp);
+        });
+
+    if (this.outputSegmentationMasks) {
+      landmarkerNode.addOutputStream(
+          'SEGMENTATION_MASK:' + SEGMENTATION_MASK_STREAM);
+      this.graphRunner.attachImageVectorListener(
+          SEGMENTATION_MASK_STREAM, (masks, timestamp) => {
+            this.result.segmentationMasks =
+                masks.map(m => m.data) as Float32Array[] | WebGLBuffer[];
+            this.setLatestOutputTimestamp(timestamp);
+          });
+      this.graphRunner.attachEmptyPacketListener(
+          SEGMENTATION_MASK_STREAM, timestamp => {
+            this.setLatestOutputTimestamp(timestamp);
+          });
+    }
+
+    const binaryGraph = graphConfig.serializeBinary();
+    this.setGraph(new Uint8Array(binaryGraph), /* isBinary= */ true);
+  }
+}
+
+
--- a/mediapipe/tasks/web/vision/pose_landmarker/pose_landmarker_options.d.ts
+++ b/mediapipe/tasks/web/vision/pose_landmarker/pose_landmarker_options.d.ts
@ -0,0 +1,47 @@
+/**
+ * Copyright 2023 The MediaPipe Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import {VisionTaskOptions} from '../../../../tasks/web/vision/core/vision_task_options';
+
+/** Options to configure the MediaPipe PoseLandmarker Task */
+export declare interface PoseLandmarkerOptions extends VisionTaskOptions {
+  /**
+   * The maximum number of poses can be detected by the PoseLandmarker.
+   * Defaults to 1.
+   */
+  numPoses?: number|undefined;
+
+  /**
+   * The minimum confidence score for the pose detection to be considered
+   * successful. Defaults to 0.5.
+   */
+  minPoseDetectionConfidence?: number|undefined;
+
+  /**
+   * The minimum confidence score of pose presence score in the pose landmark
+   * detection. Defaults to 0.5.
+   */
+  minPosePresenceConfidence?: number|undefined;
+
+  /**
+   * The minimum confidence score for the pose tracking to be considered
+   * successful. Defaults to 0.5.
+   */
+  minTrackingConfidence?: number|undefined;
+
+  /** Whether to output segmentation masks. Defaults to false. */
+  outputSegmentationMasks?: boolean|undefined;
+}
--- a/mediapipe/tasks/web/vision/pose_landmarker/pose_landmarker_result.d.ts
+++ b/mediapipe/tasks/web/vision/pose_landmarker/pose_landmarker_result.d.ts
@ -0,0 +1,38 @@
+/**
+ * Copyright 2023 The MediaPipe Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import {Category} from '../../../../tasks/web/components/containers/category';
+import {Landmark, NormalizedLandmark} from '../../../../tasks/web/components/containers/landmark';
+
+export {Category, Landmark, NormalizedLandmark};
+
+/**
+ * Represents the pose landmarks deection results generated by `PoseLandmarker`.
+ * Each vector element represents a single pose detected in the image.
+ */
+export declare interface PoseLandmarkerResult {
+  /** Pose landmarks of detected poses. */
+  landmarks: NormalizedLandmark[];
+
+  /** Pose landmarks in world coordinates of detected poses. */
+  worldLandmarks: Landmark[];
+
+  /** Detected auxiliary landmarks, used for deriving ROI for next frame. */
+  auxilaryLandmarks: NormalizedLandmark[];
+
+  /** Segmentation mask for the detected pose. */
+  segmentationMasks?: Float32Array[]|WebGLTexture[];
+}
--- a/mediapipe/tasks/web/vision/pose_landmarker/pose_landmarker_test.ts
+++ b/mediapipe/tasks/web/vision/pose_landmarker/pose_landmarker_test.ts
@ -0,0 +1,264 @@
+/**
+ * Copyright 2023 The MediaPipe Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import 'jasmine';
+
+import {CalculatorGraphConfig} from '../../../../framework/calculator_pb';
+import {createLandmarks, createWorldLandmarks} from '../../../../tasks/web/components/processors/landmark_result_test_lib';
+import {addJasmineCustomFloatEqualityTester, createSpyWasmModule, MediapipeTasksFake, SpyWasmModule, verifyGraph} from '../../../../tasks/web/core/task_runner_test_utils';
+import {VisionGraphRunner} from '../../../../tasks/web/vision/core/vision_task_runner';
+
+import {PoseLandmarker} from './pose_landmarker';
+import {PoseLandmarkerOptions} from './pose_landmarker_options';
+import {PoseLandmarkerResult} from './pose_landmarker_result';
+
+// The OSS JS API does not support the builder pattern.
+// tslint:disable:jspb-use-builder-pattern
+
+type PacketListener = (data: unknown, timestamp: number) => void;
+
+class PoseLandmarkerFake extends PoseLandmarker implements MediapipeTasksFake {
+  calculatorName = 'mediapipe.tasks.vision.pose_landmarker.PoseLandmarkerGraph';
+  attachListenerSpies: jasmine.Spy[] = [];
+  graph: CalculatorGraphConfig|undefined;
+  fakeWasmModule: SpyWasmModule;
+  listeners = new Map<string, PacketListener>();
+
+  constructor() {
+    super(createSpyWasmModule(), /* glCanvas= */ null);
+    this.fakeWasmModule =
+        this.graphRunner.wasmModule as unknown as SpyWasmModule;
+
+    this.attachListenerSpies[0] =
+        spyOn(this.graphRunner, 'attachProtoVectorListener')
+            .and.callFake((stream, listener) => {
+              expect(stream).toMatch(
+                  /(normalized_landmarks|world_landmarks|auxiliary_landmarks)/);
+              this.listeners.set(stream, listener as PacketListener);
+            });
+    this.attachListenerSpies[1] =
+        spyOn(this.graphRunner, 'attachImageVectorListener')
+            .and.callFake((stream, listener) => {
+              expect(stream).toEqual('segmentation_masks');
+              this.listeners.set(stream, listener as PacketListener);
+            });
+
+    spyOn(this.graphRunner, 'setGraph').and.callFake(binaryGraph => {
+      this.graph = CalculatorGraphConfig.deserializeBinary(binaryGraph);
+    });
+    spyOn(this.graphRunner, 'addGpuBufferAsImageToStream');
+    spyOn(this.graphRunner, 'addProtoToStream');
+  }
+
+  getGraphRunner(): VisionGraphRunner {
+    return this.graphRunner;
+  }
+}
+
+describe('PoseLandmarker', () => {
+  let poseLandmarker: PoseLandmarkerFake;
+
+  beforeEach(async () => {
+    addJasmineCustomFloatEqualityTester();
+    poseLandmarker = new PoseLandmarkerFake();
+    await poseLandmarker.setOptions(
+        {baseOptions: {modelAssetBuffer: new Uint8Array([])}});
+  });
+
+  it('initializes graph', async () => {
+    verifyGraph(poseLandmarker);
+    expect(poseLandmarker.listeners).toHaveSize(3);
+  });
+
+  it('reloads graph when settings are changed', async () => {
+    await poseLandmarker.setOptions({numPoses: 1});
+    verifyGraph(poseLandmarker, [['poseDetectorGraphOptions', 'numPoses'], 1]);
+    expect(poseLandmarker.listeners).toHaveSize(3);
+
+    await poseLandmarker.setOptions({numPoses: 5});
+    verifyGraph(poseLandmarker, [['poseDetectorGraphOptions', 'numPoses'], 5]);
+    expect(poseLandmarker.listeners).toHaveSize(3);
+  });
+
+  it('registers listener for segmentation masks', async () => {
+    expect(poseLandmarker.listeners).toHaveSize(3);
+    await poseLandmarker.setOptions({outputSegmentationMasks: true});
+    expect(poseLandmarker.listeners).toHaveSize(4);
+  });
+
+  it('merges options', async () => {
+    await poseLandmarker.setOptions({numPoses: 2});
+    await poseLandmarker.setOptions({minPoseDetectionConfidence: 0.1});
+    verifyGraph(poseLandmarker, [
+      'poseDetectorGraphOptions', {
+        numPoses: 2,
+        baseOptions: undefined,
+        minDetectionConfidence: 0.1,
+        minSuppressionThreshold: 0.5
+      }
+    ]);
+  });
+
+  describe('setOptions()', () => {
+    interface TestCase {
+      optionPath: [keyof PoseLandmarkerOptions, ...string[]];
+      fieldPath: string[];
+      customValue: unknown;
+      defaultValue: unknown;
+    }
+
+    const testCases: TestCase[] = [
+      {
+        optionPath: ['numPoses'],
+        fieldPath: ['poseDetectorGraphOptions', 'numPoses'],
+        customValue: 5,
+        defaultValue: 1
+      },
+      {
+        optionPath: ['minPoseDetectionConfidence'],
+        fieldPath: ['poseDetectorGraphOptions', 'minDetectionConfidence'],
+        customValue: 0.1,
+        defaultValue: 0.5
+      },
+      {
+        optionPath: ['minPosePresenceConfidence'],
+        fieldPath:
+            ['poseLandmarksDetectorGraphOptions', 'minDetectionConfidence'],
+        customValue: 0.2,
+        defaultValue: 0.5
+      },
+      {
+        optionPath: ['minTrackingConfidence'],
+        fieldPath: ['minTrackingConfidence'],
+        customValue: 0.3,
+        defaultValue: 0.5
+      },
+    ];
+
+    /** Creates an options object that can be passed to setOptions() */
+    function createOptions(
+        path: string[], value: unknown): PoseLandmarkerOptions {
+      const options: Record<string, unknown> = {};
+      let currentLevel = options;
+      for (const element of path.slice(0, -1)) {
+        currentLevel[element] = {};
+        currentLevel = currentLevel[element] as Record<string, unknown>;
+      }
+      currentLevel[path[path.length - 1]] = value;
+      return options;
+    }
+
+    for (const testCase of testCases) {
+      it(`uses default value for ${testCase.optionPath[0]}`, async () => {
+        verifyGraph(
+            poseLandmarker, [testCase.fieldPath, testCase.defaultValue]);
+      });
+
+      it(`can set ${testCase.optionPath[0]}`, async () => {
+        await poseLandmarker.setOptions(
+            createOptions(testCase.optionPath, testCase.customValue));
+        verifyGraph(poseLandmarker, [testCase.fieldPath, testCase.customValue]);
+      });
+
+      it(`can clear ${testCase.optionPath[0]}`, async () => {
+        await poseLandmarker.setOptions(
+            createOptions(testCase.optionPath, testCase.customValue));
+        verifyGraph(poseLandmarker, [testCase.fieldPath, testCase.customValue]);
+
+        await poseLandmarker.setOptions(
+            createOptions(testCase.optionPath, undefined));
+        verifyGraph(
+            poseLandmarker, [testCase.fieldPath, testCase.defaultValue]);
+      });
+    }
+  });
+
+  it('doesn\'t support region of interest', () => {
+    expect(() => {
+      poseLandmarker.detect(
+          {} as HTMLImageElement,
+          {regionOfInterest: {left: 0, right: 0, top: 0, bottom: 0}}, () => {});
+    }).toThrowError('This task doesn\'t support region-of-interest.');
+  });
+
+  it('transforms results', (done) => {
+    const landmarksProto = [createLandmarks().serializeBinary()];
+    const worldLandmarksProto = [createWorldLandmarks().serializeBinary()];
+    const masks = [
+      {data: new Float32Array([0, 1, 2, 3]), width: 2, height: 2},
+    ];
+
+    poseLandmarker.setOptions({outputSegmentationMasks: true});
+
+    // Pass the test data to our listener
+    poseLandmarker.fakeWasmModule._waitUntilIdle.and.callFake(() => {
+      poseLandmarker.listeners.get('normalized_landmarks')!
+          (landmarksProto, 1337);
+      poseLandmarker.listeners.get('world_landmarks')!
+          (worldLandmarksProto, 1337);
+      poseLandmarker.listeners.get('auxiliary_landmarks')!
+          (landmarksProto, 1337);
+      poseLandmarker.listeners.get('segmentation_masks')!(masks, 1337);
+    });
+
+    // Invoke the pose landmarker
+    poseLandmarker.detect({} as HTMLImageElement, result => {
+      expect(poseLandmarker.getGraphRunner().addProtoToStream)
+          .toHaveBeenCalledTimes(1);
+      expect(poseLandmarker.getGraphRunner().addGpuBufferAsImageToStream)
+          .toHaveBeenCalledTimes(1);
+      expect(poseLandmarker.fakeWasmModule._waitUntilIdle).toHaveBeenCalled();
+
+      expect(result).toEqual({
+        'landmarks': [{'x': 0, 'y': 0, 'z': 0}],
+        'worldLandmarks': [{'x': 0, 'y': 0, 'z': 0}],
+        'auxilaryLandmarks': [{'x': 0, 'y': 0, 'z': 0}],
+        'segmentationMasks': [new Float32Array([0, 1, 2, 3])],
+      });
+      done();
+    });
+  });
+
+  it('clears results between invoations', async () => {
+    const landmarksProto = [createLandmarks().serializeBinary()];
+    const worldLandmarksProto = [createWorldLandmarks().serializeBinary()];
+
+    // Pass the test data to our listener
+    poseLandmarker.fakeWasmModule._waitUntilIdle.and.callFake(() => {
+      poseLandmarker.listeners.get('normalized_landmarks')!
+          (landmarksProto, 1337);
+      poseLandmarker.listeners.get('world_landmarks')!
+          (worldLandmarksProto, 1337);
+      poseLandmarker.listeners.get('auxiliary_landmarks')!
+          (landmarksProto, 1337);
+    });
+
+    // Invoke the pose landmarker twice
+    let landmarks1: PoseLandmarkerResult|undefined;
+    poseLandmarker.detect({} as HTMLImageElement, result => {
+      landmarks1 = result;
+    });
+
+    let landmarks2: PoseLandmarkerResult|undefined;
+    poseLandmarker.detect({} as HTMLImageElement, result => {
+      landmarks2 = result;
+    });
+
+    // Verify that poses2 is not a concatenation of all previously returned
+    // poses.
+    expect(landmarks1).toBeDefined();
+    expect(landmarks1).toEqual(landmarks2);
+  });
+});
--- a/mediapipe/tasks/web/vision/types.ts
+++ b/mediapipe/tasks/web/vision/types.ts
@ -27,3 +27,4 @@ export * from '../../../tasks/web/vision/image_embedder/image_embedder';
 export * from '../../../tasks/web/vision/image_segmenter/image_segmenter';
 export * from '../../../tasks/web/vision/interactive_segmenter/interactive_segmenter';
 export * from '../../../tasks/web/vision/object_detector/object_detector';
+export * from '../../../tasks/web/vision/pose_landmarker/pose_landmarker';