From 0ea7b220f401ccef47869cc28dc1f0a62e3b4c57 Mon Sep 17 00:00:00 2001
From: MediaPipe Team <mediapipe-team@google.com>
Date: Tue, 28 Mar 2023 02:34:22 -0700
Subject: [PATCH] Add a function to convert CoreAudio buffers into a MediaPipe
 time series matrix

PiperOrigin-RevId: 519968274
---
 mediapipe/objc/BUILD                    |  17 ++
 mediapipe/objc/DrishtiAudioUtil.h       |  36 +++
 mediapipe/objc/DrishtiAudioUtil.mm      | 101 +++++++
 mediapipe/objc/DrishtiAudioUtilTests.mm | 363 ++++++++++++++++++++++++
 4 files changed, 517 insertions(+)
 create mode 100644 mediapipe/objc/DrishtiAudioUtil.h
 create mode 100644 mediapipe/objc/DrishtiAudioUtil.mm
 create mode 100644 mediapipe/objc/DrishtiAudioUtilTests.mm
diff --git a/mediapipe/objc/BUILD b/mediapipe/objc/BUILD
index c71c02b6d..a21677608 100644
--- a/mediapipe/objc/BUILD
+++ b/mediapipe/objc/BUILD
@@ -193,6 +193,20 @@ objc_library(
     ],
 )
 
+objc_library(
+    name = "mediapipe_audio_util",
+    srcs = ["MediaPipeAudioUtil.mm"],
+    hdrs = ["MediaPipeAudioUtil.h"],
+    visibility = ["//visibility:public"],
+    deps = [
+        "//mediapipe/framework/formats:matrix",
+        "//mediapipe/framework/port:statusor",
+        "//third_party/apple_frameworks:AVFoundation",
+        "//third_party/apple_frameworks:CoreAudio",
+        "//third_party/apple_frameworks:CoreMedia",
+    ],
+)
+
 objc_library(
     name = "MPPGraphTestBase",
     testonly = 1,
@@ -230,6 +244,7 @@ objc_library(
         "CFHolderTests.mm",
         "MPPDisplayLinkWeakTargetTests.mm",
         "MPPGraphTests.mm",
+        "MediaPipeAudioUtilTests.mm",
     ],
     copts = [
         "-Wno-shorten-64-to-32",
@@ -242,11 +257,13 @@ objc_library(
         ":CGImageRefUtils",
         ":MPPGraphTestBase",
         ":Weakify",
+        ":mediapipe_audio_util",
         ":mediapipe_framework_ios",
         ":mediapipe_input_sources_ios",
         "//mediapipe/calculators/core:pass_through_calculator",
         "//third_party/apple_frameworks:AVFoundation",
         "//third_party/apple_frameworks:Accelerate",
+        "//third_party/apple_frameworks:CoreAudio",
         "//third_party/apple_frameworks:CoreGraphics",
         "//third_party/apple_frameworks:CoreMedia",
         "//third_party/apple_frameworks:CoreVideo",
diff --git a/mediapipe/objc/DrishtiAudioUtil.h b/mediapipe/objc/DrishtiAudioUtil.h
new file mode 100644
index 000000000..40e6ded0d
--- /dev/null
+++ b/mediapipe/objc/DrishtiAudioUtil.h
@@ -0,0 +1,36 @@
+// Copyright 2023 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#ifndef MEDIAPIPE_OBJC_AUDIO_UTIL_H_
+#define MEDIAPIPE_OBJC_AUDIO_UTIL_H_
+
+#import <CoreAudio/CoreAudioTypes.h>
+#import <CoreMedia/CoreMedia.h>
+
+#include <memory>
+
+#include "absl/status/statusor.h"
+#include "mediapipe/framework/formats/matrix.h"
+
+NS_ASSUME_NONNULL_BEGIN
+
+// Converts an audio sample buffer list into a `mediapipe::Matrix`.
+// Returns an error status on failure.
+absl::StatusOr<std::unique_ptr<mediapipe::Matrix>>
+MediaPipeConvertAudioBufferListToAudioMatrix(
+    const AudioBufferList* audioBufferList,
+    const AudioStreamBasicDescription* streamHeader, CMItemCount numFrames);
+
+NS_ASSUME_NONNULL_END
+
+#endif  // MEDIAPIPE_OBJC_AUDIO_UTIL_H_
diff --git a/mediapipe/objc/DrishtiAudioUtil.mm b/mediapipe/objc/DrishtiAudioUtil.mm
new file mode 100644
index 000000000..83c8bedab
--- /dev/null
+++ b/mediapipe/objc/DrishtiAudioUtil.mm
@@ -0,0 +1,101 @@
+// Copyright 2023 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#import "mediapipe/objc/MediaPipeAudioUtil.h"
+
+#include <limits>
+
+namespace {
+// `float` is 32-bit.
+static_assert(std::numeric_limits<float>::is_iec559);
+using float32_t = float;
+
+template <typename SampleDataType>
+float GetSample(const void* data, int index);
+
+template <>
+float GetSample<float32_t>(const void* data, int index) {
+  return reinterpret_cast<const float32_t*>(data)[index];
+};
+
+template <>
+float GetSample<SInt16>(const void* data, int index) {
+  // Convert to the [-1, 1] range.
+  return static_cast<float>(reinterpret_cast<const SInt16*>(data)[index]) /
+         static_cast<float>(std::numeric_limits<SInt16>::max());
+};
+
+template <typename SampleDataType>
+std::unique_ptr<mediapipe::Matrix> MakeMatrix(const AudioBuffer* buffers, int channels,
+                                            CMItemCount frames, bool interleaved) {
+  // Create the matrix and fill it accordingly. Its dimensions are `channels x frames`.
+  auto matrix = std::make_unique<mediapipe::Matrix>(channels, frames);
+  // Split the case of interleaved and non-interleaved samples (see
+  // https://developer.apple.com/documentation/coremedia/1489723-cmsamplebuffercreate#discussion)
+  // - however, the resulting operations coincide when `channels == 1`.
+  if (interleaved) {
+    // A single buffer contains interleaved samples for all the channels {L, R, L, R, L, R, ...}.
+    const void* samples = buffers[0].mData;
+    for (int channel = 0; channel < channels; ++channel) {
+      for (int frame = 0; frame < frames; ++frame) {
+        (*matrix)(channel, frame) = GetSample<SampleDataType>(samples, channels * frame + channel);
+      }
+    }
+  } else {
+    // Non-interleaved audio: each channel's samples are stored in a separate buffer:
+    // {{L, L, L, L, ...}, {R, R, R, R, ...}}.
+    for (int channel = 0; channel < channels; ++channel) {
+      const void* samples = buffers[channel].mData;
+      for (int frame = 0; frame < frames; ++frame) {
+        (*matrix)(channel, frame) = GetSample<SampleDataType>(samples, frame);
+      }
+    }
+  }
+  return matrix;
+}
+}  // namespace
+
+absl::StatusOr<std::unique_ptr<mediapipe::Matrix>> MediaPipeConvertAudioBufferListToAudioMatrix(
+    const AudioBufferList* audioBufferList, const AudioStreamBasicDescription* streamHeader,
+    CMItemCount numFrames) {
+  // Sort out the channel count and whether the data is interleaved.
+  // Note that we treat "interleaved" mono audio as non-interleaved.
+  CMItemCount numChannels = 1;
+  bool isAudioInterleaved = false;
+  if (streamHeader->mChannelsPerFrame > 1) {
+    if (streamHeader->mFormatFlags & kAudioFormatFlagIsNonInterleaved) {
+      numChannels = audioBufferList->mNumberBuffers;
+      isAudioInterleaved = false;
+    } else {
+      numChannels = audioBufferList->mBuffers[0].mNumberChannels;
+      isAudioInterleaved = true;
+    }
+    if (numChannels <= 1) {
+      return absl::InternalError("AudioStreamBasicDescription indicates more than 1 channel, "
+                                 "but the buffer data declares an incompatible number of channels");
+    }
+  }
+
+  if ((streamHeader->mFormatFlags & kAudioFormatFlagIsFloat) &&
+      streamHeader->mBitsPerChannel == 32) {
+    return MakeMatrix<float32_t>(audioBufferList->mBuffers, numChannels, numFrames,
+                                 isAudioInterleaved);
+  }
+  if ((streamHeader->mFormatFlags & kAudioFormatFlagIsSignedInteger) &&
+      streamHeader->mBitsPerChannel == 16) {
+    return MakeMatrix<SInt16>(audioBufferList->mBuffers, numChannels, numFrames,
+                              isAudioInterleaved);
+  }
+  return absl::InternalError("Incompatible audio sample storage format");
+}
diff --git a/mediapipe/objc/DrishtiAudioUtilTests.mm b/mediapipe/objc/DrishtiAudioUtilTests.mm
new file mode 100644
index 000000000..7663a70d6
--- /dev/null
+++ b/mediapipe/objc/DrishtiAudioUtilTests.mm
@@ -0,0 +1,363 @@
+#import "mediapipe/objc/MediaPipeAudioUtil.h"
+
+#include <cassert>
+#include <cstdlib>
+#include <limits>
+#include <memory>
+#include <vector>
+
+#import <XCTest/XCTest.h>
+
+static const float kMatrixComparisonPrecisionFloat = 1e-9;
+static const float kMatrixComparisonPrecisionInt16 = 1e-4;
+
+@interface MediaPipeAudioUtilTest : XCTestCase
+@end
+
+template <typename DataType>
+class AudioBufferListWrapper {
+ public:
+  AudioBufferListWrapper(int num_frames, int num_channels, bool interleaved)
+      : num_frames_(num_frames), num_channels_(num_channels), interleaved_(interleaved) {
+    int num_buffers = interleaved_ ? 1 : num_channels_;
+    int channels_per_buffer = interleaved_ ? num_channels_ : 1;
+    int buffer_size_samples = num_frames_ * channels_per_buffer;
+    int buffer_size_bytes = buffer_size_samples * static_cast<int>(BytesPerSample());
+
+    buffer_list_.reset(reinterpret_cast<AudioBufferList*>(
+        calloc(1, offsetof(AudioBufferList, mBuffers) +
+                      (sizeof(AudioBuffer) * num_buffers))));  // Var. length array.
+    assert(buffer_list_.get() != nullptr);
+
+    buffer_list_->mNumberBuffers = static_cast<CMItemCount>(num_buffers);
+    for (int buffer_index = 0; buffer_index < num_buffers; ++buffer_index) {
+      AudioBuffer& buffer = GetBuffer(buffer_index);
+      auto buffer_data = std::make_unique<DataType[]>(buffer_size_samples);
+      assert(buffer_data != nullptr);
+
+      buffer.mData = static_cast<void*>(buffer_data.get());
+      buffer.mDataByteSize = buffer_size_bytes;
+      buffer.mNumberChannels = channels_per_buffer;
+
+      buffers_.push_back(std::move(buffer_data));
+    }
+  }
+
+  UInt32 BytesPerSample() const { return static_cast<UInt32>(sizeof(DataType)); }
+  UInt32 BytesPerPacket() const {
+    return static_cast<UInt32>(BytesPerSample() * num_frames_ * num_channels_);
+  }
+
+  AudioBufferList* GetBufferList() { return buffer_list_.get(); };
+  const AudioBufferList* GetBufferList() const { return buffer_list_.get(); };
+
+  AudioBuffer& GetBuffer(int index) { return GetBufferList()->mBuffers[index]; }
+
+  DataType* GetBufferData(int index) { return reinterpret_cast<DataType*>(GetBuffer(index).mData); }
+
+  DataType& At(int channel, int frame) {
+    assert(frame >= 0 && frame < num_frames_);
+    assert(channel >= 0 && channel < num_channels_);
+    if (interleaved_) {
+      // [[L, R, L, R, ...]]
+      return GetBufferData(0)[frame * num_channels_ + channel];
+    } else {
+      // [[L, L, ...], [R, R, ...]]
+      return GetBufferData(channel)[frame];
+    }
+  }
+
+  DataType ToDataType(float value) const;
+
+  void InitFromMatrix(const mediapipe::Matrix& matrix) {
+    assert(matrix.rows() == num_channels_);
+    assert(matrix.cols() == num_frames_);
+    for (int channel = 0; channel < num_channels_; ++channel) {
+      for (int frame = 0; frame < num_frames_; ++frame) {
+        this->At(channel, frame) = ToDataType(matrix(channel, frame));
+        ;
+      }
+    }
+  }
+
+ private:
+  int num_frames_;
+  int num_channels_;
+  bool interleaved_;
+  std::unique_ptr<AudioBufferList> buffer_list_;
+  std::vector<std::unique_ptr<DataType[]>> buffers_;
+};
+
+template <>
+float AudioBufferListWrapper<float>::ToDataType(float value) const {
+  return value;
+}
+
+template <>
+int16_t AudioBufferListWrapper<int16_t>::ToDataType(float value) const {
+  return static_cast<int16_t>(value * std::numeric_limits<int16_t>::max());
+}
+
+@implementation MediaPipeAudioUtilTest
+
+- (void)testBufferListToMatrixStereoNonInterleavedFloat {
+  constexpr int kChannels = 2;
+  constexpr int kFrames = 5;
+  mediapipe::Matrix inputMatrix(kChannels, kFrames);
+  inputMatrix << 0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9;
+  AudioBufferListWrapper<float> bufferList(/*num_frames=*/kFrames,
+                                           /*num_channels=*/kChannels,
+                                           /*interleaved=*/false);
+  bufferList.InitFromMatrix(inputMatrix);
+
+  static const AudioStreamBasicDescription kStreamDescription = {
+      .mSampleRate = 44100,
+      .mFormatID = kAudioFormatLinearPCM,
+      .mFormatFlags =
+          kAudioFormatFlagIsFloat | kAudioFormatFlagIsPacked | kAudioFormatFlagIsNonInterleaved,
+      .mBytesPerPacket = bufferList.BytesPerPacket(),
+      .mFramesPerPacket = kFrames,
+      .mBytesPerFrame = bufferList.BytesPerSample() * kChannels,
+      .mChannelsPerFrame = kChannels,
+      .mBitsPerChannel = bufferList.BytesPerSample() * 8,
+  };
+
+  absl::StatusOr<std::unique_ptr<mediapipe::Matrix>> matrix =
+      MediaPipeConvertAudioBufferListToAudioMatrix(bufferList.GetBufferList(), &kStreamDescription,
+                                                 static_cast<CMItemCount>(kFrames));
+  if (!matrix.ok()) {
+    XCTFail(@"Unable to convert a sample buffer list to a matrix: %s",
+            matrix.status().ToString().c_str());
+  }
+
+  XCTAssertTrue(inputMatrix.isApprox(**matrix, kMatrixComparisonPrecisionFloat));
+}
+
+- (void)testBufferListToMatrixStereoInterleavedFloat {
+  constexpr int kChannels = 2;
+  constexpr int kFrames = 5;
+  mediapipe::Matrix inputMatrix(kChannels, kFrames);
+  inputMatrix << 0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9;
+  AudioBufferListWrapper<float> bufferList(/*num_frames=*/kFrames,
+                                           /*num_channels=*/kChannels,
+                                           /*interleaved=*/true);
+  bufferList.InitFromMatrix(inputMatrix);
+
+  static const AudioStreamBasicDescription kStreamDescription = {
+      .mSampleRate = 44100,
+      .mFormatID = kAudioFormatLinearPCM,
+      .mFormatFlags = kAudioFormatFlagIsFloat | kAudioFormatFlagIsPacked,
+      .mBytesPerPacket = bufferList.BytesPerPacket(),
+      .mFramesPerPacket = kFrames,
+      .mBytesPerFrame = bufferList.BytesPerSample() * kChannels,
+      .mChannelsPerFrame = kChannels,
+      .mBitsPerChannel = bufferList.BytesPerSample() * 8,
+  };
+
+  absl::StatusOr<std::unique_ptr<mediapipe::Matrix>> matrix =
+      MediaPipeConvertAudioBufferListToAudioMatrix(bufferList.GetBufferList(), &kStreamDescription,
+                                                 static_cast<CMItemCount>(kFrames));
+  if (!matrix.ok()) {
+    XCTFail(@"Unable to convert a sample buffer list to a matrix: %s",
+            matrix.status().ToString().c_str());
+  }
+
+  XCTAssertTrue(inputMatrix.isApprox(**matrix, kMatrixComparisonPrecisionFloat));
+}
+
+- (void)testBufferListToMatrixMonoNonInterleavedFloat {
+  constexpr int kChannels = 1;
+  constexpr int kFrames = 5;
+  mediapipe::Matrix inputMatrix(kChannels, kFrames);
+  inputMatrix << 0, 0.1, 0.2, 0.3, 0.4;
+  AudioBufferListWrapper<float> bufferList(/*num_frames=*/kFrames,
+                                           /*num_channels=*/kChannels,
+                                           /*interleaved=*/false);
+  bufferList.InitFromMatrix(inputMatrix);
+
+  static const AudioStreamBasicDescription kStreamDescription = {
+      .mSampleRate = 44100,
+      .mFormatID = kAudioFormatLinearPCM,
+      .mFormatFlags =
+          kAudioFormatFlagIsFloat | kAudioFormatFlagIsPacked | kAudioFormatFlagIsNonInterleaved,
+      .mBytesPerPacket = bufferList.BytesPerPacket(),
+      .mFramesPerPacket = kFrames,
+      .mBytesPerFrame = bufferList.BytesPerSample() * kChannels,
+      .mChannelsPerFrame = kChannels,
+      .mBitsPerChannel = bufferList.BytesPerSample() * 8,
+  };
+
+  absl::StatusOr<std::unique_ptr<mediapipe::Matrix>> matrix =
+      MediaPipeConvertAudioBufferListToAudioMatrix(bufferList.GetBufferList(), &kStreamDescription,
+                                                 static_cast<CMItemCount>(kFrames));
+  if (!matrix.ok()) {
+    XCTFail(@"Unable to convert a sample buffer list to a matrix: %s",
+            matrix.status().ToString().c_str());
+  }
+
+  XCTAssertTrue(inputMatrix.isApprox(**matrix, kMatrixComparisonPrecisionFloat));
+}
+
+- (void)testBufferListToMatrixMonoInterleavedFloat {
+  constexpr int kChannels = 1;
+  constexpr int kFrames = 5;
+  mediapipe::Matrix inputMatrix(kChannels, kFrames);
+  inputMatrix << 0, 0.1, 0.2, 0.3, 0.4;
+  AudioBufferListWrapper<float> bufferList(/*num_frames=*/kFrames,
+                                           /*num_channels=*/kChannels,
+                                           /*interleaved=*/true);
+  bufferList.InitFromMatrix(inputMatrix);
+
+  static const AudioStreamBasicDescription kStreamDescription = {
+      .mSampleRate = 44100,
+      .mFormatID = kAudioFormatLinearPCM,
+      .mFormatFlags = kAudioFormatFlagIsFloat | kAudioFormatFlagIsPacked,
+      .mBytesPerPacket = bufferList.BytesPerPacket(),
+      .mFramesPerPacket = kFrames,
+      .mBytesPerFrame = bufferList.BytesPerSample() * kChannels,
+      .mChannelsPerFrame = kChannels,
+      .mBitsPerChannel = bufferList.BytesPerSample() * 8,
+  };
+
+  absl::StatusOr<std::unique_ptr<mediapipe::Matrix>> matrix =
+      MediaPipeConvertAudioBufferListToAudioMatrix(bufferList.GetBufferList(), &kStreamDescription,
+                                                 static_cast<CMItemCount>(kFrames));
+  if (!matrix.ok()) {
+    XCTFail(@"Unable to convert a sample buffer list to a matrix: %s",
+            matrix.status().ToString().c_str());
+  }
+
+  XCTAssertTrue(inputMatrix.isApprox(**matrix, kMatrixComparisonPrecisionFloat));
+}
+
+- (void)testBufferListToMatrixStereoNonInterleavedInt16 {
+  constexpr int kChannels = 2;
+  constexpr int kFrames = 5;
+  mediapipe::Matrix inputMatrix(kChannels, kFrames);
+  inputMatrix << 0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9;
+  AudioBufferListWrapper<int16_t> bufferList(/*num_frames=*/kFrames,
+                                             /*num_channels=*/kChannels,
+                                             /*interleaved=*/false);
+  bufferList.InitFromMatrix(inputMatrix);
+
+  static const AudioStreamBasicDescription kStreamDescription = {
+      .mSampleRate = 44100,
+      .mFormatID = kAudioFormatLinearPCM,
+      .mFormatFlags = kAudioFormatFlagIsSignedInteger | kAudioFormatFlagIsPacked |
+                      kAudioFormatFlagIsNonInterleaved,
+      .mBytesPerPacket = bufferList.BytesPerPacket(),
+      .mFramesPerPacket = kFrames,
+      .mBytesPerFrame = bufferList.BytesPerSample() * kChannels,
+      .mChannelsPerFrame = kChannels,
+      .mBitsPerChannel = bufferList.BytesPerSample() * 8,
+  };
+
+  absl::StatusOr<std::unique_ptr<mediapipe::Matrix>> matrix =
+      MediaPipeConvertAudioBufferListToAudioMatrix(bufferList.GetBufferList(), &kStreamDescription,
+                                                 static_cast<CMItemCount>(kFrames));
+  if (!matrix.ok()) {
+    XCTFail(@"Unable to convert a sample buffer list to a matrix: %s",
+            matrix.status().ToString().c_str());
+  }
+
+  XCTAssertTrue(inputMatrix.isApprox(**matrix, kMatrixComparisonPrecisionInt16));
+}
+
+- (void)testBufferListToMatrixStereoInterleavedInt16 {
+  constexpr int kChannels = 2;
+  constexpr int kFrames = 5;
+  mediapipe::Matrix inputMatrix(kChannels, kFrames);
+  inputMatrix << 0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9;
+  AudioBufferListWrapper<int16_t> bufferList(/*num_frames=*/kFrames,
+                                             /*num_channels=*/kChannels,
+                                             /*interleaved=*/true);
+  bufferList.InitFromMatrix(inputMatrix);
+
+  static const AudioStreamBasicDescription kStreamDescription = {
+      .mSampleRate = 44100,
+      .mFormatID = kAudioFormatLinearPCM,
+      .mFormatFlags = kAudioFormatFlagIsSignedInteger | kAudioFormatFlagIsPacked,
+      .mBytesPerPacket = bufferList.BytesPerPacket(),
+      .mFramesPerPacket = kFrames,
+      .mBytesPerFrame = bufferList.BytesPerSample() * kChannels,
+      .mChannelsPerFrame = kChannels,
+      .mBitsPerChannel = bufferList.BytesPerSample() * 8,
+  };
+
+  absl::StatusOr<std::unique_ptr<mediapipe::Matrix>> matrix =
+      MediaPipeConvertAudioBufferListToAudioMatrix(bufferList.GetBufferList(), &kStreamDescription,
+                                                 static_cast<CMItemCount>(kFrames));
+  if (!matrix.ok()) {
+    XCTFail(@"Unable to convert a sample buffer list to a matrix: %s",
+            matrix.status().ToString().c_str());
+  }
+
+  XCTAssertTrue(inputMatrix.isApprox(**matrix, kMatrixComparisonPrecisionInt16));
+}
+
+- (void)testBufferListToMatrixMonoNonInterleavedInt16 {
+  constexpr int kChannels = 1;
+  constexpr int kFrames = 5;
+  mediapipe::Matrix inputMatrix(kChannels, kFrames);
+  inputMatrix << 0, 0.1, 0.2, 0.3, 0.4;
+  AudioBufferListWrapper<int16_t> bufferList(/*num_frames=*/kFrames,
+                                             /*num_channels=*/kChannels,
+                                             /*interleaved=*/false);
+  bufferList.InitFromMatrix(inputMatrix);
+
+  static const AudioStreamBasicDescription kStreamDescription = {
+      .mSampleRate = 44100,
+      .mFormatID = kAudioFormatLinearPCM,
+      .mFormatFlags = kAudioFormatFlagIsSignedInteger | kAudioFormatFlagIsPacked |
+                      kAudioFormatFlagIsNonInterleaved,
+      .mBytesPerPacket = bufferList.BytesPerPacket(),
+      .mFramesPerPacket = kFrames,
+      .mBytesPerFrame = bufferList.BytesPerSample() * kChannels,
+      .mChannelsPerFrame = kChannels,
+      .mBitsPerChannel = bufferList.BytesPerSample() * 8,
+  };
+
+  absl::StatusOr<std::unique_ptr<mediapipe::Matrix>> matrix =
+      MediaPipeConvertAudioBufferListToAudioMatrix(bufferList.GetBufferList(), &kStreamDescription,
+                                                 static_cast<CMItemCount>(kFrames));
+  if (!matrix.ok()) {
+    XCTFail(@"Unable to convert a sample buffer list to a matrix: %s",
+            matrix.status().ToString().c_str());
+  }
+
+  XCTAssertTrue(inputMatrix.isApprox(**matrix, kMatrixComparisonPrecisionInt16));
+}
+
+- (void)testBufferListToMatrixMonoInterleavedInt16 {
+  constexpr int kChannels = 1;
+  constexpr int kFrames = 5;
+  mediapipe::Matrix inputMatrix(kChannels, kFrames);
+  inputMatrix << 0, 0.1, 0.2, 0.3, 0.4;
+  AudioBufferListWrapper<int16_t> bufferList(/*num_frames=*/kFrames,
+                                             /*num_channels=*/kChannels,
+                                             /*interleaved=*/true);
+  bufferList.InitFromMatrix(inputMatrix);
+
+  static const AudioStreamBasicDescription kStreamDescription = {
+      .mSampleRate = 44100,
+      .mFormatID = kAudioFormatLinearPCM,
+      .mFormatFlags = kAudioFormatFlagIsSignedInteger | kAudioFormatFlagIsPacked,
+      .mBytesPerPacket = bufferList.BytesPerPacket(),
+      .mFramesPerPacket = kFrames,
+      .mBytesPerFrame = bufferList.BytesPerSample() * kChannels,
+      .mChannelsPerFrame = kChannels,
+      .mBitsPerChannel = bufferList.BytesPerSample() * 8,
+  };
+
+  absl::StatusOr<std::unique_ptr<mediapipe::Matrix>> matrix =
+      MediaPipeConvertAudioBufferListToAudioMatrix(bufferList.GetBufferList(), &kStreamDescription,
+                                                 static_cast<CMItemCount>(kFrames));
+  if (!matrix.ok()) {
+    XCTFail(@"Unable to convert a sample buffer list to a matrix: %s",
+            matrix.status().ToString().c_str());
+  }
+
+  XCTAssertTrue(inputMatrix.isApprox(**matrix, kMatrixComparisonPrecisionInt16));
+}
+
+@end