Add a function to convert CoreAudio buffers into a MediaPipe time series matrix

PiperOrigin-RevId: 519968274
This commit is contained in:
MediaPipe Team 2023-03-28 02:34:22 -07:00 committed by Copybara-Service
parent a18a62ef04
commit 0ea7b220f4
4 changed files with 517 additions and 0 deletions

View File

@ -193,6 +193,20 @@ objc_library(
],
)
objc_library(
name = "mediapipe_audio_util",
srcs = ["MediaPipeAudioUtil.mm"],
hdrs = ["MediaPipeAudioUtil.h"],
visibility = ["//visibility:public"],
deps = [
"//mediapipe/framework/formats:matrix",
"//mediapipe/framework/port:statusor",
"//third_party/apple_frameworks:AVFoundation",
"//third_party/apple_frameworks:CoreAudio",
"//third_party/apple_frameworks:CoreMedia",
],
)
objc_library(
name = "MPPGraphTestBase",
testonly = 1,
@ -230,6 +244,7 @@ objc_library(
"CFHolderTests.mm",
"MPPDisplayLinkWeakTargetTests.mm",
"MPPGraphTests.mm",
"MediaPipeAudioUtilTests.mm",
],
copts = [
"-Wno-shorten-64-to-32",
@ -242,11 +257,13 @@ objc_library(
":CGImageRefUtils",
":MPPGraphTestBase",
":Weakify",
":mediapipe_audio_util",
":mediapipe_framework_ios",
":mediapipe_input_sources_ios",
"//mediapipe/calculators/core:pass_through_calculator",
"//third_party/apple_frameworks:AVFoundation",
"//third_party/apple_frameworks:Accelerate",
"//third_party/apple_frameworks:CoreAudio",
"//third_party/apple_frameworks:CoreGraphics",
"//third_party/apple_frameworks:CoreMedia",
"//third_party/apple_frameworks:CoreVideo",

View File

@ -0,0 +1,36 @@
// Copyright 2023 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MEDIAPIPE_OBJC_AUDIO_UTIL_H_
#define MEDIAPIPE_OBJC_AUDIO_UTIL_H_
#import <CoreAudio/CoreAudioTypes.h>
#import <CoreMedia/CoreMedia.h>
#include <memory>
#include "absl/status/statusor.h"
#include "mediapipe/framework/formats/matrix.h"
NS_ASSUME_NONNULL_BEGIN
// Converts an audio sample buffer list into a `mediapipe::Matrix`.
// Returns an error status on failure.
absl::StatusOr<std::unique_ptr<mediapipe::Matrix>>
MediaPipeConvertAudioBufferListToAudioMatrix(
const AudioBufferList* audioBufferList,
const AudioStreamBasicDescription* streamHeader, CMItemCount numFrames);
NS_ASSUME_NONNULL_END
#endif // MEDIAPIPE_OBJC_AUDIO_UTIL_H_

View File

@ -0,0 +1,101 @@
// Copyright 2023 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#import "mediapipe/objc/MediaPipeAudioUtil.h"
#include <limits>
namespace {
// `float` is 32-bit.
static_assert(std::numeric_limits<float>::is_iec559);
using float32_t = float;
template <typename SampleDataType>
float GetSample(const void* data, int index);
template <>
float GetSample<float32_t>(const void* data, int index) {
return reinterpret_cast<const float32_t*>(data)[index];
};
template <>
float GetSample<SInt16>(const void* data, int index) {
// Convert to the [-1, 1] range.
return static_cast<float>(reinterpret_cast<const SInt16*>(data)[index]) /
static_cast<float>(std::numeric_limits<SInt16>::max());
};
template <typename SampleDataType>
std::unique_ptr<mediapipe::Matrix> MakeMatrix(const AudioBuffer* buffers, int channels,
CMItemCount frames, bool interleaved) {
// Create the matrix and fill it accordingly. Its dimensions are `channels x frames`.
auto matrix = std::make_unique<mediapipe::Matrix>(channels, frames);
// Split the case of interleaved and non-interleaved samples (see
// https://developer.apple.com/documentation/coremedia/1489723-cmsamplebuffercreate#discussion)
// - however, the resulting operations coincide when `channels == 1`.
if (interleaved) {
// A single buffer contains interleaved samples for all the channels {L, R, L, R, L, R, ...}.
const void* samples = buffers[0].mData;
for (int channel = 0; channel < channels; ++channel) {
for (int frame = 0; frame < frames; ++frame) {
(*matrix)(channel, frame) = GetSample<SampleDataType>(samples, channels * frame + channel);
}
}
} else {
// Non-interleaved audio: each channel's samples are stored in a separate buffer:
// {{L, L, L, L, ...}, {R, R, R, R, ...}}.
for (int channel = 0; channel < channels; ++channel) {
const void* samples = buffers[channel].mData;
for (int frame = 0; frame < frames; ++frame) {
(*matrix)(channel, frame) = GetSample<SampleDataType>(samples, frame);
}
}
}
return matrix;
}
} // namespace
absl::StatusOr<std::unique_ptr<mediapipe::Matrix>> MediaPipeConvertAudioBufferListToAudioMatrix(
const AudioBufferList* audioBufferList, const AudioStreamBasicDescription* streamHeader,
CMItemCount numFrames) {
// Sort out the channel count and whether the data is interleaved.
// Note that we treat "interleaved" mono audio as non-interleaved.
CMItemCount numChannels = 1;
bool isAudioInterleaved = false;
if (streamHeader->mChannelsPerFrame > 1) {
if (streamHeader->mFormatFlags & kAudioFormatFlagIsNonInterleaved) {
numChannels = audioBufferList->mNumberBuffers;
isAudioInterleaved = false;
} else {
numChannels = audioBufferList->mBuffers[0].mNumberChannels;
isAudioInterleaved = true;
}
if (numChannels <= 1) {
return absl::InternalError("AudioStreamBasicDescription indicates more than 1 channel, "
"but the buffer data declares an incompatible number of channels");
}
}
if ((streamHeader->mFormatFlags & kAudioFormatFlagIsFloat) &&
streamHeader->mBitsPerChannel == 32) {
return MakeMatrix<float32_t>(audioBufferList->mBuffers, numChannels, numFrames,
isAudioInterleaved);
}
if ((streamHeader->mFormatFlags & kAudioFormatFlagIsSignedInteger) &&
streamHeader->mBitsPerChannel == 16) {
return MakeMatrix<SInt16>(audioBufferList->mBuffers, numChannels, numFrames,
isAudioInterleaved);
}
return absl::InternalError("Incompatible audio sample storage format");
}

View File

@ -0,0 +1,363 @@
#import "mediapipe/objc/MediaPipeAudioUtil.h"
#include <cassert>
#include <cstdlib>
#include <limits>
#include <memory>
#include <vector>
#import <XCTest/XCTest.h>
static const float kMatrixComparisonPrecisionFloat = 1e-9;
static const float kMatrixComparisonPrecisionInt16 = 1e-4;
@interface MediaPipeAudioUtilTest : XCTestCase
@end
template <typename DataType>
class AudioBufferListWrapper {
public:
AudioBufferListWrapper(int num_frames, int num_channels, bool interleaved)
: num_frames_(num_frames), num_channels_(num_channels), interleaved_(interleaved) {
int num_buffers = interleaved_ ? 1 : num_channels_;
int channels_per_buffer = interleaved_ ? num_channels_ : 1;
int buffer_size_samples = num_frames_ * channels_per_buffer;
int buffer_size_bytes = buffer_size_samples * static_cast<int>(BytesPerSample());
buffer_list_.reset(reinterpret_cast<AudioBufferList*>(
calloc(1, offsetof(AudioBufferList, mBuffers) +
(sizeof(AudioBuffer) * num_buffers)))); // Var. length array.
assert(buffer_list_.get() != nullptr);
buffer_list_->mNumberBuffers = static_cast<CMItemCount>(num_buffers);
for (int buffer_index = 0; buffer_index < num_buffers; ++buffer_index) {
AudioBuffer& buffer = GetBuffer(buffer_index);
auto buffer_data = std::make_unique<DataType[]>(buffer_size_samples);
assert(buffer_data != nullptr);
buffer.mData = static_cast<void*>(buffer_data.get());
buffer.mDataByteSize = buffer_size_bytes;
buffer.mNumberChannels = channels_per_buffer;
buffers_.push_back(std::move(buffer_data));
}
}
UInt32 BytesPerSample() const { return static_cast<UInt32>(sizeof(DataType)); }
UInt32 BytesPerPacket() const {
return static_cast<UInt32>(BytesPerSample() * num_frames_ * num_channels_);
}
AudioBufferList* GetBufferList() { return buffer_list_.get(); };
const AudioBufferList* GetBufferList() const { return buffer_list_.get(); };
AudioBuffer& GetBuffer(int index) { return GetBufferList()->mBuffers[index]; }
DataType* GetBufferData(int index) { return reinterpret_cast<DataType*>(GetBuffer(index).mData); }
DataType& At(int channel, int frame) {
assert(frame >= 0 && frame < num_frames_);
assert(channel >= 0 && channel < num_channels_);
if (interleaved_) {
// [[L, R, L, R, ...]]
return GetBufferData(0)[frame * num_channels_ + channel];
} else {
// [[L, L, ...], [R, R, ...]]
return GetBufferData(channel)[frame];
}
}
DataType ToDataType(float value) const;
void InitFromMatrix(const mediapipe::Matrix& matrix) {
assert(matrix.rows() == num_channels_);
assert(matrix.cols() == num_frames_);
for (int channel = 0; channel < num_channels_; ++channel) {
for (int frame = 0; frame < num_frames_; ++frame) {
this->At(channel, frame) = ToDataType(matrix(channel, frame));
;
}
}
}
private:
int num_frames_;
int num_channels_;
bool interleaved_;
std::unique_ptr<AudioBufferList> buffer_list_;
std::vector<std::unique_ptr<DataType[]>> buffers_;
};
template <>
float AudioBufferListWrapper<float>::ToDataType(float value) const {
return value;
}
template <>
int16_t AudioBufferListWrapper<int16_t>::ToDataType(float value) const {
return static_cast<int16_t>(value * std::numeric_limits<int16_t>::max());
}
@implementation MediaPipeAudioUtilTest
- (void)testBufferListToMatrixStereoNonInterleavedFloat {
constexpr int kChannels = 2;
constexpr int kFrames = 5;
mediapipe::Matrix inputMatrix(kChannels, kFrames);
inputMatrix << 0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9;
AudioBufferListWrapper<float> bufferList(/*num_frames=*/kFrames,
/*num_channels=*/kChannels,
/*interleaved=*/false);
bufferList.InitFromMatrix(inputMatrix);
static const AudioStreamBasicDescription kStreamDescription = {
.mSampleRate = 44100,
.mFormatID = kAudioFormatLinearPCM,
.mFormatFlags =
kAudioFormatFlagIsFloat | kAudioFormatFlagIsPacked | kAudioFormatFlagIsNonInterleaved,
.mBytesPerPacket = bufferList.BytesPerPacket(),
.mFramesPerPacket = kFrames,
.mBytesPerFrame = bufferList.BytesPerSample() * kChannels,
.mChannelsPerFrame = kChannels,
.mBitsPerChannel = bufferList.BytesPerSample() * 8,
};
absl::StatusOr<std::unique_ptr<mediapipe::Matrix>> matrix =
MediaPipeConvertAudioBufferListToAudioMatrix(bufferList.GetBufferList(), &kStreamDescription,
static_cast<CMItemCount>(kFrames));
if (!matrix.ok()) {
XCTFail(@"Unable to convert a sample buffer list to a matrix: %s",
matrix.status().ToString().c_str());
}
XCTAssertTrue(inputMatrix.isApprox(**matrix, kMatrixComparisonPrecisionFloat));
}
- (void)testBufferListToMatrixStereoInterleavedFloat {
constexpr int kChannels = 2;
constexpr int kFrames = 5;
mediapipe::Matrix inputMatrix(kChannels, kFrames);
inputMatrix << 0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9;
AudioBufferListWrapper<float> bufferList(/*num_frames=*/kFrames,
/*num_channels=*/kChannels,
/*interleaved=*/true);
bufferList.InitFromMatrix(inputMatrix);
static const AudioStreamBasicDescription kStreamDescription = {
.mSampleRate = 44100,
.mFormatID = kAudioFormatLinearPCM,
.mFormatFlags = kAudioFormatFlagIsFloat | kAudioFormatFlagIsPacked,
.mBytesPerPacket = bufferList.BytesPerPacket(),
.mFramesPerPacket = kFrames,
.mBytesPerFrame = bufferList.BytesPerSample() * kChannels,
.mChannelsPerFrame = kChannels,
.mBitsPerChannel = bufferList.BytesPerSample() * 8,
};
absl::StatusOr<std::unique_ptr<mediapipe::Matrix>> matrix =
MediaPipeConvertAudioBufferListToAudioMatrix(bufferList.GetBufferList(), &kStreamDescription,
static_cast<CMItemCount>(kFrames));
if (!matrix.ok()) {
XCTFail(@"Unable to convert a sample buffer list to a matrix: %s",
matrix.status().ToString().c_str());
}
XCTAssertTrue(inputMatrix.isApprox(**matrix, kMatrixComparisonPrecisionFloat));
}
- (void)testBufferListToMatrixMonoNonInterleavedFloat {
constexpr int kChannels = 1;
constexpr int kFrames = 5;
mediapipe::Matrix inputMatrix(kChannels, kFrames);
inputMatrix << 0, 0.1, 0.2, 0.3, 0.4;
AudioBufferListWrapper<float> bufferList(/*num_frames=*/kFrames,
/*num_channels=*/kChannels,
/*interleaved=*/false);
bufferList.InitFromMatrix(inputMatrix);
static const AudioStreamBasicDescription kStreamDescription = {
.mSampleRate = 44100,
.mFormatID = kAudioFormatLinearPCM,
.mFormatFlags =
kAudioFormatFlagIsFloat | kAudioFormatFlagIsPacked | kAudioFormatFlagIsNonInterleaved,
.mBytesPerPacket = bufferList.BytesPerPacket(),
.mFramesPerPacket = kFrames,
.mBytesPerFrame = bufferList.BytesPerSample() * kChannels,
.mChannelsPerFrame = kChannels,
.mBitsPerChannel = bufferList.BytesPerSample() * 8,
};
absl::StatusOr<std::unique_ptr<mediapipe::Matrix>> matrix =
MediaPipeConvertAudioBufferListToAudioMatrix(bufferList.GetBufferList(), &kStreamDescription,
static_cast<CMItemCount>(kFrames));
if (!matrix.ok()) {
XCTFail(@"Unable to convert a sample buffer list to a matrix: %s",
matrix.status().ToString().c_str());
}
XCTAssertTrue(inputMatrix.isApprox(**matrix, kMatrixComparisonPrecisionFloat));
}
- (void)testBufferListToMatrixMonoInterleavedFloat {
constexpr int kChannels = 1;
constexpr int kFrames = 5;
mediapipe::Matrix inputMatrix(kChannels, kFrames);
inputMatrix << 0, 0.1, 0.2, 0.3, 0.4;
AudioBufferListWrapper<float> bufferList(/*num_frames=*/kFrames,
/*num_channels=*/kChannels,
/*interleaved=*/true);
bufferList.InitFromMatrix(inputMatrix);
static const AudioStreamBasicDescription kStreamDescription = {
.mSampleRate = 44100,
.mFormatID = kAudioFormatLinearPCM,
.mFormatFlags = kAudioFormatFlagIsFloat | kAudioFormatFlagIsPacked,
.mBytesPerPacket = bufferList.BytesPerPacket(),
.mFramesPerPacket = kFrames,
.mBytesPerFrame = bufferList.BytesPerSample() * kChannels,
.mChannelsPerFrame = kChannels,
.mBitsPerChannel = bufferList.BytesPerSample() * 8,
};
absl::StatusOr<std::unique_ptr<mediapipe::Matrix>> matrix =
MediaPipeConvertAudioBufferListToAudioMatrix(bufferList.GetBufferList(), &kStreamDescription,
static_cast<CMItemCount>(kFrames));
if (!matrix.ok()) {
XCTFail(@"Unable to convert a sample buffer list to a matrix: %s",
matrix.status().ToString().c_str());
}
XCTAssertTrue(inputMatrix.isApprox(**matrix, kMatrixComparisonPrecisionFloat));
}
- (void)testBufferListToMatrixStereoNonInterleavedInt16 {
constexpr int kChannels = 2;
constexpr int kFrames = 5;
mediapipe::Matrix inputMatrix(kChannels, kFrames);
inputMatrix << 0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9;
AudioBufferListWrapper<int16_t> bufferList(/*num_frames=*/kFrames,
/*num_channels=*/kChannels,
/*interleaved=*/false);
bufferList.InitFromMatrix(inputMatrix);
static const AudioStreamBasicDescription kStreamDescription = {
.mSampleRate = 44100,
.mFormatID = kAudioFormatLinearPCM,
.mFormatFlags = kAudioFormatFlagIsSignedInteger | kAudioFormatFlagIsPacked |
kAudioFormatFlagIsNonInterleaved,
.mBytesPerPacket = bufferList.BytesPerPacket(),
.mFramesPerPacket = kFrames,
.mBytesPerFrame = bufferList.BytesPerSample() * kChannels,
.mChannelsPerFrame = kChannels,
.mBitsPerChannel = bufferList.BytesPerSample() * 8,
};
absl::StatusOr<std::unique_ptr<mediapipe::Matrix>> matrix =
MediaPipeConvertAudioBufferListToAudioMatrix(bufferList.GetBufferList(), &kStreamDescription,
static_cast<CMItemCount>(kFrames));
if (!matrix.ok()) {
XCTFail(@"Unable to convert a sample buffer list to a matrix: %s",
matrix.status().ToString().c_str());
}
XCTAssertTrue(inputMatrix.isApprox(**matrix, kMatrixComparisonPrecisionInt16));
}
- (void)testBufferListToMatrixStereoInterleavedInt16 {
constexpr int kChannels = 2;
constexpr int kFrames = 5;
mediapipe::Matrix inputMatrix(kChannels, kFrames);
inputMatrix << 0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9;
AudioBufferListWrapper<int16_t> bufferList(/*num_frames=*/kFrames,
/*num_channels=*/kChannels,
/*interleaved=*/true);
bufferList.InitFromMatrix(inputMatrix);
static const AudioStreamBasicDescription kStreamDescription = {
.mSampleRate = 44100,
.mFormatID = kAudioFormatLinearPCM,
.mFormatFlags = kAudioFormatFlagIsSignedInteger | kAudioFormatFlagIsPacked,
.mBytesPerPacket = bufferList.BytesPerPacket(),
.mFramesPerPacket = kFrames,
.mBytesPerFrame = bufferList.BytesPerSample() * kChannels,
.mChannelsPerFrame = kChannels,
.mBitsPerChannel = bufferList.BytesPerSample() * 8,
};
absl::StatusOr<std::unique_ptr<mediapipe::Matrix>> matrix =
MediaPipeConvertAudioBufferListToAudioMatrix(bufferList.GetBufferList(), &kStreamDescription,
static_cast<CMItemCount>(kFrames));
if (!matrix.ok()) {
XCTFail(@"Unable to convert a sample buffer list to a matrix: %s",
matrix.status().ToString().c_str());
}
XCTAssertTrue(inputMatrix.isApprox(**matrix, kMatrixComparisonPrecisionInt16));
}
- (void)testBufferListToMatrixMonoNonInterleavedInt16 {
constexpr int kChannels = 1;
constexpr int kFrames = 5;
mediapipe::Matrix inputMatrix(kChannels, kFrames);
inputMatrix << 0, 0.1, 0.2, 0.3, 0.4;
AudioBufferListWrapper<int16_t> bufferList(/*num_frames=*/kFrames,
/*num_channels=*/kChannels,
/*interleaved=*/false);
bufferList.InitFromMatrix(inputMatrix);
static const AudioStreamBasicDescription kStreamDescription = {
.mSampleRate = 44100,
.mFormatID = kAudioFormatLinearPCM,
.mFormatFlags = kAudioFormatFlagIsSignedInteger | kAudioFormatFlagIsPacked |
kAudioFormatFlagIsNonInterleaved,
.mBytesPerPacket = bufferList.BytesPerPacket(),
.mFramesPerPacket = kFrames,
.mBytesPerFrame = bufferList.BytesPerSample() * kChannels,
.mChannelsPerFrame = kChannels,
.mBitsPerChannel = bufferList.BytesPerSample() * 8,
};
absl::StatusOr<std::unique_ptr<mediapipe::Matrix>> matrix =
MediaPipeConvertAudioBufferListToAudioMatrix(bufferList.GetBufferList(), &kStreamDescription,
static_cast<CMItemCount>(kFrames));
if (!matrix.ok()) {
XCTFail(@"Unable to convert a sample buffer list to a matrix: %s",
matrix.status().ToString().c_str());
}
XCTAssertTrue(inputMatrix.isApprox(**matrix, kMatrixComparisonPrecisionInt16));
}
- (void)testBufferListToMatrixMonoInterleavedInt16 {
constexpr int kChannels = 1;
constexpr int kFrames = 5;
mediapipe::Matrix inputMatrix(kChannels, kFrames);
inputMatrix << 0, 0.1, 0.2, 0.3, 0.4;
AudioBufferListWrapper<int16_t> bufferList(/*num_frames=*/kFrames,
/*num_channels=*/kChannels,
/*interleaved=*/true);
bufferList.InitFromMatrix(inputMatrix);
static const AudioStreamBasicDescription kStreamDescription = {
.mSampleRate = 44100,
.mFormatID = kAudioFormatLinearPCM,
.mFormatFlags = kAudioFormatFlagIsSignedInteger | kAudioFormatFlagIsPacked,
.mBytesPerPacket = bufferList.BytesPerPacket(),
.mFramesPerPacket = kFrames,
.mBytesPerFrame = bufferList.BytesPerSample() * kChannels,
.mChannelsPerFrame = kChannels,
.mBitsPerChannel = bufferList.BytesPerSample() * 8,
};
absl::StatusOr<std::unique_ptr<mediapipe::Matrix>> matrix =
MediaPipeConvertAudioBufferListToAudioMatrix(bufferList.GetBufferList(), &kStreamDescription,
static_cast<CMItemCount>(kFrames));
if (!matrix.ok()) {
XCTFail(@"Unable to convert a sample buffer list to a matrix: %s",
matrix.status().ToString().c_str());
}
XCTAssertTrue(inputMatrix.isApprox(**matrix, kMatrixComparisonPrecisionInt16));
}
@end