diff --git a/mediapipe/java/com/google/mediapipe/components/MicrophoneHelper.java b/mediapipe/java/com/google/mediapipe/components/MicrophoneHelper.java index 4775bd7ee..2d6b311fd 100644 --- a/mediapipe/java/com/google/mediapipe/components/MicrophoneHelper.java +++ b/mediapipe/java/com/google/mediapipe/components/MicrophoneHelper.java @@ -24,6 +24,7 @@ import android.util.Log; import com.google.common.base.Preconditions; import java.io.IOException; import java.nio.ByteBuffer; +import java.nio.ByteOrder; /** Provides access to audio data from a microphone. */ public class MicrophoneHelper implements AudioDataProducer { @@ -181,6 +182,7 @@ public class MicrophoneHelper implements AudioDataProducer { // TODO: Fix audio data cloning. ByteBuffer audioData = ByteBuffer.allocateDirect(audioPacketBufferSize); + audioData.order(ByteOrder.nativeOrder()); try { readAudioPacket(audioData); } catch (IOException ioException) { diff --git a/mediapipe/java/com/google/mediapipe/framework/PacketCreator.java b/mediapipe/java/com/google/mediapipe/framework/PacketCreator.java index f9ea6760c..5e20f1f71 100644 --- a/mediapipe/java/com/google/mediapipe/framework/PacketCreator.java +++ b/mediapipe/java/com/google/mediapipe/framework/PacketCreator.java @@ -17,6 +17,7 @@ package com.google.mediapipe.framework; import com.google.mediapipe.framework.ProtoUtil.SerializedMessage; import com.google.protobuf.MessageLite; import java.nio.ByteBuffer; +import java.nio.ByteOrder; import java.nio.FloatBuffer; // TODO: use Preconditions in this file. @@ -65,14 +66,15 @@ public class PacketCreator { * Create a MediaPipe audio packet that is used by most of the audio calculators. * * @param data the raw audio data, bytes per sample is 2. + * @param isLittleEndian the raw audio data is little endian or not. * @param numChannels number of channels in the raw data. * @param numSamples number of samples in the data. */ - public Packet createAudioPacket(byte[] data, int numChannels, int numSamples) { + public Packet createAudioPacket(byte[] data, boolean isLittleEndian, int numChannels, int numSamples) { checkAudioDataSize(data.length, numChannels, numSamples); return Packet.create( nativeCreateAudioPacket( - mediapipeGraph.getNativeHandle(), data, /*offset=*/ 0, numChannels, numSamples)); + mediapipeGraph.getNativeHandle(), data, /*offset=*/ 0, isLittleEndian, numChannels, numSamples)); } /** @@ -88,13 +90,14 @@ public class PacketCreator { if (data.isDirect()) { return Packet.create( nativeCreateAudioPacketDirect( - mediapipeGraph.getNativeHandle(), data.slice(), numChannels, numSamples)); + mediapipeGraph.getNativeHandle(), data.slice(), ByteOrder.LITTLE_ENDIAN.equals(data.order()), numChannels, numSamples)); } else if (data.hasArray()) { return Packet.create( nativeCreateAudioPacket( mediapipeGraph.getNativeHandle(), data.array(), data.arrayOffset() + data.position(), + ByteOrder.LITTLE_ENDIAN.equals(data.order()), numChannels, numSamples)); } else { @@ -381,10 +384,10 @@ public class PacketCreator { private native long nativeCreateReferencePacket(long context, long packet); private native long nativeCreateAudioPacket( - long context, byte[] data, int offset, int numChannels, int numSamples); + long context, byte[] data, int offset, boolean isLittleEndian, int numChannels, int numSamples); private native long nativeCreateAudioPacketDirect( - long context, ByteBuffer data, int numChannels, int numSamples); + long context, ByteBuffer data, boolean isLittleEndian, int numChannels, int numSamples); private native long nativeCreateRgbImageFromRgba( long context, ByteBuffer buffer, int width, int height); diff --git a/mediapipe/java/com/google/mediapipe/framework/jni/packet_creator_jni.cc b/mediapipe/java/com/google/mediapipe/framework/jni/packet_creator_jni.cc index 2701c7a5e..02ae39c62 100644 --- a/mediapipe/java/com/google/mediapipe/framework/jni/packet_creator_jni.cc +++ b/mediapipe/java/com/google/mediapipe/framework/jni/packet_creator_jni.cc @@ -250,17 +250,17 @@ JNIEXPORT jlong JNICALL PACKET_CREATOR_METHOD(nativeCreateRgbaImageFrame)( return CreatePacketWithContext(context, packet); } -static mediapipe::Packet createAudioPacket(const uint8_t* audio_sample, +static mediapipe::Packet createAudioPacket(const uint8_t* audio_sample, bool is_little_endian, int num_samples, int num_channels) { std::unique_ptr matrix( new mediapipe::Matrix(num_channels, num_samples)); // Preparing and normalize the audio data. // kMultiplier is same as what used in av_sync_media_decoder.cc. static const float kMultiplier = 1.f / (1 << 15); - // We try to not assume the Endian order of the data. + for (int sample = 0; sample < num_samples; ++sample) { for (int channel = 0; channel < num_channels; ++channel) { - int16_t value = (audio_sample[1] & 0xff) << 8 | audio_sample[0]; + int16_t value = (audio_sample[is_little_endian ? 1 : 0] & 0xff) << 8 | audio_sample[is_little_endian ? 0 : 1]; (*matrix)(channel, sample) = kMultiplier * value; audio_sample += 2; } @@ -270,25 +270,25 @@ static mediapipe::Packet createAudioPacket(const uint8_t* audio_sample, JNIEXPORT jlong JNICALL PACKET_CREATOR_METHOD(nativeCreateAudioPacket)( JNIEnv* env, jobject thiz, jlong context, jbyteArray data, jint offset, - jint num_channels, jint num_samples) { + jboolean is_little_endian, jint num_channels, jint num_samples) { // Note, audio_data_ref is really a const jbyte* but this clashes with the // the expectation of ReleaseByteArrayElements below. jbyte* audio_data_ref = env->GetByteArrayElements(data, nullptr); const uint8_t* audio_sample = reinterpret_cast(audio_data_ref) + offset; mediapipe::Packet packet = - createAudioPacket(audio_sample, num_samples, num_channels); + createAudioPacket(audio_sample, is_little_endian, num_samples, num_channels); env->ReleaseByteArrayElements(data, audio_data_ref, JNI_ABORT); return CreatePacketWithContext(context, packet); } JNIEXPORT jlong JNICALL PACKET_CREATOR_METHOD(nativeCreateAudioPacketDirect)( - JNIEnv* env, jobject thiz, jlong context, jobject data, jint num_channels, - jint num_samples) { + JNIEnv* env, jobject thiz, jlong context, jobject data, + jboolean is_little_endian, jint num_channels, jint num_samples) { const uint8_t* audio_sample = reinterpret_cast(env->GetDirectBufferAddress(data)); mediapipe::Packet packet = - createAudioPacket(audio_sample, num_samples, num_channels); + createAudioPacket(audio_sample, is_little_endian, num_samples, num_channels); return CreatePacketWithContext(context, packet); } diff --git a/mediapipe/java/com/google/mediapipe/framework/jni/packet_creator_jni.h b/mediapipe/java/com/google/mediapipe/framework/jni/packet_creator_jni.h index d6f44b0a3..cc6603d28 100644 --- a/mediapipe/java/com/google/mediapipe/framework/jni/packet_creator_jni.h +++ b/mediapipe/java/com/google/mediapipe/framework/jni/packet_creator_jni.h @@ -48,12 +48,12 @@ JNIEXPORT jlong JNICALL PACKET_CREATOR_METHOD(nativeCreateGrayscaleImage)( jint height); JNIEXPORT jlong JNICALL PACKET_CREATOR_METHOD(nativeCreateAudioPacketDirect)( - JNIEnv* env, jobject thiz, jlong context, jobject data, jint num_channels, - jint num_samples); + JNIEnv* env, jobject thiz, jlong context, jobject data, + jboolean is_little_endian, jint num_channels, jint num_samples); JNIEXPORT jlong JNICALL PACKET_CREATOR_METHOD(nativeCreateAudioPacket)( JNIEnv* env, jobject thiz, jlong context, jbyteArray data, jint offset, - jint num_channels, jint num_samples); + jboolean is_little_endian, jint num_channels, jint num_samples); JNIEXPORT jlong JNICALL PACKET_CREATOR_METHOD(nativeCreateInt16)(JNIEnv* env, jobject thiz,