From b0d359529160775bdc3b213b799f86f59bc6bd97 Mon Sep 17 00:00:00 2001 From: MediaPipe Team Date: Fri, 7 Apr 2023 00:28:23 -0700 Subject: [PATCH] Internal change PiperOrigin-RevId: 522541374 --- mediapipe/util/audio_decoder.cc | 59 ++++++++++++++++++--------------- 1 file changed, 32 insertions(+), 27 deletions(-) diff --git a/mediapipe/util/audio_decoder.cc b/mediapipe/util/audio_decoder.cc index 7f964a63d..569e8015a 100644 --- a/mediapipe/util/audio_decoder.cc +++ b/mediapipe/util/audio_decoder.cc @@ -53,22 +53,22 @@ ABSL_FLAG(int64_t, media_decoder_allowed_audio_gap_merge, 5, namespace mediapipe { // MPEG PTS max value + 1, used to correct for PTS rollover. Unit is PTS ticks. -const int64 kMpegPtsEpoch = 1LL << 33; +const int64_t kMpegPtsEpoch = 1LL << 33; // Maximum PTS change between frames. Larger changes are considered to indicate // the MPEG PTS has rolled over. Unit is PTS ticks. -const int64 kMpegPtsMaxDelta = kMpegPtsEpoch / 2; +const int64_t kMpegPtsMaxDelta = kMpegPtsEpoch / 2; // BasePacketProcessor namespace { -inline std::string TimestampToString(int64 timestamp) { +inline std::string TimestampToString(int64_t timestamp) { if (timestamp == AV_NOPTS_VALUE) { return "NOPTS"; } return absl::StrCat(timestamp); } -float Uint32ToFloat(uint32 raw_value) { +float Uint32ToFloat(uint32_t raw_value) { float value; memcpy(&value, &raw_value, 4); return value; @@ -236,7 +236,7 @@ absl::Status BasePacketProcessor::GetData(Packet* packet) { } absl::Status BasePacketProcessor::Flush() { - int64 last_num_frames_processed; + int64_t last_num_frames_processed; do { std::unique_ptr av_packet(new AVPacket()); av_init_packet(av_packet.get()); @@ -291,8 +291,8 @@ absl::Status BasePacketProcessor::Decode(const AVPacket& packet, return absl::OkStatus(); } -int64 BasePacketProcessor::CorrectPtsForRollover(int64 media_pts) { - const int64 rollover_pts_media_bits = kMpegPtsEpoch - 1; +int64_t BasePacketProcessor::CorrectPtsForRollover(int64_t media_pts) { + const int64_t rollover_pts_media_bits = kMpegPtsEpoch - 1; // Ensure PTS in range 0 ... kMpegPtsEpoch. This avoids errors from post // decode PTS corrections that overflow the epoch range (while still yielding // the correct result as long as the corrections do not exceed @@ -302,16 +302,16 @@ int64 BasePacketProcessor::CorrectPtsForRollover(int64 media_pts) { // First seen PTS. rollover_corrected_last_pts_ = media_pts; } else { - int64 prev_media_pts = + int64_t prev_media_pts = rollover_corrected_last_pts_ & rollover_pts_media_bits; - int64 pts_step = media_pts - prev_media_pts; + int64_t pts_step = media_pts - prev_media_pts; if (pts_step > kMpegPtsMaxDelta) { pts_step = pts_step - kMpegPtsEpoch; } else if (pts_step < -kMpegPtsMaxDelta) { pts_step = kMpegPtsEpoch + pts_step; } rollover_corrected_last_pts_ = - std::max((int64)0, rollover_corrected_last_pts_ + pts_step); + std::max((int64_t)0, rollover_corrected_last_pts_ + pts_step); } return rollover_corrected_last_pts_; } @@ -392,20 +392,21 @@ absl::Status AudioPacketProcessor::ValidateSampleFormat() { } } -int64 AudioPacketProcessor::SampleNumberToTimestamp(const int64 sample_number) { +int64_t AudioPacketProcessor::SampleNumberToTimestamp( + const int64_t sample_number) { return av_rescale_q(sample_number, sample_time_base_, source_time_base_); } -int64 AudioPacketProcessor::TimestampToSampleNumber(const int64 timestamp) { +int64_t AudioPacketProcessor::TimestampToSampleNumber(const int64_t timestamp) { return av_rescale_q(timestamp, source_time_base_, sample_time_base_); } -int64 AudioPacketProcessor::TimestampToMicroseconds(const int64 timestamp) { +int64_t AudioPacketProcessor::TimestampToMicroseconds(const int64_t timestamp) { return av_rescale_q(timestamp, source_time_base_, {1, 1000000}); } -int64 AudioPacketProcessor::SampleNumberToMicroseconds( - const int64 sample_number) { +int64_t AudioPacketProcessor::SampleNumberToMicroseconds( + const int64_t sample_number) { return av_rescale_q(sample_number, sample_time_base_, {1, 1000000}); } @@ -433,19 +434,19 @@ absl::Status AudioPacketProcessor::ProcessDecodedFrame(const AVPacket& packet) { << " pkt_dts:" << TimestampToString(decoded_frame_->pkt_dts) << " dts:" << TimestampToString(packet.dts) << " size:" << packet.size << " decoded:" << buf_size_bytes; - uint8* const* data_ptr = decoded_frame_->data; + uint8_t* const* data_ptr = decoded_frame_->data; if (!data_ptr[0]) { return UnknownError("No data in audio frame."); } if (decoded_frame_->pts != AV_NOPTS_VALUE) { - int64 pts = MaybeCorrectPtsForRollover(decoded_frame_->pts); + int64_t pts = MaybeCorrectPtsForRollover(decoded_frame_->pts); if (num_frames_processed_ == 0) { expected_sample_number_ = TimestampToSampleNumber(pts); } - const int64 expected_us = + const int64_t expected_us = SampleNumberToMicroseconds(expected_sample_number_); - const int64 actual_us = TimestampToMicroseconds(pts); + const int64_t actual_us = TimestampToMicroseconds(pts); if (absl::Microseconds(std::abs(expected_us - actual_us)) > absl::Seconds( absl::GetFlag(FLAGS_media_decoder_allowed_audio_gap_merge))) { @@ -474,7 +475,7 @@ absl::Status AudioPacketProcessor::ProcessDecodedFrame(const AVPacket& packet) { } absl::Status AudioPacketProcessor::AddAudioDataToBuffer( - const Timestamp output_timestamp, uint8* const* raw_audio, + const Timestamp output_timestamp, uint8_t* const* raw_audio, int buf_size_bytes) { if (buf_size_bytes == 0) { return absl::OkStatus(); @@ -484,7 +485,8 @@ absl::Status AudioPacketProcessor::AddAudioDataToBuffer( return UnknownError("Buffer is not an integral number of samples."); } - const int64 num_samples = buf_size_bytes / bytes_per_sample_ / num_channels_; + const int64_t num_samples = + buf_size_bytes / bytes_per_sample_ / num_channels_; VLOG(3) << "Adding " << num_samples << " audio samples in " << num_channels_ << " channels to output."; auto current_frame = absl::make_unique(num_channels_, num_samples); @@ -493,7 +495,8 @@ absl::Status AudioPacketProcessor::AddAudioDataToBuffer( switch (avcodec_ctx_->sample_fmt) { case AV_SAMPLE_FMT_S16: sample_ptr = reinterpret_cast(raw_audio[0]); - for (int64 sample_index = 0; sample_index < num_samples; ++sample_index) { + for (int64_t sample_index = 0; sample_index < num_samples; + ++sample_index) { for (int channel = 0; channel < num_channels_; ++channel) { (*current_frame)(channel, sample_index) = PcmEncodedSampleToFloat(sample_ptr); @@ -503,7 +506,8 @@ absl::Status AudioPacketProcessor::AddAudioDataToBuffer( break; case AV_SAMPLE_FMT_S32: sample_ptr = reinterpret_cast(raw_audio[0]); - for (int64 sample_index = 0; sample_index < num_samples; ++sample_index) { + for (int64_t sample_index = 0; sample_index < num_samples; + ++sample_index) { for (int channel = 0; channel < num_channels_; ++channel) { (*current_frame)(channel, sample_index) = PcmEncodedSampleInt32ToFloat(sample_ptr); @@ -513,7 +517,8 @@ absl::Status AudioPacketProcessor::AddAudioDataToBuffer( break; case AV_SAMPLE_FMT_FLT: sample_ptr = reinterpret_cast(raw_audio[0]); - for (int64 sample_index = 0; sample_index < num_samples; ++sample_index) { + for (int64_t sample_index = 0; sample_index < num_samples; + ++sample_index) { for (int channel = 0; channel < num_channels_; ++channel) { (*current_frame)(channel, sample_index) = Uint32ToFloat(absl::little_endian::Load32(sample_ptr)); @@ -524,7 +529,7 @@ absl::Status AudioPacketProcessor::AddAudioDataToBuffer( case AV_SAMPLE_FMT_S16P: for (int channel = 0; channel < num_channels_; ++channel) { sample_ptr = reinterpret_cast(raw_audio[channel]); - for (int64 sample_index = 0; sample_index < num_samples; + for (int64_t sample_index = 0; sample_index < num_samples; ++sample_index) { (*current_frame)(channel, sample_index) = PcmEncodedSampleToFloat(sample_ptr); @@ -535,7 +540,7 @@ absl::Status AudioPacketProcessor::AddAudioDataToBuffer( case AV_SAMPLE_FMT_FLTP: for (int channel = 0; channel < num_channels_; ++channel) { sample_ptr = reinterpret_cast(raw_audio[channel]); - for (int64 sample_index = 0; sample_index < num_samples; + for (int64_t sample_index = 0; sample_index < num_samples; ++sample_index) { (*current_frame)(channel, sample_index) = Uint32ToFloat(absl::little_endian::Load32(sample_ptr)); @@ -576,7 +581,7 @@ absl::Status AudioPacketProcessor::FillHeader(TimeSeriesHeader* header) const { return absl::OkStatus(); } -int64 AudioPacketProcessor::MaybeCorrectPtsForRollover(int64 media_pts) { +int64_t AudioPacketProcessor::MaybeCorrectPtsForRollover(int64_t media_pts) { return options_.correct_pts_for_rollover() ? CorrectPtsForRollover(media_pts) : media_pts; }