Chrome can't use Absl's CHECK because of collisions with its own version. PiperOrigin-RevId: 561740965
		
			
				
	
	
		
			406 lines
		
	
	
		
			16 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			406 lines
		
	
	
		
			16 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| // Copyright 2019 The MediaPipe Authors.
 | |
| //
 | |
| // Licensed under the Apache License, Version 2.0 (the "License");
 | |
| // you may not use this file except in compliance with the License.
 | |
| // You may obtain a copy of the License at
 | |
| //
 | |
| //      http://www.apache.org/licenses/LICENSE-2.0
 | |
| //
 | |
| // Unless required by applicable law or agreed to in writing, software
 | |
| // distributed under the License is distributed on an "AS IS" BASIS,
 | |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | |
| // See the License for the specific language governing permissions and
 | |
| // limitations under the License.
 | |
| //
 | |
| // Defines TimeSeriesFramerCalculator.
 | |
| #include <math.h>
 | |
| 
 | |
| #include <vector>
 | |
| 
 | |
| #include "Eigen/Core"
 | |
| #include "absl/log/absl_check.h"
 | |
| #include "audio/dsp/window_functions.h"
 | |
| #include "mediapipe/calculators/audio/time_series_framer_calculator.pb.h"
 | |
| #include "mediapipe/framework/calculator_framework.h"
 | |
| #include "mediapipe/framework/formats/matrix.h"
 | |
| #include "mediapipe/framework/formats/time_series_header.pb.h"
 | |
| #include "mediapipe/framework/port/ret_check.h"
 | |
| #include "mediapipe/framework/timestamp.h"
 | |
| #include "mediapipe/util/time_series_util.h"
 | |
| 
 | |
| namespace mediapipe {
 | |
| 
 | |
| // MediaPipe Calculator for framing a (vector-valued) input time series,
 | |
| // i.e. for breaking an input time series into fixed-size, possibly
 | |
| // overlapping, frames.  The output stream's frame duration is
 | |
| // specified by frame_duration_seconds in the
 | |
| // TimeSeriesFramerCalculatorOptions, and the output's overlap is
 | |
| // specified by frame_overlap_seconds.
 | |
| //
 | |
| // This calculator assumes that the input timestamps refer to the
 | |
| // first sample in each Matrix.  The output timestamps follow this
 | |
| // same convention.
 | |
| //
 | |
| // All output frames will have exactly the same number of samples: the number of
 | |
| // samples that approximates frame_duration_seconds most closely.
 | |
| //
 | |
| // Similarly, frame overlap is by default the (fixed) number of samples
 | |
| // approximating frame_overlap_seconds most closely.  But if
 | |
| // emulate_fractional_frame_overlap is set to true, frame overlap is a variable
 | |
| // number of samples instead, such that the long-term average step between
 | |
| // frames is the difference between the (nominal) frame_duration_seconds and
 | |
| // frame_overlap_seconds.
 | |
| //
 | |
| // If pad_final_packet is true, all input samples will be emitted and the final
 | |
| // packet will be zero padded as necessary.  If pad_final_packet is false, some
 | |
| // samples may be dropped at the end of the stream.
 | |
| //
 | |
| // If use_local_timestamp is true, the output packet's timestamp is based on the
 | |
| // last sample of the packet. The timestamp of this sample is inferred by
 | |
| // input_packet_timesamp + local_sample_index / sampling_rate_. If false, the
 | |
| // output packet's timestamp is based on the cumulative timestamping, which is
 | |
| // done by adopting the timestamp of the first sample of the packet and this
 | |
| // sample's timestamp is inferred by initial_input_timestamp_ +
 | |
| // cumulative_completed_samples / sample_rate_.
 | |
| class TimeSeriesFramerCalculator : public CalculatorBase {
 | |
|  public:
 | |
|   static absl::Status GetContract(CalculatorContract* cc) {
 | |
|     cc->Inputs().Index(0).Set<Matrix>(
 | |
|         // Input stream with TimeSeriesHeader.
 | |
|     );
 | |
|     cc->Outputs().Index(0).Set<Matrix>(
 | |
|         // Fixed length time series Packets with TimeSeriesHeader.
 | |
|     );
 | |
|     return absl::OkStatus();
 | |
|   }
 | |
| 
 | |
|   // Returns FAIL if the input stream header is invalid.
 | |
|   absl::Status Open(CalculatorContext* cc) override;
 | |
| 
 | |
|   // Outputs as many framed packets as possible given the accumulated
 | |
|   // input.  Always returns OK.
 | |
|   absl::Status Process(CalculatorContext* cc) override;
 | |
| 
 | |
|   // Flushes any remaining samples in a zero-padded packet.  Always
 | |
|   // returns OK.
 | |
|   absl::Status Close(CalculatorContext* cc) override;
 | |
| 
 | |
|  private:
 | |
|   Timestamp CurrentOutputTimestamp() {
 | |
|     if (use_local_timestamp_) {
 | |
|       return current_timestamp_;
 | |
|     }
 | |
|     return CumulativeOutputTimestamp();
 | |
|   }
 | |
| 
 | |
|   Timestamp CumulativeOutputTimestamp() {
 | |
|     return initial_input_timestamp_ +
 | |
|            round(cumulative_completed_samples_ / sample_rate_ *
 | |
|                  Timestamp::kTimestampUnitsPerSecond);
 | |
|   }
 | |
| 
 | |
|   // The number of input samples to advance after the current output frame is
 | |
|   // emitted.
 | |
|   int next_frame_step_samples() const {
 | |
|     // All numbers are in input samples.
 | |
|     const int64_t current_output_frame_start = static_cast<int64_t>(
 | |
|         round(cumulative_output_frames_ * average_frame_step_samples_));
 | |
|     ABSL_CHECK_EQ(current_output_frame_start, cumulative_completed_samples_);
 | |
|     const int64_t next_output_frame_start = static_cast<int64_t>(
 | |
|         round((cumulative_output_frames_ + 1) * average_frame_step_samples_));
 | |
|     return next_output_frame_start - current_output_frame_start;
 | |
|   }
 | |
| 
 | |
|   double sample_rate_;
 | |
|   bool pad_final_packet_;
 | |
|   int frame_duration_samples_;
 | |
|   // The advance, in input samples, between the start of successive output
 | |
|   // frames. This may be a non-integer average value if
 | |
|   // emulate_fractional_frame_overlap is true.
 | |
|   double average_frame_step_samples_;
 | |
|   int samples_still_to_drop_;
 | |
|   int64_t cumulative_output_frames_;
 | |
|   // "Completed" samples are samples that are no longer needed because
 | |
|   // the framer has completely stepped past them (taking into account
 | |
|   // any overlap).
 | |
|   int64_t cumulative_completed_samples_;
 | |
|   Timestamp initial_input_timestamp_;
 | |
|   // The current timestamp is updated along with the incoming packets.
 | |
|   Timestamp current_timestamp_;
 | |
| 
 | |
|   // Samples are buffered in a vector of sample blocks.
 | |
|   class SampleBlockBuffer {
 | |
|    public:
 | |
|     // Initializes the buffer.
 | |
|     void Init(double sample_rate, int num_channels) {
 | |
|       ts_units_per_sample_ = Timestamp::kTimestampUnitsPerSecond / sample_rate;
 | |
|       num_channels_ = num_channels;
 | |
|       num_samples_ = 0;
 | |
|       first_block_offset_ = 0;
 | |
|     }
 | |
| 
 | |
|     // Number of channels, equal to the number of rows in each Matrix.
 | |
|     int num_channels() const { return num_channels_; }
 | |
|     // Total number of available samples over all blocks.
 | |
|     int num_samples() const { return num_samples_; }
 | |
| 
 | |
|     // Pushes a new block of samples on the back of the buffer with `timestamp`
 | |
|     // being the input timestamp of the packet containing the Matrix.
 | |
|     void Push(const Matrix& samples, Timestamp timestamp);
 | |
|     // Copies `count` samples from the front of the buffer. If there are fewer
 | |
|     // samples than this, the result is zero padded to have `count` samples.
 | |
|     // The timestamp of the last copied sample is written to *last_timestamp.
 | |
|     // This output is used below to update `current_timestamp_`, which is only
 | |
|     // used when `use_local_timestamp` is true.
 | |
|     Matrix CopySamples(int count, Timestamp* last_timestamp) const;
 | |
|     // Drops `count` samples from the front of the buffer. If `count` exceeds
 | |
|     // `num_samples()`, the buffer is emptied.  Returns how many samples were
 | |
|     // dropped.
 | |
|     int DropSamples(int count);
 | |
| 
 | |
|    private:
 | |
|     struct Block {
 | |
|       // Matrix of num_channels rows by num_samples columns, a block of possibly
 | |
|       // multiple samples.
 | |
|       Matrix samples;
 | |
|       // Timestamp of the first sample in the Block. This comes from the input
 | |
|       // packet's timestamp that contains this Matrix.
 | |
|       Timestamp timestamp;
 | |
| 
 | |
|       Block() : timestamp(Timestamp::Unstarted()) {}
 | |
|       Block(const Matrix& samples, Timestamp timestamp)
 | |
|           : samples(samples), timestamp(timestamp) {}
 | |
|       int num_samples() const { return samples.cols(); }
 | |
|     };
 | |
|     std::vector<Block> blocks_;
 | |
|     // Number of timestamp units per sample. Used to compute timestamps as
 | |
|     // nth sample timestamp = base_timestamp + round(ts_units_per_sample_ * n).
 | |
|     double ts_units_per_sample_;
 | |
|     // Number of rows in each Matrix.
 | |
|     int num_channels_;
 | |
|     // The total number of samples over all blocks, equal to
 | |
|     // (sum_i blocks_[i].num_samples()) - first_block_offset_.
 | |
|     int num_samples_;
 | |
|     // The number of samples in the first block that have been discarded. This
 | |
|     // way we can cheaply represent "partially discarding" a block.
 | |
|     int first_block_offset_;
 | |
|   } sample_buffer_;
 | |
| 
 | |
|   bool use_window_;
 | |
|   Eigen::RowVectorXf window_;
 | |
| 
 | |
|   bool use_local_timestamp_;
 | |
| };
 | |
| REGISTER_CALCULATOR(TimeSeriesFramerCalculator);
 | |
| 
 | |
| void TimeSeriesFramerCalculator::SampleBlockBuffer::Push(const Matrix& samples,
 | |
|                                                          Timestamp timestamp) {
 | |
|   num_samples_ += samples.cols();
 | |
|   blocks_.emplace_back(samples, timestamp);
 | |
| }
 | |
| 
 | |
| Matrix TimeSeriesFramerCalculator::SampleBlockBuffer::CopySamples(
 | |
|     int count, Timestamp* last_timestamp) const {
 | |
|   Matrix copied(num_channels_, count);
 | |
| 
 | |
|   if (!blocks_.empty()) {
 | |
|     int num_copied = 0;
 | |
|     // First block has an offset for samples that have been discarded.
 | |
|     int offset = first_block_offset_;
 | |
|     int n;
 | |
|     Timestamp last_block_ts;
 | |
|     int last_sample_index;
 | |
| 
 | |
|     for (auto it = blocks_.begin(); it != blocks_.end() && count > 0; ++it) {
 | |
|       n = std::min(it->num_samples() - offset, count);
 | |
|       // Copy `n` samples from the next block.
 | |
|       copied.middleCols(num_copied, n) = it->samples.middleCols(offset, n);
 | |
|       count -= n;
 | |
|       num_copied += n;
 | |
|       last_block_ts = it->timestamp;
 | |
|       last_sample_index = offset + n - 1;
 | |
|       offset = 0;  // No samples have been discarded in subsequent blocks.
 | |
|     }
 | |
| 
 | |
|     // Compute the timestamp of the last copied sample.
 | |
|     *last_timestamp =
 | |
|         last_block_ts + std::round(ts_units_per_sample_ * last_sample_index);
 | |
|   }
 | |
| 
 | |
|   if (count > 0) {
 | |
|     copied.rightCols(count).setZero();  // Zero pad if needed.
 | |
|   }
 | |
| 
 | |
|   return copied;
 | |
| }
 | |
| 
 | |
| int TimeSeriesFramerCalculator::SampleBlockBuffer::DropSamples(int count) {
 | |
|   if (blocks_.empty()) {
 | |
|     return 0;
 | |
|   }
 | |
| 
 | |
|   auto block_it = blocks_.begin();
 | |
|   if (first_block_offset_ + count < block_it->num_samples()) {
 | |
|     // `count` is less than the remaining samples in the first block.
 | |
|     first_block_offset_ += count;
 | |
|     num_samples_ -= count;
 | |
|     return count;
 | |
|   }
 | |
| 
 | |
|   int num_samples_dropped = block_it->num_samples() - first_block_offset_;
 | |
|   count -= num_samples_dropped;
 | |
|   first_block_offset_ = 0;
 | |
| 
 | |
|   for (++block_it; block_it != blocks_.end(); ++block_it) {
 | |
|     if (block_it->num_samples() > count) {
 | |
|       break;
 | |
|     }
 | |
|     num_samples_dropped += block_it->num_samples();
 | |
|     count -= block_it->num_samples();
 | |
|   }
 | |
| 
 | |
|   blocks_.erase(blocks_.begin(), block_it);  // Drop whole blocks.
 | |
|   if (!blocks_.empty()) {
 | |
|     first_block_offset_ = count;  // Drop part of the next block.
 | |
|     num_samples_dropped += count;
 | |
|   }
 | |
| 
 | |
|   num_samples_ -= num_samples_dropped;
 | |
|   return num_samples_dropped;
 | |
| }
 | |
| 
 | |
| absl::Status TimeSeriesFramerCalculator::Process(CalculatorContext* cc) {
 | |
|   if (initial_input_timestamp_ == Timestamp::Unstarted()) {
 | |
|     initial_input_timestamp_ = cc->InputTimestamp();
 | |
|     current_timestamp_ = initial_input_timestamp_;
 | |
|   }
 | |
| 
 | |
|   // Add input data to the internal buffer.
 | |
|   sample_buffer_.Push(cc->Inputs().Index(0).Get<Matrix>(),
 | |
|                       cc->InputTimestamp());
 | |
| 
 | |
|   // Construct and emit framed output packets.
 | |
|   while (sample_buffer_.num_samples() >=
 | |
|          frame_duration_samples_ + samples_still_to_drop_) {
 | |
|     sample_buffer_.DropSamples(samples_still_to_drop_);
 | |
|     Matrix output_frame = sample_buffer_.CopySamples(frame_duration_samples_,
 | |
|                                                      ¤t_timestamp_);
 | |
|     const int frame_step_samples = next_frame_step_samples();
 | |
|     samples_still_to_drop_ = frame_step_samples;
 | |
| 
 | |
|     if (use_window_) {
 | |
|       // Apply the window to each row of output_frame.
 | |
|       output_frame.array().rowwise() *= window_.array();
 | |
|     }
 | |
| 
 | |
|     cc->Outputs().Index(0).AddPacket(MakePacket<Matrix>(std::move(output_frame))
 | |
|                                          .At(CurrentOutputTimestamp()));
 | |
|     ++cumulative_output_frames_;
 | |
|     cumulative_completed_samples_ += frame_step_samples;
 | |
|   }
 | |
|   if (!use_local_timestamp_) {
 | |
|     // In non-local timestamp mode the timestamp of the next packet will be
 | |
|     // equal to CumulativeOutputTimestamp(). Inform the framework about this
 | |
|     // fact to enable packet queueing optimizations.
 | |
|     cc->Outputs().Index(0).SetNextTimestampBound(CumulativeOutputTimestamp());
 | |
|   }
 | |
| 
 | |
|   return absl::OkStatus();
 | |
| }
 | |
| 
 | |
| absl::Status TimeSeriesFramerCalculator::Close(CalculatorContext* cc) {
 | |
|   sample_buffer_.DropSamples(samples_still_to_drop_);
 | |
| 
 | |
|   if (sample_buffer_.num_samples() > 0 && pad_final_packet_) {
 | |
|     Matrix output_frame = sample_buffer_.CopySamples(frame_duration_samples_,
 | |
|                                                      ¤t_timestamp_);
 | |
|     cc->Outputs().Index(0).AddPacket(MakePacket<Matrix>(std::move(output_frame))
 | |
|                                          .At(CurrentOutputTimestamp()));
 | |
|   }
 | |
| 
 | |
|   return absl::OkStatus();
 | |
| }
 | |
| 
 | |
| absl::Status TimeSeriesFramerCalculator::Open(CalculatorContext* cc) {
 | |
|   TimeSeriesFramerCalculatorOptions framer_options =
 | |
|       cc->Options<TimeSeriesFramerCalculatorOptions>();
 | |
| 
 | |
|   RET_CHECK_GT(framer_options.frame_duration_seconds(), 0.0)
 | |
|       << "Invalid or missing frame_duration_seconds. "
 | |
|       << "framer_duration_seconds: \n"
 | |
|       << framer_options.frame_duration_seconds();
 | |
|   RET_CHECK_LT(framer_options.frame_overlap_seconds(),
 | |
|                framer_options.frame_duration_seconds())
 | |
|       << "Invalid frame_overlap_seconds. framer_overlap_seconds: \n"
 | |
|       << framer_options.frame_overlap_seconds();
 | |
| 
 | |
|   TimeSeriesHeader input_header;
 | |
|   MP_RETURN_IF_ERROR(time_series_util::FillTimeSeriesHeaderIfValid(
 | |
|       cc->Inputs().Index(0).Header(), &input_header));
 | |
| 
 | |
|   sample_rate_ = input_header.sample_rate();
 | |
|   sample_buffer_.Init(sample_rate_, input_header.num_channels());
 | |
|   frame_duration_samples_ = time_series_util::SecondsToSamples(
 | |
|       framer_options.frame_duration_seconds(), sample_rate_);
 | |
|   RET_CHECK_GT(frame_duration_samples_, 0)
 | |
|       << "Frame duration of " << framer_options.frame_duration_seconds()
 | |
|       << "s too small to cover a single sample at " << sample_rate_ << " Hz ";
 | |
|   if (framer_options.emulate_fractional_frame_overlap()) {
 | |
|     // Frame step may be fractional.
 | |
|     average_frame_step_samples_ = (framer_options.frame_duration_seconds() -
 | |
|                                    framer_options.frame_overlap_seconds()) *
 | |
|                                   sample_rate_;
 | |
|   } else {
 | |
|     // Frame step is an integer (stored in a double).
 | |
|     average_frame_step_samples_ =
 | |
|         frame_duration_samples_ -
 | |
|         time_series_util::SecondsToSamples(
 | |
|             framer_options.frame_overlap_seconds(), sample_rate_);
 | |
|   }
 | |
|   RET_CHECK_GE(average_frame_step_samples_, 1)
 | |
|       << "Frame step too small to cover a single sample at " << sample_rate_
 | |
|       << " Hz.";
 | |
|   pad_final_packet_ = framer_options.pad_final_packet();
 | |
| 
 | |
|   auto output_header = new TimeSeriesHeader(input_header);
 | |
|   output_header->set_num_samples(frame_duration_samples_);
 | |
|   if (round(average_frame_step_samples_) == average_frame_step_samples_) {
 | |
|     // Only set output packet rate if it is fixed.
 | |
|     output_header->set_packet_rate(sample_rate_ / average_frame_step_samples_);
 | |
|   }
 | |
|   cc->Outputs().Index(0).SetHeader(Adopt(output_header));
 | |
|   cumulative_completed_samples_ = 0;
 | |
|   cumulative_output_frames_ = 0;
 | |
|   samples_still_to_drop_ = 0;
 | |
|   initial_input_timestamp_ = Timestamp::Unstarted();
 | |
|   current_timestamp_ = Timestamp::Unstarted();
 | |
| 
 | |
|   std::vector<double> window_vector;
 | |
|   use_window_ = false;
 | |
|   switch (framer_options.window_function()) {
 | |
|     case TimeSeriesFramerCalculatorOptions::HAMMING:
 | |
|       audio_dsp::HammingWindow().GetPeriodicSamples(frame_duration_samples_,
 | |
|                                                     &window_vector);
 | |
|       use_window_ = true;
 | |
|       break;
 | |
|     case TimeSeriesFramerCalculatorOptions::HANN:
 | |
|       audio_dsp::HannWindow().GetPeriodicSamples(frame_duration_samples_,
 | |
|                                                  &window_vector);
 | |
|       use_window_ = true;
 | |
|       break;
 | |
|     case TimeSeriesFramerCalculatorOptions::NONE:
 | |
|       break;
 | |
|   }
 | |
| 
 | |
|   if (use_window_) {
 | |
|     window_ = Eigen::Map<Eigen::RowVectorXd>(window_vector.data(),
 | |
|                                              frame_duration_samples_)
 | |
|                   .cast<float>();
 | |
|   }
 | |
|   use_local_timestamp_ = framer_options.use_local_timestamp();
 | |
| 
 | |
|   return absl::OkStatus();
 | |
| }
 | |
| 
 | |
| }  // namespace mediapipe
 |