X-Git-Url: https://main.carlh.net/gitweb/?p=dcpomatic.git;a=blobdiff_plain;f=src%2Flib%2Faudio_decoder.cc;h=2ab527f59bc4bf535d5160f79db8be1288b04983;hp=3d5698dfe52a63b21fac0166224d82ec3cc0c71c;hb=HEAD;hpb=3620abae2f1aee79e80c2d12bf3fa97b74cf77f8 diff --git a/src/lib/audio_decoder.cc b/src/lib/audio_decoder.cc index 3d5698dfe..61ff5d265 100644 --- a/src/lib/audio_decoder.cc +++ b/src/lib/audio_decoder.cc @@ -1,227 +1,213 @@ /* - Copyright (C) 2012-2014 Carl Hetherington + Copyright (C) 2012-2021 Carl Hetherington - This program is free software; you can redistribute it and/or modify + This file is part of DCP-o-matic. + + DCP-o-matic is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. - This program is distributed in the hope that it will be useful, + DCP-o-matic is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + along with DCP-o-matic. If not, see . */ + #include "audio_decoder.h" #include "audio_buffers.h" -#include "audio_processor.h" +#include "audio_content.h" +#include "dcpomatic_log.h" +#include "log.h" #include "resampler.h" -#include "util.h" +#include "compose.hpp" +#include #include "i18n.h" -using std::stringstream; -using std::list; -using std::pair; + using std::cout; -using std::min; -using std::max; +using std::shared_ptr; +using std::make_shared; using boost::optional; -using boost::shared_ptr; +using namespace dcpomatic; -AudioDecoder::AudioDecoder (shared_ptr content) - : _audio_content (content) -{ - if (content->resampled_audio_frame_rate() != content->audio_frame_rate() && content->audio_channels ()) { - _resampler.reset (new Resampler (content->audio_frame_rate(), content->resampled_audio_frame_rate(), content->audio_channels ())); - } - if (content->audio_processor ()) { - _processor = content->audio_processor()->clone (content->resampled_audio_frame_rate ()); +AudioDecoder::AudioDecoder (Decoder* parent, shared_ptr content, bool fast) + : DecoderPart (parent) + , _content (content) + , _fast (fast) +{ + /* Set up _positions so that we have one for each stream */ + for (auto i: content->streams ()) { + _positions[i] = 0; } - - reset_decoded_audio (); } -void -AudioDecoder::reset_decoded_audio () -{ - _decoded_audio = ContentAudio (shared_ptr (new AudioBuffers (_audio_content->processed_audio_channels(), 0)), 0); -} -shared_ptr -AudioDecoder::get_audio (AudioFrame frame, AudioFrame length, bool accurate) +/** @param time_already_delayed true if the delay should not be added to time */ +void +AudioDecoder::emit(shared_ptr film, AudioStreamPtr stream, shared_ptr data, ContentTime time, bool flushing) { - shared_ptr dec; + if (ignore ()) { + return; + } - AudioFrame const end = frame + length - 1; - - if (frame < _decoded_audio.frame || end > (_decoded_audio.frame + length * 4)) { - /* Either we have no decoded data, or what we do have is a long way from what we want: seek */ - seek (ContentTime::from_frames (frame, _audio_content->audio_frame_rate()), accurate); + int const resampled_rate = _content->resampled_frame_rate(film); + if (!flushing) { + time += ContentTime::from_seconds (_content->delay() / 1000.0); } - /* Offset of the data that we want from the start of _decoded_audio.audio - (to be set up shortly) - */ - AudioFrame decoded_offset = 0; - - /* Now enough pass() calls will either: - * (a) give us what we want, or - * (b) hit the end of the decoder. - * - * If we are being accurate, we want the right frames, - * otherwise any frames will do. + /* Amount of error we will tolerate on audio timestamps; see comment below. + * We'll use 1 24fps video frame as this seems to be roughly how ffplay does it. */ - if (accurate) { - /* Keep stuffing data into _decoded_audio until we have enough data, or the subclass does not want to give us any more */ - while (!pass() && (_decoded_audio.frame > frame || (_decoded_audio.frame + _decoded_audio.audio->frames()) < end)) {} - decoded_offset = frame - _decoded_audio.frame; - } else { - while (!pass() && _decoded_audio.audio->frames() < length) {} - /* Use decoded_offset of 0, as we don't really care what frames we return */ + Frame const slack_frames = resampled_rate / 24; + + /* first_since_seek is set to true if this is the first data we have + received since initialisation or seek. We'll set the position based + on the ContentTime that was given. After this first time we just + count samples unless the timestamp is more than slack_frames away + from where we think it should be. This is because ContentTimes seem + to be slightly unreliable from FFmpegDecoder (i.e. not sample + accurate), but we still need to obey them sometimes otherwise we get + sync problems such as #1833. + */ + + auto const first_since_seek = _positions[stream] == 0; + auto const need_reset = !first_since_seek && (std::abs(_positions[stream] - time.frames_round(resampled_rate)) > slack_frames); + + if (need_reset) { + LOG_GENERAL ( + "Reset audio position: was %1, new data at %2, slack: %3 frames", + _positions[stream], + time.frames_round(resampled_rate), + std::abs(_positions[stream] - time.frames_round(resampled_rate)) + ); } - /* The amount of data available in _decoded_audio.audio starting from `frame'. This could be -ve - if pass() returned true before we got enough data. - */ - AudioFrame const available = _decoded_audio.audio->frames() - decoded_offset; + if (first_since_seek || need_reset) { + _positions[stream] = time.frames_round (resampled_rate); + } - /* We will return either that, or the requested amount, whichever is smaller */ - AudioFrame const to_return = max ((AudioFrame) 0, min (available, length)); + if (first_since_seek && _content->delay() > 0) { + silence (stream, _content->delay()); + } - /* Copy our data to the output */ - shared_ptr out (new AudioBuffers (_decoded_audio.audio->channels(), to_return)); - out->copy_from (_decoded_audio.audio.get(), to_return, decoded_offset, 0); + shared_ptr resampler; + auto i = _resamplers.find(stream); + if (i != _resamplers.end()) { + resampler = i->second; + } else { + if (stream->frame_rate() != resampled_rate) { + LOG_GENERAL ( + "Creating new resampler from %1 to %2 with %3 channels", + stream->frame_rate(), + resampled_rate, + stream->channels() + ); + + resampler = make_shared(stream->frame_rate(), resampled_rate, stream->channels()); + if (_fast) { + resampler->set_fast (); + } + _resamplers[stream] = resampler; + } + } - AudioFrame const remaining = max ((AudioFrame) 0, available - to_return); + if (resampler && !flushing) { + /* It can be the the data here has a different number of channels than the stream + * it comes from (e.g. the files decoded by FFmpegDecoder sometimes have a random + * frame, often at the end, with more channels). Insert silence or discard channels + * here. + */ + if (resampler->channels() != data->channels()) { + LOG_WARNING("Received audio data with an unexpected channel count of %1 instead of %2", data->channels(), resampler->channels()); + auto data_copy = data->clone(); + data_copy->set_channels(resampler->channels()); + data = resampler->run(data_copy); + } else { + data = resampler->run(data); + } - /* Clean up decoded; first, move the data after what we just returned to the start of the buffer */ - _decoded_audio.audio->move (decoded_offset + to_return, 0, remaining); - /* And set up the number of frames we have left */ - _decoded_audio.audio->set_frames (remaining); - /* Also bump where those frames are in terms of the content */ - _decoded_audio.frame += decoded_offset + to_return; + if (data->frames() == 0) { + return; + } + } - return shared_ptr (new ContentAudio (out, frame)); + Data(stream, ContentAudio (data, _positions[stream])); + _positions[stream] += data->frames(); } -/** Called by subclasses when audio data is ready. - * - * Audio timestamping is made hard by many factors, but perhaps the most entertaining is resampling. - * We have to assume that we are feeding continuous data into the resampler, and so we get continuous - * data out. Hence we do the timestamping here, post-resampler, just by counting samples. - * - * The time is passed in here so that after a seek we can set up our _audio_position. The - * time is ignored once this has been done. - */ -void -AudioDecoder::audio (shared_ptr data, ContentTime time) + +/** @return Time just after the last thing that was emitted from a given stream */ +ContentTime +AudioDecoder::stream_position (shared_ptr film, AudioStreamPtr stream) const { - if (_resampler) { - data = _resampler->run (data); - } + auto i = _positions.find (stream); + DCPOMATIC_ASSERT (i != _positions.end ()); + return ContentTime::from_frames (i->second, _content->resampled_frame_rate(film)); +} - if (_processor) { - data = _processor->run (data); - } - AudioFrame const frame_rate = _audio_content->resampled_audio_frame_rate (); - - if (_seek_reference) { - /* We've had an accurate seek and now we're seeing some data */ - ContentTime const delta = time - _seek_reference.get (); - AudioFrame const delta_frames = delta.frames (frame_rate); - if (delta_frames > 0) { - /* This data comes after the seek time. Pad the data with some silence. */ - shared_ptr padded (new AudioBuffers (data->channels(), data->frames() + delta_frames)); - padded->make_silent (); - padded->copy_from (data.get(), data->frames(), 0, delta_frames); - data = padded; - time -= delta; - } else if (delta_frames < 0) { - /* This data comes before the seek time. Throw some data away */ - AudioFrame const to_discard = min (-delta_frames, static_cast (data->frames())); - AudioFrame const to_keep = data->frames() - to_discard; - if (to_keep == 0) { - /* We have to throw all this data away, so keep _seek_reference and - try again next time some data arrives. - */ - return; - } - shared_ptr trimmed (new AudioBuffers (data->channels(), to_keep)); - trimmed->copy_from (data.get(), to_keep, to_discard, 0); - data = trimmed; - time += ContentTime::from_frames (to_discard, frame_rate); +boost::optional +AudioDecoder::position (shared_ptr film) const +{ + optional p; + for (auto i: _positions) { + auto const ct = stream_position (film, i.first); + if (!p || ct < *p) { + p = ct; } - _seek_reference = optional (); } - if (!_audio_position) { - _audio_position = time.frames (frame_rate); - } + return p; +} - assert (_audio_position.get() >= (_decoded_audio.frame + _decoded_audio.audio->frames())); - /* Resize _decoded_audio to fit the new data */ - int new_size = 0; - if (_decoded_audio.audio->frames() == 0) { - /* There's nothing in there, so just store the new data */ - new_size = data->frames (); - _decoded_audio.frame = _audio_position.get (); - } else { - /* Otherwise we need to extend _decoded_audio to include the new stuff */ - new_size = _audio_position.get() + data->frames() - _decoded_audio.frame; +void +AudioDecoder::seek () +{ + for (auto i: _resamplers) { + i.second->flush (); + i.second->reset (); } - - _decoded_audio.audio->ensure_size (new_size); - _decoded_audio.audio->set_frames (new_size); - - /* Copy new data in */ - _decoded_audio.audio->copy_from (data.get(), data->frames(), 0, _audio_position.get() - _decoded_audio.frame); - _audio_position = _audio_position.get() + data->frames (); - - /* Limit the amount of data we keep in case nobody is asking for it */ - int const max_frames = _audio_content->resampled_audio_frame_rate () * 10; - if (_decoded_audio.audio->frames() > max_frames) { - int const to_remove = _decoded_audio.audio->frames() - max_frames; - _decoded_audio.frame += to_remove; - _decoded_audio.audio->move (to_remove, 0, max_frames); - _decoded_audio.audio->set_frames (max_frames); + + for (auto& i: _positions) { + i.second = 0; } } -/* XXX: called? */ + void AudioDecoder::flush () { - if (!_resampler) { - return; + for (auto const& i: _resamplers) { + auto ro = i.second->flush (); + if (ro->frames() > 0) { + Data (i.first, ContentAudio (ro, _positions[i.first])); + _positions[i.first] += ro->frames(); + } } - /* - shared_ptr b = _resampler->flush (); - if (b) { - _pending.push_back (shared_ptr (new DecodedAudio (b, _audio_position.get ()))); - _audio_position = _audio_position.get() + b->frames (); + if (_content->delay() < 0) { + /* Finish off with the gap caused by the delay */ + for (auto stream: _content->streams()) { + silence (stream, -_content->delay()); + } } - */ } + void -AudioDecoder::seek (ContentTime t, bool accurate) +AudioDecoder::silence (AudioStreamPtr stream, int milliseconds) { - _audio_position.reset (); - reset_decoded_audio (); - if (accurate) { - _seek_reference = t; - } - if (_processor) { - _processor->flush (); - } + int const samples = ContentTime::from_seconds(milliseconds / 1000.0).frames_round(stream->frame_rate()); + auto silence = make_shared(stream->channels(), samples); + silence->make_silent (); + Data (stream, ContentAudio(silence, _positions[stream])); }