X-Git-Url: https://main.carlh.net/gitweb/?p=dcpomatic.git;a=blobdiff_plain;f=src%2Flib%2Faudio_decoder.cc;h=2ab527f59bc4bf535d5160f79db8be1288b04983;hp=3d5698dfe52a63b21fac0166224d82ec3cc0c71c;hb=HEAD;hpb=3620abae2f1aee79e80c2d12bf3fa97b74cf77f8

diff --git a/src/lib/audio_decoder.cc b/src/lib/audio_decoder.cc
index 3d5698dfe..61ff5d265 100644
--- a/src/lib/audio_decoder.cc
+++ b/src/lib/audio_decoder.cc
@@ -1,227 +1,213 @@
 /*
-    Copyright (C) 2012-2014 Carl Hetherington <cth@carlh.net>
+    Copyright (C) 2012-2021 Carl Hetherington <cth@carlh.net>
 
-    This program is free software; you can redistribute it and/or modify
+    This file is part of DCP-o-matic.
+
+    DCP-o-matic is free software; you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
     the Free Software Foundation; either version 2 of the License, or
     (at your option) any later version.
 
-    This program is distributed in the hope that it will be useful,
+    DCP-o-matic is distributed in the hope that it will be useful,
     but WITHOUT ANY WARRANTY; without even the implied warranty of
     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     GNU General Public License for more details.
 
     You should have received a copy of the GNU General Public License
-    along with this program; if not, write to the Free Software
-    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+    along with DCP-o-matic.  If not, see <http://www.gnu.org/licenses/>.
 
 */
 
+
 #include "audio_decoder.h"
 #include "audio_buffers.h"
-#include "audio_processor.h"
+#include "audio_content.h"
+#include "dcpomatic_log.h"
+#include "log.h"
 #include "resampler.h"
-#include "util.h"
+#include "compose.hpp"
+#include <iostream>
 
 #include "i18n.h"
 
-using std::stringstream;
-using std::list;
-using std::pair;
+
 using std::cout;
-using std::min;
-using std::max;
+using std::shared_ptr;
+using std::make_shared;
 using boost::optional;
-using boost::shared_ptr;
+using namespace dcpomatic;
 
-AudioDecoder::AudioDecoder (shared_ptr<const AudioContent> content)
-	: _audio_content (content)
-{
-	if (content->resampled_audio_frame_rate() != content->audio_frame_rate() && content->audio_channels ()) {
-		_resampler.reset (new Resampler (content->audio_frame_rate(), content->resampled_audio_frame_rate(), content->audio_channels ()));
-	}
 
-	if (content->audio_processor ()) {
-		_processor = content->audio_processor()->clone (content->resampled_audio_frame_rate ());
+AudioDecoder::AudioDecoder (Decoder* parent, shared_ptr<const AudioContent> content, bool fast)
+	: DecoderPart (parent)
+	, _content (content)
+	, _fast (fast)
+{
+	/* Set up _positions so that we have one for each stream */
+	for (auto i: content->streams ()) {
+		_positions[i] = 0;
 	}
-
-	reset_decoded_audio ();
 }
 
-void
-AudioDecoder::reset_decoded_audio ()
-{
-	_decoded_audio = ContentAudio (shared_ptr<AudioBuffers> (new AudioBuffers (_audio_content->processed_audio_channels(), 0)), 0);
-}
 
-shared_ptr<ContentAudio>
-AudioDecoder::get_audio (AudioFrame frame, AudioFrame length, bool accurate)
+/** @param time_already_delayed true if the delay should not be added to time */
+void
+AudioDecoder::emit(shared_ptr<const Film> film, AudioStreamPtr stream, shared_ptr<const AudioBuffers> data, ContentTime time, bool flushing)
 {
-	shared_ptr<ContentAudio> dec;
+	if (ignore ()) {
+		return;
+	}
 
-	AudioFrame const end = frame + length - 1;
-		
-	if (frame < _decoded_audio.frame || end > (_decoded_audio.frame + length * 4)) {
-		/* Either we have no decoded data, or what we do have is a long way from what we want: seek */
-		seek (ContentTime::from_frames (frame, _audio_content->audio_frame_rate()), accurate);
+	int const resampled_rate = _content->resampled_frame_rate(film);
+	if (!flushing) {
+		time += ContentTime::from_seconds (_content->delay() / 1000.0);
 	}
 
-	/* Offset of the data that we want from the start of _decoded_audio.audio
-	   (to be set up shortly)
-	*/
-	AudioFrame decoded_offset = 0;
-	
-	/* Now enough pass() calls will either:
-	 *  (a) give us what we want, or
-	 *  (b) hit the end of the decoder.
-	 *
-	 * If we are being accurate, we want the right frames,
-	 * otherwise any frames will do.
+	/* Amount of error we will tolerate on audio timestamps; see comment below.
+	 * We'll use 1 24fps video frame as this seems to be roughly how ffplay does it.
 	 */
-	if (accurate) {
-		/* Keep stuffing data into _decoded_audio until we have enough data, or the subclass does not want to give us any more */
-		while (!pass() && (_decoded_audio.frame > frame || (_decoded_audio.frame + _decoded_audio.audio->frames()) < end)) {}
-		decoded_offset = frame - _decoded_audio.frame;
-	} else {
-		while (!pass() && _decoded_audio.audio->frames() < length) {}
-		/* Use decoded_offset of 0, as we don't really care what frames we return */
+	Frame const slack_frames = resampled_rate / 24;
+
+	/* first_since_seek is set to true if this is the first data we have
+	   received since initialisation or seek.  We'll set the position based
+	   on the ContentTime that was given.  After this first time we just
+	   count samples unless the timestamp is more than slack_frames away
+	   from where we think it should be.  This is because ContentTimes seem
+	   to be slightly unreliable from FFmpegDecoder (i.e.  not sample
+	   accurate), but we still need to obey them sometimes otherwise we get
+	   sync problems such as #1833.
+	*/
+
+	auto const first_since_seek = _positions[stream] == 0;
+	auto const need_reset = !first_since_seek && (std::abs(_positions[stream] - time.frames_round(resampled_rate)) > slack_frames);
+
+	if (need_reset) {
+		LOG_GENERAL (
+			"Reset audio position: was %1, new data at %2, slack: %3 frames",
+			_positions[stream],
+			time.frames_round(resampled_rate),
+			std::abs(_positions[stream] - time.frames_round(resampled_rate))
+			);
 	}
 
-	/* The amount of data available in _decoded_audio.audio starting from `frame'.  This could be -ve
-	   if pass() returned true before we got enough data.
-	*/
-	AudioFrame const available = _decoded_audio.audio->frames() - decoded_offset;
+	if (first_since_seek || need_reset) {
+		_positions[stream] = time.frames_round (resampled_rate);
+	}
 
-	/* We will return either that, or the requested amount, whichever is smaller */
-	AudioFrame const to_return = max ((AudioFrame) 0, min (available, length));
+	if (first_since_seek && _content->delay() > 0) {
+		silence (stream, _content->delay());
+	}
 
-	/* Copy our data to the output */
-	shared_ptr<AudioBuffers> out (new AudioBuffers (_decoded_audio.audio->channels(), to_return));
-	out->copy_from (_decoded_audio.audio.get(), to_return, decoded_offset, 0);
+	shared_ptr<Resampler> resampler;
+	auto i = _resamplers.find(stream);
+	if (i != _resamplers.end()) {
+		resampler = i->second;
+	} else {
+		if (stream->frame_rate() != resampled_rate) {
+			LOG_GENERAL (
+				"Creating new resampler from %1 to %2 with %3 channels",
+				stream->frame_rate(),
+				resampled_rate,
+				stream->channels()
+				);
+
+			resampler = make_shared<Resampler>(stream->frame_rate(), resampled_rate, stream->channels());
+			if (_fast) {
+				resampler->set_fast ();
+			}
+			_resamplers[stream] = resampler;
+		}
+	}
 
-	AudioFrame const remaining = max ((AudioFrame) 0, available - to_return);
+	if (resampler && !flushing) {
+		/* It can be the the data here has a different number of channels than the stream
+		 * it comes from (e.g. the files decoded by FFmpegDecoder sometimes have a random
+		 * frame, often at the end, with more channels).  Insert silence or discard channels
+		 * here.
+		 */
+		if (resampler->channels() != data->channels()) {
+			LOG_WARNING("Received audio data with an unexpected channel count of %1 instead of %2", data->channels(), resampler->channels());
+			auto data_copy = data->clone();
+			data_copy->set_channels(resampler->channels());
+			data = resampler->run(data_copy);
+		} else {
+			data = resampler->run(data);
+		}
 
-	/* Clean up decoded; first, move the data after what we just returned to the start of the buffer */
-	_decoded_audio.audio->move (decoded_offset + to_return, 0, remaining);
-	/* And set up the number of frames we have left */
-	_decoded_audio.audio->set_frames (remaining);
-	/* Also bump where those frames are in terms of the content */
-	_decoded_audio.frame += decoded_offset + to_return;
+		if (data->frames() == 0) {
+			return;
+		}
+	}
 
-	return shared_ptr<ContentAudio> (new ContentAudio (out, frame));
+	Data(stream, ContentAudio (data, _positions[stream]));
+	_positions[stream] += data->frames();
 }
 
-/** Called by subclasses when audio data is ready.
- *
- *  Audio timestamping is made hard by many factors, but perhaps the most entertaining is resampling.
- *  We have to assume that we are feeding continuous data into the resampler, and so we get continuous
- *  data out.  Hence we do the timestamping here, post-resampler, just by counting samples.
- *
- *  The time is passed in here so that after a seek we can set up our _audio_position.  The
- *  time is ignored once this has been done.
- */
-void
-AudioDecoder::audio (shared_ptr<const AudioBuffers> data, ContentTime time)
+
+/** @return Time just after the last thing that was emitted from a given stream */
+ContentTime
+AudioDecoder::stream_position (shared_ptr<const Film> film, AudioStreamPtr stream) const
 {
-	if (_resampler) {
-		data = _resampler->run (data);
-	}
+	auto i = _positions.find (stream);
+	DCPOMATIC_ASSERT (i != _positions.end ());
+	return ContentTime::from_frames (i->second, _content->resampled_frame_rate(film));
+}
 
-	if (_processor) {
-		data = _processor->run (data);
-	}
 
-	AudioFrame const frame_rate = _audio_content->resampled_audio_frame_rate ();
-
-	if (_seek_reference) {
-		/* We've had an accurate seek and now we're seeing some data */
-		ContentTime const delta = time - _seek_reference.get ();
-		AudioFrame const delta_frames = delta.frames (frame_rate);
-		if (delta_frames > 0) {
-			/* This data comes after the seek time.  Pad the data with some silence. */
-			shared_ptr<AudioBuffers> padded (new AudioBuffers (data->channels(), data->frames() + delta_frames));
-			padded->make_silent ();
-			padded->copy_from (data.get(), data->frames(), 0, delta_frames);
-			data = padded;
-			time -= delta;
-		} else if (delta_frames < 0) {
-			/* This data comes before the seek time.  Throw some data away */
-			AudioFrame const to_discard = min (-delta_frames, static_cast<AudioFrame> (data->frames()));
-			AudioFrame const to_keep = data->frames() - to_discard;
-			if (to_keep == 0) {
-				/* We have to throw all this data away, so keep _seek_reference and
-				   try again next time some data arrives.
-				*/
-				return;
-			}
-			shared_ptr<AudioBuffers> trimmed (new AudioBuffers (data->channels(), to_keep));
-			trimmed->copy_from (data.get(), to_keep, to_discard, 0);
-			data = trimmed;
-			time += ContentTime::from_frames (to_discard, frame_rate);
+boost::optional<ContentTime>
+AudioDecoder::position (shared_ptr<const Film> film) const
+{
+	optional<ContentTime> p;
+	for (auto i: _positions) {
+		auto const ct = stream_position (film, i.first);
+		if (!p || ct < *p) {
+			p = ct;
 		}
-		_seek_reference = optional<ContentTime> ();
 	}
 
-	if (!_audio_position) {
-		_audio_position = time.frames (frame_rate);
-	}
+	return p;
+}
 
-	assert (_audio_position.get() >= (_decoded_audio.frame + _decoded_audio.audio->frames()));
 
-	/* Resize _decoded_audio to fit the new data */
-	int new_size = 0;
-	if (_decoded_audio.audio->frames() == 0) {
-		/* There's nothing in there, so just store the new data */
-		new_size = data->frames ();
-		_decoded_audio.frame = _audio_position.get ();
-	} else {
-		/* Otherwise we need to extend _decoded_audio to include the new stuff */
-		new_size = _audio_position.get() + data->frames() - _decoded_audio.frame;
+void
+AudioDecoder::seek ()
+{
+	for (auto i: _resamplers) {
+		i.second->flush ();
+		i.second->reset ();
 	}
-	
-	_decoded_audio.audio->ensure_size (new_size);
-	_decoded_audio.audio->set_frames (new_size);
-
-	/* Copy new data in */
-	_decoded_audio.audio->copy_from (data.get(), data->frames(), 0, _audio_position.get() - _decoded_audio.frame);
-	_audio_position = _audio_position.get() + data->frames ();
-
-	/* Limit the amount of data we keep in case nobody is asking for it */
-	int const max_frames = _audio_content->resampled_audio_frame_rate () * 10;
-	if (_decoded_audio.audio->frames() > max_frames) {
-		int const to_remove = _decoded_audio.audio->frames() - max_frames;
-		_decoded_audio.frame += to_remove;
-		_decoded_audio.audio->move (to_remove, 0, max_frames);
-		_decoded_audio.audio->set_frames (max_frames);
+
+	for (auto& i: _positions) {
+		i.second = 0;
 	}
 }
 
-/* XXX: called? */
+
 void
 AudioDecoder::flush ()
 {
-	if (!_resampler) {
-		return;
+	for (auto const& i: _resamplers) {
+		auto ro = i.second->flush ();
+		if (ro->frames() > 0) {
+			Data (i.first, ContentAudio (ro, _positions[i.first]));
+			_positions[i.first] += ro->frames();
+		}
 	}
 
-	/*
-	shared_ptr<const AudioBuffers> b = _resampler->flush ();
-	if (b) {
-		_pending.push_back (shared_ptr<DecodedAudio> (new DecodedAudio (b, _audio_position.get ())));
-		_audio_position = _audio_position.get() + b->frames ();
+	if (_content->delay() < 0) {
+		/* Finish off with the gap caused by the delay */
+		for (auto stream: _content->streams()) {
+			silence (stream, -_content->delay());
+		}
 	}
-	*/
 }
 
+
 void
-AudioDecoder::seek (ContentTime t, bool accurate)
+AudioDecoder::silence (AudioStreamPtr stream, int milliseconds)
 {
-	_audio_position.reset ();
-	reset_decoded_audio ();
-	if (accurate) {
-		_seek_reference = t;
-	}
-	if (_processor) {
-		_processor->flush ();
-	}
+	int const samples = ContentTime::from_seconds(milliseconds / 1000.0).frames_round(stream->frame_rate());
+	auto silence = make_shared<AudioBuffers>(stream->channels(), samples);
+	silence->make_silent ();
+	Data (stream, ContentAudio(silence, _positions[stream]));
 }