From 71b7808830e17423453502ed5112b70bf90fff1b Mon Sep 17 00:00:00 2001
From: Carl Hetherington <cth@carlh.net>
Date: Mon, 22 Oct 2012 14:35:00 +0100
Subject: [PATCH] Do audio/video pts sync in a hopefully much more sensible
 way.

---
 .../values                                    | 17 +++++
 .../values                                    | 17 +++++
 src/lib/decoder.cc                            |  8 ++-
 src/lib/decoder.h                             |  8 +--
 src/lib/examine_content_job.cc                |  8 ---
 src/lib/examine_content_job.h                 |  1 -
 src/lib/ffmpeg_decoder.cc                     | 62 ++++++++++++++-----
 src/lib/ffmpeg_decoder.h                      |  2 -
 src/lib/film.cc                               |  1 -
 src/lib/film_state.cc                         | 16 -----
 src/lib/film_state.h                          | 13 ----
 11 files changed, 87 insertions(+), 66 deletions(-)
 create mode 100644 .be/aff5ca2c-44ee-4ed6-800b-4abe9c3e794c/bugs/cc075bd0-4641-4b2c-83b1-4adb05433f71/values
 create mode 100644 .be/aff5ca2c-44ee-4ed6-800b-4abe9c3e794c/bugs/e6b5de1f-5dbe-42f2-b103-e8bb680c4140/values

diff --git a/.be/aff5ca2c-44ee-4ed6-800b-4abe9c3e794c/bugs/cc075bd0-4641-4b2c-83b1-4adb05433f71/values b/.be/aff5ca2c-44ee-4ed6-800b-4abe9c3e794c/bugs/cc075bd0-4641-4b2c-83b1-4adb05433f71/values
new file mode 100644
index 000000000..0fd81c0cd
--- /dev/null
+++ b/.be/aff5ca2c-44ee-4ed6-800b-4abe9c3e794c/bugs/cc075bd0-4641-4b2c-83b1-4adb05433f71/values
@@ -0,0 +1,17 @@
+creator: Carl Hetherington <cth@carlh.net>
+
+
+reporter: Carl Hetherington <cth@carlh.net>
+
+
+severity: minor
+
+
+status: open
+
+
+summary: Do the delay line in deinterleaved floats to simplify code.
+
+
+time: Mon, 22 Oct 2012 13:27:50 +0000
+
diff --git a/.be/aff5ca2c-44ee-4ed6-800b-4abe9c3e794c/bugs/e6b5de1f-5dbe-42f2-b103-e8bb680c4140/values b/.be/aff5ca2c-44ee-4ed6-800b-4abe9c3e794c/bugs/e6b5de1f-5dbe-42f2-b103-e8bb680c4140/values
new file mode 100644
index 000000000..7b9e04496
--- /dev/null
+++ b/.be/aff5ca2c-44ee-4ed6-800b-4abe9c3e794c/bugs/e6b5de1f-5dbe-42f2-b103-e8bb680c4140/values
@@ -0,0 +1,17 @@
+creator: Carl Hetherington <cth@carlh.net>
+
+
+reporter: Carl Hetherington <cth@carlh.net>
+
+
+severity: minor
+
+
+status: open
+
+
+summary: Decoder should use its own methods more than FilmState
+
+
+time: Mon, 22 Oct 2012 13:28:57 +0000
+
diff --git a/src/lib/decoder.cc b/src/lib/decoder.cc
index c9235f8e1..65e5ff722 100644
--- a/src/lib/decoder.cc
+++ b/src/lib/decoder.cc
@@ -90,12 +90,10 @@ Decoder::~Decoder ()
 void
 Decoder::process_begin ()
 {
-	_delay_in_bytes = _fs->total_audio_delay() * _fs->audio_sample_rate() * _fs->audio_channels() * bytes_per_audio_sample() / 1000;
+	_delay_in_bytes = _fs->audio_delay() * _fs->audio_sample_rate() * _fs->audio_channels() * bytes_per_audio_sample() / 1000;
 	delete _delay_line;
 	_delay_line = new DelayLine (_delay_in_bytes);
 
-	_log->log (String::compose ("Decoding audio with total delay of %1", _fs->total_audio_delay()));
-
 	_audio_frames_processed = 0;
 }
 
@@ -129,6 +127,10 @@ Decoder::process_end ()
 		_log->log (String::compose ("DCP length is %1; %2 frames of audio processed.", _fs->dcp_length(), _audio_frames_processed));
 		_log->log (String::compose ("Adding %1 frames of silence to the end.", audio_short_by_frames));
 
+		/* XXX: this is slightly questionable; does memset () give silence with all
+		   sample formats?
+		*/
+
 		int64_t bytes = audio_short_by_frames * _fs->audio_channels() * bytes_per_audio_sample();
 		
 		int64_t const silence_size = 16 * 1024 * _fs->audio_channels() * bytes_per_audio_sample();
diff --git a/src/lib/decoder.h b/src/lib/decoder.h
index 7559217eb..e81fbb24e 100644
--- a/src/lib/decoder.h
+++ b/src/lib/decoder.h
@@ -71,11 +71,6 @@ public:
 	virtual int64_t audio_channel_layout () const = 0;
 	virtual bool has_subtitles () const = 0;
 
-	/** @return amount of extra unwanted audio at the start (or -ve for unwanted video) in milliseconds */
-	virtual int audio_to_discard () const {
-		return 0;
-	}
-
 	void process_begin ();
 	bool pass ();
 	void process_end ();
@@ -117,6 +112,8 @@ protected:
 	void process_audio (uint8_t *, int);
 	void process_subtitle (boost::shared_ptr<TimedSubtitle>);
 
+	int bytes_per_audio_sample () const;
+	
 	/** our FilmState */
 	boost::shared_ptr<const FilmState> _fs;
 	/** our options */
@@ -137,7 +134,6 @@ protected:
 private:
 	void setup_video_filters ();
 	void emit_audio (uint8_t* data, int size);
-	int bytes_per_audio_sample () const;
 	
 	/** last video frame to be processed */
 	int _video_frame;
diff --git a/src/lib/examine_content_job.cc b/src/lib/examine_content_job.cc
index d12e06069..f0fb29812 100644
--- a/src/lib/examine_content_job.cc
+++ b/src/lib/examine_content_job.cc
@@ -72,7 +72,6 @@ ExamineContentJob::run ()
 	fs->set_length (_decoder->last_video_frame ());
 
 	_log->log (String::compose ("Video length is %1 frames", _decoder->last_video_frame()));
-	_log->log (String::compose ("%1ms of audio to discard", _decoder->audio_to_discard()));
 
 	ascend ();
 
@@ -111,10 +110,3 @@ ExamineContentJob::last_video_frame () const
 {
 	return _decoder->last_video_frame ();
 }
-
-int
-ExamineContentJob::audio_to_discard () const
-{
-	return _decoder->audio_to_discard ();
-}
-
diff --git a/src/lib/examine_content_job.h b/src/lib/examine_content_job.h
index d8e94f1ec..3bbd673a8 100644
--- a/src/lib/examine_content_job.h
+++ b/src/lib/examine_content_job.h
@@ -38,7 +38,6 @@ public:
 	void run ();
 
 	int last_video_frame () const;
-	int audio_to_discard () const;
 
 private:
 	boost::shared_ptr<Decoder> _decoder;
diff --git a/src/lib/ffmpeg_decoder.cc b/src/lib/ffmpeg_decoder.cc
index d8a541be3..c74fee008 100644
--- a/src/lib/ffmpeg_decoder.cc
+++ b/src/lib/ffmpeg_decoder.cc
@@ -67,6 +67,7 @@ FFmpegDecoder::FFmpegDecoder (boost::shared_ptr<const FilmState> s, boost::share
 	, _subtitle_codec_context (0)
 	, _subtitle_codec (0)
 	, _first_video_pts (-1)
+	, _first_audio_pts (-1)
 {
 	setup_general ();
 	setup_video ();
@@ -246,16 +247,56 @@ FFmpegDecoder::do_pass ()
 
 	if (_packet.stream_index == _video_stream) {
 
+		if (_first_video_pts == -1) {
+			_first_video_pts = _packet.pts;
+		}
+		
 		int frame_finished;
 		if (avcodec_decode_video2 (_video_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
-			if (_first_video_pts == -1) {
-				_first_video_pts = _packet.pts;
-			}
 			process_video (_frame);
 		}
 
-	} else if (_audio_stream >= 0 && _packet.stream_index == _audio_stream && _opt->decode_audio) {
-		
+	} else if (_audio_stream >= 0 && _packet.stream_index == _audio_stream && _opt->decode_audio && (_first_video_pts != -1 && _packet.pts > _first_video_pts)) {
+
+		/* Note: We only decode audio if we've had our first video packet through, and if this
+		   packet comes after it.  Until then it is thrown away.
+		*/
+
+		if (_first_audio_pts == -1) {
+			_first_audio_pts = _packet.pts;
+
+			/* This is our first audio packet, and if we've arrived here we must have had our
+			   first video packet.  Push some silence to make up the gap between our first
+			   video packet and our first audio.
+			*/
+			
+			AVStream* v = _format_context->streams[_video_stream];
+			AVStream* a = _format_context->streams[_audio_stream];
+			
+			assert (v->time_base.num == a->time_base.num);
+			assert (v->time_base.den == a->time_base.den);
+
+			/* samples of silence that we must push */
+			int const s = rint (av_q2d (v->time_base) * (_first_audio_pts - _first_video_pts) * audio_sample_rate ());
+
+			_log->log (
+				String::compose (
+					"First video at %1, first audio at %2, pushing %3 samples of silence",
+					_first_video_pts, _first_audio_pts, s
+					)
+				);
+
+			/* hence bytes */
+			int const b = s * audio_channels() * bytes_per_audio_sample();
+
+			/* XXX: this assumes that it won't be too much, and there are shaky assumptions
+			   that all sound representations are silent with memset()ed zero data.
+			*/
+			uint8_t silence[b];
+			memset (silence, 0, b);
+			process_audio (silence, b);
+		}
+
 		avcodec_get_frame_defaults (_frame);
 		
 		int frame_finished;
@@ -424,14 +465,3 @@ FFmpegDecoder::stream_name (AVStream* s) const
 	return n.str ();
 }
 
-int
-FFmpegDecoder::audio_to_discard () const
-{
-	AVStream* v = _format_context->streams[_video_stream];
-	AVStream* a = _format_context->streams[_audio_stream];
-
-	assert (v->time_base.num == a->time_base.num);
-	assert (v->time_base.den == a->time_base.den);
-
-	return rint (av_q2d (v->time_base) * 1000 * (_first_video_pts - _first_audio_pts));
-}
diff --git a/src/lib/ffmpeg_decoder.h b/src/lib/ffmpeg_decoder.h
index dc10635a5..ac4cd6fcf 100644
--- a/src/lib/ffmpeg_decoder.h
+++ b/src/lib/ffmpeg_decoder.h
@@ -65,8 +65,6 @@ public:
 	AVSampleFormat audio_sample_format () const;
 	int64_t audio_channel_layout () const;
 	bool has_subtitles () const;
-	int bytes_per_audio_sample () const;
-	int audio_to_discard () const;
 
 	std::vector<AudioStream> audio_streams () const;
 	std::vector<SubtitleStream> subtitle_streams () const;
diff --git a/src/lib/film.cc b/src/lib/film.cc
index 3a8b29f86..aa0bfa211 100644
--- a/src/lib/film.cc
+++ b/src/lib/film.cc
@@ -256,7 +256,6 @@ void
 Film::examine_content_post_gui ()
 {
 	set_length (_examine_content_job->last_video_frame ());
-	set_audio_to_discard (_examine_content_job->audio_to_discard ());
 	_examine_content_job.reset ();
 
 	string const tdir = dir ("thumbs");
diff --git a/src/lib/film_state.cc b/src/lib/film_state.cc
index d59d7ce7b..283830f59 100644
--- a/src/lib/film_state.cc
+++ b/src/lib/film_state.cc
@@ -126,7 +126,6 @@ FilmState::write_metadata () const
 	}
 
 	f << "frames_per_second " << _frames_per_second << "\n";
-	f << "audio_to_discard " << _audio_to_discard << "\n";
 	
 	_dirty = false;
 }
@@ -231,8 +230,6 @@ FilmState::read_metadata ()
 			_subtitle_streams.push_back (SubtitleStream (v));
 		} else if (k == "frames_per_second") {
 			_frames_per_second = atof (v.c_str ());
-		} else if (k == "audio_to_discard") {
-			_audio_to_discard = atoi (v.c_str ());
 		}
 	}
 		
@@ -812,13 +809,6 @@ FilmState::set_frames_per_second (float f)
 	_frames_per_second = f;
 	signal_changed (FRAMES_PER_SECOND);
 }
-
-void
-FilmState::set_audio_to_discard (int a)
-{
-	_audio_to_discard = a;
-	signal_changed (AUDIO_TO_DISCARD);
-}
 	
 void
 FilmState::signal_changed (Property p)
@@ -842,9 +832,3 @@ FilmState::audio_channels () const
 
 	return _audio_streams[_audio_stream].channels ();
 }
-
-int
-FilmState::total_audio_delay () const
-{
-	return _audio_delay - _audio_to_discard;
-}
diff --git a/src/lib/film_state.h b/src/lib/film_state.h
index d1d7489f6..8940d0e5d 100644
--- a/src/lib/film_state.h
+++ b/src/lib/film_state.h
@@ -73,7 +73,6 @@ public:
 		, _audio_sample_rate (0)
 		, _has_subtitles (false)
 		, _frames_per_second (0)
-		, _audio_to_discard (0)
 		, _dirty (false)
 	{}
 
@@ -114,7 +113,6 @@ public:
 		, _audio_streams     (o._audio_streams)
 		, _subtitle_streams  (o._subtitle_streams)
 		, _frames_per_second (o._frames_per_second)
-		, _audio_to_discard  (o._audio_to_discard)
 		, _dirty             (o._dirty)
 	{}
 
@@ -150,7 +148,6 @@ public:
 	}
 
 	int audio_channels () const;
-	int total_audio_delay () const;
 
 	enum Property {
 		NONE,
@@ -182,7 +179,6 @@ public:
 		AUDIO_STREAMS,
 		SUBTITLE_STREAMS,
 		FRAMES_PER_SECOND,
-		AUDIO_TO_DISCARD
 	};
 
 
@@ -342,11 +338,7 @@ public:
 		return _frames_per_second;
 	}
 
-	int audio_to_discard () const {
-		return _audio_to_discard;
-	}
 
-	
 	/* SET */
 
 	void set_directory (std::string);
@@ -390,7 +382,6 @@ public:
 	void set_audio_streams (std::vector<AudioStream>);
 	void set_subtitle_streams (std::vector<SubtitleStream>);
 	void set_frames_per_second (float);
-	void set_audio_to_discard (int);
 
 	/** Emitted when some property has changed */
 	mutable sigc::signal1<void, Property> Changed;
@@ -480,10 +471,6 @@ private:
 	std::vector<SubtitleStream> _subtitle_streams;
 	/** Frames per second of the source */
 	float _frames_per_second;
-	/** Number of milliseconds of audio to discard at the start of this film
-	    in order to sync audio with video.  Can be negative.
-	*/
-	int _audio_to_discard;
 
 	mutable bool _dirty;
 
-- 
2.30.2