From f3fdae3f8f4ae54b17f925f81a5e9d4b3589269b Mon Sep 17 00:00:00 2001
From: Carl Hetherington <cth@carlh.net>
Date: Sat, 29 Sep 2012 23:41:25 +0100
Subject: [PATCH] Entirely untested resampling to fix 24fps drop-frame.

---
 src/lib/ab_transcoder.cc    |   2 +-
 src/lib/decoder.cc          | 105 +--------------------------
 src/lib/decoder.h           |   9 ---
 src/lib/encoder.h           |   5 +-
 src/lib/j2k_still_encoder.h |   2 +-
 src/lib/j2k_wav_encoder.cc  | 137 ++++++++++++++++++++++++++++++++++--
 src/lib/j2k_wav_encoder.h   |  16 ++++-
 src/lib/tiff_encoder.h      |   2 +-
 src/lib/transcoder.cc       |   2 +-
 9 files changed, 156 insertions(+), 124 deletions(-)
diff --git a/src/lib/ab_transcoder.cc b/src/lib/ab_transcoder.cc
index aabaf2d03..95492a9d8 100644
--- a/src/lib/ab_transcoder.cc
+++ b/src/lib/ab_transcoder.cc
@@ -103,7 +103,7 @@ ABTranscoder::process_video (shared_ptr<Image> yuv, int frame, int index)
 void
 ABTranscoder::go ()
 {
-	_encoder->process_begin ();
+	_encoder->process_begin (_da->audio_channel_layout(), _da->audio_sample_format());
 	_da->process_begin ();
 	_db->process_begin ();
 	
diff --git a/src/lib/decoder.cc b/src/lib/decoder.cc
index 973582ca4..b7aca764d 100644
--- a/src/lib/decoder.cc
+++ b/src/lib/decoder.cc
@@ -69,9 +69,6 @@ Decoder::Decoder (boost::shared_ptr<const FilmState> s, boost::shared_ptr<const
 	, _video_frame (0)
 	, _buffer_src_context (0)
 	, _buffer_sink_context (0)
-#if HAVE_SWRESAMPLE	  
-	, _swr_context (0)
-#endif	  
 	, _have_setup_video_filters (false)
 	, _delay_line (0)
 	, _delay_in_bytes (0)
@@ -91,29 +88,6 @@ Decoder::~Decoder ()
 void
 Decoder::process_begin ()
 {
-	if (_fs->audio_sample_rate != dcp_audio_sample_rate (_fs->audio_sample_rate)) {
-#if HAVE_SWRESAMPLE		
-		_swr_context = swr_alloc_set_opts (
-			0,
-			audio_channel_layout(),
-			audio_sample_format(),
-			dcp_audio_sample_rate (_fs->audio_sample_rate),
-			audio_channel_layout(),
-			audio_sample_format(),
-			_fs->audio_sample_rate,
-			0, 0
-			);
-		
-		swr_init (_swr_context);
-#else
-		throw DecodeError ("Cannot resample audio as libswresample is not present");
-#endif		
-	} else {
-#if HAVE_SWRESAMPLE		
-		_swr_context = 0;
-#endif		
-	}
-
 	_delay_in_bytes = _fs->audio_delay * _fs->audio_sample_rate * _fs->audio_channels * _fs->bytes_per_sample() / 1000;
 	delete _delay_line;
 	_delay_line = new DelayLine (_delay_in_bytes);
@@ -125,35 +99,6 @@ Decoder::process_begin ()
 void
 Decoder::process_end ()
 {
-#if HAVE_SWRESAMPLE	
-	if (_swr_context) {
-
-		int mop = 0;
-		while (1) {
-			uint8_t buffer[256 * _fs->bytes_per_sample() * _fs->audio_channels];
-			uint8_t* out[1] = {
-				buffer
-			};
-
-			int const frames = swr_convert (_swr_context, out, 256, 0, 0);
-
-			if (frames < 0) {
-				throw DecodeError ("could not run sample-rate converter");
-			}
-
-			if (frames == 0) {
-				break;
-			}
-
-			mop += frames;
-			int available = _delay_line->feed (buffer, frames * _fs->audio_channels * _fs->bytes_per_sample());
-			Audio (buffer, available);
-		}
-
-		swr_free (&_swr_context);
-	}
-#endif	
-	
 	if (_delay_in_bytes < 0) {
 		uint8_t remainder[-_delay_in_bytes];
 		_delay_line->get_remaining (remainder);
@@ -166,7 +111,7 @@ Decoder::process_end ()
 	*/
 
 	int64_t const audio_short_by_frames =
-		((int64_t) decoding_frames() * dcp_audio_sample_rate (_fs->audio_sample_rate) / _fs->frames_per_second)
+		((int64_t) decoding_frames() * _fs->audio_sample_rate / _fs->frames_per_second)
 		- _audio_frames_processed;
 
 	if (audio_short_by_frames >= 0) {
@@ -240,16 +185,9 @@ Decoder::pass ()
 void
 Decoder::process_audio (uint8_t* data, int size)
 {
-	/* Here's samples per channel */
+	/* Samples per channel */
 	int const samples = size / _fs->bytes_per_sample();
 
-#if HAVE_SWRESAMPLE	
-	/* And here's frames (where 1 frame is a collection of samples, 1 for each channel,
-	   so for 5.1 a frame would be 6 samples)
-	*/
-	int const frames = samples / _fs->audio_channels;
-#endif	
-
 	/* Maybe apply gain */
 	if (_fs->audio_gain != 0) {
 		float const linear_gain = pow (10, _fs->audio_gain / 20);
@@ -282,51 +220,12 @@ Decoder::process_audio (uint8_t* data, int size)
 		}
 	}
 
-	/* This is a buffer we might use if we are sample-rate converting;
-	   it will need freeing if so.
-	*/
-	uint8_t* out_buffer = 0;
-
-	/* Maybe sample-rate convert */
-#if HAVE_SWRESAMPLE	
-	if (_swr_context) {
-
-		uint8_t const * in[2] = {
-			data,
-			0
-		};
-
-		/* Compute the resampled frame count and add 32 for luck */
-		int const out_buffer_size_frames = ceil (frames * float (dcp_audio_sample_rate (_fs->audio_sample_rate)) / _fs->audio_sample_rate) + 32;
-		int const out_buffer_size_bytes = out_buffer_size_frames * _fs->audio_channels * _fs->bytes_per_sample();
-		out_buffer = new uint8_t[out_buffer_size_bytes];
-
-		uint8_t* out[2] = {
-			out_buffer, 
-			0
-		};
-
-		/* Resample audio */
-		int out_frames = swr_convert (_swr_context, out, out_buffer_size_frames, in, frames);
-		if (out_frames < 0) {
-			throw DecodeError ("could not run sample-rate converter");
-		}
-
-		/* And point our variables at the resampled audio */
-		data = out_buffer;
-		size = out_frames * _fs->audio_channels * _fs->bytes_per_sample();
-	}
-#endif	
-		
 	/* Update the number of audio frames we've pushed to the encoder */
 	_audio_frames_processed += size / (_fs->audio_channels * _fs->bytes_per_sample ());
 
 	/* Push into the delay line and then tell the world what we've got */
 	int available = _delay_line->feed (data, size);
 	Audio (data, available);
-
-	/* Delete the sample-rate conversion buffer, if it exists */
-	delete[] out_buffer;
 }
 
 /** Called by subclasses to tell the world that some video data is ready.
diff --git a/src/lib/decoder.h b/src/lib/decoder.h
index 14b25c7b0..19ef25ede 100644
--- a/src/lib/decoder.h
+++ b/src/lib/decoder.h
@@ -29,11 +29,6 @@
 #include <stdint.h>
 #include <boost/shared_ptr.hpp>
 #include <sigc++/sigc++.h>
-#ifdef HAVE_SWRESAMPLE
-extern "C" {
-#include <libswresample/swresample.h>
-}
-#endif
 #include "util.h"
 
 class Job;
@@ -134,10 +129,6 @@ private:
 	AVFilterContext* _buffer_src_context;
 	AVFilterContext* _buffer_sink_context;
 
-#if HAVE_SWRESAMPLE	
-	SwrContext* _swr_context;
-#endif	
-
 	bool _have_setup_video_filters;
 	DelayLine* _delay_line;
 	int _delay_in_bytes;
diff --git a/src/lib/encoder.h b/src/lib/encoder.h
index 539b2912c..ea356cec4 100644
--- a/src/lib/encoder.h
+++ b/src/lib/encoder.h
@@ -28,6 +28,9 @@
 #include <boost/thread/mutex.hpp>
 #include <list>
 #include <stdint.h>
+extern "C" {
+#include <libavutil/samplefmt.h>
+}
 
 class FilmState;
 class Options;
@@ -50,7 +53,7 @@ public:
 	Encoder (boost::shared_ptr<const FilmState> s, boost::shared_ptr<const Options> o, Log* l);
 
 	/** Called to indicate that a processing run is about to begin */
-	virtual void process_begin () = 0;
+	virtual void process_begin (int64_t audio_channel_layout, AVSampleFormat audio_sample_format) = 0;
 
 	/** Called with a frame of video.
 	 *  @param i Video frame image.
diff --git a/src/lib/j2k_still_encoder.h b/src/lib/j2k_still_encoder.h
index d4d68724e..755c68877 100644
--- a/src/lib/j2k_still_encoder.h
+++ b/src/lib/j2k_still_encoder.h
@@ -36,7 +36,7 @@ class J2KStillEncoder : public Encoder
 public:
 	J2KStillEncoder (boost::shared_ptr<const FilmState>, boost::shared_ptr<const Options>, Log *);
 
-	void process_begin () {}
+	void process_begin (int64_t audio_channel_layout, AVSampleFormat audio_sample_format) {}
 	void process_video (boost::shared_ptr<Image>, int);
 	void process_audio (uint8_t *, int) {}
 	void process_end () {}
diff --git a/src/lib/j2k_wav_encoder.cc b/src/lib/j2k_wav_encoder.cc
index 9ae01c774..86c3ae13f 100644
--- a/src/lib/j2k_wav_encoder.cc
+++ b/src/lib/j2k_wav_encoder.cc
@@ -46,6 +46,9 @@ using namespace boost;
 
 J2KWAVEncoder::J2KWAVEncoder (shared_ptr<const FilmState> s, shared_ptr<const Options> o, Log* l)
 	: Encoder (s, o, l)
+#ifdef HAVE_SWRESAMPLE	  
+	, _swr_context (0)
+#endif	  
 	, _deinterleave_buffer_size (8192)
 	, _deinterleave_buffer (0)
 	, _process_end (false)
@@ -210,8 +213,31 @@ J2KWAVEncoder::encoder_thread (ServerDescription* server)
 }
 
 void
-J2KWAVEncoder::process_begin ()
+J2KWAVEncoder::process_begin (int64_t audio_channel_layout, AVSampleFormat audio_sample_format)
 {
+	if ((_fs->audio_sample_rate != dcp_audio_sample_rate (_fs->audio_sample_rate)) || (rint (_fs->frames_per_second) != _fs->frames_per_second)) {
+#ifdef HAVE_SWRESAMPLE		
+		_swr_context = swr_alloc_set_opts (
+			0,
+			audio_channel_layout,
+			audio_sample_format,
+			target_sample_rate(),
+			audio_channel_layout,
+			audio_sample_format,
+			_fs->audio_sample_rate,
+			0, 0
+			);
+		
+		swr_init (_swr_context);
+#else
+		throw EncodeError ("Cannot resample audio as libswresample is not present");
+#endif
+	} else {
+#ifdef HAVE_SWRESAMPLE
+		_swr_context = 0;
+#endif		
+	}
+	
 	for (int i = 0; i < Config::instance()->num_local_encoding_threads (); ++i) {
 		_worker_threads.push_back (new boost::thread (boost::bind (&J2KWAVEncoder::encoder_thread, this, (ServerDescription *) 0)));
 	}
@@ -268,6 +294,34 @@ J2KWAVEncoder::process_end ()
 			_log->log (s.str ());
 		}
 	}
+
+#if HAVE_SWRESAMPLE	
+	if (_swr_context) {
+
+		int mop = 0;
+		while (1) {
+			uint8_t buffer[256 * _fs->bytes_per_sample() * _fs->audio_channels];
+			uint8_t* out[1] = {
+				buffer
+			};
+
+			int const frames = swr_convert (_swr_context, out, 256, 0, 0);
+
+			if (frames < 0) {
+				throw EncodeError ("could not run sample-rate converter");
+			}
+
+			if (frames == 0) {
+				break;
+			}
+
+			mop += frames;
+			write_audio (buffer, frames);
+		}
+
+		swr_free (&_swr_context);
+	}
+#endif	
 	
 	close_sound_files ();
 
@@ -281,11 +335,64 @@ J2KWAVEncoder::process_end ()
 }
 
 void
-J2KWAVEncoder::process_audio (uint8_t* data, int data_size)
+J2KWAVEncoder::process_audio (uint8_t* data, int size)
+{
+	/* This is a buffer we might use if we are sample-rate converting;
+	   it will need freeing if so.
+	*/
+	uint8_t* out_buffer = 0;
+	
+	/* Maybe sample-rate convert */
+#if HAVE_SWRESAMPLE	
+	if (_swr_context) {
+
+		uint8_t const * in[2] = {
+			data,
+			0
+		};
+
+		/* Here's samples per channel */
+		int const samples = size / _fs->bytes_per_sample();
+		
+		/* And here's frames (where 1 frame is a collection of samples, 1 for each channel,
+		   so for 5.1 a frame would be 6 samples)
+		*/
+		int const frames = samples / _fs->audio_channels;
+
+		/* Compute the resampled frame count and add 32 for luck */
+		int const out_buffer_size_frames = ceil (frames * target_sample_rate() / _fs->audio_sample_rate) + 32;
+		int const out_buffer_size_bytes = out_buffer_size_frames * _fs->audio_channels * _fs->bytes_per_sample();
+		out_buffer = new uint8_t[out_buffer_size_bytes];
+
+		uint8_t* out[2] = {
+			out_buffer, 
+			0
+		};
+
+		/* Resample audio */
+		int out_frames = swr_convert (_swr_context, out, out_buffer_size_frames, in, size);
+		if (out_frames < 0) {
+			throw EncodeError ("could not run sample-rate converter");
+		}
+
+		/* And point our variables at the resampled audio */
+		data = out_buffer;
+		size = out_frames * _fs->audio_channels * _fs->bytes_per_sample();
+	}
+#endif
+
+	write_audio (data, size);
+
+	/* Delete the sample-rate conversion buffer, if it exists */
+	delete[] out_buffer;
+}
+
+void
+J2KWAVEncoder::write_audio (uint8_t* data, int size)
 {
 	/* Size of a sample in bytes */
 	int const sample_size = 2;
-	
+
 	/* XXX: we are assuming that sample_size is right, the _deinterleave_buffer_size is a multiple
 	   of the sample size and that data_size is a multiple of _fs->audio_channels * sample_size.
 	*/
@@ -293,7 +400,7 @@ J2KWAVEncoder::process_audio (uint8_t* data, int data_size)
 	/* XXX: this code is very tricksy and it must be possible to make it simpler ... */
 	
 	/* Number of bytes left to read this time */
-	int remaining = data_size;
+	int remaining = size;
 	/* Our position in the output buffers, in bytes */
 	int position = 0;
 	while (remaining > 0) {
@@ -313,7 +420,7 @@ J2KWAVEncoder::process_audio (uint8_t* data, int data_size)
 				sf_write_short (_sound_files[i], (const short *) _deinterleave_buffer, this_time / sample_size);
 				break;
 			default:
-				throw DecodeError ("unknown audio sample format");
+				throw EncodeError ("unknown audio sample format");
 			}
 		}
 		
@@ -321,3 +428,23 @@ J2KWAVEncoder::process_audio (uint8_t* data, int data_size)
 		remaining -= this_time * _fs->audio_channels;
 	}
 }
+
+int
+J2KWAVEncoder::target_sample_rate () const
+{
+	double t = dcp_audio_sample_rate (_fs->audio_sample_rate);
+	if (rint (_fs->frames_per_second) != _fs->frames_per_second) {
+		if (_fs->frames_per_second == 23.976) {
+			/* 24fps drop-frame ie 24 * 1000 / 1001 frames per second;
+			   hence we need to resample the audio to dcp_audio_sample_rate * 1000 / 1001
+			   so that when we play it back at dcp_audio_sample_rate it is sped up
+			   by the same amount that the video is
+			*/
+			t *= double(1000) / 1001;
+		} else {
+			throw EncodeError ("unknown fractional frame rate");
+		}
+	}
+
+	return rint (t);
+}
diff --git a/src/lib/j2k_wav_encoder.h b/src/lib/j2k_wav_encoder.h
index 1c2f50065..3f01ac480 100644
--- a/src/lib/j2k_wav_encoder.h
+++ b/src/lib/j2k_wav_encoder.h
@@ -26,6 +26,11 @@
 #include <boost/thread/condition.hpp>
 #include <boost/thread/mutex.hpp>
 #include <boost/thread.hpp>
+#ifdef HAVE_SWRESAMPLE
+extern "C" {
+#include <libswresample/swresample.h>
+}
+#endif
 #include <sndfile.h>
 #include "encoder.h"
 
@@ -43,17 +48,24 @@ public:
 	J2KWAVEncoder (boost::shared_ptr<const FilmState>, boost::shared_ptr<const Options>, Log *);
 	~J2KWAVEncoder ();
 
-	void process_begin ();
+	void process_begin (int64_t audio_channel_layout, AVSampleFormat audio_sample_format);
 	void process_video (boost::shared_ptr<Image>, int);
 	void process_audio (uint8_t *, int);
 	void process_end ();
 
-private:	
+private:
 
+	int target_sample_rate () const;
+
+	void write_audio (uint8_t* data, int size);
 	void encoder_thread (ServerDescription *);
 	void close_sound_files ();
 	void terminate_worker_threads ();
 
+#if HAVE_SWRESAMPLE	
+	SwrContext* _swr_context;
+#endif	
+
 	std::vector<SNDFILE*> _sound_files;
 	int _deinterleave_buffer_size;
 	uint8_t* _deinterleave_buffer;
diff --git a/src/lib/tiff_encoder.h b/src/lib/tiff_encoder.h
index ec8e38011..ef1ce25d2 100644
--- a/src/lib/tiff_encoder.h
+++ b/src/lib/tiff_encoder.h
@@ -36,7 +36,7 @@ class TIFFEncoder : public Encoder
 public:
 	TIFFEncoder (boost::shared_ptr<const FilmState> s, boost::shared_ptr<const Options> o, Log* l);
 
-	void process_begin () {}
+	void process_begin (int64_t audio_channel_layout, AVSampleFormat audio_sample_format) {}
 	void process_video (boost::shared_ptr<Image>, int);
 	void process_audio (uint8_t *, int) {}
 	void process_end () {}
diff --git a/src/lib/transcoder.cc b/src/lib/transcoder.cc
index 3d71b68f5..b74d09174 100644
--- a/src/lib/transcoder.cc
+++ b/src/lib/transcoder.cc
@@ -57,7 +57,7 @@ Transcoder::Transcoder (shared_ptr<const FilmState> s, shared_ptr<const Options>
 void
 Transcoder::go ()
 {
-	_encoder->process_begin ();
+	_encoder->process_begin (_decoder->audio_channel_layout(), _decoder->audio_sample_format());
 	try {
 		_decoder->go ();
 	} catch (...) {
-- 
2.30.2