From f3fdae3f8f4ae54b17f925f81a5e9d4b3589269b Mon Sep 17 00:00:00 2001 From: Carl Hetherington Date: Sat, 29 Sep 2012 23:41:25 +0100 Subject: [PATCH] Entirely untested resampling to fix 24fps drop-frame. --- src/lib/ab_transcoder.cc | 2 +- src/lib/decoder.cc | 105 +-------------------------- src/lib/decoder.h | 9 --- src/lib/encoder.h | 5 +- src/lib/j2k_still_encoder.h | 2 +- src/lib/j2k_wav_encoder.cc | 137 ++++++++++++++++++++++++++++++++++-- src/lib/j2k_wav_encoder.h | 16 ++++- src/lib/tiff_encoder.h | 2 +- src/lib/transcoder.cc | 2 +- 9 files changed, 156 insertions(+), 124 deletions(-) diff --git a/src/lib/ab_transcoder.cc b/src/lib/ab_transcoder.cc index aabaf2d03..95492a9d8 100644 --- a/src/lib/ab_transcoder.cc +++ b/src/lib/ab_transcoder.cc @@ -103,7 +103,7 @@ ABTranscoder::process_video (shared_ptr yuv, int frame, int index) void ABTranscoder::go () { - _encoder->process_begin (); + _encoder->process_begin (_da->audio_channel_layout(), _da->audio_sample_format()); _da->process_begin (); _db->process_begin (); diff --git a/src/lib/decoder.cc b/src/lib/decoder.cc index 973582ca4..b7aca764d 100644 --- a/src/lib/decoder.cc +++ b/src/lib/decoder.cc @@ -69,9 +69,6 @@ Decoder::Decoder (boost::shared_ptr s, boost::shared_ptraudio_sample_rate != dcp_audio_sample_rate (_fs->audio_sample_rate)) { -#if HAVE_SWRESAMPLE - _swr_context = swr_alloc_set_opts ( - 0, - audio_channel_layout(), - audio_sample_format(), - dcp_audio_sample_rate (_fs->audio_sample_rate), - audio_channel_layout(), - audio_sample_format(), - _fs->audio_sample_rate, - 0, 0 - ); - - swr_init (_swr_context); -#else - throw DecodeError ("Cannot resample audio as libswresample is not present"); -#endif - } else { -#if HAVE_SWRESAMPLE - _swr_context = 0; -#endif - } - _delay_in_bytes = _fs->audio_delay * _fs->audio_sample_rate * _fs->audio_channels * _fs->bytes_per_sample() / 1000; delete _delay_line; _delay_line = new DelayLine (_delay_in_bytes); @@ -125,35 +99,6 @@ Decoder::process_begin () void Decoder::process_end () { -#if HAVE_SWRESAMPLE - if (_swr_context) { - - int mop = 0; - while (1) { - uint8_t buffer[256 * _fs->bytes_per_sample() * _fs->audio_channels]; - uint8_t* out[1] = { - buffer - }; - - int const frames = swr_convert (_swr_context, out, 256, 0, 0); - - if (frames < 0) { - throw DecodeError ("could not run sample-rate converter"); - } - - if (frames == 0) { - break; - } - - mop += frames; - int available = _delay_line->feed (buffer, frames * _fs->audio_channels * _fs->bytes_per_sample()); - Audio (buffer, available); - } - - swr_free (&_swr_context); - } -#endif - if (_delay_in_bytes < 0) { uint8_t remainder[-_delay_in_bytes]; _delay_line->get_remaining (remainder); @@ -166,7 +111,7 @@ Decoder::process_end () */ int64_t const audio_short_by_frames = - ((int64_t) decoding_frames() * dcp_audio_sample_rate (_fs->audio_sample_rate) / _fs->frames_per_second) + ((int64_t) decoding_frames() * _fs->audio_sample_rate / _fs->frames_per_second) - _audio_frames_processed; if (audio_short_by_frames >= 0) { @@ -240,16 +185,9 @@ Decoder::pass () void Decoder::process_audio (uint8_t* data, int size) { - /* Here's samples per channel */ + /* Samples per channel */ int const samples = size / _fs->bytes_per_sample(); -#if HAVE_SWRESAMPLE - /* And here's frames (where 1 frame is a collection of samples, 1 for each channel, - so for 5.1 a frame would be 6 samples) - */ - int const frames = samples / _fs->audio_channels; -#endif - /* Maybe apply gain */ if (_fs->audio_gain != 0) { float const linear_gain = pow (10, _fs->audio_gain / 20); @@ -282,51 +220,12 @@ Decoder::process_audio (uint8_t* data, int size) } } - /* This is a buffer we might use if we are sample-rate converting; - it will need freeing if so. - */ - uint8_t* out_buffer = 0; - - /* Maybe sample-rate convert */ -#if HAVE_SWRESAMPLE - if (_swr_context) { - - uint8_t const * in[2] = { - data, - 0 - }; - - /* Compute the resampled frame count and add 32 for luck */ - int const out_buffer_size_frames = ceil (frames * float (dcp_audio_sample_rate (_fs->audio_sample_rate)) / _fs->audio_sample_rate) + 32; - int const out_buffer_size_bytes = out_buffer_size_frames * _fs->audio_channels * _fs->bytes_per_sample(); - out_buffer = new uint8_t[out_buffer_size_bytes]; - - uint8_t* out[2] = { - out_buffer, - 0 - }; - - /* Resample audio */ - int out_frames = swr_convert (_swr_context, out, out_buffer_size_frames, in, frames); - if (out_frames < 0) { - throw DecodeError ("could not run sample-rate converter"); - } - - /* And point our variables at the resampled audio */ - data = out_buffer; - size = out_frames * _fs->audio_channels * _fs->bytes_per_sample(); - } -#endif - /* Update the number of audio frames we've pushed to the encoder */ _audio_frames_processed += size / (_fs->audio_channels * _fs->bytes_per_sample ()); /* Push into the delay line and then tell the world what we've got */ int available = _delay_line->feed (data, size); Audio (data, available); - - /* Delete the sample-rate conversion buffer, if it exists */ - delete[] out_buffer; } /** Called by subclasses to tell the world that some video data is ready. diff --git a/src/lib/decoder.h b/src/lib/decoder.h index 14b25c7b0..19ef25ede 100644 --- a/src/lib/decoder.h +++ b/src/lib/decoder.h @@ -29,11 +29,6 @@ #include #include #include -#ifdef HAVE_SWRESAMPLE -extern "C" { -#include -} -#endif #include "util.h" class Job; @@ -134,10 +129,6 @@ private: AVFilterContext* _buffer_src_context; AVFilterContext* _buffer_sink_context; -#if HAVE_SWRESAMPLE - SwrContext* _swr_context; -#endif - bool _have_setup_video_filters; DelayLine* _delay_line; int _delay_in_bytes; diff --git a/src/lib/encoder.h b/src/lib/encoder.h index 539b2912c..ea356cec4 100644 --- a/src/lib/encoder.h +++ b/src/lib/encoder.h @@ -28,6 +28,9 @@ #include #include #include +extern "C" { +#include +} class FilmState; class Options; @@ -50,7 +53,7 @@ public: Encoder (boost::shared_ptr s, boost::shared_ptr o, Log* l); /** Called to indicate that a processing run is about to begin */ - virtual void process_begin () = 0; + virtual void process_begin (int64_t audio_channel_layout, AVSampleFormat audio_sample_format) = 0; /** Called with a frame of video. * @param i Video frame image. diff --git a/src/lib/j2k_still_encoder.h b/src/lib/j2k_still_encoder.h index d4d68724e..755c68877 100644 --- a/src/lib/j2k_still_encoder.h +++ b/src/lib/j2k_still_encoder.h @@ -36,7 +36,7 @@ class J2KStillEncoder : public Encoder public: J2KStillEncoder (boost::shared_ptr, boost::shared_ptr, Log *); - void process_begin () {} + void process_begin (int64_t audio_channel_layout, AVSampleFormat audio_sample_format) {} void process_video (boost::shared_ptr, int); void process_audio (uint8_t *, int) {} void process_end () {} diff --git a/src/lib/j2k_wav_encoder.cc b/src/lib/j2k_wav_encoder.cc index 9ae01c774..86c3ae13f 100644 --- a/src/lib/j2k_wav_encoder.cc +++ b/src/lib/j2k_wav_encoder.cc @@ -46,6 +46,9 @@ using namespace boost; J2KWAVEncoder::J2KWAVEncoder (shared_ptr s, shared_ptr o, Log* l) : Encoder (s, o, l) +#ifdef HAVE_SWRESAMPLE + , _swr_context (0) +#endif , _deinterleave_buffer_size (8192) , _deinterleave_buffer (0) , _process_end (false) @@ -210,8 +213,31 @@ J2KWAVEncoder::encoder_thread (ServerDescription* server) } void -J2KWAVEncoder::process_begin () +J2KWAVEncoder::process_begin (int64_t audio_channel_layout, AVSampleFormat audio_sample_format) { + if ((_fs->audio_sample_rate != dcp_audio_sample_rate (_fs->audio_sample_rate)) || (rint (_fs->frames_per_second) != _fs->frames_per_second)) { +#ifdef HAVE_SWRESAMPLE + _swr_context = swr_alloc_set_opts ( + 0, + audio_channel_layout, + audio_sample_format, + target_sample_rate(), + audio_channel_layout, + audio_sample_format, + _fs->audio_sample_rate, + 0, 0 + ); + + swr_init (_swr_context); +#else + throw EncodeError ("Cannot resample audio as libswresample is not present"); +#endif + } else { +#ifdef HAVE_SWRESAMPLE + _swr_context = 0; +#endif + } + for (int i = 0; i < Config::instance()->num_local_encoding_threads (); ++i) { _worker_threads.push_back (new boost::thread (boost::bind (&J2KWAVEncoder::encoder_thread, this, (ServerDescription *) 0))); } @@ -268,6 +294,34 @@ J2KWAVEncoder::process_end () _log->log (s.str ()); } } + +#if HAVE_SWRESAMPLE + if (_swr_context) { + + int mop = 0; + while (1) { + uint8_t buffer[256 * _fs->bytes_per_sample() * _fs->audio_channels]; + uint8_t* out[1] = { + buffer + }; + + int const frames = swr_convert (_swr_context, out, 256, 0, 0); + + if (frames < 0) { + throw EncodeError ("could not run sample-rate converter"); + } + + if (frames == 0) { + break; + } + + mop += frames; + write_audio (buffer, frames); + } + + swr_free (&_swr_context); + } +#endif close_sound_files (); @@ -281,11 +335,64 @@ J2KWAVEncoder::process_end () } void -J2KWAVEncoder::process_audio (uint8_t* data, int data_size) +J2KWAVEncoder::process_audio (uint8_t* data, int size) +{ + /* This is a buffer we might use if we are sample-rate converting; + it will need freeing if so. + */ + uint8_t* out_buffer = 0; + + /* Maybe sample-rate convert */ +#if HAVE_SWRESAMPLE + if (_swr_context) { + + uint8_t const * in[2] = { + data, + 0 + }; + + /* Here's samples per channel */ + int const samples = size / _fs->bytes_per_sample(); + + /* And here's frames (where 1 frame is a collection of samples, 1 for each channel, + so for 5.1 a frame would be 6 samples) + */ + int const frames = samples / _fs->audio_channels; + + /* Compute the resampled frame count and add 32 for luck */ + int const out_buffer_size_frames = ceil (frames * target_sample_rate() / _fs->audio_sample_rate) + 32; + int const out_buffer_size_bytes = out_buffer_size_frames * _fs->audio_channels * _fs->bytes_per_sample(); + out_buffer = new uint8_t[out_buffer_size_bytes]; + + uint8_t* out[2] = { + out_buffer, + 0 + }; + + /* Resample audio */ + int out_frames = swr_convert (_swr_context, out, out_buffer_size_frames, in, size); + if (out_frames < 0) { + throw EncodeError ("could not run sample-rate converter"); + } + + /* And point our variables at the resampled audio */ + data = out_buffer; + size = out_frames * _fs->audio_channels * _fs->bytes_per_sample(); + } +#endif + + write_audio (data, size); + + /* Delete the sample-rate conversion buffer, if it exists */ + delete[] out_buffer; +} + +void +J2KWAVEncoder::write_audio (uint8_t* data, int size) { /* Size of a sample in bytes */ int const sample_size = 2; - + /* XXX: we are assuming that sample_size is right, the _deinterleave_buffer_size is a multiple of the sample size and that data_size is a multiple of _fs->audio_channels * sample_size. */ @@ -293,7 +400,7 @@ J2KWAVEncoder::process_audio (uint8_t* data, int data_size) /* XXX: this code is very tricksy and it must be possible to make it simpler ... */ /* Number of bytes left to read this time */ - int remaining = data_size; + int remaining = size; /* Our position in the output buffers, in bytes */ int position = 0; while (remaining > 0) { @@ -313,7 +420,7 @@ J2KWAVEncoder::process_audio (uint8_t* data, int data_size) sf_write_short (_sound_files[i], (const short *) _deinterleave_buffer, this_time / sample_size); break; default: - throw DecodeError ("unknown audio sample format"); + throw EncodeError ("unknown audio sample format"); } } @@ -321,3 +428,23 @@ J2KWAVEncoder::process_audio (uint8_t* data, int data_size) remaining -= this_time * _fs->audio_channels; } } + +int +J2KWAVEncoder::target_sample_rate () const +{ + double t = dcp_audio_sample_rate (_fs->audio_sample_rate); + if (rint (_fs->frames_per_second) != _fs->frames_per_second) { + if (_fs->frames_per_second == 23.976) { + /* 24fps drop-frame ie 24 * 1000 / 1001 frames per second; + hence we need to resample the audio to dcp_audio_sample_rate * 1000 / 1001 + so that when we play it back at dcp_audio_sample_rate it is sped up + by the same amount that the video is + */ + t *= double(1000) / 1001; + } else { + throw EncodeError ("unknown fractional frame rate"); + } + } + + return rint (t); +} diff --git a/src/lib/j2k_wav_encoder.h b/src/lib/j2k_wav_encoder.h index 1c2f50065..3f01ac480 100644 --- a/src/lib/j2k_wav_encoder.h +++ b/src/lib/j2k_wav_encoder.h @@ -26,6 +26,11 @@ #include #include #include +#ifdef HAVE_SWRESAMPLE +extern "C" { +#include +} +#endif #include #include "encoder.h" @@ -43,17 +48,24 @@ public: J2KWAVEncoder (boost::shared_ptr, boost::shared_ptr, Log *); ~J2KWAVEncoder (); - void process_begin (); + void process_begin (int64_t audio_channel_layout, AVSampleFormat audio_sample_format); void process_video (boost::shared_ptr, int); void process_audio (uint8_t *, int); void process_end (); -private: +private: + int target_sample_rate () const; + + void write_audio (uint8_t* data, int size); void encoder_thread (ServerDescription *); void close_sound_files (); void terminate_worker_threads (); +#if HAVE_SWRESAMPLE + SwrContext* _swr_context; +#endif + std::vector _sound_files; int _deinterleave_buffer_size; uint8_t* _deinterleave_buffer; diff --git a/src/lib/tiff_encoder.h b/src/lib/tiff_encoder.h index ec8e38011..ef1ce25d2 100644 --- a/src/lib/tiff_encoder.h +++ b/src/lib/tiff_encoder.h @@ -36,7 +36,7 @@ class TIFFEncoder : public Encoder public: TIFFEncoder (boost::shared_ptr s, boost::shared_ptr o, Log* l); - void process_begin () {} + void process_begin (int64_t audio_channel_layout, AVSampleFormat audio_sample_format) {} void process_video (boost::shared_ptr, int); void process_audio (uint8_t *, int) {} void process_end () {} diff --git a/src/lib/transcoder.cc b/src/lib/transcoder.cc index 3d71b68f5..b74d09174 100644 --- a/src/lib/transcoder.cc +++ b/src/lib/transcoder.cc @@ -57,7 +57,7 @@ Transcoder::Transcoder (shared_ptr s, shared_ptr void Transcoder::go () { - _encoder->process_begin (); + _encoder->process_begin (_decoder->audio_channel_layout(), _decoder->audio_sample_format()); try { _decoder->go (); } catch (...) { -- 2.30.2