Entirely untested resampling to fix 24fps drop-frame.
[dcpomatic.git] / src / lib / j2k_wav_encoder.cc
1 /*
2     Copyright (C) 2012 Carl Hetherington <cth@carlh.net>
3
4     This program is free software; you can redistribute it and/or modify
5     it under the terms of the GNU General Public License as published by
6     the Free Software Foundation; either version 2 of the License, or
7     (at your option) any later version.
8
9     This program is distributed in the hope that it will be useful,
10     but WITHOUT ANY WARRANTY; without even the implied warranty of
11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12     GNU General Public License for more details.
13
14     You should have received a copy of the GNU General Public License
15     along with this program; if not, write to the Free Software
16     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17
18 */
19
20 /** @file  src/j2k_wav_encoder.cc
21  *  @brief An encoder which writes JPEG2000 and WAV files.
22  */
23
24 #include <sstream>
25 #include <stdexcept>
26 #include <iomanip>
27 #include <iostream>
28 #include <boost/thread.hpp>
29 #include <boost/filesystem.hpp>
30 #include <boost/lexical_cast.hpp>
31 #include <sndfile.h>
32 #include <openjpeg.h>
33 #include "j2k_wav_encoder.h"
34 #include "config.h"
35 #include "film_state.h"
36 #include "options.h"
37 #include "exceptions.h"
38 #include "dcp_video_frame.h"
39 #include "server.h"
40 #include "filter.h"
41 #include "log.h"
42 #include "cross.h"
43
44 using namespace std;
45 using namespace boost;
46
47 J2KWAVEncoder::J2KWAVEncoder (shared_ptr<const FilmState> s, shared_ptr<const Options> o, Log* l)
48         : Encoder (s, o, l)
49 #ifdef HAVE_SWRESAMPLE    
50         , _swr_context (0)
51 #endif    
52         , _deinterleave_buffer_size (8192)
53         , _deinterleave_buffer (0)
54         , _process_end (false)
55 {
56         /* Create sound output files with .tmp suffixes; we will rename
57            them if and when we complete.
58         */
59         for (int i = 0; i < _fs->audio_channels; ++i) {
60                 SF_INFO sf_info;
61                 sf_info.samplerate = dcp_audio_sample_rate (_fs->audio_sample_rate);
62                 /* We write mono files */
63                 sf_info.channels = 1;
64                 sf_info.format = SF_FORMAT_WAV | SF_FORMAT_PCM_24;
65                 SNDFILE* f = sf_open (_opt->multichannel_audio_out_path (i, true).c_str (), SFM_WRITE, &sf_info);
66                 if (f == 0) {
67                         throw CreateFileError (_opt->multichannel_audio_out_path (i, true));
68                 }
69                 _sound_files.push_back (f);
70         }
71
72         /* Create buffer for deinterleaving audio */
73         _deinterleave_buffer = new uint8_t[_deinterleave_buffer_size];
74 }
75
76 J2KWAVEncoder::~J2KWAVEncoder ()
77 {
78         terminate_worker_threads ();
79         delete[] _deinterleave_buffer;
80         close_sound_files ();
81 }
82
83 void
84 J2KWAVEncoder::terminate_worker_threads ()
85 {
86         boost::mutex::scoped_lock lock (_worker_mutex);
87         _process_end = true;
88         _worker_condition.notify_all ();
89         lock.unlock ();
90
91         for (list<boost::thread *>::iterator i = _worker_threads.begin(); i != _worker_threads.end(); ++i) {
92                 (*i)->join ();
93                 delete *i;
94         }
95 }
96
97 void
98 J2KWAVEncoder::close_sound_files ()
99 {
100         for (vector<SNDFILE*>::iterator i = _sound_files.begin(); i != _sound_files.end(); ++i) {
101                 sf_close (*i);
102         }
103
104         _sound_files.clear ();
105 }       
106
107 void
108 J2KWAVEncoder::process_video (shared_ptr<Image> yuv, int frame)
109 {
110         boost::mutex::scoped_lock lock (_worker_mutex);
111
112         /* Wait until the queue has gone down a bit */
113         while (_queue.size() >= _worker_threads.size() * 2 && !_process_end) {
114                 _worker_condition.wait (lock);
115         }
116
117         if (_process_end) {
118                 return;
119         }
120
121         /* Only do the processing if we don't already have a file for this frame */
122         if (!boost::filesystem::exists (_opt->frame_out_path (frame, false))) {
123                 pair<string, string> const s = Filter::ffmpeg_strings (_fs->filters);
124                 _queue.push_back (boost::shared_ptr<DCPVideoFrame> (
125                                           new DCPVideoFrame (
126                                                   yuv, _opt->out_size, _opt->padding, _fs->scaler, frame, _fs->frames_per_second, s.second,
127                                                   Config::instance()->colour_lut_index (), Config::instance()->j2k_bandwidth (),
128                                                   _log
129                                                   )
130                                           ));
131                 
132                 _worker_condition.notify_all ();
133         } else {
134                 frame_skipped ();
135         }
136 }
137
138 void
139 J2KWAVEncoder::encoder_thread (ServerDescription* server)
140 {
141         /* Number of seconds that we currently wait between attempts
142            to connect to the server; not relevant for localhost
143            encodings.
144         */
145         int remote_backoff = 0;
146         
147         while (1) {
148                 boost::mutex::scoped_lock lock (_worker_mutex);
149                 while (_queue.empty () && !_process_end) {
150                         _worker_condition.wait (lock);
151                 }
152
153                 if (_process_end) {
154                         return;
155                 }
156
157                 boost::shared_ptr<DCPVideoFrame> vf = _queue.front ();
158                 _queue.pop_front ();
159                 
160                 lock.unlock ();
161
162                 shared_ptr<EncodedData> encoded;
163
164                 if (server) {
165                         try {
166                                 encoded = vf->encode_remotely (server);
167
168                                 if (remote_backoff > 0) {
169                                         stringstream s;
170                                         s << server->host_name() << " was lost, but now she is found; removing backoff";
171                                         _log->log (s.str ());
172                                 }
173                                 
174                                 /* This job succeeded, so remove any backoff */
175                                 remote_backoff = 0;
176                                 
177                         } catch (std::exception& e) {
178                                 if (remote_backoff < 60) {
179                                         /* back off more */
180                                         remote_backoff += 10;
181                                 }
182                                 stringstream s;
183                                 s << "Remote encode of " << vf->frame() << " on " << server->host_name() << " failed (" << e.what() << "); thread sleeping for " << remote_backoff << "s.";
184                                 _log->log (s.str ());
185                         }
186                                 
187                 } else {
188                         try {
189                                 encoded = vf->encode_locally ();
190                         } catch (std::exception& e) {
191                                 stringstream s;
192                                 s << "Local encode failed " << e.what() << ".";
193                                 _log->log (s.str ());
194                         }
195                 }
196
197                 if (encoded) {
198                         encoded->write (_opt, vf->frame ());
199                         frame_done (vf->frame ());
200                 } else {
201                         lock.lock ();
202                         _queue.push_front (vf);
203                         lock.unlock ();
204                 }
205
206                 if (remote_backoff > 0) {
207                         dvdomatic_sleep (remote_backoff);
208                 }
209
210                 lock.lock ();
211                 _worker_condition.notify_all ();
212         }
213 }
214
215 void
216 J2KWAVEncoder::process_begin (int64_t audio_channel_layout, AVSampleFormat audio_sample_format)
217 {
218         if ((_fs->audio_sample_rate != dcp_audio_sample_rate (_fs->audio_sample_rate)) || (rint (_fs->frames_per_second) != _fs->frames_per_second)) {
219 #ifdef HAVE_SWRESAMPLE          
220                 _swr_context = swr_alloc_set_opts (
221                         0,
222                         audio_channel_layout,
223                         audio_sample_format,
224                         target_sample_rate(),
225                         audio_channel_layout,
226                         audio_sample_format,
227                         _fs->audio_sample_rate,
228                         0, 0
229                         );
230                 
231                 swr_init (_swr_context);
232 #else
233                 throw EncodeError ("Cannot resample audio as libswresample is not present");
234 #endif
235         } else {
236 #ifdef HAVE_SWRESAMPLE
237                 _swr_context = 0;
238 #endif          
239         }
240         
241         for (int i = 0; i < Config::instance()->num_local_encoding_threads (); ++i) {
242                 _worker_threads.push_back (new boost::thread (boost::bind (&J2KWAVEncoder::encoder_thread, this, (ServerDescription *) 0)));
243         }
244
245         vector<ServerDescription*> servers = Config::instance()->servers ();
246
247         for (vector<ServerDescription*>::iterator i = servers.begin(); i != servers.end(); ++i) {
248                 for (int j = 0; j < (*i)->threads (); ++j) {
249                         _worker_threads.push_back (new boost::thread (boost::bind (&J2KWAVEncoder::encoder_thread, this, *i)));
250                 }
251         }
252 }
253
254 void
255 J2KWAVEncoder::process_end ()
256 {
257         boost::mutex::scoped_lock lock (_worker_mutex);
258
259         _log->log ("Clearing queue of " + lexical_cast<string> (_queue.size ()));
260
261         /* Keep waking workers until the queue is empty */
262         while (!_queue.empty ()) {
263                 _log->log ("Waking with " + lexical_cast<string> (_queue.size ()));
264                 _worker_condition.notify_all ();
265                 _worker_condition.wait (lock);
266         }
267
268         lock.unlock ();
269         
270         terminate_worker_threads ();
271
272         _log->log ("Mopping up " + lexical_cast<string> (_queue.size()));
273
274         /* The following sequence of events can occur in the above code:
275              1. a remote worker takes the last image off the queue
276              2. the loop above terminates
277              3. the remote worker fails to encode the image and puts it back on the queue
278              4. the remote worker is then terminated by terminate_worker_threads
279
280              So just mop up anything left in the queue here.
281         */
282
283         for (list<shared_ptr<DCPVideoFrame> >::iterator i = _queue.begin(); i != _queue.end(); ++i) {
284                 stringstream s;
285                 s << "Encode left-over frame " << (*i)->frame();
286                 _log->log (s.str ());
287                 try {
288                         shared_ptr<EncodedData> e = (*i)->encode_locally ();
289                         e->write (_opt, (*i)->frame ());
290                         frame_done ((*i)->frame ());
291                 } catch (std::exception& e) {
292                         stringstream s;
293                         s << "Local encode failed " << e.what() << ".";
294                         _log->log (s.str ());
295                 }
296         }
297
298 #if HAVE_SWRESAMPLE     
299         if (_swr_context) {
300
301                 int mop = 0;
302                 while (1) {
303                         uint8_t buffer[256 * _fs->bytes_per_sample() * _fs->audio_channels];
304                         uint8_t* out[1] = {
305                                 buffer
306                         };
307
308                         int const frames = swr_convert (_swr_context, out, 256, 0, 0);
309
310                         if (frames < 0) {
311                                 throw EncodeError ("could not run sample-rate converter");
312                         }
313
314                         if (frames == 0) {
315                                 break;
316                         }
317
318                         mop += frames;
319                         write_audio (buffer, frames);
320                 }
321
322                 swr_free (&_swr_context);
323         }
324 #endif  
325         
326         close_sound_files ();
327
328         /* Rename .wav.tmp files to .wav */
329         for (int i = 0; i < _fs->audio_channels; ++i) {
330                 if (boost::filesystem::exists (_opt->multichannel_audio_out_path (i, false))) {
331                         boost::filesystem::remove (_opt->multichannel_audio_out_path (i, false));
332                 }
333                 boost::filesystem::rename (_opt->multichannel_audio_out_path (i, true), _opt->multichannel_audio_out_path (i, false));
334         }
335 }
336
337 void
338 J2KWAVEncoder::process_audio (uint8_t* data, int size)
339 {
340         /* This is a buffer we might use if we are sample-rate converting;
341            it will need freeing if so.
342         */
343         uint8_t* out_buffer = 0;
344         
345         /* Maybe sample-rate convert */
346 #if HAVE_SWRESAMPLE     
347         if (_swr_context) {
348
349                 uint8_t const * in[2] = {
350                         data,
351                         0
352                 };
353
354                 /* Here's samples per channel */
355                 int const samples = size / _fs->bytes_per_sample();
356                 
357                 /* And here's frames (where 1 frame is a collection of samples, 1 for each channel,
358                    so for 5.1 a frame would be 6 samples)
359                 */
360                 int const frames = samples / _fs->audio_channels;
361
362                 /* Compute the resampled frame count and add 32 for luck */
363                 int const out_buffer_size_frames = ceil (frames * target_sample_rate() / _fs->audio_sample_rate) + 32;
364                 int const out_buffer_size_bytes = out_buffer_size_frames * _fs->audio_channels * _fs->bytes_per_sample();
365                 out_buffer = new uint8_t[out_buffer_size_bytes];
366
367                 uint8_t* out[2] = {
368                         out_buffer, 
369                         0
370                 };
371
372                 /* Resample audio */
373                 int out_frames = swr_convert (_swr_context, out, out_buffer_size_frames, in, size);
374                 if (out_frames < 0) {
375                         throw EncodeError ("could not run sample-rate converter");
376                 }
377
378                 /* And point our variables at the resampled audio */
379                 data = out_buffer;
380                 size = out_frames * _fs->audio_channels * _fs->bytes_per_sample();
381         }
382 #endif
383
384         write_audio (data, size);
385
386         /* Delete the sample-rate conversion buffer, if it exists */
387         delete[] out_buffer;
388 }
389
390 void
391 J2KWAVEncoder::write_audio (uint8_t* data, int size)
392 {
393         /* Size of a sample in bytes */
394         int const sample_size = 2;
395
396         /* XXX: we are assuming that sample_size is right, the _deinterleave_buffer_size is a multiple
397            of the sample size and that data_size is a multiple of _fs->audio_channels * sample_size.
398         */
399         
400         /* XXX: this code is very tricksy and it must be possible to make it simpler ... */
401         
402         /* Number of bytes left to read this time */
403         int remaining = size;
404         /* Our position in the output buffers, in bytes */
405         int position = 0;
406         while (remaining > 0) {
407                 /* How many bytes of the deinterleaved data to do this time */
408                 int this_time = min (remaining / _fs->audio_channels, _deinterleave_buffer_size);
409                 for (int i = 0; i < _fs->audio_channels; ++i) {
410                         for (int j = 0; j < this_time; j += sample_size) {
411                                 for (int k = 0; k < sample_size; ++k) {
412                                         int const to = j + k;
413                                         int const from = position + (i * sample_size) + (j * _fs->audio_channels) + k;
414                                         _deinterleave_buffer[to] = data[from];
415                                 }
416                         }
417                         
418                         switch (_fs->audio_sample_format) {
419                         case AV_SAMPLE_FMT_S16:
420                                 sf_write_short (_sound_files[i], (const short *) _deinterleave_buffer, this_time / sample_size);
421                                 break;
422                         default:
423                                 throw EncodeError ("unknown audio sample format");
424                         }
425                 }
426                 
427                 position += this_time;
428                 remaining -= this_time * _fs->audio_channels;
429         }
430 }
431
432 int
433 J2KWAVEncoder::target_sample_rate () const
434 {
435         double t = dcp_audio_sample_rate (_fs->audio_sample_rate);
436         if (rint (_fs->frames_per_second) != _fs->frames_per_second) {
437                 if (_fs->frames_per_second == 23.976) {
438                         /* 24fps drop-frame ie 24 * 1000 / 1001 frames per second;
439                            hence we need to resample the audio to dcp_audio_sample_rate * 1000 / 1001
440                            so that when we play it back at dcp_audio_sample_rate it is sped up
441                            by the same amount that the video is
442                         */
443                         t *= double(1000) / 1001;
444                 } else {
445                         throw EncodeError ("unknown fractional frame rate");
446                 }
447         }
448
449         return rint (t);
450 }