Build fixes for Boost >= 1.73
[dcpomatic.git] / src / lib / ffmpeg_file_encoder.cc
1 /*
2     Copyright (C) 2017-2018 Carl Hetherington <cth@carlh.net>
3
4     This file is part of DCP-o-matic.
5
6     DCP-o-matic is free software; you can redistribute it and/or modify
7     it under the terms of the GNU General Public License as published by
8     the Free Software Foundation; either version 2 of the License, or
9     (at your option) any later version.
10
11     DCP-o-matic is distributed in the hope that it will be useful,
12     but WITHOUT ANY WARRANTY; without even the implied warranty of
13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14     GNU General Public License for more details.
15
16     You should have received a copy of the GNU General Public License
17     along with DCP-o-matic.  If not, see <http://www.gnu.org/licenses/>.
18
19 */
20
21 #include "ffmpeg_encoder.h"
22 #include "film.h"
23 #include "job.h"
24 #include "player.h"
25 #include "player_video.h"
26 #include "log.h"
27 #include "image.h"
28 #include "cross.h"
29 #include "compose.hpp"
30 #include <iostream>
31
32 #include "i18n.h"
33
34 using std::string;
35 using std::runtime_error;
36 using std::cout;
37 using std::pair;
38 using boost::shared_ptr;
39 using boost::bind;
40 using boost::weak_ptr;
41 using boost::optional;
42 using namespace dcpomatic;
43 #if BOOST_VERSION >= 106100
44 using namespace boost::placeholders;
45 #endif
46
47 int FFmpegFileEncoder::_video_stream_index = 0;
48 int FFmpegFileEncoder::_audio_stream_index_base = 1;
49
50
51 class ExportAudioStream : public boost::noncopyable
52 {
53 public:
54         ExportAudioStream (string codec_name, int channels, int frame_rate, AVSampleFormat sample_format, AVFormatContext* format_context, int stream_index)
55                 : _format_context (format_context)
56                 , _stream_index (stream_index)
57         {
58                 _codec = avcodec_find_encoder_by_name (codec_name.c_str());
59                 if (!_codec) {
60                         throw runtime_error (String::compose("could not find FFmpeg encoder %1", codec_name));
61                 }
62
63                 _codec_context = avcodec_alloc_context3 (_codec);
64                 if (!_codec_context) {
65                         throw runtime_error ("could not allocate FFmpeg audio context");
66                 }
67
68                 avcodec_get_context_defaults3 (_codec_context, _codec);
69
70                 /* XXX: configurable */
71                 _codec_context->bit_rate = channels * 128 * 1024;
72                 _codec_context->sample_fmt = sample_format;
73                 _codec_context->sample_rate = frame_rate;
74                 _codec_context->channel_layout = av_get_default_channel_layout (channels);
75                 _codec_context->channels = channels;
76
77                 _stream = avformat_new_stream (format_context, _codec);
78                 if (!_stream) {
79                         throw runtime_error ("could not create FFmpeg output audio stream");
80                 }
81
82                 _stream->id = stream_index;
83 DCPOMATIC_DISABLE_WARNINGS
84                 _stream->codec = _codec_context;
85 DCPOMATIC_ENABLE_WARNINGS
86
87                 int r = avcodec_open2 (_codec_context, _codec, 0);
88                 if (r < 0) {
89                         char buffer[256];
90                         av_strerror (r, buffer, sizeof(buffer));
91                         throw runtime_error (String::compose("could not open FFmpeg audio codec (%1)", buffer));
92                 }
93         }
94
95         ~ExportAudioStream ()
96         {
97                 avcodec_close (_codec_context);
98         }
99
100         int frame_size () const {
101                 return _codec_context->frame_size;
102         }
103
104         bool flush ()
105         {
106                 AVPacket packet;
107                 av_init_packet (&packet);
108                 packet.data = 0;
109                 packet.size = 0;
110                 bool flushed = false;
111
112                 int got_packet;
113 DCPOMATIC_DISABLE_WARNINGS
114                 avcodec_encode_audio2 (_codec_context, &packet, 0, &got_packet);
115 DCPOMATIC_ENABLE_WARNINGS
116                 if (got_packet) {
117                         packet.stream_index = 0;
118                         av_interleaved_write_frame (_format_context, &packet);
119                 } else {
120                         flushed = true;
121                 }
122                 av_packet_unref (&packet);
123                 return flushed;
124         }
125
126         void write (int size, int channel_offset, int channels, float** data, int64_t sample_offset)
127         {
128                 DCPOMATIC_ASSERT (size);
129
130                 AVFrame* frame = av_frame_alloc ();
131                 DCPOMATIC_ASSERT (frame);
132
133                 int const buffer_size = av_samples_get_buffer_size (0, channels, size, _codec_context->sample_fmt, 0);
134                 DCPOMATIC_ASSERT (buffer_size >= 0);
135
136                 void* samples = av_malloc (buffer_size);
137                 DCPOMATIC_ASSERT (samples);
138
139                 frame->nb_samples = size;
140                 int r = avcodec_fill_audio_frame (frame, channels, _codec_context->sample_fmt, (const uint8_t *) samples, buffer_size, 0);
141                 DCPOMATIC_ASSERT (r >= 0);
142
143                 switch (_codec_context->sample_fmt) {
144                 case AV_SAMPLE_FMT_S16:
145                 {
146                         int16_t* q = reinterpret_cast<int16_t*> (samples);
147                         for (int i = 0; i < size; ++i) {
148                                 for (int j = 0; j < channels; ++j) {
149                                         *q++ = data[j + channel_offset][i] * 32767;
150                                 }
151                         }
152                         break;
153                 }
154                 case AV_SAMPLE_FMT_S32:
155                 {
156                         int32_t* q = reinterpret_cast<int32_t*> (samples);
157                         for (int i = 0; i < size; ++i) {
158                                 for (int j = 0; j < channels; ++j) {
159                                         *q++ = data[j + channel_offset][i] * 2147483647;
160                                 }
161                         }
162                         break;
163                 }
164                 case AV_SAMPLE_FMT_FLTP:
165                 {
166                         float* q = reinterpret_cast<float*> (samples);
167                         for (int i = 0; i < channels; ++i) {
168                                 memcpy (q, data[i + channel_offset], sizeof(float) * size);
169                                 q += size;
170                         }
171                         break;
172                 }
173                 default:
174                         DCPOMATIC_ASSERT (false);
175                 }
176
177                 DCPOMATIC_ASSERT (_codec_context->time_base.num == 1);
178                 frame->pts = sample_offset * _codec_context->time_base.den / _codec_context->sample_rate;
179
180                 AVPacket packet;
181                 av_init_packet (&packet);
182                 packet.data = 0;
183                 packet.size = 0;
184                 int got_packet;
185
186                 DCPOMATIC_DISABLE_WARNINGS
187                 if (avcodec_encode_audio2 (_codec_context, &packet, frame, &got_packet) < 0) {
188                         throw EncodeError ("FFmpeg audio encode failed");
189                 }
190                 DCPOMATIC_ENABLE_WARNINGS
191
192                 if (got_packet && packet.size) {
193                         packet.stream_index = _stream_index;
194                         av_interleaved_write_frame (_format_context, &packet);
195                         av_packet_unref (&packet);
196                 }
197
198                 av_free (samples);
199                 av_frame_free (&frame);
200         }
201
202 private:
203         AVFormatContext* _format_context;
204         AVCodec* _codec;
205         AVCodecContext* _codec_context;
206         AVStream* _stream;
207         int _stream_index;
208 };
209
210
211 FFmpegFileEncoder::FFmpegFileEncoder (
212         dcp::Size video_frame_size,
213         int video_frame_rate,
214         int audio_frame_rate,
215         int channels,
216         ExportFormat format,
217         bool audio_stream_per_channel,
218         int x264_crf,
219         boost::filesystem::path output
220 #ifdef DCPOMATIC_VARIANT_SWAROOP
221         , optional<dcp::Key> key
222         , optional<string> id
223 #endif
224         )
225         : _audio_stream_per_channel (audio_stream_per_channel)
226         , _video_options (0)
227         , _audio_channels (channels)
228         , _output (output)
229         , _video_frame_size (video_frame_size)
230         , _video_frame_rate (video_frame_rate)
231         , _audio_frame_rate (audio_frame_rate)
232         , _audio_frames (0)
233 {
234         _pixel_format = pixel_format (format);
235
236         switch (format) {
237         case EXPORT_FORMAT_PRORES:
238                 _sample_format = AV_SAMPLE_FMT_S16;
239                 _video_codec_name = "prores_ks";
240                 _audio_codec_name = "pcm_s16le";
241                 av_dict_set (&_video_options, "profile", "3", 0);
242                 av_dict_set (&_video_options, "threads", "auto", 0);
243                 break;
244         case EXPORT_FORMAT_H264_AAC:
245                 _sample_format = AV_SAMPLE_FMT_FLTP;
246                 _video_codec_name = "libx264";
247                 _audio_codec_name = "aac";
248                 av_dict_set_int (&_video_options, "crf", x264_crf, 0);
249                 break;
250         case EXPORT_FORMAT_H264_PCM:
251                 _sample_format = AV_SAMPLE_FMT_S32;
252                 _video_codec_name = "libx264";
253                 _audio_codec_name = "pcm_s24le";
254                 av_dict_set_int (&_video_options, "crf", x264_crf, 0);
255                 break;
256         default:
257                 DCPOMATIC_ASSERT (false);
258         }
259
260 #ifdef DCPOMATIC_VARIANT_SWAROOP
261         int r = avformat_alloc_output_context2 (&_format_context, av_guess_format("mov", 0, 0), 0, 0);
262 #else
263         int r = avformat_alloc_output_context2 (&_format_context, 0, 0, _output.string().c_str());
264 #endif
265         if (!_format_context) {
266                 throw runtime_error (String::compose("could not allocate FFmpeg format context (%1)", r));
267         }
268
269         setup_video ();
270         setup_audio ();
271
272         r = avio_open_boost (&_format_context->pb, _output, AVIO_FLAG_WRITE);
273         if (r < 0) {
274                 throw runtime_error (String::compose("could not open FFmpeg output file %1 (%2)", _output.string(), r));
275         }
276
277         AVDictionary* options = 0;
278
279 #ifdef DCPOMATIC_VARIANT_SWAROOP
280         if (key) {
281                 av_dict_set (&options, "encryption_key", key->hex().c_str(), 0);
282                 /* XXX: is this OK? */
283                 av_dict_set (&options, "encryption_kid", "00000000000000000000000000000000", 0);
284                 av_dict_set (&options, "encryption_scheme", "cenc-aes-ctr", 0);
285         }
286
287         if (id) {
288                 if (av_dict_set(&_format_context->metadata, SWAROOP_ID_TAG, id->c_str(), 0) < 0) {
289                         throw runtime_error ("Could not write ID to output");
290                 }
291         }
292 #endif
293
294         if (avformat_write_header (_format_context, &options) < 0) {
295                 throw runtime_error ("could not write header to FFmpeg output file");
296         }
297
298         _pending_audio.reset (new AudioBuffers(channels, 0));
299 }
300
301
302 FFmpegFileEncoder::~FFmpegFileEncoder ()
303 {
304         _audio_streams.clear ();
305         avcodec_close (_video_codec_context);
306         avformat_free_context (_format_context);
307 }
308
309
310 AVPixelFormat
311 FFmpegFileEncoder::pixel_format (ExportFormat format)
312 {
313         switch (format) {
314         case EXPORT_FORMAT_PRORES:
315                 return AV_PIX_FMT_YUV422P10;
316         case EXPORT_FORMAT_H264_AAC:
317         case EXPORT_FORMAT_H264_PCM:
318                 return AV_PIX_FMT_YUV420P;
319         default:
320                 DCPOMATIC_ASSERT (false);
321         }
322
323         return AV_PIX_FMT_YUV422P10;
324 }
325
326 void
327 FFmpegFileEncoder::setup_video ()
328 {
329         _video_codec = avcodec_find_encoder_by_name (_video_codec_name.c_str());
330         if (!_video_codec) {
331                 throw runtime_error (String::compose ("could not find FFmpeg encoder %1", _video_codec_name));
332         }
333
334         _video_codec_context = avcodec_alloc_context3 (_video_codec);
335         if (!_video_codec_context) {
336                 throw runtime_error ("could not allocate FFmpeg video context");
337         }
338
339         avcodec_get_context_defaults3 (_video_codec_context, _video_codec);
340
341         /* Variable quantisation */
342         _video_codec_context->global_quality = 0;
343         _video_codec_context->width = _video_frame_size.width;
344         _video_codec_context->height = _video_frame_size.height;
345         _video_codec_context->time_base = (AVRational) { 1, _video_frame_rate };
346         _video_codec_context->pix_fmt = _pixel_format;
347         _video_codec_context->flags |= AV_CODEC_FLAG_QSCALE | AV_CODEC_FLAG_GLOBAL_HEADER;
348
349         _video_stream = avformat_new_stream (_format_context, _video_codec);
350         if (!_video_stream) {
351                 throw runtime_error ("could not create FFmpeg output video stream");
352         }
353
354 DCPOMATIC_DISABLE_WARNINGS
355         _video_stream->id = _video_stream_index;
356         _video_stream->codec = _video_codec_context;
357 DCPOMATIC_ENABLE_WARNINGS
358
359         if (avcodec_open2 (_video_codec_context, _video_codec, &_video_options) < 0) {
360                 throw runtime_error ("could not open FFmpeg video codec");
361         }
362 }
363
364
365 void
366 FFmpegFileEncoder::setup_audio ()
367 {
368         int const streams = _audio_stream_per_channel ? _audio_channels : 1;
369         int const channels_per_stream = _audio_stream_per_channel ? 1 : _audio_channels;
370
371         for (int i = 0; i < streams; ++i) {
372                 _audio_streams.push_back(
373                         shared_ptr<ExportAudioStream>(
374                                 new ExportAudioStream(_audio_codec_name, channels_per_stream, _audio_frame_rate, _sample_format, _format_context, _audio_stream_index_base + i)
375                                 )
376                         );
377         }
378 }
379
380
381 void
382 FFmpegFileEncoder::flush ()
383 {
384         if (_pending_audio->frames() > 0) {
385                 audio_frame (_pending_audio->frames ());
386         }
387
388         bool flushed_video = false;
389         bool flushed_audio = false;
390
391         while (!flushed_video || !flushed_audio) {
392                 AVPacket packet;
393                 av_init_packet (&packet);
394                 packet.data = 0;
395                 packet.size = 0;
396
397                 int got_packet;
398 DCPOMATIC_DISABLE_WARNINGS
399                 avcodec_encode_video2 (_video_codec_context, &packet, 0, &got_packet);
400 DCPOMATIC_ENABLE_WARNINGS
401                 if (got_packet) {
402                         packet.stream_index = 0;
403                         av_interleaved_write_frame (_format_context, &packet);
404                 } else {
405                         flushed_video = true;
406                 }
407                 av_packet_unref (&packet);
408
409                 flushed_audio = true;
410                 BOOST_FOREACH (shared_ptr<ExportAudioStream> i, _audio_streams) {
411                         if (!i->flush()) {
412                                 flushed_audio = false;
413                         }
414                 }
415         }
416
417         av_write_trailer (_format_context);
418 }
419
420 void
421 FFmpegFileEncoder::video (shared_ptr<PlayerVideo> video, DCPTime time)
422 {
423         shared_ptr<Image> image = video->image (
424                 bind (&PlayerVideo::force, _1, _pixel_format),
425                 true,
426                 false
427                 );
428
429         AVFrame* frame = av_frame_alloc ();
430         DCPOMATIC_ASSERT (frame);
431
432         {
433                 boost::mutex::scoped_lock lm (_pending_images_mutex);
434                 _pending_images[image->data()[0]] = image;
435         }
436
437         for (int i = 0; i < 3; ++i) {
438                 AVBufferRef* buffer = av_buffer_create(image->data()[i], image->stride()[i] * image->size().height, &buffer_free, this, 0);
439                 frame->buf[i] = av_buffer_ref (buffer);
440                 frame->data[i] = buffer->data;
441                 frame->linesize[i] = image->stride()[i];
442                 av_buffer_unref (&buffer);
443         }
444
445         frame->width = image->size().width;
446         frame->height = image->size().height;
447         frame->format = _pixel_format;
448         DCPOMATIC_ASSERT (_video_stream->time_base.num == 1);
449         frame->pts = time.get() * _video_stream->time_base.den / DCPTime::HZ;
450
451         AVPacket packet;
452         av_init_packet (&packet);
453         packet.data = 0;
454         packet.size = 0;
455
456         int got_packet;
457 DCPOMATIC_DISABLE_WARNINGS
458         if (avcodec_encode_video2 (_video_codec_context, &packet, frame, &got_packet) < 0) {
459                 throw EncodeError ("FFmpeg video encode failed");
460         }
461 DCPOMATIC_ENABLE_WARNINGS
462
463         if (got_packet && packet.size) {
464                 packet.stream_index = _video_stream_index;
465                 av_interleaved_write_frame (_format_context, &packet);
466                 av_packet_unref (&packet);
467         }
468
469         av_frame_free (&frame);
470
471 }
472
473 /** Called when the player gives us some audio */
474 void
475 FFmpegFileEncoder::audio (shared_ptr<AudioBuffers> audio)
476 {
477         _pending_audio->append (audio);
478
479         DCPOMATIC_ASSERT (!_audio_streams.empty());
480         int frame_size = _audio_streams[0]->frame_size();
481         if (frame_size == 0) {
482                 /* codec has AV_CODEC_CAP_VARIABLE_FRAME_SIZE */
483                 frame_size = _audio_frame_rate / _video_frame_rate;
484         }
485
486         while (_pending_audio->frames() >= frame_size) {
487                 audio_frame (frame_size);
488         }
489 }
490
491 void
492 FFmpegFileEncoder::audio_frame (int size)
493 {
494         if (_audio_stream_per_channel) {
495                 int offset = 0;
496                 BOOST_FOREACH (shared_ptr<ExportAudioStream> i, _audio_streams) {
497                         i->write (size, offset, 1, _pending_audio->data(), _audio_frames);
498                         ++offset;
499                 }
500         } else {
501                 DCPOMATIC_ASSERT (!_audio_streams.empty());
502                 DCPOMATIC_ASSERT (_pending_audio->channels());
503                 _audio_streams[0]->write (size, 0, _pending_audio->channels(), _pending_audio->data(), _audio_frames);
504         }
505
506         _pending_audio->trim_start (size);
507         _audio_frames += size;
508 }
509
510 void
511 FFmpegFileEncoder::subtitle (PlayerText, DCPTimePeriod)
512 {
513
514 }
515
516 void
517 FFmpegFileEncoder::buffer_free (void* opaque, uint8_t* data)
518 {
519         reinterpret_cast<FFmpegFileEncoder*>(opaque)->buffer_free2(data);
520 }
521
522 void
523 FFmpegFileEncoder::buffer_free2 (uint8_t* data)
524 {
525         boost::mutex::scoped_lock lm (_pending_images_mutex);
526         if (_pending_images.find(data) != _pending_images.end()) {
527                 _pending_images.erase (data);
528         }
529 }