src/lib/ffmpeg_file_encoder.cc

   1 /*
   2     Copyright (C) 2017-2021 Carl Hetherington <cth@carlh.net>
   3
   4     This file is part of DCP-o-matic.
   5
   6     DCP-o-matic is free software; you can redistribute it and/or modify
   7     it under the terms of the GNU General Public License as published by
   8     the Free Software Foundation; either version 2 of the License, or
   9     (at your option) any later version.
  10
  11     DCP-o-matic is distributed in the hope that it will be useful,
  12     but WITHOUT ANY WARRANTY; without even the implied warranty of
  13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14     GNU General Public License for more details.
  15
  16     You should have received a copy of the GNU General Public License
  17     along with DCP-o-matic.  If not, see <http://www.gnu.org/licenses/>.
  18
  19 */
  20
  21
  22 #include "ffmpeg_encoder.h"
  23 #include "film.h"
  24 #include "job.h"
  25 #include "player.h"
  26 #include "player_video.h"
  27 #include "log.h"
  28 #include "image.h"
  29 #include "cross.h"
  30 #include "compose.hpp"
  31 #include <iostream>
  32
  33 #include "i18n.h"
  34
  35
  36 using std::cout;
  37 using std::make_shared;
  38 using std::pair;
  39 using std::runtime_error;
  40 using std::shared_ptr;
  41 using std::string;
  42 using std::weak_ptr;
  43 using boost::bind;
  44 using boost::optional;
  45 using namespace dcpomatic;
  46 #if BOOST_VERSION >= 106100
  47 using namespace boost::placeholders;
  48 #endif
  49
  50
  51 int FFmpegFileEncoder::_video_stream_index = 0;
  52 int FFmpegFileEncoder::_audio_stream_index_base = 1;
  53
  54
  55 class ExportAudioStream
  56 {
  57 public:
  58         ExportAudioStream (string codec_name, int channels, int frame_rate, AVSampleFormat sample_format, AVFormatContext* format_context, int stream_index)
  59                 : _format_context (format_context)
  60                 , _stream_index (stream_index)
  61         {
  62                 _codec = avcodec_find_encoder_by_name (codec_name.c_str());
  63                 if (!_codec) {
  64                         throw runtime_error (String::compose("could not find FFmpeg encoder %1", codec_name));
  65                 }
  66
  67                 _codec_context = avcodec_alloc_context3 (_codec);
  68                 if (!_codec_context) {
  69                         throw runtime_error ("could not allocate FFmpeg audio context");
  70                 }
  71
  72                 avcodec_get_context_defaults3 (_codec_context, _codec);
  73
  74                 /* XXX: configurable */
  75                 _codec_context->bit_rate = channels * 128 * 1024;
  76                 _codec_context->sample_fmt = sample_format;
  77                 _codec_context->sample_rate = frame_rate;
  78                 _codec_context->channel_layout = av_get_default_channel_layout (channels);
  79                 _codec_context->channels = channels;
  80
  81                 _stream = avformat_new_stream (format_context, _codec);
  82                 if (!_stream) {
  83                         throw runtime_error ("could not create FFmpeg output audio stream");
  84                 }
  85
  86                 _stream->id = stream_index;
  87                 _stream->disposition |= AV_DISPOSITION_DEFAULT;
  88 DCPOMATIC_DISABLE_WARNINGS
  89                 _stream->codec = _codec_context;
  90 DCPOMATIC_ENABLE_WARNINGS
  91
  92                 int r = avcodec_open2 (_codec_context, _codec, 0);
  93                 if (r < 0) {
  94                         char buffer[256];
  95                         av_strerror (r, buffer, sizeof(buffer));
  96                         throw runtime_error (String::compose("could not open FFmpeg audio codec (%1)", buffer));
  97                 }
  98         }
  99
 100         ~ExportAudioStream ()
 101         {
 102                 avcodec_close (_codec_context);
 103         }
 104
 105         ExportAudioStream (ExportAudioStream const&) = delete;
 106         ExportAudioStream& operator= (ExportAudioStream const&) = delete;
 107
 108         int frame_size () const {
 109                 return _codec_context->frame_size;
 110         }
 111
 112         bool flush ()
 113         {
 114                 AVPacket packet;
 115                 av_init_packet (&packet);
 116                 packet.data = 0;
 117                 packet.size = 0;
 118                 bool flushed = false;
 119
 120                 int got_packet;
 121 DCPOMATIC_DISABLE_WARNINGS
 122                 avcodec_encode_audio2 (_codec_context, &packet, 0, &got_packet);
 123 DCPOMATIC_ENABLE_WARNINGS
 124                 if (got_packet) {
 125                         packet.stream_index = 0;
 126                         av_interleaved_write_frame (_format_context, &packet);
 127                 } else {
 128                         flushed = true;
 129                 }
 130                 av_packet_unref (&packet);
 131                 return flushed;
 132         }
 133
 134         void write (int size, int channel_offset, int channels, float** data, int64_t sample_offset)
 135         {
 136                 DCPOMATIC_ASSERT (size);
 137
 138                 auto frame = av_frame_alloc ();
 139                 DCPOMATIC_ASSERT (frame);
 140
 141                 int const buffer_size = av_samples_get_buffer_size (0, channels, size, _codec_context->sample_fmt, 0);
 142                 DCPOMATIC_ASSERT (buffer_size >= 0);
 143
 144                 auto samples = av_malloc (buffer_size);
 145                 DCPOMATIC_ASSERT (samples);
 146
 147                 frame->nb_samples = size;
 148                 int r = avcodec_fill_audio_frame (frame, channels, _codec_context->sample_fmt, (const uint8_t *) samples, buffer_size, 0);
 149                 DCPOMATIC_ASSERT (r >= 0);
 150
 151                 switch (_codec_context->sample_fmt) {
 152                 case AV_SAMPLE_FMT_S16:
 153                 {
 154                         int16_t* q = reinterpret_cast<int16_t*> (samples);
 155                         for (int i = 0; i < size; ++i) {
 156                                 for (int j = 0; j < channels; ++j) {
 157                                         *q++ = data[j + channel_offset][i] * 32767;
 158                                 }
 159                         }
 160                         break;
 161                 }
 162                 case AV_SAMPLE_FMT_S32:
 163                 {
 164                         int32_t* q = reinterpret_cast<int32_t*> (samples);
 165                         for (int i = 0; i < size; ++i) {
 166                                 for (int j = 0; j < channels; ++j) {
 167                                         *q++ = data[j + channel_offset][i] * 2147483647;
 168                                 }
 169                         }
 170                         break;
 171                 }
 172                 case AV_SAMPLE_FMT_FLTP:
 173                 {
 174                         float* q = reinterpret_cast<float*> (samples);
 175                         for (int i = 0; i < channels; ++i) {
 176                                 memcpy (q, data[i + channel_offset], sizeof(float) * size);
 177                                 q += size;
 178                         }
 179                         break;
 180                 }
 181                 default:
 182                         DCPOMATIC_ASSERT (false);
 183                 }
 184
 185                 DCPOMATIC_ASSERT (_codec_context->time_base.num == 1);
 186                 frame->pts = sample_offset * _codec_context->time_base.den / _codec_context->sample_rate;
 187
 188                 AVPacket packet;
 189                 av_init_packet (&packet);
 190                 packet.data = 0;
 191                 packet.size = 0;
 192                 int got_packet;
 193
 194                 DCPOMATIC_DISABLE_WARNINGS
 195                 if (avcodec_encode_audio2 (_codec_context, &packet, frame, &got_packet) < 0) {
 196                         throw EncodeError ("FFmpeg audio encode failed");
 197                 }
 198                 DCPOMATIC_ENABLE_WARNINGS
 199
 200                 if (got_packet && packet.size) {
 201                         packet.stream_index = _stream_index;
 202                         av_interleaved_write_frame (_format_context, &packet);
 203                         av_packet_unref (&packet);
 204                 }
 205
 206                 av_free (samples);
 207                 av_frame_free (&frame);
 208         }
 209
 210 private:
 211         AVFormatContext* _format_context;
 212         AVCodec* _codec;
 213         AVCodecContext* _codec_context;
 214         AVStream* _stream;
 215         int _stream_index;
 216 };
 217
 218
 219 FFmpegFileEncoder::FFmpegFileEncoder (
 220         dcp::Size video_frame_size,
 221         int video_frame_rate,
 222         int audio_frame_rate,
 223         int channels,
 224         ExportFormat format,
 225         bool audio_stream_per_channel,
 226         int x264_crf,
 227         boost::filesystem::path output
 228         )
 229         : _audio_stream_per_channel (audio_stream_per_channel)
 230         , _audio_channels (channels)
 231         , _output (output)
 232         , _video_frame_size (video_frame_size)
 233         , _video_frame_rate (video_frame_rate)
 234         , _audio_frame_rate (audio_frame_rate)
 235 {
 236         _pixel_format = pixel_format (format);
 237
 238         switch (format) {
 239         case ExportFormat::PRORES:
 240                 _sample_format = AV_SAMPLE_FMT_S16;
 241                 _video_codec_name = "prores_ks";
 242                 _audio_codec_name = "pcm_s16le";
 243                 av_dict_set (&_video_options, "profile", "3", 0);
 244                 av_dict_set (&_video_options, "threads", "auto", 0);
 245                 break;
 246         case ExportFormat::H264_AAC:
 247                 _sample_format = AV_SAMPLE_FMT_FLTP;
 248                 _video_codec_name = "libx264";
 249                 _audio_codec_name = "aac";
 250                 av_dict_set_int (&_video_options, "crf", x264_crf, 0);
 251                 break;
 252         case ExportFormat::H264_PCM:
 253                 _sample_format = AV_SAMPLE_FMT_S32;
 254                 _video_codec_name = "libx264";
 255                 _audio_codec_name = "pcm_s24le";
 256                 av_dict_set_int (&_video_options, "crf", x264_crf, 0);
 257                 break;
 258         default:
 259                 DCPOMATIC_ASSERT (false);
 260         }
 261
 262         int r = avformat_alloc_output_context2 (&_format_context, 0, 0, _output.string().c_str());
 263         if (!_format_context) {
 264                 throw runtime_error (String::compose("could not allocate FFmpeg format context (%1)", r));
 265         }
 266
 267         setup_video ();
 268         setup_audio ();
 269
 270         r = avio_open_boost (&_format_context->pb, _output, AVIO_FLAG_WRITE);
 271         if (r < 0) {
 272                 throw runtime_error (String::compose("could not open FFmpeg output file %1 (%2)", _output.string(), r));
 273         }
 274
 275         AVDictionary* options = nullptr;
 276
 277         if (avformat_write_header (_format_context, &options) < 0) {
 278                 throw runtime_error ("could not write header to FFmpeg output file");
 279         }
 280
 281         _pending_audio.reset (new AudioBuffers(channels, 0));
 282 }
 283
 284
 285 FFmpegFileEncoder::~FFmpegFileEncoder ()
 286 {
 287         _audio_streams.clear ();
 288         avcodec_close (_video_codec_context);
 289         avformat_free_context (_format_context);
 290 }
 291
 292
 293 AVPixelFormat
 294 FFmpegFileEncoder::pixel_format (ExportFormat format)
 295 {
 296         switch (format) {
 297         case ExportFormat::PRORES:
 298                 return AV_PIX_FMT_YUV422P10;
 299         case ExportFormat::H264_AAC:
 300         case ExportFormat::H264_PCM:
 301                 return AV_PIX_FMT_YUV420P;
 302         default:
 303                 DCPOMATIC_ASSERT (false);
 304         }
 305
 306         return AV_PIX_FMT_YUV422P10;
 307 }
 308
 309
 310 void
 311 FFmpegFileEncoder::setup_video ()
 312 {
 313         _video_codec = avcodec_find_encoder_by_name (_video_codec_name.c_str());
 314         if (!_video_codec) {
 315                 throw runtime_error (String::compose ("could not find FFmpeg encoder %1", _video_codec_name));
 316         }
 317
 318         _video_codec_context = avcodec_alloc_context3 (_video_codec);
 319         if (!_video_codec_context) {
 320                 throw runtime_error ("could not allocate FFmpeg video context");
 321         }
 322
 323         avcodec_get_context_defaults3 (_video_codec_context, _video_codec);
 324
 325         /* Variable quantisation */
 326         _video_codec_context->global_quality = 0;
 327         _video_codec_context->width = _video_frame_size.width;
 328         _video_codec_context->height = _video_frame_size.height;
 329         _video_codec_context->time_base = (AVRational) { 1, _video_frame_rate };
 330         _video_codec_context->pix_fmt = _pixel_format;
 331         _video_codec_context->flags |= AV_CODEC_FLAG_QSCALE | AV_CODEC_FLAG_GLOBAL_HEADER;
 332
 333         _video_stream = avformat_new_stream (_format_context, _video_codec);
 334         if (!_video_stream) {
 335                 throw runtime_error ("could not create FFmpeg output video stream");
 336         }
 337
 338         _video_stream->id = _video_stream_index;
 339 DCPOMATIC_DISABLE_WARNINGS
 340         _video_stream->codec = _video_codec_context;
 341 DCPOMATIC_ENABLE_WARNINGS
 342
 343         if (avcodec_open2 (_video_codec_context, _video_codec, &_video_options) < 0) {
 344                 throw runtime_error ("could not open FFmpeg video codec");
 345         }
 346 }
 347
 348
 349 void
 350 FFmpegFileEncoder::setup_audio ()
 351 {
 352         int const streams = _audio_stream_per_channel ? _audio_channels : 1;
 353         int const channels_per_stream = _audio_stream_per_channel ? 1 : _audio_channels;
 354
 355         for (int i = 0; i < streams; ++i) {
 356                 _audio_streams.push_back(
 357                         make_shared<ExportAudioStream>(
 358                                 _audio_codec_name, channels_per_stream, _audio_frame_rate, _sample_format, _format_context, _audio_stream_index_base + i
 359                                 )
 360                         );
 361         }
 362 }
 363
 364
 365 void
 366 FFmpegFileEncoder::flush ()
 367 {
 368         if (_pending_audio->frames() > 0) {
 369                 audio_frame (_pending_audio->frames ());
 370         }
 371
 372         bool flushed_video = false;
 373         bool flushed_audio = false;
 374
 375         while (!flushed_video || !flushed_audio) {
 376                 AVPacket packet;
 377                 av_init_packet (&packet);
 378                 packet.data = 0;
 379                 packet.size = 0;
 380
 381                 int got_packet;
 382 DCPOMATIC_DISABLE_WARNINGS
 383                 avcodec_encode_video2 (_video_codec_context, &packet, 0, &got_packet);
 384 DCPOMATIC_ENABLE_WARNINGS
 385                 if (got_packet) {
 386                         packet.stream_index = 0;
 387                         av_interleaved_write_frame (_format_context, &packet);
 388                 } else {
 389                         flushed_video = true;
 390                 }
 391                 av_packet_unref (&packet);
 392
 393                 flushed_audio = true;
 394                 for (auto i: _audio_streams) {
 395                         if (!i->flush()) {
 396                                 flushed_audio = false;
 397                         }
 398                 }
 399         }
 400
 401         av_write_trailer (_format_context);
 402 }
 403
 404
 405 void
 406 FFmpegFileEncoder::video (shared_ptr<PlayerVideo> video, DCPTime time)
 407 {
 408         /* All our output formats are video range at the moment */
 409         auto image = video->image (
 410                 bind (&PlayerVideo::force, _1, _pixel_format),
 411                 VideoRange::VIDEO,
 412                 true,
 413                 false
 414                 );
 415
 416         auto frame = av_frame_alloc ();
 417         DCPOMATIC_ASSERT (frame);
 418
 419         {
 420                 boost::mutex::scoped_lock lm (_pending_images_mutex);
 421                 _pending_images[image->data()[0]] = image;
 422         }
 423
 424         for (int i = 0; i < 3; ++i) {
 425                 AVBufferRef* buffer = av_buffer_create(image->data()[i], image->stride()[i] * image->size().height, &buffer_free, this, 0);
 426                 frame->buf[i] = av_buffer_ref (buffer);
 427                 frame->data[i] = buffer->data;
 428                 frame->linesize[i] = image->stride()[i];
 429                 av_buffer_unref (&buffer);
 430         }
 431
 432         frame->width = image->size().width;
 433         frame->height = image->size().height;
 434         frame->format = _pixel_format;
 435         DCPOMATIC_ASSERT (_video_stream->time_base.num == 1);
 436         frame->pts = time.get() * _video_stream->time_base.den / DCPTime::HZ;
 437
 438         AVPacket packet;
 439         av_init_packet (&packet);
 440         packet.data = 0;
 441         packet.size = 0;
 442
 443         int got_packet;
 444 DCPOMATIC_DISABLE_WARNINGS
 445         if (avcodec_encode_video2 (_video_codec_context, &packet, frame, &got_packet) < 0) {
 446                 throw EncodeError ("FFmpeg video encode failed");
 447         }
 448 DCPOMATIC_ENABLE_WARNINGS
 449
 450         if (got_packet && packet.size) {
 451                 packet.stream_index = _video_stream_index;
 452                 av_interleaved_write_frame (_format_context, &packet);
 453                 av_packet_unref (&packet);
 454         }
 455
 456         av_frame_free (&frame);
 457
 458 }
 459
 460
 461 /** Called when the player gives us some audio */
 462 void
 463 FFmpegFileEncoder::audio (shared_ptr<AudioBuffers> audio)
 464 {
 465         _pending_audio->append (audio);
 466
 467         DCPOMATIC_ASSERT (!_audio_streams.empty());
 468         int frame_size = _audio_streams[0]->frame_size();
 469         if (frame_size == 0) {
 470                 /* codec has AV_CODEC_CAP_VARIABLE_FRAME_SIZE */
 471                 frame_size = _audio_frame_rate / _video_frame_rate;
 472         }
 473
 474         while (_pending_audio->frames() >= frame_size) {
 475                 audio_frame (frame_size);
 476         }
 477 }
 478
 479
 480 void
 481 FFmpegFileEncoder::audio_frame (int size)
 482 {
 483         if (_audio_stream_per_channel) {
 484                 int offset = 0;
 485                 for (auto i: _audio_streams) {
 486                         i->write (size, offset, 1, _pending_audio->data(), _audio_frames);
 487                         ++offset;
 488                 }
 489         } else {
 490                 DCPOMATIC_ASSERT (!_audio_streams.empty());
 491                 DCPOMATIC_ASSERT (_pending_audio->channels());
 492                 _audio_streams[0]->write (size, 0, _pending_audio->channels(), _pending_audio->data(), _audio_frames);
 493         }
 494
 495         _pending_audio->trim_start (size);
 496         _audio_frames += size;
 497 }
 498
 499
 500 void
 501 FFmpegFileEncoder::subtitle (PlayerText, DCPTimePeriod)
 502 {
 503
 504 }
 505
 506
 507 void
 508 FFmpegFileEncoder::buffer_free (void* opaque, uint8_t* data)
 509 {
 510         reinterpret_cast<FFmpegFileEncoder*>(opaque)->buffer_free2(data);
 511 }
 512
 513
 514 void
 515 FFmpegFileEncoder::buffer_free2 (uint8_t* data)
 516 {
 517         boost::mutex::scoped_lock lm (_pending_images_mutex);
 518         if (_pending_images.find(data) != _pending_images.end()) {
 519                 _pending_images.erase (data);
 520         }
 521 }