src/lib/ffmpeg_decoder.cc

   1 /*
   2     Copyright (C) 2012 Carl Hetherington <cth@carlh.net>
   3
   4     This program is free software; you can redistribute it and/or modify
   5     it under the terms of the GNU General Public License as published by
   6     the Free Software Foundation; either version 2 of the License, or
   7     (at your option) any later version.
   8
   9     This program is distributed in the hope that it will be useful,
  10     but WITHOUT ANY WARRANTY; without even the implied warranty of
  11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12     GNU General Public License for more details.
  13
  14     You should have received a copy of the GNU General Public License
  15     along with this program; if not, write to the Free Software
  16     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  17
  18 */
  19
  20 /** @file  src/ffmpeg_decoder.cc
  21  *  @brief A decoder using FFmpeg to decode content.
  22  */
  23
  24 #include <stdexcept>
  25 #include <vector>
  26 #include <sstream>
  27 #include <iomanip>
  28 #include <iostream>
  29 #include <stdint.h>
  30 #include <boost/lexical_cast.hpp>
  31 extern "C" {
  32 #include <tiffio.h>
  33 #include <libavcodec/avcodec.h>
  34 #include <libavformat/avformat.h>
  35 #include <libswscale/swscale.h>
  36 #include <libpostproc/postprocess.h>
  37 }
  38 #include <sndfile.h>
  39 #include "film.h"
  40 #include "format.h"
  41 #include "transcoder.h"
  42 #include "job.h"
  43 #include "filter.h"
  44 #include "exceptions.h"
  45 #include "image.h"
  46 #include "util.h"
  47 #include "log.h"
  48 #include "ffmpeg_decoder.h"
  49 #include "filter_graph.h"
  50 #include "subtitle.h"
  51
  52 #include "i18n.h"
  53
  54 using std::cout;
  55 using std::string;
  56 using std::vector;
  57 using std::stringstream;
  58 using std::list;
  59 using boost::shared_ptr;
  60 using boost::optional;
  61 using boost::dynamic_pointer_cast;
  62 using libdcp::Size;
  63
  64 FFmpegDecoder::FFmpegDecoder (shared_ptr<const Film> f, shared_ptr<const FFmpegContent> c, bool video, bool audio, bool subtitles, bool video_sync)
  65         : Decoder (f)
  66         , VideoDecoder (f)
  67         , AudioDecoder (f)
  68         , _ffmpeg_content (c)
  69         , _format_context (0)
  70         , _video_stream (-1)
  71         , _frame (0)
  72         , _video_codec_context (0)
  73         , _video_codec (0)
  74         , _audio_codec_context (0)
  75         , _audio_codec (0)
  76         , _subtitle_codec_context (0)
  77         , _subtitle_codec (0)
  78         , _decode_video (video)
  79         , _decode_audio (audio)
  80         , _decode_subtitles (subtitles)
  81         , _video_sync (video_sync)
  82 {
  83         setup_general ();
  84         setup_video ();
  85         setup_audio ();
  86         setup_subtitle ();
  87
  88         if (!video_sync) {
  89                 _first_video = 0;
  90         }
  91 }
  92
  93 FFmpegDecoder::~FFmpegDecoder ()
  94 {
  95         if (_audio_codec_context) {
  96                 avcodec_close (_audio_codec_context);
  97         }
  98
  99         if (_video_codec_context) {
 100                 avcodec_close (_video_codec_context);
 101         }
 102
 103         if (_subtitle_codec_context) {
 104                 avcodec_close (_subtitle_codec_context);
 105         }
 106
 107         av_free (_frame);
 108
 109         avformat_close_input (&_format_context);
 110 }
 111
 112 void
 113 FFmpegDecoder::setup_general ()
 114 {
 115         av_register_all ();
 116
 117         if (avformat_open_input (&_format_context, _ffmpeg_content->file().string().c_str(), 0, 0) < 0) {
 118                 throw OpenFileError (_ffmpeg_content->file().string ());
 119         }
 120
 121         if (avformat_find_stream_info (_format_context, 0) < 0) {
 122                 throw DecodeError (_("could not find stream information"));
 123         }
 124
 125         /* Find video, audio and subtitle streams and choose the first of each */
 126
 127         for (uint32_t i = 0; i < _format_context->nb_streams; ++i) {
 128                 AVStream* s = _format_context->streams[i];
 129                 if (s->codec->codec_type == AVMEDIA_TYPE_VIDEO) {
 130                         _video_stream = i;
 131                 } else if (s->codec->codec_type == AVMEDIA_TYPE_AUDIO) {
 132
 133                         /* This is a hack; sometimes it seems that _audio_codec_context->channel_layout isn't set up,
 134                            so bodge it here.  No idea why we should have to do this.
 135                         */
 136
 137                         if (s->codec->channel_layout == 0) {
 138                                 s->codec->channel_layout = av_get_default_channel_layout (s->codec->channels);
 139                         }
 140
 141                         _audio_streams.push_back (
 142                                 FFmpegAudioStream (stream_name (s), i, s->codec->sample_rate, s->codec->channel_layout)
 143                                 );
 144
 145                 } else if (s->codec->codec_type == AVMEDIA_TYPE_SUBTITLE) {
 146                         _subtitle_streams.push_back (FFmpegSubtitleStream (stream_name (s), i));
 147                 }
 148         }
 149
 150         if (_video_stream < 0) {
 151                 throw DecodeError (N_("could not find video stream"));
 152         }
 153
 154         _frame = avcodec_alloc_frame ();
 155         if (_frame == 0) {
 156                 throw DecodeError (N_("could not allocate frame"));
 157         }
 158 }
 159
 160 void
 161 FFmpegDecoder::setup_video ()
 162 {
 163         _video_codec_context = _format_context->streams[_video_stream]->codec;
 164         _video_codec = avcodec_find_decoder (_video_codec_context->codec_id);
 165
 166         if (_video_codec == 0) {
 167                 throw DecodeError (_("could not find video decoder"));
 168         }
 169
 170         if (avcodec_open2 (_video_codec_context, _video_codec, 0) < 0) {
 171                 throw DecodeError (N_("could not open video decoder"));
 172         }
 173 }
 174
 175 void
 176 FFmpegDecoder::setup_audio ()
 177 {
 178         if (!_ffmpeg_content->audio_stream ()) {
 179                 return;
 180         }
 181
 182         _audio_codec_context = _format_context->streams[_ffmpeg_content->audio_stream()->id]->codec;
 183         _audio_codec = avcodec_find_decoder (_audio_codec_context->codec_id);
 184
 185         if (_audio_codec == 0) {
 186                 throw DecodeError (_("could not find audio decoder"));
 187         }
 188
 189         if (avcodec_open2 (_audio_codec_context, _audio_codec, 0) < 0) {
 190                 throw DecodeError (N_("could not open audio decoder"));
 191         }
 192 }
 193
 194 void
 195 FFmpegDecoder::setup_subtitle ()
 196 {
 197         if (!_ffmpeg_content->subtitle_stream() || _ffmpeg_content->subtitle_stream()->id >= int (_format_context->nb_streams)) {
 198                 return;
 199         }
 200
 201         _subtitle_codec_context = _format_context->streams[_ffmpeg_content->subtitle_stream()->id]->codec;
 202         _subtitle_codec = avcodec_find_decoder (_subtitle_codec_context->codec_id);
 203
 204         if (_subtitle_codec == 0) {
 205                 throw DecodeError (_("could not find subtitle decoder"));
 206         }
 207
 208         if (avcodec_open2 (_subtitle_codec_context, _subtitle_codec, 0) < 0) {
 209                 throw DecodeError (N_("could not open subtitle decoder"));
 210         }
 211 }
 212
 213
 214 bool
 215 FFmpegDecoder::pass ()
 216 {
 217         int r = av_read_frame (_format_context, &_packet);
 218
 219         if (r < 0) {
 220                 if (r != AVERROR_EOF) {
 221                         /* Maybe we should fail here, but for now we'll just finish off instead */
 222                         char buf[256];
 223                         av_strerror (r, buf, sizeof(buf));
 224                         _film->log()->log (String::compose (N_("error on av_read_frame (%1) (%2)"), buf, r));
 225                 }
 226
 227                 /* Get any remaining frames */
 228
 229                 _packet.data = 0;
 230                 _packet.size = 0;
 231
 232                 /* XXX: should we reset _packet.data and size after each *_decode_* call? */
 233
 234                 int frame_finished;
 235
 236                 if (_decode_video) {
 237                         while (avcodec_decode_video2 (_video_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
 238                                 filter_and_emit_video (_frame);
 239                         }
 240                 }
 241
 242                 if (_ffmpeg_content->audio_stream() && _decode_audio) {
 243                         decode_audio_packet ();
 244                 }
 245
 246                 return true;
 247         }
 248
 249         avcodec_get_frame_defaults (_frame);
 250
 251         if (_packet.stream_index == _video_stream && _decode_video) {
 252
 253                 int frame_finished;
 254                 int const r = avcodec_decode_video2 (_video_codec_context, _frame, &frame_finished, &_packet);
 255                 if (r >= 0 && frame_finished) {
 256
 257                         if (r != _packet.size) {
 258                                 _film->log()->log (String::compose (N_("Used only %1 bytes of %2 in packet"), r, _packet.size));
 259                         }
 260
 261                         if (_video_sync) {
 262                                 out_with_sync ();
 263                         } else {
 264                                 filter_and_emit_video (_frame);
 265                         }
 266                 }
 267
 268         } else if (_ffmpeg_content->audio_stream() && _packet.stream_index == _ffmpeg_content->audio_stream()->id && _decode_audio) {
 269                 decode_audio_packet ();
 270         } else if (_ffmpeg_content->subtitle_stream() && _packet.stream_index == _ffmpeg_content->subtitle_stream()->id && _decode_subtitles && _first_video) {
 271
 272                 int got_subtitle;
 273                 AVSubtitle sub;
 274                 if (avcodec_decode_subtitle2 (_subtitle_codec_context, &sub, &got_subtitle, &_packet) && got_subtitle) {
 275                         /* Sometimes we get an empty AVSubtitle, which is used by some codecs to
 276                            indicate that the previous subtitle should stop.
 277                         */
 278                         if (sub.num_rects > 0) {
 279                                 shared_ptr<TimedSubtitle> ts;
 280                                 try {
 281                                         emit_subtitle (shared_ptr<TimedSubtitle> (new TimedSubtitle (sub)));
 282                                 } catch (...) {
 283                                         /* some problem with the subtitle; we probably didn't understand it */
 284                                 }
 285                         } else {
 286                                 emit_subtitle (shared_ptr<TimedSubtitle> ());
 287                         }
 288                         avsubtitle_free (&sub);
 289                 }
 290         }
 291
 292         av_free_packet (&_packet);
 293         return false;
 294 }
 295
 296 /** @param data pointer to array of pointers to buffers.
 297  *  Only the first buffer will be used for non-planar data, otherwise there will be one per channel.
 298  */
 299 shared_ptr<AudioBuffers>
 300 FFmpegDecoder::deinterleave_audio (uint8_t** data, int size)
 301 {
 302         assert (_ffmpeg_content->audio_channels());
 303         assert (bytes_per_audio_sample());
 304
 305         /* Deinterleave and convert to float */
 306
 307         assert ((size % (bytes_per_audio_sample() * _ffmpeg_content->audio_channels())) == 0);
 308
 309         int const total_samples = size / bytes_per_audio_sample();
 310         int const frames = total_samples / _ffmpeg_content->audio_channels();
 311         shared_ptr<AudioBuffers> audio (new AudioBuffers (_ffmpeg_content->audio_channels(), frames));
 312
 313         switch (audio_sample_format()) {
 314         case AV_SAMPLE_FMT_S16:
 315         {
 316                 int16_t* p = reinterpret_cast<int16_t *> (data[0]);
 317                 int sample = 0;
 318                 int channel = 0;
 319                 for (int i = 0; i < total_samples; ++i) {
 320                         audio->data(channel)[sample] = float(*p++) / (1 << 15);
 321
 322                         ++channel;
 323                         if (channel == _ffmpeg_content->audio_channels()) {
 324                                 channel = 0;
 325                                 ++sample;
 326                         }
 327                 }
 328         }
 329         break;
 330
 331         case AV_SAMPLE_FMT_S16P:
 332         {
 333                 int16_t** p = reinterpret_cast<int16_t **> (data);
 334                 for (int i = 0; i < _ffmpeg_content->audio_channels(); ++i) {
 335                         for (int j = 0; j < frames; ++j) {
 336                                 audio->data(i)[j] = static_cast<float>(p[i][j]) / (1 << 15);
 337                         }
 338                 }
 339         }
 340         break;
 341
 342         case AV_SAMPLE_FMT_S32:
 343         {
 344                 int32_t* p = reinterpret_cast<int32_t *> (data[0]);
 345                 int sample = 0;
 346                 int channel = 0;
 347                 for (int i = 0; i < total_samples; ++i) {
 348                         audio->data(channel)[sample] = static_cast<float>(*p++) / (1 << 31);
 349
 350                         ++channel;
 351                         if (channel == _ffmpeg_content->audio_channels()) {
 352                                 channel = 0;
 353                                 ++sample;
 354                         }
 355                 }
 356         }
 357         break;
 358
 359         case AV_SAMPLE_FMT_FLT:
 360         {
 361                 float* p = reinterpret_cast<float*> (data[0]);
 362                 int sample = 0;
 363                 int channel = 0;
 364                 for (int i = 0; i < total_samples; ++i) {
 365                         audio->data(channel)[sample] = *p++;
 366
 367                         ++channel;
 368                         if (channel == _ffmpeg_content->audio_channels()) {
 369                                 channel = 0;
 370                                 ++sample;
 371                         }
 372                 }
 373         }
 374         break;
 375
 376         case AV_SAMPLE_FMT_FLTP:
 377         {
 378                 float** p = reinterpret_cast<float**> (data);
 379                 for (int i = 0; i < _ffmpeg_content->audio_channels(); ++i) {
 380                         memcpy (audio->data(i), p[i], frames * sizeof(float));
 381                 }
 382         }
 383         break;
 384
 385         default:
 386                 throw DecodeError (String::compose (_("Unrecognised audio sample format (%1)"), static_cast<int> (audio_sample_format())));
 387         }
 388
 389         return audio;
 390 }
 391
 392 float
 393 FFmpegDecoder::frames_per_second () const
 394 {
 395         AVStream* s = _format_context->streams[_video_stream];
 396
 397         if (s->avg_frame_rate.num && s->avg_frame_rate.den) {
 398                 return av_q2d (s->avg_frame_rate);
 399         }
 400
 401         return av_q2d (s->r_frame_rate);
 402 }
 403
 404 AVSampleFormat
 405 FFmpegDecoder::audio_sample_format () const
 406 {
 407         if (_audio_codec_context == 0) {
 408                 return (AVSampleFormat) 0;
 409         }
 410
 411         return _audio_codec_context->sample_fmt;
 412 }
 413
 414 libdcp::Size
 415 FFmpegDecoder::native_size () const
 416 {
 417         return libdcp::Size (_video_codec_context->width, _video_codec_context->height);
 418 }
 419
 420 PixelFormat
 421 FFmpegDecoder::pixel_format () const
 422 {
 423         return _video_codec_context->pix_fmt;
 424 }
 425
 426 int
 427 FFmpegDecoder::time_base_numerator () const
 428 {
 429         return _video_codec_context->time_base.num;
 430 }
 431
 432 int
 433 FFmpegDecoder::time_base_denominator () const
 434 {
 435         return _video_codec_context->time_base.den;
 436 }
 437
 438 int
 439 FFmpegDecoder::sample_aspect_ratio_numerator () const
 440 {
 441         return _video_codec_context->sample_aspect_ratio.num;
 442 }
 443
 444 int
 445 FFmpegDecoder::sample_aspect_ratio_denominator () const
 446 {
 447         return _video_codec_context->sample_aspect_ratio.den;
 448 }
 449
 450 string
 451 FFmpegDecoder::stream_name (AVStream* s) const
 452 {
 453         stringstream n;
 454
 455         AVDictionaryEntry const * lang = av_dict_get (s->metadata, N_("language"), 0, 0);
 456         if (lang) {
 457                 n << lang->value;
 458         }
 459
 460         AVDictionaryEntry const * title = av_dict_get (s->metadata, N_("title"), 0, 0);
 461         if (title) {
 462                 if (!n.str().empty()) {
 463                         n << N_(" ");
 464                 }
 465                 n << title->value;
 466         }
 467
 468         if (n.str().empty()) {
 469                 n << N_("unknown");
 470         }
 471
 472         return n.str ();
 473 }
 474
 475 int
 476 FFmpegDecoder::bytes_per_audio_sample () const
 477 {
 478         return av_get_bytes_per_sample (audio_sample_format ());
 479 }
 480
 481 void
 482 FFmpegDecoder::filter_and_emit_video (AVFrame* frame)
 483 {
 484         boost::mutex::scoped_lock lm (_filter_graphs_mutex);
 485
 486         shared_ptr<FilterGraph> graph;
 487
 488         list<shared_ptr<FilterGraph> >::iterator i = _filter_graphs.begin();
 489         while (i != _filter_graphs.end() && !(*i)->can_process (libdcp::Size (frame->width, frame->height), (AVPixelFormat) frame->format)) {
 490                 ++i;
 491         }
 492
 493         if (i == _filter_graphs.end ()) {
 494                 graph.reset (new FilterGraph (_film, this, libdcp::Size (frame->width, frame->height), (AVPixelFormat) frame->format));
 495                 _filter_graphs.push_back (graph);
 496                 _film->log()->log (String::compose (N_("New graph for %1x%2, pixel format %3"), frame->width, frame->height, frame->format));
 497         } else {
 498                 graph = *i;
 499         }
 500
 501         list<shared_ptr<Image> > images = graph->process (frame);
 502
 503         for (list<shared_ptr<Image> >::iterator i = images.begin(); i != images.end(); ++i) {
 504                 emit_video (*i, frame_time ());
 505         }
 506 }
 507
 508 bool
 509 FFmpegDecoder::seek (double p)
 510 {
 511         return do_seek (p, false);
 512 }
 513
 514 bool
 515 FFmpegDecoder::seek_to_last ()
 516 {
 517         /* This AVSEEK_FLAG_BACKWARD in do_seek is a bit of a hack; without it, if we ask for a seek to the same place as last time
 518            (used when we change decoder parameters and want to re-fetch the frame) we end up going forwards rather than
 519            staying in the same place.
 520         */
 521         return do_seek (last_source_time(), true);
 522 }
 523
 524 bool
 525 FFmpegDecoder::do_seek (double p, bool backwards)
 526 {
 527         int64_t const vt = p / av_q2d (_format_context->streams[_video_stream]->time_base);
 528
 529         int const r = av_seek_frame (_format_context, _video_stream, vt, backwards ? AVSEEK_FLAG_BACKWARD : 0);
 530
 531         avcodec_flush_buffers (_video_codec_context);
 532         if (_subtitle_codec_context) {
 533                 avcodec_flush_buffers (_subtitle_codec_context);
 534         }
 535
 536         return r < 0;
 537 }
 538
 539 void
 540 FFmpegDecoder::out_with_sync ()
 541 {
 542         /* Where we are in the output, in seconds */
 543         double const out_pts_seconds = video_frame() / frames_per_second();
 544
 545         /* Where we are in the source, in seconds */
 546         double const source_pts_seconds = av_q2d (_format_context->streams[_packet.stream_index]->time_base)
 547                 * av_frame_get_best_effort_timestamp(_frame);
 548
 549         _film->log()->log (
 550                 String::compose (N_("Source video frame ready; source at %1, output at %2"), source_pts_seconds, out_pts_seconds),
 551                 Log::VERBOSE
 552                 );
 553
 554         if (!_first_video) {
 555                 _first_video = source_pts_seconds;
 556         }
 557
 558         /* Difference between where we are and where we should be */
 559         double const delta = source_pts_seconds - _first_video.get() - out_pts_seconds;
 560         double const one_frame = 1 / frames_per_second();
 561
 562         /* Insert frames if required to get out_pts_seconds up to pts_seconds */
 563         if (delta > one_frame) {
 564                 int const extra = rint (delta / one_frame);
 565                 for (int i = 0; i < extra; ++i) {
 566                         repeat_last_video ();
 567                         _film->log()->log (
 568                                 String::compose (
 569                                         N_("Extra video frame inserted at %1s; source frame %2, source PTS %3 (at %4 fps)"),
 570                                         out_pts_seconds, video_frame(), source_pts_seconds, frames_per_second()
 571                                         )
 572                                 );
 573                 }
 574         }
 575
 576         if (delta > -one_frame) {
 577                 /* Process this frame */
 578                 filter_and_emit_video (_frame);
 579         } else {
 580                 /* Otherwise we are omitting a frame to keep things right */
 581                 _film->log()->log (String::compose (N_("Frame removed at %1s"), out_pts_seconds));
 582         }
 583 }
 584
 585 void
 586 FFmpegDecoder::film_changed (Film::Property p)
 587 {
 588         switch (p) {
 589         case Film::CROP:
 590         case Film::FILTERS:
 591         {
 592                 boost::mutex::scoped_lock lm (_filter_graphs_mutex);
 593                 _filter_graphs.clear ();
 594         }
 595         OutputChanged ();
 596         break;
 597
 598         default:
 599                 break;
 600         }
 601 }
 602
 603 /** @return Length (in video frames) according to our content's header */
 604 ContentVideoFrame
 605 FFmpegDecoder::video_length () const
 606 {
 607         return (double(_format_context->duration) / AV_TIME_BASE) * frames_per_second();
 608 }
 609
 610 double
 611 FFmpegDecoder::frame_time () const
 612 {
 613         return av_frame_get_best_effort_timestamp(_frame) * av_q2d (_format_context->streams[_video_stream]->time_base);
 614 }
 615
 616 void
 617 FFmpegDecoder::decode_audio_packet ()
 618 {
 619         /* Audio packets can contain multiple frames, so we may have to call avcodec_decode_audio4
 620            several times.
 621         */
 622
 623         AVPacket copy_packet = _packet;
 624
 625         while (copy_packet.size > 0) {
 626
 627                 int frame_finished;
 628                 int const decode_result = avcodec_decode_audio4 (_audio_codec_context, _frame, &frame_finished, &copy_packet);
 629                 if (decode_result >= 0 && frame_finished) {
 630
 631                         /* Where we are in the source, in seconds */
 632                         double const source_pts_seconds = av_q2d (_format_context->streams[copy_packet.stream_index]->time_base)
 633                                 * av_frame_get_best_effort_timestamp(_frame);
 634
 635                         /* We only decode audio if we've had our first video packet through, and if it
 636                            was before this packet.  Until then audio is thrown away.
 637                         */
 638
 639                         if ((_first_video && _first_video.get() <= source_pts_seconds) || !_decode_video) {
 640
 641                                 if (!_first_audio && _decode_video) {
 642                                         _first_audio = source_pts_seconds;
 643
 644                                         /* This is our first audio frame, and if we've arrived here we must have had our
 645                                            first video frame.  Push some silence to make up any gap between our first
 646                                            video frame and our first audio.
 647                                         */
 648
 649                                         /* frames of silence that we must push */
 650                                         int const s = rint ((_first_audio.get() - _first_video.get()) * _ffmpeg_content->audio_frame_rate ());
 651
 652                                         _film->log()->log (
 653                                                 String::compose (
 654                                                         N_("First video at %1, first audio at %2, pushing %3 audio frames of silence for %4 channels (%5 bytes per sample)"),
 655                                                         _first_video.get(), _first_audio.get(), s, _ffmpeg_content->audio_channels(), bytes_per_audio_sample()
 656                                                         )
 657                                                 );
 658
 659                                         if (s) {
 660                                                 shared_ptr<AudioBuffers> audio (new AudioBuffers (_ffmpeg_content->audio_channels(), s));
 661                                                 audio->make_silent ();
 662                                                 Audio (audio);
 663                                         }
 664                                 }
 665
 666                                 int const data_size = av_samples_get_buffer_size (
 667                                         0, _audio_codec_context->channels, _frame->nb_samples, audio_sample_format (), 1
 668                                         );
 669
 670                                 assert (_audio_codec_context->channels == _ffmpeg_content->audio_channels());
 671                                 Audio (deinterleave_audio (_frame->data, data_size));
 672                         }
 673                 }
 674
 675                 if (decode_result >= 0) {
 676                         copy_packet.data += decode_result;
 677                         copy_packet.size -= decode_result;
 678                 }
 679         }
 680 }