src/lib/ffmpeg_decoder.cc

   1 /*
   2     Copyright (C) 2012 Carl Hetherington <cth@carlh.net>
   3
   4     This program is free software; you can redistribute it and/or modify
   5     it under the terms of the GNU General Public License as published by
   6     the Free Software Foundation; either version 2 of the License, or
   7     (at your option) any later version.
   8
   9     This program is distributed in the hope that it will be useful,
  10     but WITHOUT ANY WARRANTY; without even the implied warranty of
  11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12     GNU General Public License for more details.
  13
  14     You should have received a copy of the GNU General Public License
  15     along with this program; if not, write to the Free Software
  16     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  17
  18 */
  19
  20 /** @file  src/ffmpeg_decoder.cc
  21  *  @brief A decoder using FFmpeg to decode content.
  22  */
  23
  24 #include <stdexcept>
  25 #include <vector>
  26 #include <sstream>
  27 #include <iomanip>
  28 #include <iostream>
  29 #include <stdint.h>
  30 #include <boost/lexical_cast.hpp>
  31 extern "C" {
  32 #include <tiffio.h>
  33 #include <libavcodec/avcodec.h>
  34 #include <libavformat/avformat.h>
  35 #include <libswscale/swscale.h>
  36 #include <libpostproc/postprocess.h>
  37 }
  38 #include <sndfile.h>
  39 #include "film.h"
  40 #include "format.h"
  41 #include "transcoder.h"
  42 #include "job.h"
  43 #include "filter.h"
  44 #include "options.h"
  45 #include "exceptions.h"
  46 #include "image.h"
  47 #include "util.h"
  48 #include "log.h"
  49 #include "ffmpeg_decoder.h"
  50 #include "filter_graph.h"
  51 #include "subtitle.h"
  52
  53 #include "i18n.h"
  54
  55 using std::cout;
  56 using std::string;
  57 using std::vector;
  58 using std::stringstream;
  59 using std::list;
  60 using boost::shared_ptr;
  61 using boost::optional;
  62 using boost::dynamic_pointer_cast;
  63 using libdcp::Size;
  64
  65 FFmpegDecoder::FFmpegDecoder (shared_ptr<const Film> f, shared_ptr<FFmpegContent> c, DecodeOptions o)
  66         : Decoder (f, o)
  67         , VideoDecoder (f, c, o)
  68         , AudioDecoder (f, c, o)
  69         , _ffmpeg_content (c)
  70         , _format_context (0)
  71         , _video_stream (-1)
  72         , _frame (0)
  73         , _video_codec_context (0)
  74         , _video_codec (0)
  75         , _audio_codec_context (0)
  76         , _audio_codec (0)
  77         , _subtitle_codec_context (0)
  78         , _subtitle_codec (0)
  79 {
  80         setup_general ();
  81         setup_video ();
  82         setup_audio ();
  83         setup_subtitle ();
  84
  85         if (!o.video_sync) {
  86                 _first_video = 0;
  87         }
  88 }
  89
  90 FFmpegDecoder::~FFmpegDecoder ()
  91 {
  92         if (_audio_codec_context) {
  93                 avcodec_close (_audio_codec_context);
  94         }
  95
  96         if (_video_codec_context) {
  97                 avcodec_close (_video_codec_context);
  98         }
  99
 100         if (_subtitle_codec_context) {
 101                 avcodec_close (_subtitle_codec_context);
 102         }
 103
 104         av_free (_frame);
 105
 106         avformat_close_input (&_format_context);
 107 }
 108
 109 void
 110 FFmpegDecoder::setup_general ()
 111 {
 112         av_register_all ();
 113
 114         if (avformat_open_input (&_format_context, _ffmpeg_content->file().string().c_str(), 0, 0) < 0) {
 115                 throw OpenFileError (_ffmpeg_content->file().string ());
 116         }
 117
 118         if (avformat_find_stream_info (_format_context, 0) < 0) {
 119                 throw DecodeError (_("could not find stream information"));
 120         }
 121
 122         /* Find video, audio and subtitle streams and choose the first of each */
 123
 124         for (uint32_t i = 0; i < _format_context->nb_streams; ++i) {
 125                 AVStream* s = _format_context->streams[i];
 126                 if (s->codec->codec_type == AVMEDIA_TYPE_VIDEO) {
 127                         _video_stream = i;
 128                 } else if (s->codec->codec_type == AVMEDIA_TYPE_AUDIO) {
 129
 130                         /* This is a hack; sometimes it seems that _audio_codec_context->channel_layout isn't set up,
 131                            so bodge it here.  No idea why we should have to do this.
 132                         */
 133
 134                         if (s->codec->channel_layout == 0) {
 135                                 s->codec->channel_layout = av_get_default_channel_layout (s->codec->channels);
 136                         }
 137
 138                         _audio_streams.push_back (
 139                                 FFmpegAudioStream (stream_name (s), i, s->codec->sample_rate, s->codec->channel_layout)
 140                                 );
 141
 142                 } else if (s->codec->codec_type == AVMEDIA_TYPE_SUBTITLE) {
 143                         _subtitle_streams.push_back (FFmpegSubtitleStream (stream_name (s), i));
 144                 }
 145         }
 146
 147         if (_video_stream < 0) {
 148                 throw DecodeError (N_("could not find video stream"));
 149         }
 150
 151         _frame = avcodec_alloc_frame ();
 152         if (_frame == 0) {
 153                 throw DecodeError (N_("could not allocate frame"));
 154         }
 155 }
 156
 157 void
 158 FFmpegDecoder::setup_video ()
 159 {
 160         _video_codec_context = _format_context->streams[_video_stream]->codec;
 161         _video_codec = avcodec_find_decoder (_video_codec_context->codec_id);
 162
 163         if (_video_codec == 0) {
 164                 throw DecodeError (_("could not find video decoder"));
 165         }
 166
 167         if (avcodec_open2 (_video_codec_context, _video_codec, 0) < 0) {
 168                 throw DecodeError (N_("could not open video decoder"));
 169         }
 170 }
 171
 172 void
 173 FFmpegDecoder::setup_audio ()
 174 {
 175         if (!_ffmpeg_content->audio_stream ()) {
 176                 return;
 177         }
 178
 179         _audio_codec_context = _format_context->streams[_ffmpeg_content->audio_stream()->id]->codec;
 180         _audio_codec = avcodec_find_decoder (_audio_codec_context->codec_id);
 181
 182         if (_audio_codec == 0) {
 183                 throw DecodeError (_("could not find audio decoder"));
 184         }
 185
 186         if (avcodec_open2 (_audio_codec_context, _audio_codec, 0) < 0) {
 187                 throw DecodeError (N_("could not open audio decoder"));
 188         }
 189 }
 190
 191 void
 192 FFmpegDecoder::setup_subtitle ()
 193 {
 194         if (!_ffmpeg_content->subtitle_stream() || _ffmpeg_content->subtitle_stream()->id >= int (_format_context->nb_streams)) {
 195                 return;
 196         }
 197
 198         _subtitle_codec_context = _format_context->streams[_ffmpeg_content->subtitle_stream()->id]->codec;
 199         _subtitle_codec = avcodec_find_decoder (_subtitle_codec_context->codec_id);
 200
 201         if (_subtitle_codec == 0) {
 202                 throw DecodeError (_("could not find subtitle decoder"));
 203         }
 204
 205         if (avcodec_open2 (_subtitle_codec_context, _subtitle_codec, 0) < 0) {
 206                 throw DecodeError (N_("could not open subtitle decoder"));
 207         }
 208 }
 209
 210
 211 bool
 212 FFmpegDecoder::pass ()
 213 {
 214         int r = av_read_frame (_format_context, &_packet);
 215
 216         if (r < 0) {
 217                 if (r != AVERROR_EOF) {
 218                         /* Maybe we should fail here, but for now we'll just finish off instead */
 219                         char buf[256];
 220                         av_strerror (r, buf, sizeof(buf));
 221                         _film->log()->log (String::compose (N_("error on av_read_frame (%1) (%2)"), buf, r));
 222                 }
 223
 224                 /* Get any remaining frames */
 225
 226                 _packet.data = 0;
 227                 _packet.size = 0;
 228
 229                 /* XXX: should we reset _packet.data and size after each *_decode_* call? */
 230
 231                 int frame_finished;
 232
 233                 if (_opt.decode_video) {
 234                         while (avcodec_decode_video2 (_video_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
 235                                 filter_and_emit_video (_frame);
 236                         }
 237                 }
 238
 239                 if (_ffmpeg_content->audio_stream() && _opt.decode_audio) {
 240                         decode_audio_packet ();
 241                 }
 242
 243                 return true;
 244         }
 245
 246         avcodec_get_frame_defaults (_frame);
 247
 248         if (_packet.stream_index == _video_stream && _opt.decode_video) {
 249
 250                 int frame_finished;
 251                 int const r = avcodec_decode_video2 (_video_codec_context, _frame, &frame_finished, &_packet);
 252                 if (r >= 0 && frame_finished) {
 253
 254                         if (r != _packet.size) {
 255                                 _film->log()->log (String::compose (N_("Used only %1 bytes of %2 in packet"), r, _packet.size));
 256                         }
 257
 258                         if (_opt.video_sync) {
 259                                 out_with_sync ();
 260                         } else {
 261                                 filter_and_emit_video (_frame);
 262                         }
 263                 }
 264
 265         } else if (_ffmpeg_content->audio_stream() && _packet.stream_index == _ffmpeg_content->audio_stream()->id && _opt.decode_audio) {
 266                 decode_audio_packet ();
 267         } else if (_ffmpeg_content->subtitle_stream() && _packet.stream_index == _ffmpeg_content->subtitle_stream()->id && _opt.decode_subtitles && _first_video) {
 268
 269                 int got_subtitle;
 270                 AVSubtitle sub;
 271                 if (avcodec_decode_subtitle2 (_subtitle_codec_context, &sub, &got_subtitle, &_packet) && got_subtitle) {
 272                         /* Sometimes we get an empty AVSubtitle, which is used by some codecs to
 273                            indicate that the previous subtitle should stop.
 274                         */
 275                         if (sub.num_rects > 0) {
 276                                 shared_ptr<TimedSubtitle> ts;
 277                                 try {
 278                                         emit_subtitle (shared_ptr<TimedSubtitle> (new TimedSubtitle (sub)));
 279                                 } catch (...) {
 280                                         /* some problem with the subtitle; we probably didn't understand it */
 281                                 }
 282                         } else {
 283                                 emit_subtitle (shared_ptr<TimedSubtitle> ());
 284                         }
 285                         avsubtitle_free (&sub);
 286                 }
 287         }
 288
 289         av_free_packet (&_packet);
 290         return false;
 291 }
 292
 293 /** @param data pointer to array of pointers to buffers.
 294  *  Only the first buffer will be used for non-planar data, otherwise there will be one per channel.
 295  */
 296 shared_ptr<AudioBuffers>
 297 FFmpegDecoder::deinterleave_audio (uint8_t** data, int size)
 298 {
 299         assert (_ffmpeg_content->audio_channels());
 300         assert (bytes_per_audio_sample());
 301
 302         /* Deinterleave and convert to float */
 303
 304         assert ((size % (bytes_per_audio_sample() * _ffmpeg_content->audio_channels())) == 0);
 305
 306         int const total_samples = size / bytes_per_audio_sample();
 307         int const frames = total_samples / _ffmpeg_content->audio_channels();
 308         shared_ptr<AudioBuffers> audio (new AudioBuffers (_ffmpeg_content->audio_channels(), frames));
 309
 310         switch (audio_sample_format()) {
 311         case AV_SAMPLE_FMT_S16:
 312         {
 313                 int16_t* p = reinterpret_cast<int16_t *> (data[0]);
 314                 int sample = 0;
 315                 int channel = 0;
 316                 for (int i = 0; i < total_samples; ++i) {
 317                         audio->data(channel)[sample] = float(*p++) / (1 << 15);
 318
 319                         ++channel;
 320                         if (channel == _ffmpeg_content->audio_channels()) {
 321                                 channel = 0;
 322                                 ++sample;
 323                         }
 324                 }
 325         }
 326         break;
 327
 328         case AV_SAMPLE_FMT_S16P:
 329         {
 330                 int16_t** p = reinterpret_cast<int16_t **> (data);
 331                 for (int i = 0; i < _ffmpeg_content->audio_channels(); ++i) {
 332                         for (int j = 0; j < frames; ++j) {
 333                                 audio->data(i)[j] = static_cast<float>(p[i][j]) / (1 << 15);
 334                         }
 335                 }
 336         }
 337         break;
 338
 339         case AV_SAMPLE_FMT_S32:
 340         {
 341                 int32_t* p = reinterpret_cast<int32_t *> (data[0]);
 342                 int sample = 0;
 343                 int channel = 0;
 344                 for (int i = 0; i < total_samples; ++i) {
 345                         audio->data(channel)[sample] = static_cast<float>(*p++) / (1 << 31);
 346
 347                         ++channel;
 348                         if (channel == _ffmpeg_content->audio_channels()) {
 349                                 channel = 0;
 350                                 ++sample;
 351                         }
 352                 }
 353         }
 354         break;
 355
 356         case AV_SAMPLE_FMT_FLT:
 357         {
 358                 float* p = reinterpret_cast<float*> (data[0]);
 359                 int sample = 0;
 360                 int channel = 0;
 361                 for (int i = 0; i < total_samples; ++i) {
 362                         audio->data(channel)[sample] = *p++;
 363
 364                         ++channel;
 365                         if (channel == _ffmpeg_content->audio_channels()) {
 366                                 channel = 0;
 367                                 ++sample;
 368                         }
 369                 }
 370         }
 371         break;
 372
 373         case AV_SAMPLE_FMT_FLTP:
 374         {
 375                 float** p = reinterpret_cast<float**> (data);
 376                 for (int i = 0; i < _ffmpeg_content->audio_channels(); ++i) {
 377                         memcpy (audio->data(i), p[i], frames * sizeof(float));
 378                 }
 379         }
 380         break;
 381
 382         default:
 383                 throw DecodeError (String::compose (_("Unrecognised audio sample format (%1)"), static_cast<int> (audio_sample_format())));
 384         }
 385
 386         return audio;
 387 }
 388
 389 float
 390 FFmpegDecoder::frames_per_second () const
 391 {
 392         AVStream* s = _format_context->streams[_video_stream];
 393
 394         if (s->avg_frame_rate.num && s->avg_frame_rate.den) {
 395                 return av_q2d (s->avg_frame_rate);
 396         }
 397
 398         return av_q2d (s->r_frame_rate);
 399 }
 400
 401 AVSampleFormat
 402 FFmpegDecoder::audio_sample_format () const
 403 {
 404         if (_audio_codec_context == 0) {
 405                 return (AVSampleFormat) 0;
 406         }
 407
 408         return _audio_codec_context->sample_fmt;
 409 }
 410
 411 libdcp::Size
 412 FFmpegDecoder::native_size () const
 413 {
 414         return libdcp::Size (_video_codec_context->width, _video_codec_context->height);
 415 }
 416
 417 PixelFormat
 418 FFmpegDecoder::pixel_format () const
 419 {
 420         return _video_codec_context->pix_fmt;
 421 }
 422
 423 int
 424 FFmpegDecoder::time_base_numerator () const
 425 {
 426         return _video_codec_context->time_base.num;
 427 }
 428
 429 int
 430 FFmpegDecoder::time_base_denominator () const
 431 {
 432         return _video_codec_context->time_base.den;
 433 }
 434
 435 int
 436 FFmpegDecoder::sample_aspect_ratio_numerator () const
 437 {
 438         return _video_codec_context->sample_aspect_ratio.num;
 439 }
 440
 441 int
 442 FFmpegDecoder::sample_aspect_ratio_denominator () const
 443 {
 444         return _video_codec_context->sample_aspect_ratio.den;
 445 }
 446
 447 string
 448 FFmpegDecoder::stream_name (AVStream* s) const
 449 {
 450         stringstream n;
 451
 452         AVDictionaryEntry const * lang = av_dict_get (s->metadata, N_("language"), 0, 0);
 453         if (lang) {
 454                 n << lang->value;
 455         }
 456
 457         AVDictionaryEntry const * title = av_dict_get (s->metadata, N_("title"), 0, 0);
 458         if (title) {
 459                 if (!n.str().empty()) {
 460                         n << N_(" ");
 461                 }
 462                 n << title->value;
 463         }
 464
 465         if (n.str().empty()) {
 466                 n << N_("unknown");
 467         }
 468
 469         return n.str ();
 470 }
 471
 472 int
 473 FFmpegDecoder::bytes_per_audio_sample () const
 474 {
 475         return av_get_bytes_per_sample (audio_sample_format ());
 476 }
 477
 478 void
 479 FFmpegDecoder::filter_and_emit_video (AVFrame* frame)
 480 {
 481         boost::mutex::scoped_lock lm (_filter_graphs_mutex);
 482
 483         shared_ptr<FilterGraph> graph;
 484
 485         list<shared_ptr<FilterGraph> >::iterator i = _filter_graphs.begin();
 486         while (i != _filter_graphs.end() && !(*i)->can_process (libdcp::Size (frame->width, frame->height), (AVPixelFormat) frame->format)) {
 487                 ++i;
 488         }
 489
 490         if (i == _filter_graphs.end ()) {
 491                 graph.reset (new FilterGraph (_film, this, libdcp::Size (frame->width, frame->height), (AVPixelFormat) frame->format));
 492                 _filter_graphs.push_back (graph);
 493                 _film->log()->log (String::compose (N_("New graph for %1x%2, pixel format %3"), frame->width, frame->height, frame->format));
 494         } else {
 495                 graph = *i;
 496         }
 497
 498         list<shared_ptr<Image> > images = graph->process (frame);
 499
 500         for (list<shared_ptr<Image> >::iterator i = images.begin(); i != images.end(); ++i) {
 501                 emit_video (*i, frame_time ());
 502         }
 503 }
 504
 505 bool
 506 FFmpegDecoder::seek (double p)
 507 {
 508         return do_seek (p, false);
 509 }
 510
 511 bool
 512 FFmpegDecoder::seek_to_last ()
 513 {
 514         /* This AVSEEK_FLAG_BACKWARD in do_seek is a bit of a hack; without it, if we ask for a seek to the same place as last time
 515            (used when we change decoder parameters and want to re-fetch the frame) we end up going forwards rather than
 516            staying in the same place.
 517         */
 518         return do_seek (last_source_time(), true);
 519 }
 520
 521 bool
 522 FFmpegDecoder::do_seek (double p, bool backwards)
 523 {
 524         int64_t const vt = p / av_q2d (_format_context->streams[_video_stream]->time_base);
 525
 526         int const r = av_seek_frame (_format_context, _video_stream, vt, backwards ? AVSEEK_FLAG_BACKWARD : 0);
 527
 528         avcodec_flush_buffers (_video_codec_context);
 529         if (_subtitle_codec_context) {
 530                 avcodec_flush_buffers (_subtitle_codec_context);
 531         }
 532
 533         return r < 0;
 534 }
 535
 536 void
 537 FFmpegDecoder::out_with_sync ()
 538 {
 539         /* Where we are in the output, in seconds */
 540         double const out_pts_seconds = video_frame() / frames_per_second();
 541
 542         /* Where we are in the source, in seconds */
 543         double const source_pts_seconds = av_q2d (_format_context->streams[_packet.stream_index]->time_base)
 544                 * av_frame_get_best_effort_timestamp(_frame);
 545
 546         _film->log()->log (
 547                 String::compose (N_("Source video frame ready; source at %1, output at %2"), source_pts_seconds, out_pts_seconds),
 548                 Log::VERBOSE
 549                 );
 550
 551         if (!_first_video) {
 552                 _first_video = source_pts_seconds;
 553         }
 554
 555         /* Difference between where we are and where we should be */
 556         double const delta = source_pts_seconds - _first_video.get() - out_pts_seconds;
 557         double const one_frame = 1 / frames_per_second();
 558
 559         /* Insert frames if required to get out_pts_seconds up to pts_seconds */
 560         if (delta > one_frame) {
 561                 int const extra = rint (delta / one_frame);
 562                 for (int i = 0; i < extra; ++i) {
 563                         repeat_last_video ();
 564                         _film->log()->log (
 565                                 String::compose (
 566                                         N_("Extra video frame inserted at %1s; source frame %2, source PTS %3 (at %4 fps)"),
 567                                         out_pts_seconds, video_frame(), source_pts_seconds, frames_per_second()
 568                                         )
 569                                 );
 570                 }
 571         }
 572
 573         if (delta > -one_frame) {
 574                 /* Process this frame */
 575                 filter_and_emit_video (_frame);
 576         } else {
 577                 /* Otherwise we are omitting a frame to keep things right */
 578                 _film->log()->log (String::compose (N_("Frame removed at %1s"), out_pts_seconds));
 579         }
 580 }
 581
 582 void
 583 FFmpegDecoder::film_changed (Film::Property p)
 584 {
 585         switch (p) {
 586         case Film::CROP:
 587         case Film::FILTERS:
 588         {
 589                 boost::mutex::scoped_lock lm (_filter_graphs_mutex);
 590                 _filter_graphs.clear ();
 591         }
 592         OutputChanged ();
 593         break;
 594
 595         default:
 596                 break;
 597         }
 598 }
 599
 600 /** @return Length (in video frames) according to our content's header */
 601 ContentVideoFrame
 602 FFmpegDecoder::video_length () const
 603 {
 604         return (double(_format_context->duration) / AV_TIME_BASE) * frames_per_second();
 605 }
 606
 607 double
 608 FFmpegDecoder::frame_time () const
 609 {
 610         return av_frame_get_best_effort_timestamp(_frame) * av_q2d (_format_context->streams[_video_stream]->time_base);
 611 }
 612
 613 void
 614 FFmpegDecoder::decode_audio_packet ()
 615 {
 616         /* Audio packets can contain multiple frames, so we may have to call avcodec_decode_audio4
 617            several times.
 618         */
 619
 620         AVPacket copy_packet = _packet;
 621
 622         while (copy_packet.size > 0) {
 623
 624                 int frame_finished;
 625                 int const decode_result = avcodec_decode_audio4 (_audio_codec_context, _frame, &frame_finished, &copy_packet);
 626                 if (decode_result >= 0 && frame_finished) {
 627
 628                         /* Where we are in the source, in seconds */
 629                         double const source_pts_seconds = av_q2d (_format_context->streams[copy_packet.stream_index]->time_base)
 630                                 * av_frame_get_best_effort_timestamp(_frame);
 631
 632                         /* We only decode audio if we've had our first video packet through, and if it
 633                            was before this packet.  Until then audio is thrown away.
 634                         */
 635
 636                         if ((_first_video && _first_video.get() <= source_pts_seconds) || !_opt.decode_video) {
 637
 638                                 if (!_first_audio && _opt.decode_video) {
 639                                         _first_audio = source_pts_seconds;
 640
 641                                         /* This is our first audio frame, and if we've arrived here we must have had our
 642                                            first video frame.  Push some silence to make up any gap between our first
 643                                            video frame and our first audio.
 644                                         */
 645
 646                                         /* frames of silence that we must push */
 647                                         int const s = rint ((_first_audio.get() - _first_video.get()) * _ffmpeg_content->audio_frame_rate ());
 648
 649                                         _film->log()->log (
 650                                                 String::compose (
 651                                                         N_("First video at %1, first audio at %2, pushing %3 audio frames of silence for %4 channels (%5 bytes per sample)"),
 652                                                         _first_video.get(), _first_audio.get(), s, _ffmpeg_content->audio_channels(), bytes_per_audio_sample()
 653                                                         )
 654                                                 );
 655
 656                                         if (s) {
 657                                                 shared_ptr<AudioBuffers> audio (new AudioBuffers (_ffmpeg_content->audio_channels(), s));
 658                                                 audio->make_silent ();
 659                                                 Audio (audio);
 660                                         }
 661                                 }
 662
 663                                 int const data_size = av_samples_get_buffer_size (
 664                                         0, _audio_codec_context->channels, _frame->nb_samples, audio_sample_format (), 1
 665                                         );
 666
 667                                 assert (_audio_codec_context->channels == _ffmpeg_content->audio_channels());
 668                                 Audio (deinterleave_audio (_frame->data, data_size));
 669                         }
 670                 }
 671
 672                 if (decode_result >= 0) {
 673                         copy_packet.data += decode_result;
 674                         copy_packet.size -= decode_result;
 675                 }
 676         }
 677 }