src/lib/ffmpeg_decoder.cc

   1 /*
   2     Copyright (C) 2012 Carl Hetherington <cth@carlh.net>
   3
   4     This program is free software; you can redistribute it and/or modify
   5     it under the terms of the GNU General Public License as published by
   6     the Free Software Foundation; either version 2 of the License, or
   7     (at your option) any later version.
   8
   9     This program is distributed in the hope that it will be useful,
  10     but WITHOUT ANY WARRANTY; without even the implied warranty of
  11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12     GNU General Public License for more details.
  13
  14     You should have received a copy of the GNU General Public License
  15     along with this program; if not, write to the Free Software
  16     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  17
  18 */
  19
  20 /** @file  src/ffmpeg_decoder.cc
  21  *  @brief A decoder using FFmpeg to decode content.
  22  */
  23
  24 #include <stdexcept>
  25 #include <vector>
  26 #include <sstream>
  27 #include <iomanip>
  28 #include <iostream>
  29 #include <stdint.h>
  30 #include <boost/lexical_cast.hpp>
  31 extern "C" {
  32 #include <tiffio.h>
  33 #include <libavcodec/avcodec.h>
  34 #include <libavformat/avformat.h>
  35 #include <libswscale/swscale.h>
  36 #include <libpostproc/postprocess.h>
  37 }
  38 #include <sndfile.h>
  39 #include "film.h"
  40 #include "format.h"
  41 #include "transcoder.h"
  42 #include "job.h"
  43 #include "filter.h"
  44 #include "options.h"
  45 #include "exceptions.h"
  46 #include "image.h"
  47 #include "util.h"
  48 #include "log.h"
  49 #include "ffmpeg_decoder.h"
  50 #include "filter_graph.h"
  51 #include "subtitle.h"
  52
  53 using std::cout;
  54 using std::string;
  55 using std::vector;
  56 using std::stringstream;
  57 using std::list;
  58 using boost::shared_ptr;
  59 using boost::optional;
  60 using boost::dynamic_pointer_cast;
  61
  62 FFmpegDecoder::FFmpegDecoder (shared_ptr<Film> f, shared_ptr<const DecodeOptions> o, Job* j)
  63         : Decoder (f, o, j)
  64         , VideoDecoder (f, o, j)
  65         , AudioDecoder (f, o, j)
  66         , _format_context (0)
  67         , _video_stream (-1)
  68         , _frame (0)
  69         , _video_codec_context (0)
  70         , _video_codec (0)
  71         , _audio_codec_context (0)
  72         , _audio_codec (0)
  73         , _subtitle_codec_context (0)
  74         , _subtitle_codec (0)
  75 {
  76         setup_general ();
  77         setup_video ();
  78         setup_audio ();
  79         setup_subtitle ();
  80
  81         _film_connection = f->Changed.connect (bind (&FFmpegDecoder::film_changed, this, _1));
  82 }
  83
  84 FFmpegDecoder::~FFmpegDecoder ()
  85 {
  86         if (_audio_codec_context) {
  87                 avcodec_close (_audio_codec_context);
  88         }
  89
  90         if (_video_codec_context) {
  91                 avcodec_close (_video_codec_context);
  92         }
  93
  94         if (_subtitle_codec_context) {
  95                 avcodec_close (_subtitle_codec_context);
  96         }
  97
  98         av_free (_frame);
  99
 100         avformat_close_input (&_format_context);
 101 }
 102
 103 void
 104 FFmpegDecoder::setup_general ()
 105 {
 106         av_register_all ();
 107
 108         if (avformat_open_input (&_format_context, _film->content_path().c_str(), 0, 0) < 0) {
 109                 throw OpenFileError (_film->content_path ());
 110         }
 111
 112         if (avformat_find_stream_info (_format_context, 0) < 0) {
 113                 throw DecodeError ("could not find stream information");
 114         }
 115
 116         /* Find video, audio and subtitle streams and choose the first of each */
 117
 118         for (uint32_t i = 0; i < _format_context->nb_streams; ++i) {
 119                 AVStream* s = _format_context->streams[i];
 120                 if (s->codec->codec_type == AVMEDIA_TYPE_VIDEO) {
 121                         _video_stream = i;
 122                 } else if (s->codec->codec_type == AVMEDIA_TYPE_AUDIO) {
 123
 124                         /* This is a hack; sometimes it seems that _audio_codec_context->channel_layout isn't set up,
 125                            so bodge it here.  No idea why we should have to do this.
 126                         */
 127
 128                         if (s->codec->channel_layout == 0) {
 129                                 s->codec->channel_layout = av_get_default_channel_layout (s->codec->channels);
 130                         }
 131
 132                         _audio_streams.push_back (
 133                                 shared_ptr<AudioStream> (
 134                                         new FFmpegAudioStream (stream_name (s), i, s->codec->sample_rate, s->codec->channel_layout)
 135                                         )
 136                                 );
 137
 138                 } else if (s->codec->codec_type == AVMEDIA_TYPE_SUBTITLE) {
 139                         _subtitle_streams.push_back (
 140                                 shared_ptr<SubtitleStream> (
 141                                         new SubtitleStream (stream_name (s), i)
 142                                         )
 143                                 );
 144                 }
 145         }
 146
 147         if (_video_stream < 0) {
 148                 throw DecodeError ("could not find video stream");
 149         }
 150
 151         _frame = avcodec_alloc_frame ();
 152         if (_frame == 0) {
 153                 throw DecodeError ("could not allocate frame");
 154         }
 155 }
 156
 157 void
 158 FFmpegDecoder::setup_video ()
 159 {
 160         _video_codec_context = _format_context->streams[_video_stream]->codec;
 161         _video_codec = avcodec_find_decoder (_video_codec_context->codec_id);
 162
 163         if (_video_codec == 0) {
 164                 throw DecodeError ("could not find video decoder");
 165         }
 166
 167         if (avcodec_open2 (_video_codec_context, _video_codec, 0) < 0) {
 168                 throw DecodeError ("could not open video decoder");
 169         }
 170 }
 171
 172 void
 173 FFmpegDecoder::setup_audio ()
 174 {
 175         if (!_audio_stream) {
 176                 return;
 177         }
 178
 179         shared_ptr<FFmpegAudioStream> ffa = dynamic_pointer_cast<FFmpegAudioStream> (_audio_stream);
 180         assert (ffa);
 181
 182         _audio_codec_context = _format_context->streams[ffa->id()]->codec;
 183         _audio_codec = avcodec_find_decoder (_audio_codec_context->codec_id);
 184
 185         if (_audio_codec == 0) {
 186                 throw DecodeError ("could not find audio decoder");
 187         }
 188
 189         if (avcodec_open2 (_audio_codec_context, _audio_codec, 0) < 0) {
 190                 throw DecodeError ("could not open audio decoder");
 191         }
 192 }
 193
 194 void
 195 FFmpegDecoder::setup_subtitle ()
 196 {
 197         if (!_subtitle_stream) {
 198                 return;
 199         }
 200
 201         _subtitle_codec_context = _format_context->streams[_subtitle_stream->id()]->codec;
 202         _subtitle_codec = avcodec_find_decoder (_subtitle_codec_context->codec_id);
 203
 204         if (_subtitle_codec == 0) {
 205                 throw DecodeError ("could not find subtitle decoder");
 206         }
 207
 208         if (avcodec_open2 (_subtitle_codec_context, _subtitle_codec, 0) < 0) {
 209                 throw DecodeError ("could not open subtitle decoder");
 210         }
 211 }
 212
 213
 214 bool
 215 FFmpegDecoder::pass ()
 216 {
 217         int r = av_read_frame (_format_context, &_packet);
 218
 219         if (r < 0) {
 220                 if (r != AVERROR_EOF) {
 221                         /* Maybe we should fail here, but for now we'll just finish off instead */
 222                         char buf[256];
 223                         av_strerror (r, buf, sizeof(buf));
 224                         _film->log()->log (String::compose ("error on av_read_frame (%1) (%2)", buf, r));
 225                 }
 226
 227                 /* Get any remaining frames */
 228
 229                 _packet.data = 0;
 230                 _packet.size = 0;
 231
 232                 /* XXX: should we reset _packet.data and size after each *_decode_* call? */
 233
 234                 int frame_finished;
 235
 236                 while (avcodec_decode_video2 (_video_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
 237                         filter_and_emit_video (_frame);
 238                 }
 239
 240                 if (_audio_stream && _opt->decode_audio) {
 241                         while (avcodec_decode_audio4 (_audio_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
 242                                 int const data_size = av_samples_get_buffer_size (
 243                                         0, _audio_codec_context->channels, _frame->nb_samples, audio_sample_format (), 1
 244                                         );
 245
 246                                 assert (_audio_codec_context->channels == _film->audio_channels());
 247                                 Audio (deinterleave_audio (_frame->data[0], data_size));
 248                         }
 249                 }
 250
 251                 return true;
 252         }
 253
 254         avcodec_get_frame_defaults (_frame);
 255
 256         shared_ptr<FFmpegAudioStream> ffa = dynamic_pointer_cast<FFmpegAudioStream> (_audio_stream);
 257
 258         if (_packet.stream_index == _video_stream) {
 259
 260                 int frame_finished;
 261                 int const r = avcodec_decode_video2 (_video_codec_context, _frame, &frame_finished, &_packet);
 262                 if (r >= 0 && frame_finished) {
 263
 264                         if (r != _packet.size) {
 265                                 _film->log()->log (String::compose ("Used only %1 bytes of %2 in packet", r, _packet.size));
 266                         }
 267
 268                         if (_opt->video_sync) {
 269                                 out_with_sync ();
 270                         } else {
 271                                 filter_and_emit_video (_frame);
 272                         }
 273                 }
 274
 275         } else if (ffa && _packet.stream_index == ffa->id() && _opt->decode_audio) {
 276
 277                 int frame_finished;
 278                 if (avcodec_decode_audio4 (_audio_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
 279
 280                         /* Where we are in the source, in seconds */
 281                         double const source_pts_seconds = av_q2d (_format_context->streams[_packet.stream_index]->time_base)
 282                                 * av_frame_get_best_effort_timestamp(_frame);
 283
 284                         /* We only decode audio if we've had our first video packet through, and if it
 285                            was before this packet.  Until then audio is thrown away.
 286                         */
 287
 288                         if (_first_video && _first_video.get() <= source_pts_seconds) {
 289
 290                                 if (!_first_audio) {
 291                                         _first_audio = source_pts_seconds;
 292
 293                                         /* This is our first audio frame, and if we've arrived here we must have had our
 294                                            first video frame.  Push some silence to make up any gap between our first
 295                                            video frame and our first audio.
 296                                         */
 297
 298                                         /* frames of silence that we must push */
 299                                         int const s = rint ((_first_audio.get() - _first_video.get()) * ffa->sample_rate ());
 300
 301                                         _film->log()->log (
 302                                                 String::compose (
 303                                                         "First video at %1, first audio at %2, pushing %3 audio frames of silence for %4 channels (%5 bytes per sample)",
 304                                                         _first_video.get(), _first_audio.get(), s, ffa->channels(), bytes_per_audio_sample()
 305                                                         )
 306                                                 );
 307
 308                                         if (s) {
 309                                                 shared_ptr<AudioBuffers> audio (new AudioBuffers (ffa->channels(), s));
 310                                                 audio->make_silent ();
 311                                                 Audio (audio);
 312                                         }
 313                                 }
 314
 315                                 int const data_size = av_samples_get_buffer_size (
 316                                         0, _audio_codec_context->channels, _frame->nb_samples, audio_sample_format (), 1
 317                                         );
 318
 319                                 assert (_audio_codec_context->channels == _film->audio_channels());
 320                                 Audio (deinterleave_audio (_frame->data[0], data_size));
 321                         }
 322                 }
 323
 324         } else if (_subtitle_stream && _packet.stream_index == _subtitle_stream->id() && _opt->decode_subtitles && _first_video) {
 325
 326                 int got_subtitle;
 327                 AVSubtitle sub;
 328                 if (avcodec_decode_subtitle2 (_subtitle_codec_context, &sub, &got_subtitle, &_packet) && got_subtitle) {
 329                         /* Sometimes we get an empty AVSubtitle, which is used by some codecs to
 330                            indicate that the previous subtitle should stop.
 331                         */
 332                         if (sub.num_rects > 0) {
 333                                 shared_ptr<TimedSubtitle> ts;
 334                                 try {
 335                                         emit_subtitle (shared_ptr<TimedSubtitle> (new TimedSubtitle (sub, _first_video.get())));
 336                                 } catch (...) {
 337                                         /* some problem with the subtitle; we probably didn't understand it */
 338                                 }
 339                         } else {
 340                                 emit_subtitle (shared_ptr<TimedSubtitle> ());
 341                         }
 342                         avsubtitle_free (&sub);
 343                 }
 344         }
 345
 346         av_free_packet (&_packet);
 347         return false;
 348 }
 349
 350 shared_ptr<AudioBuffers>
 351 FFmpegDecoder::deinterleave_audio (uint8_t* data, int size)
 352 {
 353         assert (_film->audio_channels());
 354         assert (bytes_per_audio_sample());
 355
 356         shared_ptr<FFmpegAudioStream> ffa = dynamic_pointer_cast<FFmpegAudioStream> (_audio_stream);
 357         assert (ffa);
 358
 359         /* Deinterleave and convert to float */
 360
 361         assert ((size % (bytes_per_audio_sample() * ffa->channels())) == 0);
 362
 363         int const total_samples = size / bytes_per_audio_sample();
 364         int const frames = total_samples / _film->audio_channels();
 365         shared_ptr<AudioBuffers> audio (new AudioBuffers (ffa->channels(), frames));
 366
 367         switch (audio_sample_format()) {
 368         case AV_SAMPLE_FMT_S16:
 369         {
 370                 int16_t* p = (int16_t *) data;
 371                 int sample = 0;
 372                 int channel = 0;
 373                 for (int i = 0; i < total_samples; ++i) {
 374                         audio->data(channel)[sample] = float(*p++) / (1 << 15);
 375
 376                         ++channel;
 377                         if (channel == _film->audio_channels()) {
 378                                 channel = 0;
 379                                 ++sample;
 380                         }
 381                 }
 382         }
 383         break;
 384
 385         case AV_SAMPLE_FMT_S32:
 386         {
 387                 int32_t* p = (int32_t *) data;
 388                 int sample = 0;
 389                 int channel = 0;
 390                 for (int i = 0; i < total_samples; ++i) {
 391                         audio->data(channel)[sample] = float(*p++) / (1 << 31);
 392
 393                         ++channel;
 394                         if (channel == _film->audio_channels()) {
 395                                 channel = 0;
 396                                 ++sample;
 397                         }
 398                 }
 399         }
 400
 401         case AV_SAMPLE_FMT_FLTP:
 402         {
 403                 float* p = reinterpret_cast<float*> (data);
 404                 for (int i = 0; i < _film->audio_channels(); ++i) {
 405                         memcpy (audio->data(i), p, frames * sizeof(float));
 406                         p += frames;
 407                 }
 408         }
 409         break;
 410
 411         default:
 412                 assert (false);
 413         }
 414
 415         return audio;
 416 }
 417
 418 float
 419 FFmpegDecoder::frames_per_second () const
 420 {
 421         AVStream* s = _format_context->streams[_video_stream];
 422
 423         if (s->avg_frame_rate.num && s->avg_frame_rate.den) {
 424                 return av_q2d (s->avg_frame_rate);
 425         }
 426
 427         return av_q2d (s->r_frame_rate);
 428 }
 429
 430 AVSampleFormat
 431 FFmpegDecoder::audio_sample_format () const
 432 {
 433         if (_audio_codec_context == 0) {
 434                 return (AVSampleFormat) 0;
 435         }
 436
 437         return _audio_codec_context->sample_fmt;
 438 }
 439
 440 Size
 441 FFmpegDecoder::native_size () const
 442 {
 443         return Size (_video_codec_context->width, _video_codec_context->height);
 444 }
 445
 446 PixelFormat
 447 FFmpegDecoder::pixel_format () const
 448 {
 449         return _video_codec_context->pix_fmt;
 450 }
 451
 452 int
 453 FFmpegDecoder::time_base_numerator () const
 454 {
 455         return _video_codec_context->time_base.num;
 456 }
 457
 458 int
 459 FFmpegDecoder::time_base_denominator () const
 460 {
 461         return _video_codec_context->time_base.den;
 462 }
 463
 464 int
 465 FFmpegDecoder::sample_aspect_ratio_numerator () const
 466 {
 467         return _video_codec_context->sample_aspect_ratio.num;
 468 }
 469
 470 int
 471 FFmpegDecoder::sample_aspect_ratio_denominator () const
 472 {
 473         return _video_codec_context->sample_aspect_ratio.den;
 474 }
 475
 476 string
 477 FFmpegDecoder::stream_name (AVStream* s) const
 478 {
 479         stringstream n;
 480
 481         AVDictionaryEntry const * lang = av_dict_get (s->metadata, "language", 0, 0);
 482         if (lang) {
 483                 n << lang->value;
 484         }
 485
 486         AVDictionaryEntry const * title = av_dict_get (s->metadata, "title", 0, 0);
 487         if (title) {
 488                 if (!n.str().empty()) {
 489                         n << " ";
 490                 }
 491                 n << title->value;
 492         }
 493
 494         if (n.str().empty()) {
 495                 n << "unknown";
 496         }
 497
 498         return n.str ();
 499 }
 500
 501 int
 502 FFmpegDecoder::bytes_per_audio_sample () const
 503 {
 504         return av_get_bytes_per_sample (audio_sample_format ());
 505 }
 506
 507 void
 508 FFmpegDecoder::set_audio_stream (shared_ptr<AudioStream> s)
 509 {
 510         AudioDecoder::set_audio_stream (s);
 511         setup_audio ();
 512 }
 513
 514 void
 515 FFmpegDecoder::set_subtitle_stream (shared_ptr<SubtitleStream> s)
 516 {
 517         VideoDecoder::set_subtitle_stream (s);
 518         setup_subtitle ();
 519 }
 520
 521 void
 522 FFmpegDecoder::filter_and_emit_video (AVFrame* frame)
 523 {
 524         boost::mutex::scoped_lock lm (_filter_graphs_mutex);
 525
 526         shared_ptr<FilterGraph> graph;
 527
 528         list<shared_ptr<FilterGraph> >::iterator i = _filter_graphs.begin();
 529         while (i != _filter_graphs.end() && !(*i)->can_process (Size (frame->width, frame->height), (AVPixelFormat) frame->format)) {
 530                 ++i;
 531         }
 532
 533         if (i == _filter_graphs.end ()) {
 534                 graph.reset (new FilterGraph (_film, this, Size (frame->width, frame->height), (AVPixelFormat) frame->format));
 535                 _filter_graphs.push_back (graph);
 536                 _film->log()->log (String::compose ("New graph for %1x%2, pixel format %3", frame->width, frame->height, frame->format));
 537         } else {
 538                 graph = *i;
 539         }
 540
 541         list<shared_ptr<Image> > images = graph->process (frame);
 542
 543         SourceFrame const sf = av_q2d (_format_context->streams[_video_stream]->time_base)
 544                 * av_frame_get_best_effort_timestamp(_frame) * frames_per_second();
 545
 546         for (list<shared_ptr<Image> >::iterator i = images.begin(); i != images.end(); ++i) {
 547                 emit_video (*i, sf);
 548         }
 549 }
 550
 551 bool
 552 FFmpegDecoder::seek (SourceFrame f)
 553 {
 554         int64_t const t = static_cast<int64_t>(f) / (av_q2d (_format_context->streams[_video_stream]->time_base) * frames_per_second());
 555         int const r = av_seek_frame (_format_context, _video_stream, t, 0);
 556         avcodec_flush_buffers (_video_codec_context);
 557         return r < 0;
 558 }
 559
 560 shared_ptr<FFmpegAudioStream>
 561 FFmpegAudioStream::create (string t, optional<int> v)
 562 {
 563         if (!v) {
 564                 /* version < 1; no type in the string, and there's only FFmpeg streams anyway */
 565                 return shared_ptr<FFmpegAudioStream> (new FFmpegAudioStream (t, v));
 566         }
 567
 568         stringstream s (t);
 569         string type;
 570         s >> type;
 571         if (type != "ffmpeg") {
 572                 return shared_ptr<FFmpegAudioStream> ();
 573         }
 574
 575         return shared_ptr<FFmpegAudioStream> (new FFmpegAudioStream (t, v));
 576 }
 577
 578 FFmpegAudioStream::FFmpegAudioStream (string t, optional<int> version)
 579 {
 580         stringstream n (t);
 581
 582         int name_index = 4;
 583         if (!version) {
 584                 name_index = 2;
 585                 int channels;
 586                 n >> _id >> channels;
 587                 _channel_layout = av_get_default_channel_layout (channels);
 588                 _sample_rate = 0;
 589         } else {
 590                 string type;
 591                 /* Current (marked version 1) */
 592                 n >> type >> _id >> _sample_rate >> _channel_layout;
 593                 assert (type == "ffmpeg");
 594         }
 595
 596         for (int i = 0; i < name_index; ++i) {
 597                 size_t const s = t.find (' ');
 598                 if (s != string::npos) {
 599                         t = t.substr (s + 1);
 600                 }
 601         }
 602
 603         _name = t;
 604 }
 605
 606 string
 607 FFmpegAudioStream::to_string () const
 608 {
 609         return String::compose ("ffmpeg %1 %2 %3 %4", _id, _sample_rate, _channel_layout, _name);
 610 }
 611
 612 void
 613 FFmpegDecoder::out_with_sync ()
 614 {
 615         /* Where we are in the output, in seconds */
 616         double const out_pts_seconds = video_frame() / frames_per_second();
 617
 618         /* Where we are in the source, in seconds */
 619         double const source_pts_seconds = av_q2d (_format_context->streams[_packet.stream_index]->time_base)
 620                 * av_frame_get_best_effort_timestamp(_frame);
 621
 622         _film->log()->log (
 623                 String::compose ("Source video frame ready; source at %1, output at %2", source_pts_seconds, out_pts_seconds),
 624                 Log::VERBOSE
 625                 );
 626
 627         if (!_first_video) {
 628                 _first_video = source_pts_seconds;
 629         }
 630
 631         /* Difference between where we are and where we should be */
 632         double const delta = source_pts_seconds - _first_video.get() - out_pts_seconds;
 633         double const one_frame = 1 / frames_per_second();
 634
 635         /* Insert frames if required to get out_pts_seconds up to pts_seconds */
 636         if (delta > one_frame) {
 637                 int const extra = rint (delta / one_frame);
 638                 for (int i = 0; i < extra; ++i) {
 639                         repeat_last_video ();
 640                         _film->log()->log (
 641                                 String::compose (
 642                                         "Extra video frame inserted at %1s; source frame %2, source PTS %3 (at %4 fps)",
 643                                         out_pts_seconds, video_frame(), source_pts_seconds, frames_per_second()
 644                                                         )
 645                                 );
 646                 }
 647         }
 648
 649         if (delta > -one_frame) {
 650                 /* Process this frame */
 651                 filter_and_emit_video (_frame);
 652         } else {
 653                 /* Otherwise we are omitting a frame to keep things right */
 654                 _film->log()->log (String::compose ("Frame removed at %1s", out_pts_seconds));
 655         }
 656 }
 657
 658 void
 659 FFmpegDecoder::film_changed (Film::Property p)
 660 {
 661         switch (p) {
 662         case Film::CROP:
 663         {
 664                 boost::mutex::scoped_lock lm (_filter_graphs_mutex);
 665                 _filter_graphs.clear ();
 666         }
 667         OutputChanged ();
 668         break;
 669
 670         default:
 671                 break;
 672         }
 673 }
 674
 675 /** @return Length (in video frames) according to our content's header */
 676 SourceFrame
 677 FFmpegDecoder::length () const
 678 {
 679         return (double(_format_context->duration) / AV_TIME_BASE) * frames_per_second();
 680 }
 681