src/lib/ffmpeg_decoder.cc

   1 /*
   2     Copyright (C) 2012 Carl Hetherington <cth@carlh.net>
   3
   4     This program is free software; you can redistribute it and/or modify
   5     it under the terms of the GNU General Public License as published by
   6     the Free Software Foundation; either version 2 of the License, or
   7     (at your option) any later version.
   8
   9     This program is distributed in the hope that it will be useful,
  10     but WITHOUT ANY WARRANTY; without even the implied warranty of
  11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12     GNU General Public License for more details.
  13
  14     You should have received a copy of the GNU General Public License
  15     along with this program; if not, write to the Free Software
  16     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  17
  18 */
  19
  20 /** @file  src/ffmpeg_decoder.cc
  21  *  @brief A decoder using FFmpeg to decode content.
  22  */
  23
  24 #include <stdexcept>
  25 #include <vector>
  26 #include <sstream>
  27 #include <iomanip>
  28 #include <iostream>
  29 #include <stdint.h>
  30 #include <boost/lexical_cast.hpp>
  31 extern "C" {
  32 #include <tiffio.h>
  33 #include <libavcodec/avcodec.h>
  34 #include <libavformat/avformat.h>
  35 #include <libswscale/swscale.h>
  36 #include <libpostproc/postprocess.h>
  37 }
  38 #include <sndfile.h>
  39 #include "film.h"
  40 #include "format.h"
  41 #include "transcoder.h"
  42 #include "job.h"
  43 #include "filter.h"
  44 #include "options.h"
  45 #include "exceptions.h"
  46 #include "image.h"
  47 #include "util.h"
  48 #include "log.h"
  49 #include "ffmpeg_decoder.h"
  50 #include "subtitle.h"
  51
  52 using std::cout;
  53 using std::string;
  54 using std::vector;
  55 using std::stringstream;
  56 using boost::shared_ptr;
  57
  58 FFmpegDecoder::FFmpegDecoder (shared_ptr<Film> f, shared_ptr<const Options> o, Job* j, bool minimal, bool ignore_length)
  59         : Decoder (f, o, j, minimal, ignore_length)
  60         , _format_context (0)
  61         , _video_stream (-1)
  62         , _audio_stream (-1)
  63         , _subtitle_stream (-1)
  64         , _last_video_frame (-1)
  65         , _this_video_frame (0)
  66         , _audio_frame (0)
  67         , _video_codec_context (0)
  68         , _video_codec (0)
  69         , _audio_codec_context (0)
  70         , _audio_codec (0)
  71         , _subtitle_codec_context (0)
  72         , _subtitle_codec (0)
  73 {
  74         for (int i = 0; i < 2; ++i) {
  75                 _video_frame[i] = 0;
  76         }
  77
  78         setup_general ();
  79         setup_video ();
  80         setup_audio ();
  81         setup_subtitle ();
  82 }
  83
  84 FFmpegDecoder::~FFmpegDecoder ()
  85 {
  86         if (_audio_codec_context) {
  87                 avcodec_close (_audio_codec_context);
  88         }
  89
  90         if (_video_codec_context) {
  91                 avcodec_close (_video_codec_context);
  92         }
  93
  94         if (_subtitle_codec_context) {
  95                 avcodec_close (_subtitle_codec_context);
  96         }
  97
  98         for (int i = 0; i < 2; ++i) {
  99                 av_free (_video_frame[i]);
 100         }
 101
 102         av_free (_audio_frame);
 103
 104         avformat_close_input (&_format_context);
 105 }
 106
 107 void
 108 FFmpegDecoder::setup_general ()
 109 {
 110         int r;
 111
 112         av_register_all ();
 113
 114         if ((r = avformat_open_input (&_format_context, _film->content_path().c_str(), 0, 0)) != 0) {
 115                 throw OpenFileError (_film->content_path ());
 116         }
 117
 118         if (avformat_find_stream_info (_format_context, 0) < 0) {
 119                 throw DecodeError ("could not find stream information");
 120         }
 121
 122         /* Find video, audio and subtitle streams and choose the first of each */
 123
 124         for (uint32_t i = 0; i < _format_context->nb_streams; ++i) {
 125                 AVStream* s = _format_context->streams[i];
 126                 if (s->codec->codec_type == AVMEDIA_TYPE_VIDEO) {
 127                         _video_stream = i;
 128                 } else if (s->codec->codec_type == AVMEDIA_TYPE_AUDIO) {
 129                         if (_audio_stream == -1) {
 130                                 _audio_stream = i;
 131                         }
 132                         _audio_streams.push_back (AudioStream (stream_name (s), i, s->codec->channels));
 133                 } else if (s->codec->codec_type == AVMEDIA_TYPE_SUBTITLE) {
 134                         if (_subtitle_stream == -1) {
 135                                 _subtitle_stream = i;
 136                         }
 137                         _subtitle_streams.push_back (SubtitleStream (stream_name (s), i));
 138                 }
 139         }
 140
 141         /* Now override audio and subtitle streams with those from the Film, if it has any */
 142
 143         if (_film->audio_stream_index() != -1) {
 144                 _audio_stream = _film->audio_stream().id();
 145         }
 146
 147         if (_film->subtitle_stream_index() != -1) {
 148                 _subtitle_stream = _film->subtitle_stream().id ();
 149         }
 150
 151         if (_video_stream < 0) {
 152                 throw DecodeError ("could not find video stream");
 153         }
 154
 155         for (int i = 0; i < 2; ++i) {
 156                 _video_frame[i] = avcodec_alloc_frame ();
 157                 if (_video_frame[i] == 0) {
 158                         throw DecodeError ("could not allocate frame");
 159                 }
 160         }
 161
 162         _audio_frame = avcodec_alloc_frame ();
 163         if (_audio_frame == 0) {
 164                 throw DecodeError ("could not allocate frame");
 165         }
 166 }
 167
 168 void
 169 FFmpegDecoder::setup_video ()
 170 {
 171         _video_codec_context = _format_context->streams[_video_stream]->codec;
 172         _video_codec = avcodec_find_decoder (_video_codec_context->codec_id);
 173
 174         if (_video_codec == 0) {
 175                 throw DecodeError ("could not find video decoder");
 176         }
 177
 178         /* I think this prevents problems with green hash on decodes and
 179            "changing frame properties on the fly is not supported by all filters"
 180            messages with some content.  Although I'm not sure; needs checking.
 181         */
 182         AVDictionary* opts = 0;
 183         av_dict_set (&opts, "threads", "1", 0);
 184
 185         if (avcodec_open2 (_video_codec_context, _video_codec, &opts) < 0) {
 186                 throw DecodeError ("could not open video decoder");
 187         }
 188 }
 189
 190 void
 191 FFmpegDecoder::setup_audio ()
 192 {
 193         if (_audio_stream < 0) {
 194                 return;
 195         }
 196
 197         _audio_codec_context = _format_context->streams[_audio_stream]->codec;
 198         _audio_codec = avcodec_find_decoder (_audio_codec_context->codec_id);
 199
 200         if (_audio_codec == 0) {
 201                 throw DecodeError ("could not find audio decoder");
 202         }
 203
 204         if (avcodec_open2 (_audio_codec_context, _audio_codec, 0) < 0) {
 205                 throw DecodeError ("could not open audio decoder");
 206         }
 207
 208         /* This is a hack; sometimes it seems that _audio_codec_context->channel_layout isn't set up,
 209            so bodge it here.  No idea why we should have to do this.
 210         */
 211
 212         if (_audio_codec_context->channel_layout == 0) {
 213                 _audio_codec_context->channel_layout = av_get_default_channel_layout (audio_channels ());
 214         }
 215 }
 216
 217 void
 218 FFmpegDecoder::setup_subtitle ()
 219 {
 220         if (_subtitle_stream < 0) {
 221                 return;
 222         }
 223
 224         _subtitle_codec_context = _format_context->streams[_subtitle_stream]->codec;
 225         _subtitle_codec = avcodec_find_decoder (_subtitle_codec_context->codec_id);
 226
 227         if (_subtitle_codec == 0) {
 228                 throw DecodeError ("could not find subtitle decoder");
 229         }
 230
 231         if (avcodec_open2 (_subtitle_codec_context, _subtitle_codec, 0) < 0) {
 232                 throw DecodeError ("could not open subtitle decoder");
 233         }
 234 }
 235
 236
 237 bool
 238 FFmpegDecoder::do_pass ()
 239 {
 240         int r = av_read_frame (_format_context, &_packet);
 241
 242         if (r < 0) {
 243                 if (r != AVERROR_EOF) {
 244                         throw DecodeError ("error on av_read_frame");
 245                 }
 246
 247                 /* Get any remaining frames */
 248
 249                 _packet.data = 0;
 250                 _packet.size = 0;
 251
 252                 /* XXX: should we reset _packet.data and size after each *_decode_* call? */
 253
 254                 int frame_finished;
 255
 256                 while (avcodec_decode_video2 (_video_codec_context, _video_frame[_this_video_frame], &frame_finished, &_packet) >= 0 && frame_finished) {
 257                         process_video (_video_frame[_this_video_frame]);
 258                 }
 259
 260                 if (_audio_stream >= 0 && _opt->decode_audio) {
 261                         while (avcodec_decode_audio4 (_audio_codec_context, _audio_frame, &frame_finished, &_packet) >= 0 && frame_finished) {
 262                                 int const data_size = av_samples_get_buffer_size (
 263                                         0, _audio_codec_context->channels, _audio_frame->nb_samples, audio_sample_format (), 1
 264                                         );
 265
 266                                 assert (_audio_codec_context->channels == _film->audio_channels());
 267                                 process_audio (_audio_frame->data[0], data_size);
 268                         }
 269                 }
 270
 271                 return true;
 272         }
 273
 274         double const pts_seconds = av_q2d (_format_context->streams[_packet.stream_index]->time_base) * _packet.pts;
 275
 276         if (_packet.stream_index == _video_stream) {
 277
 278                 avcodec_get_frame_defaults (_video_frame[_this_video_frame]);
 279
 280                 if (!_first_video) {
 281                         _first_video = pts_seconds;
 282                 }
 283
 284                 int frame_finished;
 285                 if (avcodec_decode_video2 (_video_codec_context, _video_frame[_this_video_frame], &frame_finished, &_packet) >= 0 && frame_finished) {
 286
 287                         /* Where we are in the output, in seconds */
 288                         double const out_pts_seconds = video_frame() / frames_per_second();
 289
 290                         /* Difference between where we are and where we should be */
 291                         double const delta = pts_seconds - out_pts_seconds;
 292                         double const one_frame = 1 / frames_per_second();
 293
 294                         /* Insert the last frame if we have one, otherwise just use this one */
 295                         int const insert_frame = _last_video_frame == -1 ? _this_video_frame : _last_video_frame;
 296
 297                         /* Insert frames if required to get out_pts_seconds up to pts_seconds */
 298                         if (delta > one_frame) {
 299                                 int const extra = rint (delta / one_frame);
 300                                 for (int i = 0; i < extra; ++i) {
 301                                         _film->log()->log (String::compose ("Extra frame inserted at %1s", out_pts_seconds));
 302                                         process_video (_video_frame[insert_frame]);
 303                                 }
 304                         }
 305
 306                         if (delta > -one_frame) {
 307                                 /* Process this frame */
 308                                 process_video (_video_frame[_this_video_frame]);
 309                         } else {
 310                                 /* Otherwise we are omitting a frame to keep things right */
 311                                 _film->log()->log (String::compose ("Frame removed at %1s", out_pts_seconds));
 312                         }
 313
 314                         /* Swap over so that we use the alternate video frames next time */
 315                         _this_video_frame = 1 - _this_video_frame;
 316                         _last_video_frame = 1 - _this_video_frame;
 317                 }
 318
 319         } else if (_audio_stream >= 0 && _packet.stream_index == _audio_stream && _opt->decode_audio && _first_video && _first_video.get() <= pts_seconds) {
 320
 321                 /* Note: We only decode audio if we've had our first video packet through, and if it
 322                    was before this packet.  Until then audio is thrown away.
 323                 */
 324
 325                 if (!_first_audio) {
 326                         _first_audio = pts_seconds;
 327
 328                         /* This is our first audio packet, and if we've arrived here we must have had our
 329                            first video packet.  Push some silence to make up the gap between our first
 330                            video packet and our first audio.
 331                         */
 332
 333                         /* frames of silence that we must push */
 334                         int const s = rint ((_first_audio.get() - _first_video.get()) * audio_sample_rate ());
 335
 336                         _film->log()->log (
 337                                 String::compose (
 338                                         "First video at %1, first audio at %2, pushing %3 frames of silence for %4 channels (%5 bytes per sample)",
 339                                         _first_video.get(), _first_audio.get(), s, audio_channels(), bytes_per_audio_sample()
 340                                         )
 341                                 );
 342
 343                         /* hence bytes */
 344                         int const b = s * audio_channels() * bytes_per_audio_sample();
 345
 346                         /* XXX: this assumes that it won't be too much, and there are shaky assumptions
 347                            that all sound representations are silent with memset()ed zero data.
 348                         */
 349                         uint8_t silence[b];
 350                         memset (silence, 0, b);
 351                         process_audio (silence, b);
 352                 }
 353
 354                 int frame_finished;
 355                 if (avcodec_decode_audio4 (_audio_codec_context, _audio_frame, &frame_finished, &_packet) >= 0 && frame_finished) {
 356                         int const data_size = av_samples_get_buffer_size (
 357                                 0, _audio_codec_context->channels, _audio_frame->nb_samples, audio_sample_format (), 1
 358                                 );
 359
 360                         assert (_audio_codec_context->channels == _film->audio_channels());
 361                         process_audio (_audio_frame->data[0], data_size);
 362                 }
 363
 364         } else if (_subtitle_stream >= 0 && _packet.stream_index == _subtitle_stream && _opt->decode_subtitles && _first_video) {
 365
 366                 int got_subtitle;
 367                 AVSubtitle sub;
 368                 if (avcodec_decode_subtitle2 (_subtitle_codec_context, &sub, &got_subtitle, &_packet) && got_subtitle) {
 369                         /* Sometimes we get an empty AVSubtitle, which is used by some codecs to
 370                            indicate that the previous subtitle should stop.
 371                         */
 372                         if (sub.num_rects > 0) {
 373                                 process_subtitle (shared_ptr<TimedSubtitle> (new TimedSubtitle (sub, _first_video.get())));
 374                         } else {
 375                                 process_subtitle (shared_ptr<TimedSubtitle> ());
 376                         }
 377                         avsubtitle_free (&sub);
 378                 }
 379         }
 380
 381         av_free_packet (&_packet);
 382         return false;
 383 }
 384
 385 float
 386 FFmpegDecoder::frames_per_second () const
 387 {
 388         AVStream* s = _format_context->streams[_video_stream];
 389
 390         if (s->avg_frame_rate.num && s->avg_frame_rate.den) {
 391                 return av_q2d (s->avg_frame_rate);
 392         }
 393
 394         return av_q2d (s->r_frame_rate);
 395 }
 396
 397 int
 398 FFmpegDecoder::audio_channels () const
 399 {
 400         if (_audio_codec_context == 0) {
 401                 return 0;
 402         }
 403
 404         return _audio_codec_context->channels;
 405 }
 406
 407 int
 408 FFmpegDecoder::audio_sample_rate () const
 409 {
 410         if (_audio_codec_context == 0) {
 411                 return 0;
 412         }
 413
 414         return _audio_codec_context->sample_rate;
 415 }
 416
 417 AVSampleFormat
 418 FFmpegDecoder::audio_sample_format () const
 419 {
 420         if (_audio_codec_context == 0) {
 421                 return (AVSampleFormat) 0;
 422         }
 423
 424         return _audio_codec_context->sample_fmt;
 425 }
 426
 427 int64_t
 428 FFmpegDecoder::audio_channel_layout () const
 429 {
 430         if (_audio_codec_context == 0) {
 431                 return 0;
 432         }
 433
 434         return _audio_codec_context->channel_layout;
 435 }
 436
 437 Size
 438 FFmpegDecoder::native_size () const
 439 {
 440         return Size (_video_codec_context->width, _video_codec_context->height);
 441 }
 442
 443 PixelFormat
 444 FFmpegDecoder::pixel_format () const
 445 {
 446         return _video_codec_context->pix_fmt;
 447 }
 448
 449 int
 450 FFmpegDecoder::time_base_numerator () const
 451 {
 452         return _video_codec_context->time_base.num;
 453 }
 454
 455 int
 456 FFmpegDecoder::time_base_denominator () const
 457 {
 458         return _video_codec_context->time_base.den;
 459 }
 460
 461 int
 462 FFmpegDecoder::sample_aspect_ratio_numerator () const
 463 {
 464         return _video_codec_context->sample_aspect_ratio.num;
 465 }
 466
 467 int
 468 FFmpegDecoder::sample_aspect_ratio_denominator () const
 469 {
 470         return _video_codec_context->sample_aspect_ratio.den;
 471 }
 472
 473 bool
 474 FFmpegDecoder::has_subtitles () const
 475 {
 476         return (_subtitle_stream != -1);
 477 }
 478
 479 vector<AudioStream>
 480 FFmpegDecoder::audio_streams () const
 481 {
 482         return _audio_streams;
 483 }
 484
 485 vector<SubtitleStream>
 486 FFmpegDecoder::subtitle_streams () const
 487 {
 488         return _subtitle_streams;
 489 }
 490
 491 string
 492 FFmpegDecoder::stream_name (AVStream* s) const
 493 {
 494         stringstream n;
 495
 496         AVDictionaryEntry const * lang = av_dict_get (s->metadata, "language", 0, 0);
 497         if (lang) {
 498                 n << lang->value;
 499         }
 500
 501         AVDictionaryEntry const * title = av_dict_get (s->metadata, "title", 0, 0);
 502         if (title) {
 503                 if (!n.str().empty()) {
 504                         n << " ";
 505                 }
 506                 n << title->value;
 507         }
 508
 509         if (n.str().empty()) {
 510                 n << "unknown";
 511         }
 512
 513         return n.str ();
 514 }
 515