src/lib/ffmpeg_decoder.cc

   1 /*
   2     Copyright (C) 2012 Carl Hetherington <cth@carlh.net>
   3
   4     This program is free software; you can redistribute it and/or modify
   5     it under the terms of the GNU General Public License as published by
   6     the Free Software Foundation; either version 2 of the License, or
   7     (at your option) any later version.
   8
   9     This program is distributed in the hope that it will be useful,
  10     but WITHOUT ANY WARRANTY; without even the implied warranty of
  11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12     GNU General Public License for more details.
  13
  14     You should have received a copy of the GNU General Public License
  15     along with this program; if not, write to the Free Software
  16     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  17
  18 */
  19
  20 /** @file  src/ffmpeg_decoder.cc
  21  *  @brief A decoder using FFmpeg to decode content.
  22  */
  23
  24 #include <stdexcept>
  25 #include <vector>
  26 #include <sstream>
  27 #include <iomanip>
  28 #include <iostream>
  29 #include <stdint.h>
  30 #include <boost/lexical_cast.hpp>
  31 extern "C" {
  32 #include <tiffio.h>
  33 #include <libavcodec/avcodec.h>
  34 #include <libavformat/avformat.h>
  35 #include <libswscale/swscale.h>
  36 #include <libpostproc/postprocess.h>
  37 }
  38 #include <sndfile.h>
  39 #include "film.h"
  40 #include "format.h"
  41 #include "transcoder.h"
  42 #include "job.h"
  43 #include "filter.h"
  44 #include "options.h"
  45 #include "exceptions.h"
  46 #include "image.h"
  47 #include "util.h"
  48 #include "log.h"
  49 #include "ffmpeg_decoder.h"
  50 #include "subtitle.h"
  51
  52 using std::cout;
  53 using std::string;
  54 using std::vector;
  55 using std::stringstream;
  56 using boost::shared_ptr;
  57
  58 FFmpegDecoder::FFmpegDecoder (shared_ptr<Film> f, shared_ptr<const Options> o, Job* j, bool minimal, bool ignore_length)
  59         : Decoder (f, o, j, minimal, ignore_length)
  60         , _format_context (0)
  61         , _video_stream (-1)
  62         , _audio_stream (-1)
  63         , _subtitle_stream (-1)
  64         , _last_video_frame (-1)
  65         , _this_video_frame (0)
  66         , _audio_frame (0)
  67         , _video_codec_context (0)
  68         , _video_codec (0)
  69         , _audio_codec_context (0)
  70         , _audio_codec (0)
  71         , _subtitle_codec_context (0)
  72         , _subtitle_codec (0)
  73 {
  74         for (int i = 0; i < 2; ++i) {
  75                 _video_frame[i] = 0;
  76         }
  77
  78         setup_general ();
  79         setup_video ();
  80         setup_audio ();
  81         setup_subtitle ();
  82 }
  83
  84 FFmpegDecoder::~FFmpegDecoder ()
  85 {
  86         if (_audio_codec_context) {
  87                 avcodec_close (_audio_codec_context);
  88         }
  89
  90         if (_video_codec_context) {
  91                 avcodec_close (_video_codec_context);
  92         }
  93
  94         if (_subtitle_codec_context) {
  95                 avcodec_close (_subtitle_codec_context);
  96         }
  97
  98         for (int i = 0; i < 2; ++i) {
  99                 av_free (_video_frame[i]);
 100         }
 101
 102         av_free (_audio_frame);
 103
 104         avformat_close_input (&_format_context);
 105 }
 106
 107 void
 108 FFmpegDecoder::setup_general ()
 109 {
 110         int r;
 111
 112         av_register_all ();
 113
 114         if ((r = avformat_open_input (&_format_context, _film->content_path().c_str(), 0, 0)) != 0) {
 115                 throw OpenFileError (_film->content_path ());
 116         }
 117
 118         if (avformat_find_stream_info (_format_context, 0) < 0) {
 119                 throw DecodeError ("could not find stream information");
 120         }
 121
 122         /* Find video, audio and subtitle streams and choose the first of each */
 123
 124         for (uint32_t i = 0; i < _format_context->nb_streams; ++i) {
 125                 AVStream* s = _format_context->streams[i];
 126                 if (s->codec->codec_type == AVMEDIA_TYPE_VIDEO) {
 127                         _video_stream = i;
 128                 } else if (s->codec->codec_type == AVMEDIA_TYPE_AUDIO) {
 129                         if (_audio_stream == -1) {
 130                                 _audio_stream = i;
 131                         }
 132                         _audio_streams.push_back (AudioStream (stream_name (s), i, s->codec->channels));
 133                 } else if (s->codec->codec_type == AVMEDIA_TYPE_SUBTITLE) {
 134                         if (_subtitle_stream == -1) {
 135                                 _subtitle_stream = i;
 136                         }
 137                         _subtitle_streams.push_back (SubtitleStream (stream_name (s), i));
 138                 }
 139         }
 140
 141         /* Now override audio and subtitle streams with those from the Film, if it has any */
 142
 143         if (_film->audio_stream_index() != -1) {
 144                 _audio_stream = _film->audio_stream().id();
 145         }
 146
 147         if (_film->subtitle_stream_index() != -1) {
 148                 _subtitle_stream = _film->subtitle_stream().id ();
 149         }
 150
 151         if (_video_stream < 0) {
 152                 throw DecodeError ("could not find video stream");
 153         }
 154
 155         for (int i = 0; i < 2; ++i) {
 156                 _video_frame[i] = avcodec_alloc_frame ();
 157                 if (_video_frame[i] == 0) {
 158                         throw DecodeError ("could not allocate frame");
 159                 }
 160         }
 161
 162         _audio_frame = avcodec_alloc_frame ();
 163         if (_audio_frame == 0) {
 164                 throw DecodeError ("could not allocate frame");
 165         }
 166 }
 167
 168 void
 169 FFmpegDecoder::setup_video ()
 170 {
 171         _video_codec_context = _format_context->streams[_video_stream]->codec;
 172         _video_codec = avcodec_find_decoder (_video_codec_context->codec_id);
 173
 174         if (_video_codec == 0) {
 175                 throw DecodeError ("could not find video decoder");
 176         }
 177
 178         /* I think this prevents problems with green hash on decodes and
 179            "changing frame properties on the fly is not supported by all filters"
 180            messages with some content.  Although I'm not sure; needs checking.
 181         */
 182         AVDictionary* opts = 0;
 183         av_dict_set (&opts, "threads", "1", 0);
 184
 185         if (avcodec_open2 (_video_codec_context, _video_codec, &opts) < 0) {
 186                 throw DecodeError ("could not open video decoder");
 187         }
 188 }
 189
 190 void
 191 FFmpegDecoder::setup_audio ()
 192 {
 193         if (_audio_stream < 0) {
 194                 return;
 195         }
 196
 197         _audio_codec_context = _format_context->streams[_audio_stream]->codec;
 198         _audio_codec = avcodec_find_decoder (_audio_codec_context->codec_id);
 199
 200         if (_audio_codec == 0) {
 201                 throw DecodeError ("could not find audio decoder");
 202         }
 203
 204         if (avcodec_open2 (_audio_codec_context, _audio_codec, 0) < 0) {
 205                 throw DecodeError ("could not open audio decoder");
 206         }
 207
 208         /* This is a hack; sometimes it seems that _audio_codec_context->channel_layout isn't set up,
 209            so bodge it here.  No idea why we should have to do this.
 210         */
 211
 212         if (_audio_codec_context->channel_layout == 0) {
 213                 _audio_codec_context->channel_layout = av_get_default_channel_layout (audio_channels ());
 214         }
 215 }
 216
 217 void
 218 FFmpegDecoder::setup_subtitle ()
 219 {
 220         if (_subtitle_stream < 0) {
 221                 return;
 222         }
 223
 224         _subtitle_codec_context = _format_context->streams[_subtitle_stream]->codec;
 225         _subtitle_codec = avcodec_find_decoder (_subtitle_codec_context->codec_id);
 226
 227         if (_subtitle_codec == 0) {
 228                 throw DecodeError ("could not find subtitle decoder");
 229         }
 230
 231         if (avcodec_open2 (_subtitle_codec_context, _subtitle_codec, 0) < 0) {
 232                 throw DecodeError ("could not open subtitle decoder");
 233         }
 234 }
 235
 236
 237 bool
 238 FFmpegDecoder::do_pass ()
 239 {
 240         int r = av_read_frame (_format_context, &_packet);
 241
 242         if (r < 0) {
 243                 if (r != AVERROR_EOF) {
 244                         throw DecodeError ("error on av_read_frame");
 245                 }
 246
 247                 /* Get any remaining frames */
 248
 249                 _packet.data = 0;
 250                 _packet.size = 0;
 251
 252                 /* XXX: should we reset _packet.data and size after each *_decode_* call? */
 253
 254                 int frame_finished;
 255
 256                 while (avcodec_decode_video2 (_video_codec_context, _video_frame[_this_video_frame], &frame_finished, &_packet) >= 0 && frame_finished) {
 257                         process_video (_video_frame[_this_video_frame]);
 258                 }
 259
 260                 if (_audio_stream >= 0 && _opt->decode_audio) {
 261                         while (avcodec_decode_audio4 (_audio_codec_context, _audio_frame, &frame_finished, &_packet) >= 0 && frame_finished) {
 262                                 int const data_size = av_samples_get_buffer_size (
 263                                         0, _audio_codec_context->channels, _audio_frame->nb_samples, audio_sample_format (), 1
 264                                         );
 265
 266                                 assert (_audio_codec_context->channels == _film->audio_channels());
 267                                 process_audio (_audio_frame->data[0], data_size);
 268                         }
 269                 }
 270
 271                 return true;
 272         }
 273
 274         double const pts_seconds = av_q2d (_format_context->streams[_packet.stream_index]->time_base) * _packet.pts;
 275
 276         if (_packet.stream_index == _video_stream) {
 277
 278                 avcodec_get_frame_defaults (_video_frame[_this_video_frame]);
 279
 280                 if (!_first_video) {
 281                         _first_video = pts_seconds;
 282                 }
 283
 284                 int frame_finished;
 285                 if (avcodec_decode_video2 (_video_codec_context, _video_frame[_this_video_frame], &frame_finished, &_packet) >= 0 && frame_finished) {
 286
 287                         /* Where we are in the output, in seconds */
 288                         double const out_pts_seconds = video_frame() / frames_per_second();
 289
 290                         /* Difference between where we are and where we should be */
 291                         double const delta = pts_seconds - out_pts_seconds;
 292                         double const one_frame = 1 / frames_per_second();
 293
 294                         /* Insert the last frame if we have one, otherwise just use this one */
 295                         int const insert_frame = _last_video_frame == -1 ? _this_video_frame : _last_video_frame;
 296
 297                         /* Insert frames if required to get out_pts_seconds up to pts_seconds */
 298                         if (delta > one_frame) {
 299                                 int const extra = rint (delta / one_frame);
 300                                 for (int i = 0; i < extra; ++i) {
 301                                         _film->log()->log (String::compose ("Extra frame inserted at %1s", out_pts_seconds));
 302                                         process_video (_video_frame[insert_frame]);
 303                                 }
 304                         }
 305
 306                         if (delta > -one_frame) {
 307                                 /* Process this frame */
 308                                 process_video (_video_frame[_this_video_frame]);
 309                         } else {
 310                                 /* Otherwise we are omitting a frame to keep things right */
 311                                 _film->log()->log (String::compose ("Frame removed at %1s", out_pts_seconds));
 312                         }
 313
 314                         /* Swap over so that we use the alternate video frames next time */
 315                         _this_video_frame = 1 - _this_video_frame;
 316                         _last_video_frame = 1 - _this_video_frame;
 317                 }
 318
 319         } else if (_audio_stream >= 0 && _packet.stream_index == _audio_stream && _opt->decode_audio && _first_video && _first_video.get() <= pts_seconds) {
 320
 321                 /* Note: We only decode audio if we've had our first video packet through, and if it
 322                    was before this packet.  Until then audio is thrown away.
 323                 */
 324
 325                 if (!_first_audio) {
 326                         _first_audio = pts_seconds;
 327
 328                         /* This is our first audio packet, and if we've arrived here we must have had our
 329                            first video packet.  Push some silence to make up any gap between our first
 330                            video packet and our first audio.
 331                         */
 332
 333                         /* frames of silence that we must push */
 334                         int const s = rint ((_first_audio.get() - _first_video.get()) * audio_sample_rate ());
 335
 336                         _film->log()->log (
 337                                 String::compose (
 338                                         "First video at %1, first audio at %2, pushing %3 frames of silence for %4 channels (%5 bytes per sample)",
 339                                         _first_video.get(), _first_audio.get(), s, audio_channels(), bytes_per_audio_sample()
 340                                         )
 341                                 );
 342
 343                         if (s) {
 344                                 /* hence bytes */
 345                                 int const b = s * audio_channels() * bytes_per_audio_sample();
 346
 347                                 /* XXX: this assumes that it won't be too much, and there are shaky assumptions
 348                                    that all sound representations are silent with memset()ed zero data.
 349                                 */
 350                                 uint8_t silence[b];
 351                                 memset (silence, 0, b);
 352                                 process_audio (silence, b);
 353                         }
 354                 }
 355
 356                 int frame_finished;
 357                 if (avcodec_decode_audio4 (_audio_codec_context, _audio_frame, &frame_finished, &_packet) >= 0 && frame_finished) {
 358                         int const data_size = av_samples_get_buffer_size (
 359                                 0, _audio_codec_context->channels, _audio_frame->nb_samples, audio_sample_format (), 1
 360                                 );
 361
 362                         assert (_audio_codec_context->channels == _film->audio_channels());
 363                         process_audio (_audio_frame->data[0], data_size);
 364                 }
 365
 366         } else if (_subtitle_stream >= 0 && _packet.stream_index == _subtitle_stream && _opt->decode_subtitles && _first_video) {
 367
 368                 int got_subtitle;
 369                 AVSubtitle sub;
 370                 if (avcodec_decode_subtitle2 (_subtitle_codec_context, &sub, &got_subtitle, &_packet) && got_subtitle) {
 371                         /* Sometimes we get an empty AVSubtitle, which is used by some codecs to
 372                            indicate that the previous subtitle should stop.
 373                         */
 374                         if (sub.num_rects > 0) {
 375                                 process_subtitle (shared_ptr<TimedSubtitle> (new TimedSubtitle (sub, _first_video.get())));
 376                         } else {
 377                                 process_subtitle (shared_ptr<TimedSubtitle> ());
 378                         }
 379                         avsubtitle_free (&sub);
 380                 }
 381         }
 382
 383         av_free_packet (&_packet);
 384         return false;
 385 }
 386
 387 float
 388 FFmpegDecoder::frames_per_second () const
 389 {
 390         AVStream* s = _format_context->streams[_video_stream];
 391
 392         if (s->avg_frame_rate.num && s->avg_frame_rate.den) {
 393                 return av_q2d (s->avg_frame_rate);
 394         }
 395
 396         return av_q2d (s->r_frame_rate);
 397 }
 398
 399 int
 400 FFmpegDecoder::audio_channels () const
 401 {
 402         if (_audio_codec_context == 0) {
 403                 return 0;
 404         }
 405
 406         return _audio_codec_context->channels;
 407 }
 408
 409 int
 410 FFmpegDecoder::audio_sample_rate () const
 411 {
 412         if (_audio_codec_context == 0) {
 413                 return 0;
 414         }
 415
 416         return _audio_codec_context->sample_rate;
 417 }
 418
 419 AVSampleFormat
 420 FFmpegDecoder::audio_sample_format () const
 421 {
 422         if (_audio_codec_context == 0) {
 423                 return (AVSampleFormat) 0;
 424         }
 425
 426         return _audio_codec_context->sample_fmt;
 427 }
 428
 429 int64_t
 430 FFmpegDecoder::audio_channel_layout () const
 431 {
 432         if (_audio_codec_context == 0) {
 433                 return 0;
 434         }
 435
 436         return _audio_codec_context->channel_layout;
 437 }
 438
 439 Size
 440 FFmpegDecoder::native_size () const
 441 {
 442         return Size (_video_codec_context->width, _video_codec_context->height);
 443 }
 444
 445 PixelFormat
 446 FFmpegDecoder::pixel_format () const
 447 {
 448         return _video_codec_context->pix_fmt;
 449 }
 450
 451 int
 452 FFmpegDecoder::time_base_numerator () const
 453 {
 454         return _video_codec_context->time_base.num;
 455 }
 456
 457 int
 458 FFmpegDecoder::time_base_denominator () const
 459 {
 460         return _video_codec_context->time_base.den;
 461 }
 462
 463 int
 464 FFmpegDecoder::sample_aspect_ratio_numerator () const
 465 {
 466         return _video_codec_context->sample_aspect_ratio.num;
 467 }
 468
 469 int
 470 FFmpegDecoder::sample_aspect_ratio_denominator () const
 471 {
 472         return _video_codec_context->sample_aspect_ratio.den;
 473 }
 474
 475 bool
 476 FFmpegDecoder::has_subtitles () const
 477 {
 478         return (_subtitle_stream != -1);
 479 }
 480
 481 vector<AudioStream>
 482 FFmpegDecoder::audio_streams () const
 483 {
 484         return _audio_streams;
 485 }
 486
 487 vector<SubtitleStream>
 488 FFmpegDecoder::subtitle_streams () const
 489 {
 490         return _subtitle_streams;
 491 }
 492
 493 string
 494 FFmpegDecoder::stream_name (AVStream* s) const
 495 {
 496         stringstream n;
 497
 498         AVDictionaryEntry const * lang = av_dict_get (s->metadata, "language", 0, 0);
 499         if (lang) {
 500                 n << lang->value;
 501         }
 502
 503         AVDictionaryEntry const * title = av_dict_get (s->metadata, "title", 0, 0);
 504         if (title) {
 505                 if (!n.str().empty()) {
 506                         n << " ";
 507                 }
 508                 n << title->value;
 509         }
 510
 511         if (n.str().empty()) {
 512                 n << "unknown";
 513         }
 514
 515         return n.str ();
 516 }
 517