src/lib/ffmpeg_decoder.cc

   1 /*
   2     Copyright (C) 2012 Carl Hetherington <cth@carlh.net>
   3
   4     This program is free software; you can redistribute it and/or modify
   5     it under the terms of the GNU General Public License as published by
   6     the Free Software Foundation; either version 2 of the License, or
   7     (at your option) any later version.
   8
   9     This program is distributed in the hope that it will be useful,
  10     but WITHOUT ANY WARRANTY; without even the implied warranty of
  11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12     GNU General Public License for more details.
  13
  14     You should have received a copy of the GNU General Public License
  15     along with this program; if not, write to the Free Software
  16     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  17
  18 */
  19
  20 /** @file  src/ffmpeg_decoder.cc
  21  *  @brief A decoder using FFmpeg to decode content.
  22  */
  23
  24 #include <stdexcept>
  25 #include <vector>
  26 #include <sstream>
  27 #include <iomanip>
  28 #include <iostream>
  29 #include <stdint.h>
  30 #include <boost/lexical_cast.hpp>
  31 extern "C" {
  32 #include <tiffio.h>
  33 #include <libavcodec/avcodec.h>
  34 #include <libavformat/avformat.h>
  35 #include <libswscale/swscale.h>
  36 #include <libpostproc/postprocess.h>
  37 }
  38 #include <sndfile.h>
  39 #include "film.h"
  40 #include "format.h"
  41 #include "transcoder.h"
  42 #include "job.h"
  43 #include "filter.h"
  44 #include "options.h"
  45 #include "exceptions.h"
  46 #include "image.h"
  47 #include "util.h"
  48 #include "log.h"
  49 #include "ffmpeg_decoder.h"
  50 #include "subtitle.h"
  51
  52 using std::cout;
  53 using std::string;
  54 using std::vector;
  55 using std::stringstream;
  56 using boost::shared_ptr;
  57
  58 FFmpegDecoder::FFmpegDecoder (shared_ptr<Film> f, shared_ptr<const Options> o, Job* j, bool minimal, bool ignore_length)
  59         : Decoder (f, o, j, minimal, ignore_length)
  60         , _format_context (0)
  61         , _video_stream (-1)
  62         , _audio_stream (-1)
  63         , _subtitle_stream (-1)
  64         , _frame (0)
  65         , _video_codec_context (0)
  66         , _video_codec (0)
  67         , _audio_codec_context (0)
  68         , _audio_codec (0)
  69         , _subtitle_codec_context (0)
  70         , _subtitle_codec (0)
  71 {
  72         setup_general ();
  73         setup_video ();
  74         setup_audio ();
  75         setup_subtitle ();
  76 }
  77
  78 FFmpegDecoder::~FFmpegDecoder ()
  79 {
  80         if (_audio_codec_context) {
  81                 avcodec_close (_audio_codec_context);
  82         }
  83
  84         if (_video_codec_context) {
  85                 avcodec_close (_video_codec_context);
  86         }
  87
  88         if (_subtitle_codec_context) {
  89                 avcodec_close (_subtitle_codec_context);
  90         }
  91
  92         av_free (_frame);
  93
  94         avformat_close_input (&_format_context);
  95 }
  96
  97 void
  98 FFmpegDecoder::setup_general ()
  99 {
 100         int r;
 101
 102         av_register_all ();
 103
 104         if ((r = avformat_open_input (&_format_context, _film->content_path().c_str(), 0, 0)) != 0) {
 105                 throw OpenFileError (_film->content_path ());
 106         }
 107
 108         if (avformat_find_stream_info (_format_context, 0) < 0) {
 109                 throw DecodeError ("could not find stream information");
 110         }
 111
 112         /* Find video, audio and subtitle streams and choose the first of each */
 113
 114         for (uint32_t i = 0; i < _format_context->nb_streams; ++i) {
 115                 AVStream* s = _format_context->streams[i];
 116                 if (s->codec->codec_type == AVMEDIA_TYPE_VIDEO) {
 117                         _video_stream = i;
 118                 } else if (s->codec->codec_type == AVMEDIA_TYPE_AUDIO) {
 119                         if (_audio_stream == -1) {
 120                                 _audio_stream = i;
 121                         }
 122                         _audio_streams.push_back (AudioStream (stream_name (s), i, s->codec->channels));
 123                 } else if (s->codec->codec_type == AVMEDIA_TYPE_SUBTITLE) {
 124                         if (_subtitle_stream == -1) {
 125                                 _subtitle_stream = i;
 126                         }
 127                         _subtitle_streams.push_back (SubtitleStream (stream_name (s), i));
 128                 }
 129         }
 130
 131         /* Now override audio and subtitle streams with those from the Film, if it has any */
 132
 133         if (_film->audio_stream_index() != -1) {
 134                 _audio_stream = _film->audio_stream().id();
 135         }
 136
 137         if (_film->subtitle_stream_index() != -1) {
 138                 _subtitle_stream = _film->subtitle_stream().id ();
 139         }
 140
 141         if (_video_stream < 0) {
 142                 throw DecodeError ("could not find video stream");
 143         }
 144
 145         _frame = avcodec_alloc_frame ();
 146         if (_frame == 0) {
 147                 throw DecodeError ("could not allocate frame");
 148         }
 149 }
 150
 151 void
 152 FFmpegDecoder::setup_video ()
 153 {
 154         _video_codec_context = _format_context->streams[_video_stream]->codec;
 155         _video_codec = avcodec_find_decoder (_video_codec_context->codec_id);
 156
 157         if (_video_codec == 0) {
 158                 throw DecodeError ("could not find video decoder");
 159         }
 160
 161         /* I think this prevents problems with green hash on decodes and
 162            "changing frame properties on the fly is not supported by all filters"
 163            messages with some content.  Although I'm not sure; needs checking.
 164         */
 165         AVDictionary* opts = 0;
 166         av_dict_set (&opts, "threads", "1", 0);
 167
 168         if (avcodec_open2 (_video_codec_context, _video_codec, &opts) < 0) {
 169                 throw DecodeError ("could not open video decoder");
 170         }
 171 }
 172
 173 void
 174 FFmpegDecoder::setup_audio ()
 175 {
 176         if (_audio_stream < 0) {
 177                 return;
 178         }
 179
 180         _audio_codec_context = _format_context->streams[_audio_stream]->codec;
 181         _audio_codec = avcodec_find_decoder (_audio_codec_context->codec_id);
 182
 183         if (_audio_codec == 0) {
 184                 throw DecodeError ("could not find audio decoder");
 185         }
 186
 187         if (avcodec_open2 (_audio_codec_context, _audio_codec, 0) < 0) {
 188                 throw DecodeError ("could not open audio decoder");
 189         }
 190
 191         /* This is a hack; sometimes it seems that _audio_codec_context->channel_layout isn't set up,
 192            so bodge it here.  No idea why we should have to do this.
 193         */
 194
 195         if (_audio_codec_context->channel_layout == 0) {
 196                 _audio_codec_context->channel_layout = av_get_default_channel_layout (audio_channels ());
 197         }
 198 }
 199
 200 void
 201 FFmpegDecoder::setup_subtitle ()
 202 {
 203         if (_subtitle_stream < 0) {
 204                 return;
 205         }
 206
 207         _subtitle_codec_context = _format_context->streams[_subtitle_stream]->codec;
 208         _subtitle_codec = avcodec_find_decoder (_subtitle_codec_context->codec_id);
 209
 210         if (_subtitle_codec == 0) {
 211                 throw DecodeError ("could not find subtitle decoder");
 212         }
 213
 214         if (avcodec_open2 (_subtitle_codec_context, _subtitle_codec, 0) < 0) {
 215                 throw DecodeError ("could not open subtitle decoder");
 216         }
 217 }
 218
 219
 220 bool
 221 FFmpegDecoder::do_pass ()
 222 {
 223         int r = av_read_frame (_format_context, &_packet);
 224
 225         if (r < 0) {
 226                 if (r != AVERROR_EOF) {
 227                         throw DecodeError ("error on av_read_frame");
 228                 }
 229
 230                 /* Get any remaining frames */
 231
 232                 _packet.data = 0;
 233                 _packet.size = 0;
 234
 235                 /* XXX: should we reset _packet.data and size after each *_decode_* call? */
 236
 237                 int frame_finished;
 238
 239                 while (avcodec_decode_video2 (_video_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
 240                         process_video (_frame);
 241                 }
 242
 243                 if (_audio_stream >= 0 && _opt->decode_audio) {
 244                         while (avcodec_decode_audio4 (_audio_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
 245                                 int const data_size = av_samples_get_buffer_size (
 246                                         0, _audio_codec_context->channels, _frame->nb_samples, audio_sample_format (), 1
 247                                         );
 248
 249                                 assert (_audio_codec_context->channels == _film->audio_channels());
 250                                 process_audio (_frame->data[0], data_size);
 251                         }
 252                 }
 253
 254                 return true;
 255         }
 256
 257         double const pts_seconds = av_q2d (_format_context->streams[_packet.stream_index]->time_base) * _packet.pts;
 258
 259         avcodec_get_frame_defaults (_frame);
 260
 261         if (_packet.stream_index == _video_stream) {
 262
 263                 if (!_first_video) {
 264                         _first_video = pts_seconds;
 265                 }
 266
 267                 int frame_finished;
 268                 if (avcodec_decode_video2 (_video_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
 269
 270                         /* Where we are in the output, in seconds */
 271                         double const out_pts_seconds = video_frame_index() / frames_per_second();
 272
 273                         /* Difference between where we are and where we should be */
 274                         double const delta = pts_seconds - out_pts_seconds;
 275                         double const one_frame = 1 / frames_per_second();
 276
 277                         /* Insert frames if required to get out_pts_seconds up to pts_seconds */
 278                         if (delta > one_frame) {
 279                                 int const extra = rint (delta / one_frame);
 280                                 for (int i = 0; i < extra; ++i) {
 281                                         repeat_last_video ();
 282                                         _film->log()->log (
 283                                                 String::compose (
 284                                                         "Extra frame inserted at %1s; DCP frame %2, packet PTS %3",
 285                                                         out_pts_seconds, video_frame_index(), pts_seconds
 286                                                         )
 287                                                 );
 288                                 }
 289                         }
 290
 291                         if (delta > -one_frame) {
 292                                 /* Process this frame */
 293                                 process_video (_frame);
 294                         } else {
 295                                 /* Otherwise we are omitting a frame to keep things right */
 296                                 _film->log()->log (String::compose ("Frame removed at %1s", out_pts_seconds));
 297                         }
 298                 }
 299
 300         } else if (_audio_stream >= 0 && _packet.stream_index == _audio_stream && _opt->decode_audio && _first_video && _first_video.get() <= pts_seconds) {
 301
 302                 /* Note: We only decode audio if we've had our first video packet through, and if it
 303                    was before this packet.  Until then audio is thrown away.
 304                 */
 305
 306                 if (!_first_audio) {
 307                         _first_audio = pts_seconds;
 308
 309                         /* This is our first audio packet, and if we've arrived here we must have had our
 310                            first video packet.  Push some silence to make up any gap between our first
 311                            video packet and our first audio.
 312                         */
 313
 314                         /* frames of silence that we must push */
 315                         int const s = rint ((_first_audio.get() - _first_video.get()) * audio_sample_rate ());
 316
 317                         _film->log()->log (
 318                                 String::compose (
 319                                         "First video at %1, first audio at %2, pushing %3 frames of silence for %4 channels (%5 bytes per sample)",
 320                                         _first_video.get(), _first_audio.get(), s, audio_channels(), bytes_per_audio_sample()
 321                                         )
 322                                 );
 323
 324                         if (s) {
 325                                 /* hence bytes */
 326                                 int const b = s * audio_channels() * bytes_per_audio_sample();
 327
 328                                 /* XXX: this assumes that it won't be too much, and there are shaky assumptions
 329                                    that all sound representations are silent with memset()ed zero data.
 330                                 */
 331                                 uint8_t silence[b];
 332                                 memset (silence, 0, b);
 333                                 process_audio (silence, b);
 334                         }
 335                 }
 336
 337                 int frame_finished;
 338                 if (avcodec_decode_audio4 (_audio_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
 339                         int const data_size = av_samples_get_buffer_size (
 340                                 0, _audio_codec_context->channels, _frame->nb_samples, audio_sample_format (), 1
 341                                 );
 342
 343                         assert (_audio_codec_context->channels == _film->audio_channels());
 344                         process_audio (_frame->data[0], data_size);
 345                 }
 346
 347         } else if (_subtitle_stream >= 0 && _packet.stream_index == _subtitle_stream && _opt->decode_subtitles && _first_video) {
 348
 349                 int got_subtitle;
 350                 AVSubtitle sub;
 351                 if (avcodec_decode_subtitle2 (_subtitle_codec_context, &sub, &got_subtitle, &_packet) && got_subtitle) {
 352                         /* Sometimes we get an empty AVSubtitle, which is used by some codecs to
 353                            indicate that the previous subtitle should stop.
 354                         */
 355                         if (sub.num_rects > 0) {
 356                                 process_subtitle (shared_ptr<TimedSubtitle> (new TimedSubtitle (sub, _first_video.get())));
 357                         } else {
 358                                 process_subtitle (shared_ptr<TimedSubtitle> ());
 359                         }
 360                         avsubtitle_free (&sub);
 361                 }
 362         }
 363
 364         av_free_packet (&_packet);
 365         return false;
 366 }
 367
 368 float
 369 FFmpegDecoder::frames_per_second () const
 370 {
 371         AVStream* s = _format_context->streams[_video_stream];
 372
 373         if (s->avg_frame_rate.num && s->avg_frame_rate.den) {
 374                 return av_q2d (s->avg_frame_rate);
 375         }
 376
 377         return av_q2d (s->r_frame_rate);
 378 }
 379
 380 int
 381 FFmpegDecoder::audio_channels () const
 382 {
 383         if (_audio_codec_context == 0) {
 384                 return 0;
 385         }
 386
 387         return _audio_codec_context->channels;
 388 }
 389
 390 int
 391 FFmpegDecoder::audio_sample_rate () const
 392 {
 393         if (_audio_codec_context == 0) {
 394                 return 0;
 395         }
 396
 397         return _audio_codec_context->sample_rate;
 398 }
 399
 400 AVSampleFormat
 401 FFmpegDecoder::audio_sample_format () const
 402 {
 403         if (_audio_codec_context == 0) {
 404                 return (AVSampleFormat) 0;
 405         }
 406
 407         return _audio_codec_context->sample_fmt;
 408 }
 409
 410 int64_t
 411 FFmpegDecoder::audio_channel_layout () const
 412 {
 413         if (_audio_codec_context == 0) {
 414                 return 0;
 415         }
 416
 417         return _audio_codec_context->channel_layout;
 418 }
 419
 420 Size
 421 FFmpegDecoder::native_size () const
 422 {
 423         return Size (_video_codec_context->width, _video_codec_context->height);
 424 }
 425
 426 PixelFormat
 427 FFmpegDecoder::pixel_format () const
 428 {
 429         return _video_codec_context->pix_fmt;
 430 }
 431
 432 int
 433 FFmpegDecoder::time_base_numerator () const
 434 {
 435         return _video_codec_context->time_base.num;
 436 }
 437
 438 int
 439 FFmpegDecoder::time_base_denominator () const
 440 {
 441         return _video_codec_context->time_base.den;
 442 }
 443
 444 int
 445 FFmpegDecoder::sample_aspect_ratio_numerator () const
 446 {
 447         return _video_codec_context->sample_aspect_ratio.num;
 448 }
 449
 450 int
 451 FFmpegDecoder::sample_aspect_ratio_denominator () const
 452 {
 453         return _video_codec_context->sample_aspect_ratio.den;
 454 }
 455
 456 bool
 457 FFmpegDecoder::has_subtitles () const
 458 {
 459         return (_subtitle_stream != -1);
 460 }
 461
 462 vector<AudioStream>
 463 FFmpegDecoder::audio_streams () const
 464 {
 465         return _audio_streams;
 466 }
 467
 468 vector<SubtitleStream>
 469 FFmpegDecoder::subtitle_streams () const
 470 {
 471         return _subtitle_streams;
 472 }
 473
 474 string
 475 FFmpegDecoder::stream_name (AVStream* s) const
 476 {
 477         stringstream n;
 478
 479         AVDictionaryEntry const * lang = av_dict_get (s->metadata, "language", 0, 0);
 480         if (lang) {
 481                 n << lang->value;
 482         }
 483
 484         AVDictionaryEntry const * title = av_dict_get (s->metadata, "title", 0, 0);
 485         if (title) {
 486                 if (!n.str().empty()) {
 487                         n << " ";
 488                 }
 489                 n << title->value;
 490         }
 491
 492         if (n.str().empty()) {
 493                 n << "unknown";
 494         }
 495
 496         return n.str ();
 497 }
 498