src/lib/ffmpeg_decoder.cc

   1 /*
   2     Copyright (C) 2012 Carl Hetherington <cth@carlh.net>
   3
   4     This program is free software; you can redistribute it and/or modify
   5     it under the terms of the GNU General Public License as published by
   6     the Free Software Foundation; either version 2 of the License, or
   7     (at your option) any later version.
   8
   9     This program is distributed in the hope that it will be useful,
  10     but WITHOUT ANY WARRANTY; without even the implied warranty of
  11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12     GNU General Public License for more details.
  13
  14     You should have received a copy of the GNU General Public License
  15     along with this program; if not, write to the Free Software
  16     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  17
  18 */
  19
  20 /** @file  src/ffmpeg_decoder.cc
  21  *  @brief A decoder using FFmpeg to decode content.
  22  */
  23
  24 #include <stdexcept>
  25 #include <vector>
  26 #include <sstream>
  27 #include <iomanip>
  28 #include <iostream>
  29 #include <stdint.h>
  30 #include <boost/lexical_cast.hpp>
  31 extern "C" {
  32 #include <tiffio.h>
  33 #include <libavcodec/avcodec.h>
  34 #include <libavformat/avformat.h>
  35 #include <libswscale/swscale.h>
  36 #include <libpostproc/postprocess.h>
  37 }
  38 #include <sndfile.h>
  39 #include "film.h"
  40 #include "format.h"
  41 #include "transcoder.h"
  42 #include "job.h"
  43 #include "filter.h"
  44 #include "options.h"
  45 #include "exceptions.h"
  46 #include "image.h"
  47 #include "util.h"
  48 #include "log.h"
  49 #include "ffmpeg_decoder.h"
  50 #include "subtitle.h"
  51
  52 using std::cout;
  53 using std::string;
  54 using std::vector;
  55 using std::stringstream;
  56 using boost::shared_ptr;
  57
  58 FFmpegDecoder::FFmpegDecoder (shared_ptr<Film> f, shared_ptr<const Options> o, Job* j, bool minimal, bool ignore_length)
  59         : Decoder (f, o, j, minimal, ignore_length)
  60         , _format_context (0)
  61         , _video_stream (-1)
  62         , _audio_stream (-1)
  63         , _subtitle_stream (-1)
  64         , _frame (0)
  65         , _video_codec_context (0)
  66         , _video_codec (0)
  67         , _audio_codec_context (0)
  68         , _audio_codec (0)
  69         , _subtitle_codec_context (0)
  70         , _subtitle_codec (0)
  71 {
  72         setup_general ();
  73         setup_video ();
  74         setup_audio ();
  75         setup_subtitle ();
  76 }
  77
  78 FFmpegDecoder::~FFmpegDecoder ()
  79 {
  80         if (_audio_codec_context) {
  81                 avcodec_close (_audio_codec_context);
  82         }
  83
  84         if (_video_codec_context) {
  85                 avcodec_close (_video_codec_context);
  86         }
  87
  88         if (_subtitle_codec_context) {
  89                 avcodec_close (_subtitle_codec_context);
  90         }
  91
  92         av_free (_frame);
  93         avformat_close_input (&_format_context);
  94 }
  95
  96 void
  97 FFmpegDecoder::setup_general ()
  98 {
  99         int r;
 100
 101         av_register_all ();
 102
 103         if ((r = avformat_open_input (&_format_context, _film->content_path().c_str(), 0, 0)) != 0) {
 104                 throw OpenFileError (_film->content_path ());
 105         }
 106
 107         if (avformat_find_stream_info (_format_context, 0) < 0) {
 108                 throw DecodeError ("could not find stream information");
 109         }
 110
 111         /* Find video, audio and subtitle streams and choose the first of each */
 112
 113         for (uint32_t i = 0; i < _format_context->nb_streams; ++i) {
 114                 AVStream* s = _format_context->streams[i];
 115                 if (s->codec->codec_type == AVMEDIA_TYPE_VIDEO) {
 116                         _video_stream = i;
 117                 } else if (s->codec->codec_type == AVMEDIA_TYPE_AUDIO) {
 118                         if (_audio_stream == -1) {
 119                                 _audio_stream = i;
 120                         }
 121                         _audio_streams.push_back (AudioStream (stream_name (s), i, s->codec->channels));
 122                 } else if (s->codec->codec_type == AVMEDIA_TYPE_SUBTITLE) {
 123                         if (_subtitle_stream == -1) {
 124                                 _subtitle_stream = i;
 125                         }
 126                         _subtitle_streams.push_back (SubtitleStream (stream_name (s), i));
 127                 }
 128         }
 129
 130         /* Now override audio and subtitle streams with those from the Film, if it has any */
 131
 132         if (_film->audio_stream_index() != -1) {
 133                 _audio_stream = _film->audio_stream().id();
 134         }
 135
 136         if (_film->subtitle_stream_index() != -1) {
 137                 _subtitle_stream = _film->subtitle_stream().id ();
 138         }
 139
 140         if (_video_stream < 0) {
 141                 throw DecodeError ("could not find video stream");
 142         }
 143
 144         _frame = avcodec_alloc_frame ();
 145         if (_frame == 0) {
 146                 throw DecodeError ("could not allocate frame");
 147         }
 148 }
 149
 150 void
 151 FFmpegDecoder::setup_video ()
 152 {
 153         _video_codec_context = _format_context->streams[_video_stream]->codec;
 154         _video_codec = avcodec_find_decoder (_video_codec_context->codec_id);
 155
 156         if (_video_codec == 0) {
 157                 throw DecodeError ("could not find video decoder");
 158         }
 159
 160         /* I think this prevents problems with green hash on decodes and
 161            "changing frame properties on the fly is not supported by all filters"
 162            messages with some content.  Although I'm not sure; needs checking.
 163         */
 164         AVDictionary* opts = 0;
 165         av_dict_set (&opts, "threads", "1", 0);
 166
 167         if (avcodec_open2 (_video_codec_context, _video_codec, &opts) < 0) {
 168                 throw DecodeError ("could not open video decoder");
 169         }
 170 }
 171
 172 void
 173 FFmpegDecoder::setup_audio ()
 174 {
 175         if (_audio_stream < 0) {
 176                 return;
 177         }
 178
 179         _audio_codec_context = _format_context->streams[_audio_stream]->codec;
 180         _audio_codec = avcodec_find_decoder (_audio_codec_context->codec_id);
 181
 182         if (_audio_codec == 0) {
 183                 throw DecodeError ("could not find audio decoder");
 184         }
 185
 186         if (avcodec_open2 (_audio_codec_context, _audio_codec, 0) < 0) {
 187                 throw DecodeError ("could not open audio decoder");
 188         }
 189
 190         /* This is a hack; sometimes it seems that _audio_codec_context->channel_layout isn't set up,
 191            so bodge it here.  No idea why we should have to do this.
 192         */
 193
 194         if (_audio_codec_context->channel_layout == 0) {
 195                 _audio_codec_context->channel_layout = av_get_default_channel_layout (audio_channels ());
 196         }
 197 }
 198
 199 void
 200 FFmpegDecoder::setup_subtitle ()
 201 {
 202         if (_subtitle_stream < 0) {
 203                 return;
 204         }
 205
 206         _subtitle_codec_context = _format_context->streams[_subtitle_stream]->codec;
 207         _subtitle_codec = avcodec_find_decoder (_subtitle_codec_context->codec_id);
 208
 209         if (_subtitle_codec == 0) {
 210                 throw DecodeError ("could not find subtitle decoder");
 211         }
 212
 213         if (avcodec_open2 (_subtitle_codec_context, _subtitle_codec, 0) < 0) {
 214                 throw DecodeError ("could not open subtitle decoder");
 215         }
 216 }
 217
 218
 219 bool
 220 FFmpegDecoder::do_pass ()
 221 {
 222         int r = av_read_frame (_format_context, &_packet);
 223
 224         if (r < 0) {
 225                 if (r != AVERROR_EOF) {
 226                         throw DecodeError ("error on av_read_frame");
 227                 }
 228
 229                 /* Get any remaining frames */
 230
 231                 _packet.data = 0;
 232                 _packet.size = 0;
 233
 234                 /* XXX: should we reset _packet.data and size after each *_decode_* call? */
 235
 236                 int frame_finished;
 237
 238                 while (avcodec_decode_video2 (_video_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
 239                         process_video (_frame);
 240                 }
 241
 242                 if (_audio_stream >= 0 && _opt->decode_audio) {
 243                         while (avcodec_decode_audio4 (_audio_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
 244                                 int const data_size = av_samples_get_buffer_size (
 245                                         0, _audio_codec_context->channels, _frame->nb_samples, audio_sample_format (), 1
 246                                         );
 247
 248                                 assert (_audio_codec_context->channels == _film->audio_channels());
 249                                 process_audio (_frame->data[0], data_size);
 250                         }
 251                 }
 252
 253                 return true;
 254         }
 255
 256         double const pts_seconds = av_q2d (_format_context->streams[_packet.stream_index]->time_base) * _packet.pts;
 257
 258         avcodec_get_frame_defaults (_frame);
 259
 260         if (_packet.stream_index == _video_stream) {
 261
 262                 if (!_first_video) {
 263                         _first_video = pts_seconds;
 264                 }
 265
 266                 int frame_finished;
 267                 if (avcodec_decode_video2 (_video_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
 268
 269                         /* Where we are in the output, in seconds */
 270                         double const out_pts_seconds = last_video_frame() / frames_per_second();
 271
 272                         /* Difference between where we are and where we should be */
 273                         double const delta = pts_seconds - out_pts_seconds;
 274                         double const one_frame = 1 / frames_per_second();
 275
 276                         /* Insert frames if required to get out_pts_seconds up to pts_seconds */
 277                         if (delta > one_frame) {
 278                                 int const extra = rint (delta / one_frame);
 279                                 for (int i = 0; i < extra; ++i) {
 280                                         _film->log()->log (String::compose ("Extra frame inserted at %1s", out_pts_seconds));
 281                                         process_video (_frame);
 282                                 }
 283                         }
 284
 285                         if (delta > -one_frame) {
 286                                 /* Process this frame */
 287                                 process_video (_frame);
 288                         } else {
 289                                 /* Otherwise we are omitting a frame to keep things right */
 290                                 _film->log()->log (String::compose ("Frame removed at %1s", out_pts_seconds));
 291                         }
 292                 }
 293
 294         } else if (_audio_stream >= 0 && _packet.stream_index == _audio_stream && _opt->decode_audio && _first_video && _first_video.get() <= pts_seconds) {
 295
 296                 /* Note: We only decode audio if we've had our first video packet through, and if it
 297                    was before this packet.  Until then audio is thrown away.
 298                 */
 299
 300                 if (!_first_audio) {
 301                         _first_audio = pts_seconds;
 302
 303                         /* This is our first audio packet, and if we've arrived here we must have had our
 304                            first video packet.  Push some silence to make up the gap between our first
 305                            video packet and our first audio.
 306                         */
 307
 308                         /* frames of silence that we must push */
 309                         int const s = rint ((_first_audio.get() - _first_video.get()) * audio_sample_rate ());
 310
 311                         _film->log()->log (
 312                                 String::compose (
 313                                         "First video at %1, first audio at %2, pushing %3 frames of silence for %4 channels (%5 bytes per sample)",
 314                                         _first_video.get(), _first_audio.get(), s, audio_channels(), bytes_per_audio_sample()
 315                                         )
 316                                 );
 317
 318                         /* hence bytes */
 319                         int const b = s * audio_channels() * bytes_per_audio_sample();
 320
 321                         /* XXX: this assumes that it won't be too much, and there are shaky assumptions
 322                            that all sound representations are silent with memset()ed zero data.
 323                         */
 324                         uint8_t silence[b];
 325                         memset (silence, 0, b);
 326                         process_audio (silence, b);
 327                 }
 328
 329                 int frame_finished;
 330                 if (avcodec_decode_audio4 (_audio_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
 331                         int const data_size = av_samples_get_buffer_size (
 332                                 0, _audio_codec_context->channels, _frame->nb_samples, audio_sample_format (), 1
 333                                 );
 334
 335                         assert (_audio_codec_context->channels == _film->audio_channels());
 336                         process_audio (_frame->data[0], data_size);
 337                 }
 338
 339         } else if (_subtitle_stream >= 0 && _packet.stream_index == _subtitle_stream && _opt->decode_subtitles && _first_video) {
 340
 341                 int got_subtitle;
 342                 AVSubtitle sub;
 343                 if (avcodec_decode_subtitle2 (_subtitle_codec_context, &sub, &got_subtitle, &_packet) && got_subtitle) {
 344                         /* Sometimes we get an empty AVSubtitle, which is used by some codecs to
 345                            indicate that the previous subtitle should stop.
 346                         */
 347                         if (sub.num_rects > 0) {
 348                                 process_subtitle (shared_ptr<TimedSubtitle> (new TimedSubtitle (sub, _first_video.get())));
 349                         } else {
 350                                 process_subtitle (shared_ptr<TimedSubtitle> ());
 351                         }
 352                         avsubtitle_free (&sub);
 353                 }
 354         }
 355
 356         av_free_packet (&_packet);
 357         return false;
 358 }
 359
 360 float
 361 FFmpegDecoder::frames_per_second () const
 362 {
 363         AVStream* s = _format_context->streams[_video_stream];
 364
 365         if (s->avg_frame_rate.num && s->avg_frame_rate.den) {
 366                 return av_q2d (s->avg_frame_rate);
 367         }
 368
 369         return av_q2d (s->r_frame_rate);
 370 }
 371
 372 int
 373 FFmpegDecoder::audio_channels () const
 374 {
 375         if (_audio_codec_context == 0) {
 376                 return 0;
 377         }
 378
 379         return _audio_codec_context->channels;
 380 }
 381
 382 int
 383 FFmpegDecoder::audio_sample_rate () const
 384 {
 385         if (_audio_codec_context == 0) {
 386                 return 0;
 387         }
 388
 389         return _audio_codec_context->sample_rate;
 390 }
 391
 392 AVSampleFormat
 393 FFmpegDecoder::audio_sample_format () const
 394 {
 395         if (_audio_codec_context == 0) {
 396                 return (AVSampleFormat) 0;
 397         }
 398
 399         return _audio_codec_context->sample_fmt;
 400 }
 401
 402 int64_t
 403 FFmpegDecoder::audio_channel_layout () const
 404 {
 405         if (_audio_codec_context == 0) {
 406                 return 0;
 407         }
 408
 409         return _audio_codec_context->channel_layout;
 410 }
 411
 412 Size
 413 FFmpegDecoder::native_size () const
 414 {
 415         return Size (_video_codec_context->width, _video_codec_context->height);
 416 }
 417
 418 PixelFormat
 419 FFmpegDecoder::pixel_format () const
 420 {
 421         return _video_codec_context->pix_fmt;
 422 }
 423
 424 int
 425 FFmpegDecoder::time_base_numerator () const
 426 {
 427         return _video_codec_context->time_base.num;
 428 }
 429
 430 int
 431 FFmpegDecoder::time_base_denominator () const
 432 {
 433         return _video_codec_context->time_base.den;
 434 }
 435
 436 int
 437 FFmpegDecoder::sample_aspect_ratio_numerator () const
 438 {
 439         return _video_codec_context->sample_aspect_ratio.num;
 440 }
 441
 442 int
 443 FFmpegDecoder::sample_aspect_ratio_denominator () const
 444 {
 445         return _video_codec_context->sample_aspect_ratio.den;
 446 }
 447
 448 bool
 449 FFmpegDecoder::has_subtitles () const
 450 {
 451         return (_subtitle_stream != -1);
 452 }
 453
 454 vector<AudioStream>
 455 FFmpegDecoder::audio_streams () const
 456 {
 457         return _audio_streams;
 458 }
 459
 460 vector<SubtitleStream>
 461 FFmpegDecoder::subtitle_streams () const
 462 {
 463         return _subtitle_streams;
 464 }
 465
 466 string
 467 FFmpegDecoder::stream_name (AVStream* s) const
 468 {
 469         stringstream n;
 470
 471         AVDictionaryEntry const * lang = av_dict_get (s->metadata, "language", 0, 0);
 472         if (lang) {
 473                 n << lang->value;
 474         }
 475
 476         AVDictionaryEntry const * title = av_dict_get (s->metadata, "title", 0, 0);
 477         if (title) {
 478                 if (!n.str().empty()) {
 479                         n << " ";
 480                 }
 481                 n << title->value;
 482         }
 483
 484         if (n.str().empty()) {
 485                 n << "unknown";
 486         }
 487
 488         return n.str ();
 489 }
 490