src/lib/ffmpeg_decoder.cc

   1 /*
   2     Copyright (C) 2012 Carl Hetherington <cth@carlh.net>
   3
   4     This program is free software; you can redistribute it and/or modify
   5     it under the terms of the GNU General Public License as published by
   6     the Free Software Foundation; either version 2 of the License, or
   7     (at your option) any later version.
   8
   9     This program is distributed in the hope that it will be useful,
  10     but WITHOUT ANY WARRANTY; without even the implied warranty of
  11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12     GNU General Public License for more details.
  13
  14     You should have received a copy of the GNU General Public License
  15     along with this program; if not, write to the Free Software
  16     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  17
  18 */
  19
  20 /** @file  src/ffmpeg_decoder.cc
  21  *  @brief A decoder using FFmpeg to decode content.
  22  */
  23
  24 #include <stdexcept>
  25 #include <vector>
  26 #include <sstream>
  27 #include <iomanip>
  28 #include <iostream>
  29 #include <stdint.h>
  30 #include <boost/lexical_cast.hpp>
  31 extern "C" {
  32 #include <tiffio.h>
  33 #include <libavcodec/avcodec.h>
  34 #include <libavformat/avformat.h>
  35 #include <libswscale/swscale.h>
  36 #include <libpostproc/postprocess.h>
  37 }
  38 #include <sndfile.h>
  39 #include "film.h"
  40 #include "format.h"
  41 #include "transcoder.h"
  42 #include "job.h"
  43 #include "filter.h"
  44 #include "film_state.h"
  45 #include "options.h"
  46 #include "exceptions.h"
  47 #include "image.h"
  48 #include "util.h"
  49 #include "log.h"
  50 #include "ffmpeg_decoder.h"
  51 #include "subtitle.h"
  52
  53 using namespace std;
  54 using namespace boost;
  55
  56 FFmpegDecoder::FFmpegDecoder (boost::shared_ptr<const FilmState> s, boost::shared_ptr<const Options> o, Job* j, Log* l, bool minimal, bool ignore_length)
  57         : Decoder (s, o, j, l, minimal, ignore_length)
  58         , _format_context (0)
  59         , _video_stream (-1)
  60         , _audio_stream (-1)
  61         , _subtitle_stream (-1)
  62         , _frame (0)
  63         , _video_codec_context (0)
  64         , _video_codec (0)
  65         , _audio_codec_context (0)
  66         , _audio_codec (0)
  67         , _subtitle_codec_context (0)
  68         , _subtitle_codec (0)
  69         , _first_video_pts (-1)
  70         , _first_audio_pts (-1)
  71 {
  72         setup_general ();
  73         setup_video ();
  74         setup_audio ();
  75         setup_subtitle ();
  76 }
  77
  78 FFmpegDecoder::~FFmpegDecoder ()
  79 {
  80         if (_audio_codec_context) {
  81                 avcodec_close (_audio_codec_context);
  82         }
  83
  84         if (_video_codec_context) {
  85                 avcodec_close (_video_codec_context);
  86         }
  87
  88         if (_subtitle_codec_context) {
  89                 avcodec_close (_subtitle_codec_context);
  90         }
  91
  92         av_free (_frame);
  93         avformat_close_input (&_format_context);
  94 }
  95
  96 void
  97 FFmpegDecoder::setup_general ()
  98 {
  99         int r;
 100
 101         av_register_all ();
 102
 103         if ((r = avformat_open_input (&_format_context, _fs->content_path().c_str(), 0, 0)) != 0) {
 104                 throw OpenFileError (_fs->content_path ());
 105         }
 106
 107         if (avformat_find_stream_info (_format_context, 0) < 0) {
 108                 throw DecodeError ("could not find stream information");
 109         }
 110
 111         /* Find video, audio and subtitle streams and choose the first of each */
 112
 113         for (uint32_t i = 0; i < _format_context->nb_streams; ++i) {
 114                 AVStream* s = _format_context->streams[i];
 115                 if (s->codec->codec_type == AVMEDIA_TYPE_VIDEO) {
 116                         _video_stream = i;
 117                 } else if (s->codec->codec_type == AVMEDIA_TYPE_AUDIO) {
 118                         if (_audio_stream == -1) {
 119                                 _audio_stream = i;
 120                         }
 121                         _audio_streams.push_back (AudioStream (stream_name (s), i, s->codec->channels));
 122                 } else if (s->codec->codec_type == AVMEDIA_TYPE_SUBTITLE) {
 123                         if (_subtitle_stream == -1) {
 124                                 _subtitle_stream = i;
 125                         }
 126                         _subtitle_streams.push_back (SubtitleStream (stream_name (s), i));
 127                 }
 128         }
 129
 130         /* Now override audio and subtitle streams with those from the Film, if it has any */
 131
 132         if (_fs->audio_stream_index() != -1) {
 133                 _audio_stream = _fs->audio_stream().id();
 134         }
 135
 136         if (_fs->subtitle_stream_index() != -1) {
 137                 _subtitle_stream = _fs->subtitle_stream().id ();
 138         }
 139
 140         if (_video_stream < 0) {
 141                 throw DecodeError ("could not find video stream");
 142         }
 143
 144         _frame = avcodec_alloc_frame ();
 145         if (_frame == 0) {
 146                 throw DecodeError ("could not allocate frame");
 147         }
 148 }
 149
 150 void
 151 FFmpegDecoder::setup_video ()
 152 {
 153         _video_codec_context = _format_context->streams[_video_stream]->codec;
 154         _video_codec = avcodec_find_decoder (_video_codec_context->codec_id);
 155
 156         if (_video_codec == 0) {
 157                 throw DecodeError ("could not find video decoder");
 158         }
 159
 160         if (avcodec_open2 (_video_codec_context, _video_codec, 0) < 0) {
 161                 throw DecodeError ("could not open video decoder");
 162         }
 163 }
 164
 165 void
 166 FFmpegDecoder::setup_audio ()
 167 {
 168         if (_audio_stream < 0) {
 169                 return;
 170         }
 171
 172         _audio_codec_context = _format_context->streams[_audio_stream]->codec;
 173         _audio_codec = avcodec_find_decoder (_audio_codec_context->codec_id);
 174
 175         if (_audio_codec == 0) {
 176                 throw DecodeError ("could not find audio decoder");
 177         }
 178
 179         if (avcodec_open2 (_audio_codec_context, _audio_codec, 0) < 0) {
 180                 throw DecodeError ("could not open audio decoder");
 181         }
 182
 183         /* This is a hack; sometimes it seems that _audio_codec_context->channel_layout isn't set up,
 184            so bodge it here.  No idea why we should have to do this.
 185         */
 186
 187         if (_audio_codec_context->channel_layout == 0) {
 188                 _audio_codec_context->channel_layout = av_get_default_channel_layout (audio_channels ());
 189         }
 190 }
 191
 192 void
 193 FFmpegDecoder::setup_subtitle ()
 194 {
 195         if (_subtitle_stream < 0) {
 196                 return;
 197         }
 198
 199         _subtitle_codec_context = _format_context->streams[_subtitle_stream]->codec;
 200         _subtitle_codec = avcodec_find_decoder (_subtitle_codec_context->codec_id);
 201
 202         if (_subtitle_codec == 0) {
 203                 throw DecodeError ("could not find subtitle decoder");
 204         }
 205
 206         if (avcodec_open2 (_subtitle_codec_context, _subtitle_codec, 0) < 0) {
 207                 throw DecodeError ("could not open subtitle decoder");
 208         }
 209 }
 210
 211
 212 bool
 213 FFmpegDecoder::do_pass ()
 214 {
 215         int r = av_read_frame (_format_context, &_packet);
 216         if (r < 0) {
 217                 if (r != AVERROR_EOF) {
 218                         throw DecodeError ("error on av_read_frame");
 219                 }
 220
 221                 /* Get any remaining frames */
 222
 223                 _packet.data = 0;
 224                 _packet.size = 0;
 225
 226                 /* XXX: should we reset _packet.data and size after each *_decode_* call? */
 227
 228                 int frame_finished;
 229
 230                 while (avcodec_decode_video2 (_video_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
 231                         process_video (_frame);
 232                 }
 233
 234                 if (_audio_stream >= 0 && _opt->decode_audio) {
 235                         while (avcodec_decode_audio4 (_audio_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
 236                                 int const data_size = av_samples_get_buffer_size (
 237                                         0, _audio_codec_context->channels, _frame->nb_samples, audio_sample_format (), 1
 238                                         );
 239
 240                                 assert (_audio_codec_context->channels == _fs->audio_channels());
 241                                 process_audio (_frame->data[0], data_size);
 242                         }
 243                 }
 244
 245                 return true;
 246         }
 247
 248         if (_packet.stream_index == _video_stream) {
 249
 250                 if (_first_video_pts == -1) {
 251                         _first_video_pts = _packet.pts;
 252                 }
 253
 254                 int frame_finished;
 255                 if (avcodec_decode_video2 (_video_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
 256                         process_video (_frame);
 257                 }
 258
 259         } else if (_audio_stream >= 0 && _packet.stream_index == _audio_stream && _opt->decode_audio && (_first_video_pts != -1 && _packet.pts > _first_video_pts)) {
 260
 261                 /* Note: We only decode audio if we've had our first video packet through, and if this
 262                    packet comes after it.  Until then it is thrown away.
 263                 */
 264
 265                 if (_first_audio_pts == -1) {
 266                         _first_audio_pts = _packet.pts;
 267
 268                         /* This is our first audio packet, and if we've arrived here we must have had our
 269                            first video packet.  Push some silence to make up the gap between our first
 270                            video packet and our first audio.
 271                         */
 272
 273                         AVStream* v = _format_context->streams[_video_stream];
 274                         AVStream* a = _format_context->streams[_audio_stream];
 275
 276                         assert (v->time_base.num == a->time_base.num);
 277                         assert (v->time_base.den == a->time_base.den);
 278
 279                         /* samples of silence that we must push */
 280                         int const s = rint (av_q2d (v->time_base) * (_first_audio_pts - _first_video_pts) * audio_sample_rate ());
 281
 282                         _log->log (
 283                                 String::compose (
 284                                         "First video at %1, first audio at %2, pushing %3 samples of silence",
 285                                         _first_video_pts, _first_audio_pts, s
 286                                         )
 287                                 );
 288
 289                         /* hence bytes */
 290                         int const b = s * audio_channels() * bytes_per_audio_sample();
 291
 292                         /* XXX: this assumes that it won't be too much, and there are shaky assumptions
 293                            that all sound representations are silent with memset()ed zero data.
 294                         */
 295                         uint8_t silence[b];
 296                         memset (silence, 0, b);
 297                         process_audio (silence, b);
 298                 }
 299
 300                 avcodec_get_frame_defaults (_frame);
 301
 302                 int frame_finished;
 303                 if (avcodec_decode_audio4 (_audio_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
 304                         int const data_size = av_samples_get_buffer_size (
 305                                 0, _audio_codec_context->channels, _frame->nb_samples, audio_sample_format (), 1
 306                                 );
 307
 308                         assert (_audio_codec_context->channels == _fs->audio_channels());
 309                         process_audio (_frame->data[0], data_size);
 310                 }
 311
 312         } else if (_subtitle_stream >= 0 && _packet.stream_index == _subtitle_stream && _opt->decode_subtitles) {
 313
 314                 int got_subtitle;
 315                 AVSubtitle sub;
 316                 if (avcodec_decode_subtitle2 (_subtitle_codec_context, &sub, &got_subtitle, &_packet) && got_subtitle) {
 317                         /* I'm not entirely sure why, but sometimes we get an AVSubtitle with
 318                            no AVSubtitleRects.
 319                         */
 320                         if (sub.num_rects > 0) {
 321                                 process_subtitle (shared_ptr<TimedSubtitle> (new TimedSubtitle (sub)));
 322                         }
 323                         avsubtitle_free (&sub);
 324                 }
 325         }
 326
 327         av_free_packet (&_packet);
 328         return false;
 329 }
 330
 331 int
 332 FFmpegDecoder::length_in_frames () const
 333 {
 334         return (_format_context->duration / AV_TIME_BASE) * frames_per_second ();
 335 }
 336
 337 float
 338 FFmpegDecoder::frames_per_second () const
 339 {
 340         AVStream* s = _format_context->streams[_video_stream];
 341
 342         if (s->avg_frame_rate.num && s->avg_frame_rate.den) {
 343                 return av_q2d (s->avg_frame_rate);
 344         }
 345
 346         return av_q2d (s->r_frame_rate);
 347 }
 348
 349 int
 350 FFmpegDecoder::audio_channels () const
 351 {
 352         if (_audio_codec_context == 0) {
 353                 return 0;
 354         }
 355
 356         return _audio_codec_context->channels;
 357 }
 358
 359 int
 360 FFmpegDecoder::audio_sample_rate () const
 361 {
 362         if (_audio_codec_context == 0) {
 363                 return 0;
 364         }
 365
 366         return _audio_codec_context->sample_rate;
 367 }
 368
 369 AVSampleFormat
 370 FFmpegDecoder::audio_sample_format () const
 371 {
 372         if (_audio_codec_context == 0) {
 373                 return (AVSampleFormat) 0;
 374         }
 375
 376         return _audio_codec_context->sample_fmt;
 377 }
 378
 379 int64_t
 380 FFmpegDecoder::audio_channel_layout () const
 381 {
 382         if (_audio_codec_context == 0) {
 383                 return 0;
 384         }
 385
 386         return _audio_codec_context->channel_layout;
 387 }
 388
 389 Size
 390 FFmpegDecoder::native_size () const
 391 {
 392         return Size (_video_codec_context->width, _video_codec_context->height);
 393 }
 394
 395 PixelFormat
 396 FFmpegDecoder::pixel_format () const
 397 {
 398         return _video_codec_context->pix_fmt;
 399 }
 400
 401 int
 402 FFmpegDecoder::time_base_numerator () const
 403 {
 404         return _video_codec_context->time_base.num;
 405 }
 406
 407 int
 408 FFmpegDecoder::time_base_denominator () const
 409 {
 410         return _video_codec_context->time_base.den;
 411 }
 412
 413 int
 414 FFmpegDecoder::sample_aspect_ratio_numerator () const
 415 {
 416         return _video_codec_context->sample_aspect_ratio.num;
 417 }
 418
 419 int
 420 FFmpegDecoder::sample_aspect_ratio_denominator () const
 421 {
 422         return _video_codec_context->sample_aspect_ratio.den;
 423 }
 424
 425 bool
 426 FFmpegDecoder::has_subtitles () const
 427 {
 428         return (_subtitle_stream != -1);
 429 }
 430
 431 vector<AudioStream>
 432 FFmpegDecoder::audio_streams () const
 433 {
 434         return _audio_streams;
 435 }
 436
 437 vector<SubtitleStream>
 438 FFmpegDecoder::subtitle_streams () const
 439 {
 440         return _subtitle_streams;
 441 }
 442
 443 string
 444 FFmpegDecoder::stream_name (AVStream* s) const
 445 {
 446         stringstream n;
 447
 448         AVDictionaryEntry const * lang = av_dict_get (s->metadata, "language", 0, 0);
 449         if (lang) {
 450                 n << lang->value;
 451         }
 452
 453         AVDictionaryEntry const * title = av_dict_get (s->metadata, "title", 0, 0);
 454         if (title) {
 455                 if (!n.str().empty()) {
 456                         n << " ";
 457                 }
 458                 n << title->value;
 459         }
 460
 461         if (n.str().empty()) {
 462                 n << "unknown";
 463         }
 464
 465         return n.str ();
 466 }
 467