src/lib/ffmpeg_decoder.cc

   1 /*
   2     Copyright (C) 2012 Carl Hetherington <cth@carlh.net>
   3
   4     This program is free software; you can redistribute it and/or modify
   5     it under the terms of the GNU General Public License as published by
   6     the Free Software Foundation; either version 2 of the License, or
   7     (at your option) any later version.
   8
   9     This program is distributed in the hope that it will be useful,
  10     but WITHOUT ANY WARRANTY; without even the implied warranty of
  11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12     GNU General Public License for more details.
  13
  14     You should have received a copy of the GNU General Public License
  15     along with this program; if not, write to the Free Software
  16     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  17
  18 */
  19
  20 /** @file  src/ffmpeg_decoder.cc
  21  *  @brief A decoder using FFmpeg to decode content.
  22  */
  23
  24 #include <stdexcept>
  25 #include <vector>
  26 #include <sstream>
  27 #include <iomanip>
  28 #include <iostream>
  29 #include <stdint.h>
  30 #include <boost/lexical_cast.hpp>
  31 extern "C" {
  32 #include <tiffio.h>
  33 #include <libavcodec/avcodec.h>
  34 #include <libavformat/avformat.h>
  35 #include <libswscale/swscale.h>
  36 #include <libpostproc/postprocess.h>
  37 }
  38 #include <sndfile.h>
  39 #include "film.h"
  40 #include "format.h"
  41 #include "transcoder.h"
  42 #include "job.h"
  43 #include "filter.h"
  44 #include "options.h"
  45 #include "exceptions.h"
  46 #include "image.h"
  47 #include "util.h"
  48 #include "log.h"
  49 #include "ffmpeg_decoder.h"
  50 #include "filter_graph.h"
  51 #include "subtitle.h"
  52
  53 using std::cout;
  54 using std::string;
  55 using std::vector;
  56 using std::stringstream;
  57 using std::list;
  58 using boost::shared_ptr;
  59 using boost::optional;
  60
  61 FFmpegDecoder::FFmpegDecoder (shared_ptr<Film> f, shared_ptr<const Options> o, Job* j)
  62         : Decoder (f, o, j)
  63         , VideoDecoder (f, o, j)
  64         , AudioDecoder (f, o, j)
  65         , _format_context (0)
  66         , _video_stream (-1)
  67         , _frame (0)
  68         , _video_codec_context (0)
  69         , _video_codec (0)
  70         , _audio_codec_context (0)
  71         , _audio_codec (0)
  72         , _subtitle_codec_context (0)
  73         , _subtitle_codec (0)
  74 {
  75         setup_general ();
  76         setup_video ();
  77         setup_audio ();
  78         setup_subtitle ();
  79 }
  80
  81 FFmpegDecoder::~FFmpegDecoder ()
  82 {
  83         if (_audio_codec_context) {
  84                 avcodec_close (_audio_codec_context);
  85         }
  86
  87         if (_video_codec_context) {
  88                 avcodec_close (_video_codec_context);
  89         }
  90
  91         if (_subtitle_codec_context) {
  92                 avcodec_close (_subtitle_codec_context);
  93         }
  94
  95         av_free (_frame);
  96
  97         avformat_close_input (&_format_context);
  98 }
  99
 100 void
 101 FFmpegDecoder::setup_general ()
 102 {
 103         int r;
 104
 105         av_register_all ();
 106
 107         if ((r = avformat_open_input (&_format_context, _film->content_path().c_str(), 0, 0)) != 0) {
 108                 throw OpenFileError (_film->content_path ());
 109         }
 110
 111         if (avformat_find_stream_info (_format_context, 0) < 0) {
 112                 throw DecodeError ("could not find stream information");
 113         }
 114
 115         /* Find video, audio and subtitle streams and choose the first of each */
 116
 117         for (uint32_t i = 0; i < _format_context->nb_streams; ++i) {
 118                 AVStream* s = _format_context->streams[i];
 119                 if (s->codec->codec_type == AVMEDIA_TYPE_VIDEO) {
 120                         _video_stream = i;
 121                 } else if (s->codec->codec_type == AVMEDIA_TYPE_AUDIO) {
 122                         _audio_streams.push_back (AudioStream (stream_name (s), i, s->codec->sample_rate, s->codec->channel_layout));
 123                 } else if (s->codec->codec_type == AVMEDIA_TYPE_SUBTITLE) {
 124                         _subtitle_streams.push_back (SubtitleStream (stream_name (s), i));
 125                 }
 126         }
 127
 128         if (_video_stream < 0) {
 129                 throw DecodeError ("could not find video stream");
 130         }
 131
 132         _frame = avcodec_alloc_frame ();
 133         if (_frame == 0) {
 134                 throw DecodeError ("could not allocate frame");
 135         }
 136 }
 137
 138 void
 139 FFmpegDecoder::setup_video ()
 140 {
 141         _video_codec_context = _format_context->streams[_video_stream]->codec;
 142         _video_codec = avcodec_find_decoder (_video_codec_context->codec_id);
 143
 144         if (_video_codec == 0) {
 145                 throw DecodeError ("could not find video decoder");
 146         }
 147
 148         /* I think this prevents problems with green hash on decodes and
 149            "changing frame properties on the fly is not supported by all filters"
 150            messages with some content.  Although I'm not sure; needs checking.
 151         */
 152         AVDictionary* opts = 0;
 153         av_dict_set (&opts, "threads", "1", 0);
 154
 155         if (avcodec_open2 (_video_codec_context, _video_codec, &opts) < 0) {
 156                 throw DecodeError ("could not open video decoder");
 157         }
 158 }
 159
 160 void
 161 FFmpegDecoder::setup_audio ()
 162 {
 163         if (!_audio_stream) {
 164                 return;
 165         }
 166
 167         _audio_codec_context = _format_context->streams[_audio_stream.get().id()]->codec;
 168         _audio_codec = avcodec_find_decoder (_audio_codec_context->codec_id);
 169
 170         if (_audio_codec == 0) {
 171                 throw DecodeError ("could not find audio decoder");
 172         }
 173
 174         if (avcodec_open2 (_audio_codec_context, _audio_codec, 0) < 0) {
 175                 throw DecodeError ("could not open audio decoder");
 176         }
 177
 178         /* This is a hack; sometimes it seems that _audio_codec_context->channel_layout isn't set up,
 179            so bodge it here.  No idea why we should have to do this.
 180         */
 181
 182         if (_audio_codec_context->channel_layout == 0) {
 183                 _audio_codec_context->channel_layout = av_get_default_channel_layout (_audio_stream.get().channels());
 184         }
 185 }
 186
 187 void
 188 FFmpegDecoder::setup_subtitle ()
 189 {
 190         if (!_subtitle_stream) {
 191                 return;
 192         }
 193
 194         _subtitle_codec_context = _format_context->streams[_subtitle_stream.get().id()]->codec;
 195         _subtitle_codec = avcodec_find_decoder (_subtitle_codec_context->codec_id);
 196
 197         if (_subtitle_codec == 0) {
 198                 throw DecodeError ("could not find subtitle decoder");
 199         }
 200
 201         if (avcodec_open2 (_subtitle_codec_context, _subtitle_codec, 0) < 0) {
 202                 throw DecodeError ("could not open subtitle decoder");
 203         }
 204 }
 205
 206
 207 bool
 208 FFmpegDecoder::pass ()
 209 {
 210         int r = av_read_frame (_format_context, &_packet);
 211
 212         if (r < 0) {
 213                 if (r != AVERROR_EOF) {
 214                         /* Maybe we should fail here, but for now we'll just finish off instead */
 215                         char buf[256];
 216                         av_strerror (r, buf, sizeof(buf));
 217                         _film->log()->log (String::compose ("error on av_read_frame (%1) (%2)", buf, r));
 218                 }
 219
 220                 /* Get any remaining frames */
 221
 222                 _packet.data = 0;
 223                 _packet.size = 0;
 224
 225                 /* XXX: should we reset _packet.data and size after each *_decode_* call? */
 226
 227                 int frame_finished;
 228
 229                 while (avcodec_decode_video2 (_video_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
 230                         filter_and_emit_video (_frame);
 231                 }
 232
 233                 if (_audio_stream && _opt->decode_audio && _film->use_content_audio()) {
 234                         while (avcodec_decode_audio4 (_audio_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
 235                                 int const data_size = av_samples_get_buffer_size (
 236                                         0, _audio_codec_context->channels, _frame->nb_samples, audio_sample_format (), 1
 237                                         );
 238
 239                                 assert (_audio_codec_context->channels == _film->audio_channels());
 240                                 Audio (deinterleave_audio (_frame->data[0], data_size));
 241                         }
 242                 }
 243
 244                 return true;
 245         }
 246
 247         avcodec_get_frame_defaults (_frame);
 248
 249         if (_packet.stream_index == _video_stream) {
 250
 251                 int frame_finished;
 252                 int const r = avcodec_decode_video2 (_video_codec_context, _frame, &frame_finished, &_packet);
 253                 if (r >= 0 && frame_finished) {
 254
 255                         if (r != _packet.size) {
 256                                 _film->log()->log (String::compose ("Used only %1 bytes of %2 in packet", r, _packet.size));
 257                         }
 258
 259                         /* Where we are in the output, in seconds */
 260                         double const out_pts_seconds = video_frame() / frames_per_second();
 261
 262                         /* Where we are in the source, in seconds */
 263                         double const source_pts_seconds = av_q2d (_format_context->streams[_packet.stream_index]->time_base)
 264                                 * av_frame_get_best_effort_timestamp(_frame);
 265
 266                         if (!_first_video) {
 267                                 _first_video = source_pts_seconds;
 268                         }
 269
 270                         /* Difference between where we are and where we should be */
 271                         double const delta = source_pts_seconds - _first_video.get() - out_pts_seconds;
 272                         double const one_frame = 1 / frames_per_second();
 273
 274                         /* Insert frames if required to get out_pts_seconds up to pts_seconds */
 275                         if (delta > one_frame) {
 276                                 int const extra = rint (delta / one_frame);
 277                                 for (int i = 0; i < extra; ++i) {
 278                                         repeat_last_video ();
 279                                         _film->log()->log (
 280                                                 String::compose (
 281                                                         "Extra frame inserted at %1s; source frame %2, source PTS %3",
 282                                                         out_pts_seconds, video_frame(), source_pts_seconds
 283                                                         )
 284                                                 );
 285                                 }
 286                         }
 287
 288                         if (delta > -one_frame) {
 289                                 /* Process this frame */
 290                                 filter_and_emit_video (_frame);
 291                         } else {
 292                                 /* Otherwise we are omitting a frame to keep things right */
 293                                 _film->log()->log (String::compose ("Frame removed at %1s", out_pts_seconds));
 294                         }
 295                 }
 296
 297         } else if (_audio_stream && _packet.stream_index == _audio_stream.get().id() && _opt->decode_audio && _film->use_content_audio()) {
 298
 299                 int frame_finished;
 300                 if (avcodec_decode_audio4 (_audio_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
 301
 302                         /* Where we are in the source, in seconds */
 303                         double const source_pts_seconds = av_q2d (_format_context->streams[_packet.stream_index]->time_base)
 304                                 * av_frame_get_best_effort_timestamp(_frame);
 305
 306                         /* We only decode audio if we've had our first video packet through, and if it
 307                            was before this packet.  Until then audio is thrown away.
 308                         */
 309
 310                         if (_first_video && _first_video.get() <= source_pts_seconds) {
 311
 312                                 if (!_first_audio) {
 313                                         _first_audio = source_pts_seconds;
 314
 315                                         /* This is our first audio frame, and if we've arrived here we must have had our
 316                                            first video frame.  Push some silence to make up any gap between our first
 317                                            video frame and our first audio.
 318                                         */
 319
 320                                         /* frames of silence that we must push */
 321                                         int const s = rint ((_first_audio.get() - _first_video.get()) * _audio_stream.get().sample_rate ());
 322
 323                                         _film->log()->log (
 324                                                 String::compose (
 325                                                         "First video at %1, first audio at %2, pushing %3 frames of silence for %4 channels (%5 bytes per sample)",
 326                                                         _first_video.get(), _first_audio.get(), s, _audio_stream.get().channels(), bytes_per_audio_sample()
 327                                                         )
 328                                                 );
 329
 330                                         if (s) {
 331                                                 shared_ptr<AudioBuffers> audio (new AudioBuffers (_audio_stream.get().channels(), s));
 332                                                 audio->make_silent ();
 333                                                 Audio (audio);
 334                                         }
 335                                 }
 336
 337                                 int const data_size = av_samples_get_buffer_size (
 338                                         0, _audio_codec_context->channels, _frame->nb_samples, audio_sample_format (), 1
 339                                         );
 340
 341                                 assert (_audio_codec_context->channels == _film->audio_channels());
 342                                 Audio (deinterleave_audio (_frame->data[0], data_size));
 343                         }
 344                 }
 345
 346         } else if (_subtitle_stream && _packet.stream_index == _subtitle_stream.get().id() && _opt->decode_subtitles && _first_video) {
 347
 348                 int got_subtitle;
 349                 AVSubtitle sub;
 350                 if (avcodec_decode_subtitle2 (_subtitle_codec_context, &sub, &got_subtitle, &_packet) && got_subtitle) {
 351                         /* Sometimes we get an empty AVSubtitle, which is used by some codecs to
 352                            indicate that the previous subtitle should stop.
 353                         */
 354                         if (sub.num_rects > 0) {
 355                                 emit_subtitle (shared_ptr<TimedSubtitle> (new TimedSubtitle (sub, _first_video.get())));
 356                         } else {
 357                                 emit_subtitle (shared_ptr<TimedSubtitle> ());
 358                         }
 359                         avsubtitle_free (&sub);
 360                 }
 361         }
 362
 363         av_free_packet (&_packet);
 364         return false;
 365 }
 366
 367 shared_ptr<AudioBuffers>
 368 FFmpegDecoder::deinterleave_audio (uint8_t* data, int size)
 369 {
 370         assert (_film->audio_channels());
 371         assert (bytes_per_audio_sample());
 372
 373         /* Deinterleave and convert to float */
 374
 375         assert ((size % (bytes_per_audio_sample() * _audio_stream.get().channels())) == 0);
 376
 377         int const total_samples = size / bytes_per_audio_sample();
 378         int const frames = total_samples / _film->audio_channels();
 379         shared_ptr<AudioBuffers> audio (new AudioBuffers (_audio_stream.get().channels(), frames));
 380
 381         switch (audio_sample_format()) {
 382         case AV_SAMPLE_FMT_S16:
 383         {
 384                 int16_t* p = (int16_t *) data;
 385                 int sample = 0;
 386                 int channel = 0;
 387                 for (int i = 0; i < total_samples; ++i) {
 388                         audio->data(channel)[sample] = float(*p++) / (1 << 15);
 389
 390                         ++channel;
 391                         if (channel == _film->audio_channels()) {
 392                                 channel = 0;
 393                                 ++sample;
 394                         }
 395                 }
 396         }
 397         break;
 398
 399         case AV_SAMPLE_FMT_S32:
 400         {
 401                 int32_t* p = (int32_t *) data;
 402                 int sample = 0;
 403                 int channel = 0;
 404                 for (int i = 0; i < total_samples; ++i) {
 405                         audio->data(channel)[sample] = float(*p++) / (1 << 31);
 406
 407                         ++channel;
 408                         if (channel == _film->audio_channels()) {
 409                                 channel = 0;
 410                                 ++sample;
 411                         }
 412                 }
 413         }
 414
 415         case AV_SAMPLE_FMT_FLTP:
 416         {
 417                 float* p = reinterpret_cast<float*> (data);
 418                 for (int i = 0; i < _film->audio_channels(); ++i) {
 419                         memcpy (audio->data(i), p, frames * sizeof(float));
 420                         p += frames;
 421                 }
 422         }
 423         break;
 424
 425         default:
 426                 assert (false);
 427         }
 428
 429         return audio;
 430 }
 431
 432 float
 433 FFmpegDecoder::frames_per_second () const
 434 {
 435         AVStream* s = _format_context->streams[_video_stream];
 436
 437         if (s->avg_frame_rate.num && s->avg_frame_rate.den) {
 438                 return av_q2d (s->avg_frame_rate);
 439         }
 440
 441         return av_q2d (s->r_frame_rate);
 442 }
 443
 444 AVSampleFormat
 445 FFmpegDecoder::audio_sample_format () const
 446 {
 447         if (_audio_codec_context == 0) {
 448                 return (AVSampleFormat) 0;
 449         }
 450
 451         return _audio_codec_context->sample_fmt;
 452 }
 453
 454 Size
 455 FFmpegDecoder::native_size () const
 456 {
 457         return Size (_video_codec_context->width, _video_codec_context->height);
 458 }
 459
 460 PixelFormat
 461 FFmpegDecoder::pixel_format () const
 462 {
 463         return _video_codec_context->pix_fmt;
 464 }
 465
 466 int
 467 FFmpegDecoder::time_base_numerator () const
 468 {
 469         return _video_codec_context->time_base.num;
 470 }
 471
 472 int
 473 FFmpegDecoder::time_base_denominator () const
 474 {
 475         return _video_codec_context->time_base.den;
 476 }
 477
 478 int
 479 FFmpegDecoder::sample_aspect_ratio_numerator () const
 480 {
 481         return _video_codec_context->sample_aspect_ratio.num;
 482 }
 483
 484 int
 485 FFmpegDecoder::sample_aspect_ratio_denominator () const
 486 {
 487         return _video_codec_context->sample_aspect_ratio.den;
 488 }
 489
 490 string
 491 FFmpegDecoder::stream_name (AVStream* s) const
 492 {
 493         stringstream n;
 494
 495         AVDictionaryEntry const * lang = av_dict_get (s->metadata, "language", 0, 0);
 496         if (lang) {
 497                 n << lang->value;
 498         }
 499
 500         AVDictionaryEntry const * title = av_dict_get (s->metadata, "title", 0, 0);
 501         if (title) {
 502                 if (!n.str().empty()) {
 503                         n << " ";
 504                 }
 505                 n << title->value;
 506         }
 507
 508         if (n.str().empty()) {
 509                 n << "unknown";
 510         }
 511
 512         return n.str ();
 513 }
 514
 515 int
 516 FFmpegDecoder::bytes_per_audio_sample () const
 517 {
 518         return av_get_bytes_per_sample (audio_sample_format ());
 519 }
 520
 521 void
 522 FFmpegDecoder::set_audio_stream (optional<AudioStream> s)
 523 {
 524         AudioDecoder::set_audio_stream (s);
 525         setup_audio ();
 526 }
 527
 528 void
 529 FFmpegDecoder::set_subtitle_stream (optional<SubtitleStream> s)
 530 {
 531         VideoDecoder::set_subtitle_stream (s);
 532         setup_subtitle ();
 533 }
 534
 535 void
 536 FFmpegDecoder::filter_and_emit_video (AVFrame* frame)
 537 {
 538         shared_ptr<FilterGraph> graph;
 539
 540         list<shared_ptr<FilterGraph> >::iterator i = _filter_graphs.begin();
 541         while (i != _filter_graphs.end() && !(*i)->can_process (Size (frame->width, frame->height), (AVPixelFormat) frame->format)) {
 542                 ++i;
 543         }
 544
 545         if (i == _filter_graphs.end ()) {
 546                 graph.reset (new FilterGraph (_film, this, _opt->apply_crop, Size (frame->width, frame->height), (AVPixelFormat) frame->format));
 547                 _filter_graphs.push_back (graph);
 548                 _film->log()->log (String::compose ("New graph for %1x%2, pixel format %3", frame->width, frame->height, frame->format));
 549         } else {
 550                 graph = *i;
 551         }
 552
 553         list<shared_ptr<Image> > images = graph->process (frame);
 554
 555         for (list<shared_ptr<Image> >::iterator i = images.begin(); i != images.end(); ++i) {
 556                 emit_video (*i);
 557         }
 558 }