src/lib/ffmpeg_decoder.cc

   1 /*
   2     Copyright (C) 2012 Carl Hetherington <cth@carlh.net>
   3
   4     This program is free software; you can redistribute it and/or modify
   5     it under the terms of the GNU General Public License as published by
   6     the Free Software Foundation; either version 2 of the License, or
   7     (at your option) any later version.
   8
   9     This program is distributed in the hope that it will be useful,
  10     but WITHOUT ANY WARRANTY; without even the implied warranty of
  11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12     GNU General Public License for more details.
  13
  14     You should have received a copy of the GNU General Public License
  15     along with this program; if not, write to the Free Software
  16     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  17
  18 */
  19
  20 /** @file  src/ffmpeg_decoder.cc
  21  *  @brief A decoder using FFmpeg to decode content.
  22  */
  23
  24 #include <stdexcept>
  25 #include <vector>
  26 #include <sstream>
  27 #include <iomanip>
  28 #include <iostream>
  29 #include <stdint.h>
  30 #include <boost/lexical_cast.hpp>
  31 extern "C" {
  32 #include <tiffio.h>
  33 #include <libavcodec/avcodec.h>
  34 #include <libavformat/avformat.h>
  35 #include <libswscale/swscale.h>
  36 #include <libpostproc/postprocess.h>
  37 }
  38 #include <sndfile.h>
  39 #include "film.h"
  40 #include "format.h"
  41 #include "transcoder.h"
  42 #include "job.h"
  43 #include "filter.h"
  44 #include "options.h"
  45 #include "exceptions.h"
  46 #include "image.h"
  47 #include "util.h"
  48 #include "log.h"
  49 #include "ffmpeg_decoder.h"
  50 #include "filter_graph.h"
  51 #include "subtitle.h"
  52
  53 using std::cout;
  54 using std::string;
  55 using std::vector;
  56 using std::stringstream;
  57 using std::list;
  58 using boost::shared_ptr;
  59 using boost::optional;
  60 using boost::dynamic_pointer_cast;
  61
  62 FFmpegDecoder::FFmpegDecoder (shared_ptr<Film> f, shared_ptr<const Options> o, Job* j)
  63         : Decoder (f, o, j)
  64         , VideoDecoder (f, o, j)
  65         , AudioDecoder (f, o, j)
  66         , _format_context (0)
  67         , _video_stream (-1)
  68         , _frame (0)
  69         , _video_codec_context (0)
  70         , _video_codec (0)
  71         , _audio_codec_context (0)
  72         , _audio_codec (0)
  73         , _subtitle_codec_context (0)
  74         , _subtitle_codec (0)
  75 {
  76         setup_general ();
  77         setup_video ();
  78         setup_audio ();
  79         setup_subtitle ();
  80 }
  81
  82 FFmpegDecoder::~FFmpegDecoder ()
  83 {
  84         if (_audio_codec_context) {
  85                 avcodec_close (_audio_codec_context);
  86         }
  87
  88         if (_video_codec_context) {
  89                 avcodec_close (_video_codec_context);
  90         }
  91
  92         if (_subtitle_codec_context) {
  93                 avcodec_close (_subtitle_codec_context);
  94         }
  95
  96         av_free (_frame);
  97
  98         avformat_close_input (&_format_context);
  99 }
 100
 101 void
 102 FFmpegDecoder::setup_general ()
 103 {
 104         int r;
 105
 106         av_register_all ();
 107
 108         if ((r = avformat_open_input (&_format_context, _film->content_path().c_str(), 0, 0)) != 0) {
 109                 throw OpenFileError (_film->content_path ());
 110         }
 111
 112         if (avformat_find_stream_info (_format_context, 0) < 0) {
 113                 throw DecodeError ("could not find stream information");
 114         }
 115
 116         /* Find video, audio and subtitle streams and choose the first of each */
 117
 118         for (uint32_t i = 0; i < _format_context->nb_streams; ++i) {
 119                 AVStream* s = _format_context->streams[i];
 120                 if (s->codec->codec_type == AVMEDIA_TYPE_VIDEO) {
 121                         _video_stream = i;
 122                 } else if (s->codec->codec_type == AVMEDIA_TYPE_AUDIO) {
 123
 124                         /* This is a hack; sometimes it seems that _audio_codec_context->channel_layout isn't set up,
 125                            so bodge it here.  No idea why we should have to do this.
 126                         */
 127
 128                         if (s->codec->channel_layout == 0) {
 129                                 s->codec->channel_layout = av_get_default_channel_layout (s->codec->channels);
 130                         }
 131
 132                         _audio_streams.push_back (
 133                                 shared_ptr<AudioStream> (
 134                                         new FFmpegAudioStream (stream_name (s), i, s->codec->sample_rate, s->codec->channel_layout)
 135                                         )
 136                                 );
 137
 138                 } else if (s->codec->codec_type == AVMEDIA_TYPE_SUBTITLE) {
 139                         _subtitle_streams.push_back (
 140                                 shared_ptr<SubtitleStream> (
 141                                         new SubtitleStream (stream_name (s), i)
 142                                         )
 143                                 );
 144                 }
 145         }
 146
 147         if (_video_stream < 0) {
 148                 throw DecodeError ("could not find video stream");
 149         }
 150
 151         _frame = avcodec_alloc_frame ();
 152         if (_frame == 0) {
 153                 throw DecodeError ("could not allocate frame");
 154         }
 155 }
 156
 157 void
 158 FFmpegDecoder::setup_video ()
 159 {
 160         _video_codec_context = _format_context->streams[_video_stream]->codec;
 161         _video_codec = avcodec_find_decoder (_video_codec_context->codec_id);
 162
 163         if (_video_codec == 0) {
 164                 throw DecodeError ("could not find video decoder");
 165         }
 166
 167         /* I think this prevents problems with green hash on decodes and
 168            "changing frame properties on the fly is not supported by all filters"
 169            messages with some content.  Although I'm not sure; needs checking.
 170         */
 171         AVDictionary* opts = 0;
 172         av_dict_set (&opts, "threads", "1", 0);
 173
 174         if (avcodec_open2 (_video_codec_context, _video_codec, &opts) < 0) {
 175                 throw DecodeError ("could not open video decoder");
 176         }
 177 }
 178
 179 void
 180 FFmpegDecoder::setup_audio ()
 181 {
 182         if (!_audio_stream) {
 183                 return;
 184         }
 185
 186         shared_ptr<FFmpegAudioStream> ffa = dynamic_pointer_cast<FFmpegAudioStream> (_audio_stream);
 187         assert (ffa);
 188
 189         _audio_codec_context = _format_context->streams[ffa->id()]->codec;
 190         _audio_codec = avcodec_find_decoder (_audio_codec_context->codec_id);
 191
 192         if (_audio_codec == 0) {
 193                 throw DecodeError ("could not find audio decoder");
 194         }
 195
 196         if (avcodec_open2 (_audio_codec_context, _audio_codec, 0) < 0) {
 197                 throw DecodeError ("could not open audio decoder");
 198         }
 199 }
 200
 201 void
 202 FFmpegDecoder::setup_subtitle ()
 203 {
 204         if (!_subtitle_stream) {
 205                 return;
 206         }
 207
 208         _subtitle_codec_context = _format_context->streams[_subtitle_stream->id()]->codec;
 209         _subtitle_codec = avcodec_find_decoder (_subtitle_codec_context->codec_id);
 210
 211         if (_subtitle_codec == 0) {
 212                 throw DecodeError ("could not find subtitle decoder");
 213         }
 214
 215         if (avcodec_open2 (_subtitle_codec_context, _subtitle_codec, 0) < 0) {
 216                 throw DecodeError ("could not open subtitle decoder");
 217         }
 218 }
 219
 220
 221 bool
 222 FFmpegDecoder::pass ()
 223 {
 224         int r = av_read_frame (_format_context, &_packet);
 225
 226         if (r < 0) {
 227                 if (r != AVERROR_EOF) {
 228                         /* Maybe we should fail here, but for now we'll just finish off instead */
 229                         char buf[256];
 230                         av_strerror (r, buf, sizeof(buf));
 231                         _film->log()->log (String::compose ("error on av_read_frame (%1) (%2)", buf, r));
 232                 }
 233
 234                 /* Get any remaining frames */
 235
 236                 _packet.data = 0;
 237                 _packet.size = 0;
 238
 239                 /* XXX: should we reset _packet.data and size after each *_decode_* call? */
 240
 241                 int frame_finished;
 242
 243                 while (avcodec_decode_video2 (_video_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
 244                         filter_and_emit_video (_frame);
 245                 }
 246
 247                 if (_audio_stream && _opt->decode_audio) {
 248                         while (avcodec_decode_audio4 (_audio_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
 249                                 int const data_size = av_samples_get_buffer_size (
 250                                         0, _audio_codec_context->channels, _frame->nb_samples, audio_sample_format (), 1
 251                                         );
 252
 253                                 assert (_audio_codec_context->channels == _film->audio_channels());
 254                                 Audio (deinterleave_audio (_frame->data[0], data_size));
 255                         }
 256                 }
 257
 258                 return true;
 259         }
 260
 261         avcodec_get_frame_defaults (_frame);
 262
 263         shared_ptr<FFmpegAudioStream> ffa = dynamic_pointer_cast<FFmpegAudioStream> (_audio_stream);
 264
 265         if (_packet.stream_index == _video_stream) {
 266
 267                 int frame_finished;
 268                 int const r = avcodec_decode_video2 (_video_codec_context, _frame, &frame_finished, &_packet);
 269                 if (r >= 0 && frame_finished) {
 270
 271                         if (r != _packet.size) {
 272                                 _film->log()->log (String::compose ("Used only %1 bytes of %2 in packet", r, _packet.size));
 273                         }
 274
 275                         /* Where we are in the output, in seconds */
 276                         double const out_pts_seconds = video_frame() / frames_per_second();
 277
 278                         /* Where we are in the source, in seconds */
 279                         double const source_pts_seconds = av_q2d (_format_context->streams[_packet.stream_index]->time_base)
 280                                 * av_frame_get_best_effort_timestamp(_frame);
 281
 282                         _film->log()->log (
 283                                 String::compose ("Source video frame ready; source at %1, output at %2", source_pts_seconds, out_pts_seconds),
 284                                 Log::VERBOSE
 285                                 );
 286
 287                         if (!_first_video) {
 288                                 _first_video = source_pts_seconds;
 289                         }
 290
 291                         /* Difference between where we are and where we should be */
 292                         double const delta = source_pts_seconds - _first_video.get() - out_pts_seconds;
 293                         double const one_frame = 1 / frames_per_second();
 294
 295                         /* Insert frames if required to get out_pts_seconds up to pts_seconds */
 296                         if (delta > one_frame) {
 297                                 int const extra = rint (delta / one_frame);
 298                                 for (int i = 0; i < extra; ++i) {
 299                                         repeat_last_video ();
 300                                         _film->log()->log (
 301                                                 String::compose (
 302                                                         "Extra video frame inserted at %1s; source frame %2, source PTS %3 (at %4 fps)",
 303                                                         out_pts_seconds, video_frame(), source_pts_seconds, frames_per_second()
 304                                                         )
 305                                                 );
 306                                 }
 307                         }
 308
 309                         if (delta > -one_frame) {
 310                                 /* Process this frame */
 311                                 filter_and_emit_video (_frame);
 312                         } else {
 313                                 /* Otherwise we are omitting a frame to keep things right */
 314                                 _film->log()->log (String::compose ("Frame removed at %1s", out_pts_seconds));
 315                         }
 316                 }
 317
 318         } else if (ffa && _packet.stream_index == ffa->id() && _opt->decode_audio) {
 319
 320                 int frame_finished;
 321                 if (avcodec_decode_audio4 (_audio_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
 322
 323                         /* Where we are in the source, in seconds */
 324                         double const source_pts_seconds = av_q2d (_format_context->streams[_packet.stream_index]->time_base)
 325                                 * av_frame_get_best_effort_timestamp(_frame);
 326
 327                         /* We only decode audio if we've had our first video packet through, and if it
 328                            was before this packet.  Until then audio is thrown away.
 329                         */
 330
 331                         if (_first_video && _first_video.get() <= source_pts_seconds) {
 332
 333                                 if (!_first_audio) {
 334                                         _first_audio = source_pts_seconds;
 335
 336                                         /* This is our first audio frame, and if we've arrived here we must have had our
 337                                            first video frame.  Push some silence to make up any gap between our first
 338                                            video frame and our first audio.
 339                                         */
 340
 341                                         /* frames of silence that we must push */
 342                                         int const s = rint ((_first_audio.get() - _first_video.get()) * ffa->sample_rate ());
 343
 344                                         _film->log()->log (
 345                                                 String::compose (
 346                                                         "First video at %1, first audio at %2, pushing %3 audio frames of silence for %4 channels (%5 bytes per sample)",
 347                                                         _first_video.get(), _first_audio.get(), s, ffa->channels(), bytes_per_audio_sample()
 348                                                         )
 349                                                 );
 350
 351                                         if (s) {
 352                                                 shared_ptr<AudioBuffers> audio (new AudioBuffers (ffa->channels(), s));
 353                                                 audio->make_silent ();
 354                                                 Audio (audio);
 355                                         }
 356                                 }
 357
 358                                 int const data_size = av_samples_get_buffer_size (
 359                                         0, _audio_codec_context->channels, _frame->nb_samples, audio_sample_format (), 1
 360                                         );
 361
 362                                 assert (_audio_codec_context->channels == _film->audio_channels());
 363                                 Audio (deinterleave_audio (_frame->data[0], data_size));
 364                         }
 365                 }
 366
 367         } else if (_subtitle_stream && _packet.stream_index == _subtitle_stream->id() && _opt->decode_subtitles && _first_video) {
 368
 369                 int got_subtitle;
 370                 AVSubtitle sub;
 371                 if (avcodec_decode_subtitle2 (_subtitle_codec_context, &sub, &got_subtitle, &_packet) && got_subtitle) {
 372                         /* Sometimes we get an empty AVSubtitle, which is used by some codecs to
 373                            indicate that the previous subtitle should stop.
 374                         */
 375                         if (sub.num_rects > 0) {
 376                                 shared_ptr<TimedSubtitle> ts;
 377                                 try {
 378                                         emit_subtitle (shared_ptr<TimedSubtitle> (new TimedSubtitle (sub, _first_video.get())));
 379                                 } catch (...) {
 380                                         /* some problem with the subtitle; we probably didn't understand it */
 381                                 }
 382                         } else {
 383                                 emit_subtitle (shared_ptr<TimedSubtitle> ());
 384                         }
 385                         avsubtitle_free (&sub);
 386                 }
 387         }
 388
 389         av_free_packet (&_packet);
 390         return false;
 391 }
 392
 393 shared_ptr<AudioBuffers>
 394 FFmpegDecoder::deinterleave_audio (uint8_t* data, int size)
 395 {
 396         assert (_film->audio_channels());
 397         assert (bytes_per_audio_sample());
 398
 399         shared_ptr<FFmpegAudioStream> ffa = dynamic_pointer_cast<FFmpegAudioStream> (_audio_stream);
 400         assert (ffa);
 401
 402         /* Deinterleave and convert to float */
 403
 404         assert ((size % (bytes_per_audio_sample() * ffa->channels())) == 0);
 405
 406         int const total_samples = size / bytes_per_audio_sample();
 407         int const frames = total_samples / _film->audio_channels();
 408         shared_ptr<AudioBuffers> audio (new AudioBuffers (ffa->channels(), frames));
 409
 410         switch (audio_sample_format()) {
 411         case AV_SAMPLE_FMT_S16:
 412         {
 413                 int16_t* p = (int16_t *) data;
 414                 int sample = 0;
 415                 int channel = 0;
 416                 for (int i = 0; i < total_samples; ++i) {
 417                         audio->data(channel)[sample] = float(*p++) / (1 << 15);
 418
 419                         ++channel;
 420                         if (channel == _film->audio_channels()) {
 421                                 channel = 0;
 422                                 ++sample;
 423                         }
 424                 }
 425         }
 426         break;
 427
 428         case AV_SAMPLE_FMT_S32:
 429         {
 430                 int32_t* p = (int32_t *) data;
 431                 int sample = 0;
 432                 int channel = 0;
 433                 for (int i = 0; i < total_samples; ++i) {
 434                         audio->data(channel)[sample] = float(*p++) / (1 << 31);
 435
 436                         ++channel;
 437                         if (channel == _film->audio_channels()) {
 438                                 channel = 0;
 439                                 ++sample;
 440                         }
 441                 }
 442         }
 443
 444         case AV_SAMPLE_FMT_FLTP:
 445         {
 446                 float* p = reinterpret_cast<float*> (data);
 447                 for (int i = 0; i < _film->audio_channels(); ++i) {
 448                         memcpy (audio->data(i), p, frames * sizeof(float));
 449                         p += frames;
 450                 }
 451         }
 452         break;
 453
 454         default:
 455                 assert (false);
 456         }
 457
 458         return audio;
 459 }
 460
 461 float
 462 FFmpegDecoder::frames_per_second () const
 463 {
 464         AVStream* s = _format_context->streams[_video_stream];
 465
 466         if (s->avg_frame_rate.num && s->avg_frame_rate.den) {
 467                 return av_q2d (s->avg_frame_rate);
 468         }
 469
 470         return av_q2d (s->r_frame_rate);
 471 }
 472
 473 AVSampleFormat
 474 FFmpegDecoder::audio_sample_format () const
 475 {
 476         if (_audio_codec_context == 0) {
 477                 return (AVSampleFormat) 0;
 478         }
 479
 480         return _audio_codec_context->sample_fmt;
 481 }
 482
 483 Size
 484 FFmpegDecoder::native_size () const
 485 {
 486         return Size (_video_codec_context->width, _video_codec_context->height);
 487 }
 488
 489 PixelFormat
 490 FFmpegDecoder::pixel_format () const
 491 {
 492         return _video_codec_context->pix_fmt;
 493 }
 494
 495 int
 496 FFmpegDecoder::time_base_numerator () const
 497 {
 498         return _video_codec_context->time_base.num;
 499 }
 500
 501 int
 502 FFmpegDecoder::time_base_denominator () const
 503 {
 504         return _video_codec_context->time_base.den;
 505 }
 506
 507 int
 508 FFmpegDecoder::sample_aspect_ratio_numerator () const
 509 {
 510         return _video_codec_context->sample_aspect_ratio.num;
 511 }
 512
 513 int
 514 FFmpegDecoder::sample_aspect_ratio_denominator () const
 515 {
 516         return _video_codec_context->sample_aspect_ratio.den;
 517 }
 518
 519 string
 520 FFmpegDecoder::stream_name (AVStream* s) const
 521 {
 522         stringstream n;
 523
 524         AVDictionaryEntry const * lang = av_dict_get (s->metadata, "language", 0, 0);
 525         if (lang) {
 526                 n << lang->value;
 527         }
 528
 529         AVDictionaryEntry const * title = av_dict_get (s->metadata, "title", 0, 0);
 530         if (title) {
 531                 if (!n.str().empty()) {
 532                         n << " ";
 533                 }
 534                 n << title->value;
 535         }
 536
 537         if (n.str().empty()) {
 538                 n << "unknown";
 539         }
 540
 541         return n.str ();
 542 }
 543
 544 int
 545 FFmpegDecoder::bytes_per_audio_sample () const
 546 {
 547         return av_get_bytes_per_sample (audio_sample_format ());
 548 }
 549
 550 void
 551 FFmpegDecoder::set_audio_stream (shared_ptr<AudioStream> s)
 552 {
 553         AudioDecoder::set_audio_stream (s);
 554         setup_audio ();
 555 }
 556
 557 void
 558 FFmpegDecoder::set_subtitle_stream (shared_ptr<SubtitleStream> s)
 559 {
 560         VideoDecoder::set_subtitle_stream (s);
 561         setup_subtitle ();
 562 }
 563
 564 void
 565 FFmpegDecoder::filter_and_emit_video (AVFrame* frame)
 566 {
 567         shared_ptr<FilterGraph> graph;
 568
 569         list<shared_ptr<FilterGraph> >::iterator i = _filter_graphs.begin();
 570         while (i != _filter_graphs.end() && !(*i)->can_process (Size (frame->width, frame->height), (AVPixelFormat) frame->format)) {
 571                 ++i;
 572         }
 573
 574         if (i == _filter_graphs.end ()) {
 575                 graph.reset (new FilterGraph (_film, this, _opt->apply_crop, Size (frame->width, frame->height), (AVPixelFormat) frame->format));
 576                 _filter_graphs.push_back (graph);
 577                 _film->log()->log (String::compose ("New graph for %1x%2, pixel format %3", frame->width, frame->height, frame->format));
 578         } else {
 579                 graph = *i;
 580         }
 581
 582         list<shared_ptr<Image> > images = graph->process (frame);
 583
 584         for (list<shared_ptr<Image> >::iterator i = images.begin(); i != images.end(); ++i) {
 585                 emit_video (*i);
 586         }
 587 }
 588
 589 shared_ptr<FFmpegAudioStream>
 590 FFmpegAudioStream::create (string t, optional<int> v)
 591 {
 592         if (!v) {
 593                 /* version < 1; no type in the string, and there's only FFmpeg streams anyway */
 594                 return shared_ptr<FFmpegAudioStream> (new FFmpegAudioStream (t, v));
 595         }
 596
 597         stringstream s (t);
 598         string type;
 599         s >> type;
 600         if (type != "ffmpeg") {
 601                 return shared_ptr<FFmpegAudioStream> ();
 602         }
 603
 604         return shared_ptr<FFmpegAudioStream> (new FFmpegAudioStream (t, v));
 605 }
 606
 607 FFmpegAudioStream::FFmpegAudioStream (string t, optional<int> version)
 608 {
 609         stringstream n (t);
 610
 611         int name_index = 4;
 612         if (!version) {
 613                 name_index = 2;
 614                 int channels;
 615                 n >> _id >> channels;
 616                 _channel_layout = av_get_default_channel_layout (channels);
 617                 _sample_rate = 0;
 618         } else {
 619                 string type;
 620                 /* Current (marked version 1) */
 621                 n >> type >> _id >> _sample_rate >> _channel_layout;
 622                 assert (type == "ffmpeg");
 623         }
 624
 625         for (int i = 0; i < name_index; ++i) {
 626                 size_t const s = t.find (' ');
 627                 if (s != string::npos) {
 628                         t = t.substr (s + 1);
 629                 }
 630         }
 631
 632         _name = t;
 633 }
 634
 635 string
 636 FFmpegAudioStream::to_string () const
 637 {
 638         return String::compose ("ffmpeg %1 %2 %3 %4", _id, _sample_rate, _channel_layout, _name);
 639 }
 640