src/lib/ffmpeg_decoder.cc

   1 /*
   2     Copyright (C) 2012 Carl Hetherington <cth@carlh.net>
   3
   4     This program is free software; you can redistribute it and/or modify
   5     it under the terms of the GNU General Public License as published by
   6     the Free Software Foundation; either version 2 of the License, or
   7     (at your option) any later version.
   8
   9     This program is distributed in the hope that it will be useful,
  10     but WITHOUT ANY WARRANTY; without even the implied warranty of
  11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12     GNU General Public License for more details.
  13
  14     You should have received a copy of the GNU General Public License
  15     along with this program; if not, write to the Free Software
  16     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  17
  18 */
  19
  20 /** @file  src/ffmpeg_decoder.cc
  21  *  @brief A decoder using FFmpeg to decode content.
  22  */
  23
  24 #include <stdexcept>
  25 #include <vector>
  26 #include <sstream>
  27 #include <iomanip>
  28 #include <iostream>
  29 #include <stdint.h>
  30 #include <boost/lexical_cast.hpp>
  31 #include <sndfile.h>
  32 extern "C" {
  33 #include <libavcodec/avcodec.h>
  34 #include <libavformat/avformat.h>
  35 }
  36 #include "filter.h"
  37 #include "exceptions.h"
  38 #include "image.h"
  39 #include "util.h"
  40 #include "log.h"
  41 #include "ffmpeg_decoder.h"
  42 #include "filter_graph.h"
  43 #include "audio_buffers.h"
  44 #include "ffmpeg_content.h"
  45
  46 #include "i18n.h"
  47
  48 using std::cout;
  49 using std::string;
  50 using std::vector;
  51 using std::stringstream;
  52 using std::list;
  53 using std::min;
  54 using std::pair;
  55 using boost::shared_ptr;
  56 using boost::optional;
  57 using boost::dynamic_pointer_cast;
  58 using dcp::Size;
  59
  60 FFmpegDecoder::FFmpegDecoder (shared_ptr<const FFmpegContent> c, shared_ptr<Log> log, bool video, bool audio, bool subtitles)
  61         : VideoDecoder (c)
  62         , AudioDecoder (c)
  63         , FFmpeg (c)
  64         , _log (log)
  65         , _subtitle_codec_context (0)
  66         , _subtitle_codec (0)
  67         , _decode_video (video)
  68         , _decode_audio (audio)
  69         , _decode_subtitles (subtitles)
  70         , _pts_offset (0)
  71 {
  72         setup_subtitle ();
  73
  74         /* Audio and video frame PTS values may not start with 0.  We want
  75            to fiddle them so that:
  76
  77            1.  One of them starts at time 0.
  78            2.  The first video PTS value ends up on a frame boundary.
  79
  80            Then we remove big initial gaps in PTS and we allow our
  81            insertion of black frames to work.
  82
  83            We will do pts_to_use = pts_from_ffmpeg + pts_offset;
  84         */
  85
  86         bool const have_video = video && c->first_video();
  87         bool const have_audio = _decode_audio && c->audio_stream () && c->audio_stream()->first_audio;
  88
  89         /* First, make one of them start at 0 */
  90
  91         if (have_audio && have_video) {
  92                 _pts_offset = - min (c->first_video().get(), c->audio_stream()->first_audio.get());
  93         } else if (have_video) {
  94                 _pts_offset = - c->first_video().get();
  95         } else if (have_audio) {
  96                 _pts_offset = - c->audio_stream()->first_audio.get();
  97         }
  98
  99         /* Now adjust both so that the video pts starts on a frame */
 100         if (have_video && have_audio) {
 101                 ContentTime first_video = c->first_video().get() + _pts_offset;
 102                 ContentTime const old_first_video = first_video;
 103                 _pts_offset += first_video.round_up (c->video_frame_rate ()) - old_first_video;
 104         }
 105 }
 106
 107 FFmpegDecoder::~FFmpegDecoder ()
 108 {
 109         boost::mutex::scoped_lock lm (_mutex);
 110
 111         if (_subtitle_codec_context) {
 112                 avcodec_close (_subtitle_codec_context);
 113         }
 114 }
 115
 116 void
 117 FFmpegDecoder::flush ()
 118 {
 119         /* Get any remaining frames */
 120
 121         _packet.data = 0;
 122         _packet.size = 0;
 123
 124         /* XXX: should we reset _packet.data and size after each *_decode_* call? */
 125
 126         if (_decode_video) {
 127                 while (decode_video_packet ()) {}
 128         }
 129
 130         if (_ffmpeg_content->audio_stream() && _decode_audio) {
 131                 decode_audio_packet ();
 132                 AudioDecoder::flush ();
 133         }
 134 }
 135
 136 bool
 137 FFmpegDecoder::pass ()
 138 {
 139         int r = av_read_frame (_format_context, &_packet);
 140
 141         if (r < 0) {
 142                 if (r != AVERROR_EOF) {
 143                         /* Maybe we should fail here, but for now we'll just finish off instead */
 144                         char buf[256];
 145                         av_strerror (r, buf, sizeof(buf));
 146                         _log->log (String::compose (N_("error on av_read_frame (%1) (%2)"), buf, r));
 147                 }
 148
 149                 flush ();
 150                 return true;
 151         }
 152
 153         int const si = _packet.stream_index;
 154
 155         if (si == _video_stream && _decode_video) {
 156                 decode_video_packet ();
 157         } else if (_ffmpeg_content->audio_stream() && _ffmpeg_content->audio_stream()->uses_index (_format_context, si) && _decode_audio) {
 158                 decode_audio_packet ();
 159         } else if (_ffmpeg_content->subtitle_stream() && _ffmpeg_content->subtitle_stream()->uses_index (_format_context, si) && _decode_subtitles) {
 160                 decode_subtitle_packet ();
 161         }
 162
 163         av_free_packet (&_packet);
 164         return false;
 165 }
 166
 167 /** @param data pointer to array of pointers to buffers.
 168  *  Only the first buffer will be used for non-planar data, otherwise there will be one per channel.
 169  */
 170 shared_ptr<AudioBuffers>
 171 FFmpegDecoder::deinterleave_audio (uint8_t** data, int size)
 172 {
 173         assert (_ffmpeg_content->audio_channels());
 174         assert (bytes_per_audio_sample());
 175
 176         /* Deinterleave and convert to float */
 177
 178         assert ((size % (bytes_per_audio_sample() * _ffmpeg_content->audio_channels())) == 0);
 179
 180         int const total_samples = size / bytes_per_audio_sample();
 181         int const frames = total_samples / _ffmpeg_content->audio_channels();
 182         shared_ptr<AudioBuffers> audio (new AudioBuffers (_ffmpeg_content->audio_channels(), frames));
 183
 184         switch (audio_sample_format()) {
 185         case AV_SAMPLE_FMT_S16:
 186         {
 187                 int16_t* p = reinterpret_cast<int16_t *> (data[0]);
 188                 int sample = 0;
 189                 int channel = 0;
 190                 for (int i = 0; i < total_samples; ++i) {
 191                         audio->data(channel)[sample] = float(*p++) / (1 << 15);
 192
 193                         ++channel;
 194                         if (channel == _ffmpeg_content->audio_channels()) {
 195                                 channel = 0;
 196                                 ++sample;
 197                         }
 198                 }
 199         }
 200         break;
 201
 202         case AV_SAMPLE_FMT_S16P:
 203         {
 204                 int16_t** p = reinterpret_cast<int16_t **> (data);
 205                 for (int i = 0; i < _ffmpeg_content->audio_channels(); ++i) {
 206                         for (int j = 0; j < frames; ++j) {
 207                                 audio->data(i)[j] = static_cast<float>(p[i][j]) / (1 << 15);
 208                         }
 209                 }
 210         }
 211         break;
 212
 213         case AV_SAMPLE_FMT_S32:
 214         {
 215                 int32_t* p = reinterpret_cast<int32_t *> (data[0]);
 216                 int sample = 0;
 217                 int channel = 0;
 218                 for (int i = 0; i < total_samples; ++i) {
 219                         audio->data(channel)[sample] = static_cast<float>(*p++) / (1 << 31);
 220
 221                         ++channel;
 222                         if (channel == _ffmpeg_content->audio_channels()) {
 223                                 channel = 0;
 224                                 ++sample;
 225                         }
 226                 }
 227         }
 228         break;
 229
 230         case AV_SAMPLE_FMT_FLT:
 231         {
 232                 float* p = reinterpret_cast<float*> (data[0]);
 233                 int sample = 0;
 234                 int channel = 0;
 235                 for (int i = 0; i < total_samples; ++i) {
 236                         audio->data(channel)[sample] = *p++;
 237
 238                         ++channel;
 239                         if (channel == _ffmpeg_content->audio_channels()) {
 240                                 channel = 0;
 241                                 ++sample;
 242                         }
 243                 }
 244         }
 245         break;
 246
 247         case AV_SAMPLE_FMT_FLTP:
 248         {
 249                 float** p = reinterpret_cast<float**> (data);
 250                 for (int i = 0; i < _ffmpeg_content->audio_channels(); ++i) {
 251                         memcpy (audio->data(i), p[i], frames * sizeof(float));
 252                 }
 253         }
 254         break;
 255
 256         default:
 257                 throw DecodeError (String::compose (_("Unrecognised audio sample format (%1)"), static_cast<int> (audio_sample_format())));
 258         }
 259
 260         return audio;
 261 }
 262
 263 AVSampleFormat
 264 FFmpegDecoder::audio_sample_format () const
 265 {
 266         if (!_ffmpeg_content->audio_stream()) {
 267                 return (AVSampleFormat) 0;
 268         }
 269
 270         return audio_codec_context()->sample_fmt;
 271 }
 272
 273 int
 274 FFmpegDecoder::bytes_per_audio_sample () const
 275 {
 276         return av_get_bytes_per_sample (audio_sample_format ());
 277 }
 278
 279 int
 280 FFmpegDecoder::minimal_run (boost::function<bool (optional<ContentTime>, optional<ContentTime>, int)> finished)
 281 {
 282         int frames_read = 0;
 283         optional<ContentTime> last_video;
 284         optional<ContentTime> last_audio;
 285
 286         while (!finished (last_video, last_audio, frames_read)) {
 287                 int r = av_read_frame (_format_context, &_packet);
 288                 if (r < 0) {
 289                         /* We should flush our decoders here, possibly yielding a few more frames,
 290                            but the consequence of having to do that is too hideous to contemplate.
 291                            Instead we give up and say that you can't seek too close to the end
 292                            of a file.
 293                         */
 294                         return frames_read;
 295                 }
 296
 297                 ++frames_read;
 298
 299                 double const time_base = av_q2d (_format_context->streams[_packet.stream_index]->time_base);
 300
 301                 if (_packet.stream_index == _video_stream) {
 302
 303                         avcodec_get_frame_defaults (_frame);
 304
 305                         int finished = 0;
 306                         r = avcodec_decode_video2 (video_codec_context(), _frame, &finished, &_packet);
 307                         if (r >= 0 && finished) {
 308                                 last_video = ContentTime::from_seconds (av_frame_get_best_effort_timestamp (_frame) * time_base) + _pts_offset;
 309                         }
 310
 311                 } else if (_ffmpeg_content->audio_stream() && _ffmpeg_content->audio_stream()->uses_index (_format_context, _packet.stream_index)) {
 312                         AVPacket copy_packet = _packet;
 313                         while (copy_packet.size > 0) {
 314
 315                                 int finished;
 316                                 r = avcodec_decode_audio4 (audio_codec_context(), _frame, &finished, &_packet);
 317                                 if (r >= 0 && finished) {
 318                                         last_audio = ContentTime::from_seconds (av_frame_get_best_effort_timestamp (_frame) * time_base) + _pts_offset;
 319                                 }
 320
 321                                 copy_packet.data += r;
 322                                 copy_packet.size -= r;
 323                         }
 324                 }
 325
 326                 av_free_packet (&_packet);
 327         }
 328
 329         return frames_read;
 330 }
 331
 332 bool
 333 FFmpegDecoder::seek_overrun_finished (ContentTime seek, optional<ContentTime> last_video, optional<ContentTime> last_audio) const
 334 {
 335         return (last_video && last_video.get() >= seek) || (last_audio && last_audio.get() >= seek);
 336 }
 337
 338 bool
 339 FFmpegDecoder::seek_final_finished (int n, int done) const
 340 {
 341         return n == done;
 342 }
 343
 344 void
 345 FFmpegDecoder::seek_and_flush (ContentTime t)
 346 {
 347         ContentTime const u = t - _pts_offset;
 348         int64_t s = u.seconds() / av_q2d (_format_context->streams[_video_stream]->time_base);
 349
 350         if (_ffmpeg_content->audio_stream ()) {
 351                 s = min (
 352                         s, int64_t (u.seconds() / av_q2d (_ffmpeg_content->audio_stream()->stream(_format_context)->time_base))
 353                         );
 354         }
 355
 356         /* Ridiculous empirical hack */
 357         s--;
 358         if (s < 0) {
 359                 s = 0;
 360         }
 361
 362         av_seek_frame (_format_context, _video_stream, s, 0);
 363
 364         avcodec_flush_buffers (video_codec_context());
 365         if (audio_codec_context ()) {
 366                 avcodec_flush_buffers (audio_codec_context ());
 367         }
 368         if (_subtitle_codec_context) {
 369                 avcodec_flush_buffers (_subtitle_codec_context);
 370         }
 371 }
 372
 373 void
 374 FFmpegDecoder::seek (ContentTime time, bool accurate)
 375 {
 376         Decoder::seek (time, accurate);
 377         if (_decode_audio) {
 378                 AudioDecoder::seek (time, accurate);
 379         }
 380
 381         /* If we are doing an accurate seek, our initial shot will be 200ms (200 being
 382            a number plucked from the air) earlier than we want to end up.  The loop below
 383            will hopefully then step through to where we want to be.
 384         */
 385
 386         ContentTime pre_roll = accurate ? ContentTime::from_seconds (0.2) : ContentTime (0);
 387         ContentTime initial_seek = time - pre_roll;
 388         if (initial_seek < ContentTime (0)) {
 389                 initial_seek = ContentTime (0);
 390         }
 391
 392         /* Initial seek time in the video stream's timebase */
 393
 394         seek_and_flush (initial_seek);
 395
 396         if (!accurate) {
 397                 /* That'll do */
 398                 return;
 399         }
 400
 401         int const N = minimal_run (boost::bind (&FFmpegDecoder::seek_overrun_finished, this, time, _1, _2));
 402
 403         seek_and_flush (initial_seek);
 404         if (N > 0) {
 405                 minimal_run (boost::bind (&FFmpegDecoder::seek_final_finished, this, N - 1, _3));
 406         }
 407 }
 408
 409 void
 410 FFmpegDecoder::decode_audio_packet ()
 411 {
 412         /* Audio packets can contain multiple frames, so we may have to call avcodec_decode_audio4
 413            several times.
 414         */
 415
 416         AVPacket copy_packet = _packet;
 417
 418         while (copy_packet.size > 0) {
 419
 420                 int frame_finished;
 421                 int const decode_result = avcodec_decode_audio4 (audio_codec_context(), _frame, &frame_finished, &copy_packet);
 422
 423                 if (decode_result < 0) {
 424                         _log->log (String::compose ("avcodec_decode_audio4 failed (%1)", decode_result));
 425                         return;
 426                 }
 427
 428                 if (frame_finished) {
 429                         ContentTime const ct = ContentTime::from_seconds (
 430                                 av_frame_get_best_effort_timestamp (_frame) *
 431                                 av_q2d (_ffmpeg_content->audio_stream()->stream (_format_context)->time_base))
 432                                 + _pts_offset;
 433
 434                         int const data_size = av_samples_get_buffer_size (
 435                                 0, audio_codec_context()->channels, _frame->nb_samples, audio_sample_format (), 1
 436                                 );
 437
 438                         audio (deinterleave_audio (_frame->data, data_size), ct);
 439                 }
 440
 441                 copy_packet.data += decode_result;
 442                 copy_packet.size -= decode_result;
 443         }
 444 }
 445
 446 bool
 447 FFmpegDecoder::decode_video_packet ()
 448 {
 449         int frame_finished;
 450         if (avcodec_decode_video2 (video_codec_context(), _frame, &frame_finished, &_packet) < 0 || !frame_finished) {
 451                 return false;
 452         }
 453
 454         boost::mutex::scoped_lock lm (_filter_graphs_mutex);
 455
 456         shared_ptr<FilterGraph> graph;
 457
 458         list<shared_ptr<FilterGraph> >::iterator i = _filter_graphs.begin();
 459         while (i != _filter_graphs.end() && !(*i)->can_process (dcp::Size (_frame->width, _frame->height), (AVPixelFormat) _frame->format)) {
 460                 ++i;
 461         }
 462
 463         if (i == _filter_graphs.end ()) {
 464                 graph.reset (new FilterGraph (_ffmpeg_content, dcp::Size (_frame->width, _frame->height), (AVPixelFormat) _frame->format));
 465                 _filter_graphs.push_back (graph);
 466                 _log->log (String::compose (N_("New graph for %1x%2, pixel format %3"), _frame->width, _frame->height, _frame->format));
 467         } else {
 468                 graph = *i;
 469         }
 470
 471         list<pair<shared_ptr<Image>, int64_t> > images = graph->process (_frame);
 472
 473         string post_process = Filter::ffmpeg_strings (_ffmpeg_content->filters()).second;
 474
 475         for (list<pair<shared_ptr<Image>, int64_t> >::iterator i = images.begin(); i != images.end(); ++i) {
 476
 477                 shared_ptr<Image> image = i->first;
 478                 if (!post_process.empty ()) {
 479                         image = image->post_process (post_process, true);
 480                 }
 481
 482                 if (i->second != AV_NOPTS_VALUE) {
 483                         video (image, false, ContentTime::from_seconds (i->second * av_q2d (_format_context->streams[_video_stream]->time_base)) + _pts_offset);
 484                 } else {
 485                         _log->log ("Dropping frame without PTS");
 486                 }
 487         }
 488
 489         return true;
 490 }
 491
 492
 493 void
 494 FFmpegDecoder::setup_subtitle ()
 495 {
 496         boost::mutex::scoped_lock lm (_mutex);
 497
 498         if (!_ffmpeg_content->subtitle_stream()) {
 499                 return;
 500         }
 501
 502         _subtitle_codec_context = _ffmpeg_content->subtitle_stream()->stream(_format_context)->codec;
 503         if (_subtitle_codec_context == 0) {
 504                 throw DecodeError (N_("could not find subtitle stream"));
 505         }
 506
 507         _subtitle_codec = avcodec_find_decoder (_subtitle_codec_context->codec_id);
 508
 509         if (_subtitle_codec == 0) {
 510                 throw DecodeError (N_("could not find subtitle decoder"));
 511         }
 512
 513         if (avcodec_open2 (_subtitle_codec_context, _subtitle_codec, 0) < 0) {
 514                 throw DecodeError (N_("could not open subtitle decoder"));
 515         }
 516 }
 517
 518 void
 519 FFmpegDecoder::decode_subtitle_packet ()
 520 {
 521         int got_subtitle;
 522         AVSubtitle sub;
 523         if (avcodec_decode_subtitle2 (_subtitle_codec_context, &sub, &got_subtitle, &_packet) < 0 || !got_subtitle) {
 524                 return;
 525         }
 526
 527         /* Sometimes we get an empty AVSubtitle, which is used by some codecs to
 528            indicate that the previous subtitle should stop.
 529         */
 530         if (sub.num_rects <= 0) {
 531                 image_subtitle (shared_ptr<Image> (), dcpomatic::Rect<double> (), ContentTime (), ContentTime ());
 532                 return;
 533         } else if (sub.num_rects > 1) {
 534                 throw DecodeError (_("multi-part subtitles not yet supported"));
 535         }
 536
 537         /* Subtitle PTS (within the source, not taking into account any of the
 538            source that we may have chopped off for the DCP)
 539         */
 540         ContentTime packet_time = ContentTime::from_seconds (static_cast<double> (sub.pts) / AV_TIME_BASE) + _pts_offset;
 541
 542         /* hence start time for this sub */
 543         ContentTime const from = packet_time + ContentTime::from_seconds (sub.start_display_time / 1e3);
 544         ContentTime const to = packet_time + ContentTime::from_seconds (sub.end_display_time / 1e3);
 545
 546         AVSubtitleRect const * rect = sub.rects[0];
 547
 548         if (rect->type != SUBTITLE_BITMAP) {
 549                 throw DecodeError (_("non-bitmap subtitles not yet supported"));
 550         }
 551
 552         /* Note RGBA is expressed little-endian, so the first byte in the word is R, second
 553            G, third B, fourth A.
 554         */
 555         shared_ptr<Image> image (new Image (PIX_FMT_RGBA, dcp::Size (rect->w, rect->h), true));
 556
 557         /* Start of the first line in the subtitle */
 558         uint8_t* sub_p = rect->pict.data[0];
 559         /* sub_p looks up into a BGRA palette which is here
 560            (i.e. first byte B, second G, third R, fourth A)
 561         */
 562         uint32_t const * palette = (uint32_t *) rect->pict.data[1];
 563         /* Start of the output data */
 564         uint32_t* out_p = (uint32_t *) image->data()[0];
 565
 566         for (int y = 0; y < rect->h; ++y) {
 567                 uint8_t* sub_line_p = sub_p;
 568                 uint32_t* out_line_p = out_p;
 569                 for (int x = 0; x < rect->w; ++x) {
 570                         uint32_t const p = palette[*sub_line_p++];
 571                         *out_line_p++ = ((p & 0xff) << 16) | (p & 0xff00) | ((p & 0xff0000) >> 16) | (p & 0xff000000);
 572                 }
 573                 sub_p += rect->pict.linesize[0];
 574                 out_p += image->stride()[0] / sizeof (uint32_t);
 575         }
 576
 577         dcp::Size const vs = _ffmpeg_content->video_size ();
 578
 579         image_subtitle (
 580                 image,
 581                 dcpomatic::Rect<double> (
 582                         static_cast<double> (rect->x) / vs.width,
 583                         static_cast<double> (rect->y) / vs.height,
 584                         static_cast<double> (rect->w) / vs.width,
 585                         static_cast<double> (rect->h) / vs.height
 586                         ),
 587                 from,
 588                 to
 589                 );
 590
 591
 592         avsubtitle_free (&sub);
 593 }