src/lib/ffmpeg_decoder.cc

   1 /*
   2     Copyright (C) 2012-2014 Carl Hetherington <cth@carlh.net>
   3
   4     This program is free software; you can redistribute it and/or modify
   5     it under the terms of the GNU General Public License as published by
   6     the Free Software Foundation; either version 2 of the License, or
   7     (at your option) any later version.
   8
   9     This program is distributed in the hope that it will be useful,
  10     but WITHOUT ANY WARRANTY; without even the implied warranty of
  11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12     GNU General Public License for more details.
  13
  14     You should have received a copy of the GNU General Public License
  15     along with this program; if not, write to the Free Software
  16     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  17
  18 */
  19
  20 /** @file  src/ffmpeg_decoder.cc
  21  *  @brief A decoder using FFmpeg to decode content.
  22  */
  23
  24 #include <stdexcept>
  25 #include <vector>
  26 #include <sstream>
  27 #include <iomanip>
  28 #include <iostream>
  29 #include <stdint.h>
  30 #include <sndfile.h>
  31 extern "C" {
  32 #include <libavcodec/avcodec.h>
  33 #include <libavformat/avformat.h>
  34 }
  35 #include "filter.h"
  36 #include "exceptions.h"
  37 #include "image.h"
  38 #include "util.h"
  39 #include "log.h"
  40 #include "ffmpeg_decoder.h"
  41 #include "ffmpeg_audio_stream.h"
  42 #include "ffmpeg_subtitle_stream.h"
  43 #include "filter_graph.h"
  44 #include "audio_buffers.h"
  45 #include "ffmpeg_content.h"
  46 #include "image_proxy.h"
  47 #include "film.h"
  48
  49 #include "i18n.h"
  50
  51 #define LOG_GENERAL(...) _video_content->film()->log()->log (String::compose (__VA_ARGS__), Log::TYPE_GENERAL);
  52 #define LOG_ERROR(...) _video_content->film()->log()->log (String::compose (__VA_ARGS__), Log::TYPE_ERROR);
  53 #define LOG_WARNING(...) _video_content->film()->log()->log (__VA_ARGS__, Log::TYPE_WARNING);
  54
  55 using std::cout;
  56 using std::string;
  57 using std::vector;
  58 using std::stringstream;
  59 using std::list;
  60 using std::min;
  61 using std::pair;
  62 using boost::shared_ptr;
  63 using boost::optional;
  64 using boost::dynamic_pointer_cast;
  65 using dcp::Size;
  66
  67 FFmpegDecoder::FFmpegDecoder (shared_ptr<const FFmpegContent> c, shared_ptr<Log> log)
  68         : VideoDecoder (c)
  69         , AudioDecoder (c)
  70         , SubtitleDecoder (c)
  71         , FFmpeg (c)
  72         , _log (log)
  73         , _subtitle_codec_context (0)
  74         , _subtitle_codec (0)
  75 {
  76         /* Audio and video frame PTS values may not start with 0.  We want
  77            to fiddle them so that:
  78
  79            1.  One of them starts at time 0.
  80            2.  The first video PTS value ends up on a frame boundary.
  81
  82            Then we remove big initial gaps in PTS and we allow our
  83            insertion of black frames to work.
  84
  85            We will do pts_to_use = pts_from_ffmpeg + pts_offset;
  86         */
  87
  88         bool const have_video = c->first_video();
  89         bool const have_audio = c->audio_stream () && c->audio_stream()->first_audio;
  90
  91         /* First, make one of them start at 0 */
  92
  93         if (have_audio && have_video) {
  94                 _pts_offset = - min (c->first_video().get(), c->audio_stream()->first_audio.get());
  95         } else if (have_video) {
  96                 _pts_offset = - c->first_video().get();
  97         } else if (have_audio) {
  98                 _pts_offset = - c->audio_stream()->first_audio.get();
  99         }
 100
 101         /* Now adjust both so that the video pts starts on a frame */
 102         if (have_video && have_audio) {
 103                 ContentTime first_video = c->first_video().get() + _pts_offset;
 104                 ContentTime const old_first_video = first_video;
 105                 _pts_offset += first_video.round_up (c->video_frame_rate ()) - old_first_video;
 106         }
 107 }
 108
 109 FFmpegDecoder::~FFmpegDecoder ()
 110 {
 111         boost::mutex::scoped_lock lm (_mutex);
 112
 113         if (_subtitle_codec_context) {
 114                 avcodec_close (_subtitle_codec_context);
 115         }
 116 }
 117
 118 void
 119 FFmpegDecoder::flush ()
 120 {
 121         /* Get any remaining frames */
 122
 123         _packet.data = 0;
 124         _packet.size = 0;
 125
 126         /* XXX: should we reset _packet.data and size after each *_decode_* call? */
 127
 128         while (decode_video_packet ()) {}
 129
 130         if (_ffmpeg_content->audio_stream()) {
 131                 decode_audio_packet ();
 132                 AudioDecoder::flush ();
 133         }
 134 }
 135
 136 bool
 137 FFmpegDecoder::pass ()
 138 {
 139         int r = av_read_frame (_format_context, &_packet);
 140
 141         if (r < 0) {
 142                 if (r != AVERROR_EOF) {
 143                         /* Maybe we should fail here, but for now we'll just finish off instead */
 144                         char buf[256];
 145                         av_strerror (r, buf, sizeof(buf));
 146                         LOG_ERROR (N_("error on av_read_frame (%1) (%2)"), buf, r);
 147                 }
 148
 149                 flush ();
 150                 return true;
 151         }
 152
 153         int const si = _packet.stream_index;
 154
 155         if (si == _video_stream) {
 156                 decode_video_packet ();
 157         } else if (_ffmpeg_content->audio_stream() && _ffmpeg_content->audio_stream()->uses_index (_format_context, si)) {
 158                 decode_audio_packet ();
 159         } else if (_ffmpeg_content->subtitle_stream() && _ffmpeg_content->subtitle_stream()->uses_index (_format_context, si)) {
 160                 decode_subtitle_packet ();
 161         }
 162
 163         av_free_packet (&_packet);
 164         return false;
 165 }
 166
 167 /** @param data pointer to array of pointers to buffers.
 168  *  Only the first buffer will be used for non-planar data, otherwise there will be one per channel.
 169  */
 170 shared_ptr<AudioBuffers>
 171 FFmpegDecoder::deinterleave_audio (uint8_t** data, int size)
 172 {
 173         assert (_ffmpeg_content->audio_channels());
 174         assert (bytes_per_audio_sample());
 175
 176         /* Deinterleave and convert to float */
 177
 178         assert ((size % (bytes_per_audio_sample() * _ffmpeg_content->audio_channels())) == 0);
 179
 180         int const total_samples = size / bytes_per_audio_sample();
 181         int const frames = total_samples / _ffmpeg_content->audio_channels();
 182         shared_ptr<AudioBuffers> audio (new AudioBuffers (_ffmpeg_content->audio_channels(), frames));
 183
 184         switch (audio_sample_format()) {
 185         case AV_SAMPLE_FMT_U8:
 186         {
 187                 uint8_t* p = reinterpret_cast<uint8_t *> (data[0]);
 188                 int sample = 0;
 189                 int channel = 0;
 190                 for (int i = 0; i < total_samples; ++i) {
 191                         audio->data(channel)[sample] = float(*p++) / (1 << 23);
 192
 193                         ++channel;
 194                         if (channel == _ffmpeg_content->audio_channels()) {
 195                                 channel = 0;
 196                                 ++sample;
 197                         }
 198                 }
 199         }
 200         break;
 201
 202         case AV_SAMPLE_FMT_S16:
 203         {
 204                 int16_t* p = reinterpret_cast<int16_t *> (data[0]);
 205                 int sample = 0;
 206                 int channel = 0;
 207                 for (int i = 0; i < total_samples; ++i) {
 208                         audio->data(channel)[sample] = float(*p++) / (1 << 15);
 209
 210                         ++channel;
 211                         if (channel == _ffmpeg_content->audio_channels()) {
 212                                 channel = 0;
 213                                 ++sample;
 214                         }
 215                 }
 216         }
 217         break;
 218
 219         case AV_SAMPLE_FMT_S16P:
 220         {
 221                 int16_t** p = reinterpret_cast<int16_t **> (data);
 222                 for (int i = 0; i < _ffmpeg_content->audio_channels(); ++i) {
 223                         for (int j = 0; j < frames; ++j) {
 224                                 audio->data(i)[j] = static_cast<float>(p[i][j]) / (1 << 15);
 225                         }
 226                 }
 227         }
 228         break;
 229
 230         case AV_SAMPLE_FMT_S32:
 231         {
 232                 int32_t* p = reinterpret_cast<int32_t *> (data[0]);
 233                 int sample = 0;
 234                 int channel = 0;
 235                 for (int i = 0; i < total_samples; ++i) {
 236                         audio->data(channel)[sample] = static_cast<float>(*p++) / (1 << 31);
 237
 238                         ++channel;
 239                         if (channel == _ffmpeg_content->audio_channels()) {
 240                                 channel = 0;
 241                                 ++sample;
 242                         }
 243                 }
 244         }
 245         break;
 246
 247         case AV_SAMPLE_FMT_FLT:
 248         {
 249                 float* p = reinterpret_cast<float*> (data[0]);
 250                 int sample = 0;
 251                 int channel = 0;
 252                 for (int i = 0; i < total_samples; ++i) {
 253                         audio->data(channel)[sample] = *p++;
 254
 255                         ++channel;
 256                         if (channel == _ffmpeg_content->audio_channels()) {
 257                                 channel = 0;
 258                                 ++sample;
 259                         }
 260                 }
 261         }
 262         break;
 263
 264         case AV_SAMPLE_FMT_FLTP:
 265         {
 266                 float** p = reinterpret_cast<float**> (data);
 267                 for (int i = 0; i < _ffmpeg_content->audio_channels(); ++i) {
 268                         memcpy (audio->data(i), p[i], frames * sizeof(float));
 269                 }
 270         }
 271         break;
 272
 273         default:
 274                 throw DecodeError (String::compose (_("Unrecognised audio sample format (%1)"), static_cast<int> (audio_sample_format())));
 275         }
 276
 277         return audio;
 278 }
 279
 280 AVSampleFormat
 281 FFmpegDecoder::audio_sample_format () const
 282 {
 283         if (!_ffmpeg_content->audio_stream()) {
 284                 return (AVSampleFormat) 0;
 285         }
 286
 287         return audio_codec_context()->sample_fmt;
 288 }
 289
 290 int
 291 FFmpegDecoder::bytes_per_audio_sample () const
 292 {
 293         return av_get_bytes_per_sample (audio_sample_format ());
 294 }
 295
 296 int
 297 FFmpegDecoder::minimal_run (boost::function<bool (optional<ContentTime>, optional<ContentTime>, int)> finished)
 298 {
 299         int frames_read = 0;
 300         optional<ContentTime> last_video;
 301         optional<ContentTime> last_audio;
 302
 303         while (!finished (last_video, last_audio, frames_read)) {
 304                 int r = av_read_frame (_format_context, &_packet);
 305                 if (r < 0) {
 306                         /* We should flush our decoders here, possibly yielding a few more frames,
 307                            but the consequence of having to do that is too hideous to contemplate.
 308                            Instead we give up and say that you can't seek too close to the end
 309                            of a file.
 310                         */
 311                         return frames_read;
 312                 }
 313
 314                 ++frames_read;
 315
 316                 double const time_base = av_q2d (_format_context->streams[_packet.stream_index]->time_base);
 317
 318                 if (_packet.stream_index == _video_stream) {
 319
 320                         av_frame_unref (_frame);
 321
 322                         int got_picture = 0;
 323                         r = avcodec_decode_video2 (video_codec_context(), _frame, &got_picture, &_packet);
 324                         if (r >= 0 && got_picture) {
 325                                 last_video = ContentTime::from_seconds (av_frame_get_best_effort_timestamp (_frame) * time_base) + _pts_offset;
 326                         }
 327
 328                 } else if (_ffmpeg_content->audio_stream() && _ffmpeg_content->audio_stream()->uses_index (_format_context, _packet.stream_index)) {
 329                         AVPacket copy_packet = _packet;
 330                         while (copy_packet.size > 0) {
 331
 332                                 int got_frame;
 333                                 r = avcodec_decode_audio4 (audio_codec_context(), _frame, &got_frame, &_packet);
 334                                 if (r >= 0 && got_frame) {
 335                                         last_audio = ContentTime::from_seconds (av_frame_get_best_effort_timestamp (_frame) * time_base) + _pts_offset;
 336                                 }
 337
 338                                 copy_packet.data += r;
 339                                 copy_packet.size -= r;
 340                         }
 341                 }
 342
 343                 av_free_packet (&_packet);
 344         }
 345
 346         return frames_read;
 347 }
 348
 349 bool
 350 FFmpegDecoder::seek_overrun_finished (ContentTime seek, optional<ContentTime> last_video, optional<ContentTime> last_audio) const
 351 {
 352         return (last_video && last_video.get() >= seek) || (last_audio && last_audio.get() >= seek);
 353 }
 354
 355 bool
 356 FFmpegDecoder::seek_final_finished (int n, int done) const
 357 {
 358         return n == done;
 359 }
 360
 361 void
 362 FFmpegDecoder::seek_and_flush (ContentTime t)
 363 {
 364         ContentTime const u = t - _pts_offset;
 365         int64_t s = u.seconds() / av_q2d (_format_context->streams[_video_stream]->time_base);
 366
 367         if (_ffmpeg_content->audio_stream ()) {
 368                 s = min (
 369                         s, int64_t (u.seconds() / av_q2d (_ffmpeg_content->audio_stream()->stream(_format_context)->time_base))
 370                         );
 371         }
 372
 373         /* Ridiculous empirical hack */
 374         s--;
 375         if (s < 0) {
 376                 s = 0;
 377         }
 378
 379         av_seek_frame (_format_context, _video_stream, s, 0);
 380
 381         avcodec_flush_buffers (video_codec_context());
 382         if (audio_codec_context ()) {
 383                 avcodec_flush_buffers (audio_codec_context ());
 384         }
 385         if (_subtitle_codec_context) {
 386                 avcodec_flush_buffers (_subtitle_codec_context);
 387         }
 388 }
 389
 390 void
 391 FFmpegDecoder::seek (ContentTime time, bool accurate)
 392 {
 393         VideoDecoder::seek (time, accurate);
 394         AudioDecoder::seek (time, accurate);
 395
 396         /* If we are doing an accurate seek, our initial shot will be 2s (2 being
 397            a number plucked from the air) earlier than we want to end up.  The loop below
 398            will hopefully then step through to where we want to be.
 399         */
 400
 401         ContentTime pre_roll = accurate ? ContentTime::from_seconds (2) : ContentTime (0);
 402         ContentTime initial_seek = time - pre_roll;
 403         if (initial_seek < ContentTime (0)) {
 404                 initial_seek = ContentTime (0);
 405         }
 406
 407         /* Initial seek time in the video stream's timebase */
 408
 409         seek_and_flush (initial_seek);
 410
 411         if (!accurate) {
 412                 /* That'll do */
 413                 return;
 414         }
 415
 416         int const N = minimal_run (boost::bind (&FFmpegDecoder::seek_overrun_finished, this, time, _1, _2));
 417
 418         seek_and_flush (initial_seek);
 419         if (N > 0) {
 420                 minimal_run (boost::bind (&FFmpegDecoder::seek_final_finished, this, N - 1, _3));
 421         }
 422 }
 423
 424 void
 425 FFmpegDecoder::decode_audio_packet ()
 426 {
 427         /* Audio packets can contain multiple frames, so we may have to call avcodec_decode_audio4
 428            several times.
 429         */
 430
 431         AVPacket copy_packet = _packet;
 432
 433         while (copy_packet.size > 0) {
 434
 435                 int frame_finished;
 436                 int const decode_result = avcodec_decode_audio4 (audio_codec_context(), _frame, &frame_finished, &copy_packet);
 437
 438                 if (decode_result < 0) {
 439                         LOG_ERROR ("avcodec_decode_audio4 failed (%1)", decode_result);
 440                         return;
 441                 }
 442
 443                 if (frame_finished) {
 444                         ContentTime const ct = ContentTime::from_seconds (
 445                                 av_frame_get_best_effort_timestamp (_frame) *
 446                                 av_q2d (_ffmpeg_content->audio_stream()->stream (_format_context)->time_base))
 447                                 + _pts_offset;
 448
 449                         int const data_size = av_samples_get_buffer_size (
 450                                 0, audio_codec_context()->channels, _frame->nb_samples, audio_sample_format (), 1
 451                                 );
 452
 453                         audio (deinterleave_audio (_frame->data, data_size), ct);
 454                 }
 455
 456                 copy_packet.data += decode_result;
 457                 copy_packet.size -= decode_result;
 458         }
 459 }
 460
 461 bool
 462 FFmpegDecoder::decode_video_packet ()
 463 {
 464         int frame_finished;
 465         if (avcodec_decode_video2 (video_codec_context(), _frame, &frame_finished, &_packet) < 0 || !frame_finished) {
 466                 return false;
 467         }
 468
 469         boost::mutex::scoped_lock lm (_filter_graphs_mutex);
 470
 471         shared_ptr<FilterGraph> graph;
 472
 473         list<shared_ptr<FilterGraph> >::iterator i = _filter_graphs.begin();
 474         while (i != _filter_graphs.end() && !(*i)->can_process (dcp::Size (_frame->width, _frame->height), (AVPixelFormat) _frame->format)) {
 475                 ++i;
 476         }
 477
 478         if (i == _filter_graphs.end ()) {
 479                 graph.reset (new FilterGraph (_ffmpeg_content, dcp::Size (_frame->width, _frame->height), (AVPixelFormat) _frame->format));
 480                 _filter_graphs.push_back (graph);
 481                 LOG_GENERAL (N_("New graph for %1x%2, pixel format %3"), _frame->width, _frame->height, _frame->format);
 482         } else {
 483                 graph = *i;
 484         }
 485
 486         list<pair<shared_ptr<Image>, int64_t> > images = graph->process (_frame);
 487
 488         for (list<pair<shared_ptr<Image>, int64_t> >::iterator i = images.begin(); i != images.end(); ++i) {
 489
 490                 shared_ptr<Image> image = i->first;
 491
 492                 if (i->second != AV_NOPTS_VALUE) {
 493                         double const pts = i->second * av_q2d (_format_context->streams[_video_stream]->time_base) + _pts_offset.seconds ();
 494                         video (
 495                                 shared_ptr<ImageProxy> (new RawImageProxy (image, _video_content->film()->log())),
 496                                 rint (pts * _ffmpeg_content->video_frame_rate ())
 497                                 );
 498                 } else {
 499                         LOG_WARNING ("Dropping frame without PTS");
 500                 }
 501         }
 502
 503         return true;
 504 }
 505
 506 void
 507 FFmpegDecoder::decode_subtitle_packet ()
 508 {
 509         int got_subtitle;
 510         AVSubtitle sub;
 511         if (avcodec_decode_subtitle2 (_subtitle_codec_context, &sub, &got_subtitle, &_packet) < 0 || !got_subtitle) {
 512                 return;
 513         }
 514
 515         /* Sometimes we get an empty AVSubtitle, which is used by some codecs to
 516            indicate that the previous subtitle should stop.
 517         */
 518         if (sub.num_rects <= 0) {
 519                 image_subtitle (ContentTimePeriod (), shared_ptr<Image> (), dcpomatic::Rect<double> ());
 520                 return;
 521         } else if (sub.num_rects > 1) {
 522                 throw DecodeError (_("multi-part subtitles not yet supported"));
 523         }
 524
 525         /* Subtitle PTS (within the source, not taking into account any of the
 526            source that we may have chopped off for the DCP)
 527         */
 528         ContentTimePeriod period = subtitle_period (sub) + _pts_offset;
 529
 530         AVSubtitleRect const * rect = sub.rects[0];
 531
 532         if (rect->type != SUBTITLE_BITMAP) {
 533                 /* XXX */
 534                 // throw DecodeError (_("non-bitmap subtitles not yet supported"));
 535                 return;
 536         }
 537
 538         /* Note RGBA is expressed little-endian, so the first byte in the word is R, second
 539            G, third B, fourth A.
 540         */
 541         shared_ptr<Image> image (new Image (PIX_FMT_RGBA, dcp::Size (rect->w, rect->h), true));
 542
 543         /* Start of the first line in the subtitle */
 544         uint8_t* sub_p = rect->pict.data[0];
 545         /* sub_p looks up into a BGRA palette which is here
 546            (i.e. first byte B, second G, third R, fourth A)
 547         */
 548         uint32_t const * palette = (uint32_t *) rect->pict.data[1];
 549         /* Start of the output data */
 550         uint32_t* out_p = (uint32_t *) image->data()[0];
 551
 552         for (int y = 0; y < rect->h; ++y) {
 553                 uint8_t* sub_line_p = sub_p;
 554                 uint32_t* out_line_p = out_p;
 555                 for (int x = 0; x < rect->w; ++x) {
 556                         uint32_t const p = palette[*sub_line_p++];
 557                         *out_line_p++ = ((p & 0xff) << 16) | (p & 0xff00) | ((p & 0xff0000) >> 16) | (p & 0xff000000);
 558                 }
 559                 sub_p += rect->pict.linesize[0];
 560                 out_p += image->stride()[0] / sizeof (uint32_t);
 561         }
 562
 563         dcp::Size const vs = _ffmpeg_content->video_size ();
 564
 565         image_subtitle (
 566                 period,
 567                 image,
 568                 dcpomatic::Rect<double> (
 569                         static_cast<double> (rect->x) / vs.width,
 570                         static_cast<double> (rect->y) / vs.height,
 571                         static_cast<double> (rect->w) / vs.width,
 572                         static_cast<double> (rect->h) / vs.height
 573                         )
 574                 );
 575
 576         avsubtitle_free (&sub);
 577 }
 578
 579 bool
 580 FFmpegDecoder::has_subtitle_during (ContentTimePeriod p) const
 581 {
 582         return _ffmpeg_content->has_subtitle_during (p);
 583 }