src/lib/ffmpeg_decoder.cc

   1 /*
   2     Copyright (C) 2012-2014 Carl Hetherington <cth@carlh.net>
   3
   4     This program is free software; you can redistribute it and/or modify
   5     it under the terms of the GNU General Public License as published by
   6     the Free Software Foundation; either version 2 of the License, or
   7     (at your option) any later version.
   8
   9     This program is distributed in the hope that it will be useful,
  10     but WITHOUT ANY WARRANTY; without even the implied warranty of
  11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12     GNU General Public License for more details.
  13
  14     You should have received a copy of the GNU General Public License
  15     along with this program; if not, write to the Free Software
  16     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  17
  18 */
  19
  20 /** @file  src/ffmpeg_decoder.cc
  21  *  @brief A decoder using FFmpeg to decode content.
  22  */
  23
  24 #include <stdexcept>
  25 #include <vector>
  26 #include <sstream>
  27 #include <iomanip>
  28 #include <iostream>
  29 #include <stdint.h>
  30 #include <sndfile.h>
  31 extern "C" {
  32 #include <libavcodec/avcodec.h>
  33 #include <libavformat/avformat.h>
  34 }
  35 #include "filter.h"
  36 #include "exceptions.h"
  37 #include "image.h"
  38 #include "util.h"
  39 #include "log.h"
  40 #include "ffmpeg_decoder.h"
  41 #include "filter_graph.h"
  42 #include "audio_buffers.h"
  43 #include "ffmpeg_content.h"
  44
  45 #include "i18n.h"
  46
  47 using std::cout;
  48 using std::string;
  49 using std::vector;
  50 using std::stringstream;
  51 using std::list;
  52 using std::min;
  53 using std::pair;
  54 using boost::shared_ptr;
  55 using boost::optional;
  56 using boost::dynamic_pointer_cast;
  57 using dcp::Size;
  58
  59 FFmpegDecoder::FFmpegDecoder (shared_ptr<const FFmpegContent> c, shared_ptr<Log> log)
  60         : VideoDecoder (c)
  61         , AudioDecoder (c)
  62         , FFmpeg (c)
  63         , _log (log)
  64         , _subtitle_codec_context (0)
  65         , _subtitle_codec (0)
  66 {
  67         setup_subtitle ();
  68
  69         /* Audio and video frame PTS values may not start with 0.  We want
  70            to fiddle them so that:
  71
  72            1.  One of them starts at time 0.
  73            2.  The first video PTS value ends up on a frame boundary.
  74
  75            Then we remove big initial gaps in PTS and we allow our
  76            insertion of black frames to work.
  77
  78            We will do pts_to_use = pts_from_ffmpeg + pts_offset;
  79         */
  80
  81         bool const have_video = c->first_video();
  82         bool const have_audio = c->audio_stream () && c->audio_stream()->first_audio;
  83
  84         /* First, make one of them start at 0 */
  85
  86         if (have_audio && have_video) {
  87                 _pts_offset = - min (c->first_video().get(), c->audio_stream()->first_audio.get());
  88         } else if (have_video) {
  89                 _pts_offset = - c->first_video().get();
  90         } else if (have_audio) {
  91                 _pts_offset = - c->audio_stream()->first_audio.get();
  92         }
  93
  94         /* Now adjust both so that the video pts starts on a frame */
  95         if (have_video && have_audio) {
  96                 ContentTime first_video = c->first_video().get() + _pts_offset;
  97                 ContentTime const old_first_video = first_video;
  98                 _pts_offset += first_video.round_up (c->video_frame_rate ()) - old_first_video;
  99         }
 100 }
 101
 102 FFmpegDecoder::~FFmpegDecoder ()
 103 {
 104         boost::mutex::scoped_lock lm (_mutex);
 105
 106         if (_subtitle_codec_context) {
 107                 avcodec_close (_subtitle_codec_context);
 108         }
 109 }
 110
 111 void
 112 FFmpegDecoder::flush ()
 113 {
 114         /* Get any remaining frames */
 115
 116         _packet.data = 0;
 117         _packet.size = 0;
 118
 119         /* XXX: should we reset _packet.data and size after each *_decode_* call? */
 120
 121         while (decode_video_packet ()) {}
 122
 123         if (_ffmpeg_content->audio_stream()) {
 124                 decode_audio_packet ();
 125                 AudioDecoder::flush ();
 126         }
 127 }
 128
 129 bool
 130 FFmpegDecoder::pass ()
 131 {
 132         int r = av_read_frame (_format_context, &_packet);
 133
 134         if (r < 0) {
 135                 if (r != AVERROR_EOF) {
 136                         /* Maybe we should fail here, but for now we'll just finish off instead */
 137                         char buf[256];
 138                         av_strerror (r, buf, sizeof(buf));
 139                         _log->log (String::compose (N_("error on av_read_frame (%1) (%2)"), buf, r));
 140                 }
 141
 142                 flush ();
 143                 return true;
 144         }
 145
 146         int const si = _packet.stream_index;
 147
 148         if (si == _video_stream) {
 149                 decode_video_packet ();
 150         } else if (_ffmpeg_content->audio_stream() && _ffmpeg_content->audio_stream()->uses_index (_format_context, si)) {
 151                 decode_audio_packet ();
 152         } else if (_ffmpeg_content->subtitle_stream() && _ffmpeg_content->subtitle_stream()->uses_index (_format_context, si)) {
 153                 decode_subtitle_packet ();
 154         }
 155
 156         av_free_packet (&_packet);
 157         return false;
 158 }
 159
 160 /** @param data pointer to array of pointers to buffers.
 161  *  Only the first buffer will be used for non-planar data, otherwise there will be one per channel.
 162  */
 163 shared_ptr<AudioBuffers>
 164 FFmpegDecoder::deinterleave_audio (uint8_t** data, int size)
 165 {
 166         assert (_ffmpeg_content->audio_channels());
 167         assert (bytes_per_audio_sample());
 168
 169         /* Deinterleave and convert to float */
 170
 171         assert ((size % (bytes_per_audio_sample() * _ffmpeg_content->audio_channels())) == 0);
 172
 173         int const total_samples = size / bytes_per_audio_sample();
 174         int const frames = total_samples / _ffmpeg_content->audio_channels();
 175         shared_ptr<AudioBuffers> audio (new AudioBuffers (_ffmpeg_content->audio_channels(), frames));
 176
 177         switch (audio_sample_format()) {
 178         case AV_SAMPLE_FMT_U8:
 179         {
 180                 uint8_t* p = reinterpret_cast<uint8_t *> (data[0]);
 181                 int sample = 0;
 182                 int channel = 0;
 183                 for (int i = 0; i < total_samples; ++i) {
 184                         audio->data(channel)[sample] = float(*p++) / (1 << 23);
 185
 186                         ++channel;
 187                         if (channel == _ffmpeg_content->audio_channels()) {
 188                                 channel = 0;
 189                                 ++sample;
 190                         }
 191                 }
 192         }
 193         break;
 194
 195         case AV_SAMPLE_FMT_S16:
 196         {
 197                 int16_t* p = reinterpret_cast<int16_t *> (data[0]);
 198                 int sample = 0;
 199                 int channel = 0;
 200                 for (int i = 0; i < total_samples; ++i) {
 201                         audio->data(channel)[sample] = float(*p++) / (1 << 15);
 202
 203                         ++channel;
 204                         if (channel == _ffmpeg_content->audio_channels()) {
 205                                 channel = 0;
 206                                 ++sample;
 207                         }
 208                 }
 209         }
 210         break;
 211
 212         case AV_SAMPLE_FMT_S16P:
 213         {
 214                 int16_t** p = reinterpret_cast<int16_t **> (data);
 215                 for (int i = 0; i < _ffmpeg_content->audio_channels(); ++i) {
 216                         for (int j = 0; j < frames; ++j) {
 217                                 audio->data(i)[j] = static_cast<float>(p[i][j]) / (1 << 15);
 218                         }
 219                 }
 220         }
 221         break;
 222
 223         case AV_SAMPLE_FMT_S32:
 224         {
 225                 int32_t* p = reinterpret_cast<int32_t *> (data[0]);
 226                 int sample = 0;
 227                 int channel = 0;
 228                 for (int i = 0; i < total_samples; ++i) {
 229                         audio->data(channel)[sample] = static_cast<float>(*p++) / (1 << 31);
 230
 231                         ++channel;
 232                         if (channel == _ffmpeg_content->audio_channels()) {
 233                                 channel = 0;
 234                                 ++sample;
 235                         }
 236                 }
 237         }
 238         break;
 239
 240         case AV_SAMPLE_FMT_FLT:
 241         {
 242                 float* p = reinterpret_cast<float*> (data[0]);
 243                 int sample = 0;
 244                 int channel = 0;
 245                 for (int i = 0; i < total_samples; ++i) {
 246                         audio->data(channel)[sample] = *p++;
 247
 248                         ++channel;
 249                         if (channel == _ffmpeg_content->audio_channels()) {
 250                                 channel = 0;
 251                                 ++sample;
 252                         }
 253                 }
 254         }
 255         break;
 256
 257         case AV_SAMPLE_FMT_FLTP:
 258         {
 259                 float** p = reinterpret_cast<float**> (data);
 260                 for (int i = 0; i < _ffmpeg_content->audio_channels(); ++i) {
 261                         memcpy (audio->data(i), p[i], frames * sizeof(float));
 262                 }
 263         }
 264         break;
 265
 266         default:
 267                 throw DecodeError (String::compose (_("Unrecognised audio sample format (%1)"), static_cast<int> (audio_sample_format())));
 268         }
 269
 270         return audio;
 271 }
 272
 273 AVSampleFormat
 274 FFmpegDecoder::audio_sample_format () const
 275 {
 276         if (!_ffmpeg_content->audio_stream()) {
 277                 return (AVSampleFormat) 0;
 278         }
 279
 280         return audio_codec_context()->sample_fmt;
 281 }
 282
 283 int
 284 FFmpegDecoder::bytes_per_audio_sample () const
 285 {
 286         return av_get_bytes_per_sample (audio_sample_format ());
 287 }
 288
 289 int
 290 FFmpegDecoder::minimal_run (boost::function<bool (optional<ContentTime>, optional<ContentTime>, int)> finished)
 291 {
 292         int frames_read = 0;
 293         optional<ContentTime> last_video;
 294         optional<ContentTime> last_audio;
 295
 296         while (!finished (last_video, last_audio, frames_read)) {
 297                 int r = av_read_frame (_format_context, &_packet);
 298                 if (r < 0) {
 299                         /* We should flush our decoders here, possibly yielding a few more frames,
 300                            but the consequence of having to do that is too hideous to contemplate.
 301                            Instead we give up and say that you can't seek too close to the end
 302                            of a file.
 303                         */
 304                         return frames_read;
 305                 }
 306
 307                 ++frames_read;
 308
 309                 double const time_base = av_q2d (_format_context->streams[_packet.stream_index]->time_base);
 310
 311                 if (_packet.stream_index == _video_stream) {
 312
 313                         avcodec_get_frame_defaults (_frame);
 314
 315                         int got_picture = 0;
 316                         r = avcodec_decode_video2 (video_codec_context(), _frame, &got_picture, &_packet);
 317                         if (r >= 0 && got_picture) {
 318                                 last_video = ContentTime::from_seconds (av_frame_get_best_effort_timestamp (_frame) * time_base) + _pts_offset;
 319                         }
 320
 321                 } else if (_ffmpeg_content->audio_stream() && _ffmpeg_content->audio_stream()->uses_index (_format_context, _packet.stream_index)) {
 322                         AVPacket copy_packet = _packet;
 323                         while (copy_packet.size > 0) {
 324
 325                                 int got_frame;
 326                                 r = avcodec_decode_audio4 (audio_codec_context(), _frame, &got_frame, &_packet);
 327                                 if (r >= 0 && got_frame) {
 328                                         last_audio = ContentTime::from_seconds (av_frame_get_best_effort_timestamp (_frame) * time_base) + _pts_offset;
 329                                 }
 330
 331                                 copy_packet.data += r;
 332                                 copy_packet.size -= r;
 333                         }
 334                 }
 335
 336                 av_free_packet (&_packet);
 337         }
 338
 339         return frames_read;
 340 }
 341
 342 bool
 343 FFmpegDecoder::seek_overrun_finished (ContentTime seek, optional<ContentTime> last_video, optional<ContentTime> last_audio) const
 344 {
 345         return (last_video && last_video.get() >= seek) || (last_audio && last_audio.get() >= seek);
 346 }
 347
 348 bool
 349 FFmpegDecoder::seek_final_finished (int n, int done) const
 350 {
 351         return n == done;
 352 }
 353
 354 void
 355 FFmpegDecoder::seek_and_flush (ContentTime t)
 356 {
 357         ContentTime const u = t - _pts_offset;
 358         int64_t s = u.seconds() / av_q2d (_format_context->streams[_video_stream]->time_base);
 359
 360         if (_ffmpeg_content->audio_stream ()) {
 361                 s = min (
 362                         s, int64_t (u.seconds() / av_q2d (_ffmpeg_content->audio_stream()->stream(_format_context)->time_base))
 363                         );
 364         }
 365
 366         /* Ridiculous empirical hack */
 367         s--;
 368         if (s < 0) {
 369                 s = 0;
 370         }
 371
 372         av_seek_frame (_format_context, _video_stream, s, 0);
 373
 374         avcodec_flush_buffers (video_codec_context());
 375         if (audio_codec_context ()) {
 376                 avcodec_flush_buffers (audio_codec_context ());
 377         }
 378         if (_subtitle_codec_context) {
 379                 avcodec_flush_buffers (_subtitle_codec_context);
 380         }
 381 }
 382
 383 void
 384 FFmpegDecoder::seek (ContentTime time, bool accurate)
 385 {
 386         VideoDecoder::seek (time, accurate);
 387         AudioDecoder::seek (time, accurate);
 388
 389         /* If we are doing an accurate seek, our initial shot will be 2s (2 being
 390            a number plucked from the air) earlier than we want to end up.  The loop below
 391            will hopefully then step through to where we want to be.
 392         */
 393
 394         ContentTime pre_roll = accurate ? ContentTime::from_seconds (2) : ContentTime (0);
 395         ContentTime initial_seek = time - pre_roll;
 396         if (initial_seek < ContentTime (0)) {
 397                 initial_seek = ContentTime (0);
 398         }
 399
 400         /* Initial seek time in the video stream's timebase */
 401
 402         seek_and_flush (initial_seek);
 403
 404         if (!accurate) {
 405                 /* That'll do */
 406                 return;
 407         }
 408
 409         int const N = minimal_run (boost::bind (&FFmpegDecoder::seek_overrun_finished, this, time, _1, _2));
 410
 411         seek_and_flush (initial_seek);
 412         if (N > 0) {
 413                 minimal_run (boost::bind (&FFmpegDecoder::seek_final_finished, this, N - 1, _3));
 414         }
 415 }
 416
 417 void
 418 FFmpegDecoder::decode_audio_packet ()
 419 {
 420         /* Audio packets can contain multiple frames, so we may have to call avcodec_decode_audio4
 421            several times.
 422         */
 423
 424         AVPacket copy_packet = _packet;
 425
 426         while (copy_packet.size > 0) {
 427
 428                 int frame_finished;
 429                 int const decode_result = avcodec_decode_audio4 (audio_codec_context(), _frame, &frame_finished, &copy_packet);
 430
 431                 if (decode_result < 0) {
 432                         _log->log (String::compose ("avcodec_decode_audio4 failed (%1)", decode_result));
 433                         return;
 434                 }
 435
 436                 if (frame_finished) {
 437                         ContentTime const ct = ContentTime::from_seconds (
 438                                 av_frame_get_best_effort_timestamp (_frame) *
 439                                 av_q2d (_ffmpeg_content->audio_stream()->stream (_format_context)->time_base))
 440                                 + _pts_offset;
 441
 442                         int const data_size = av_samples_get_buffer_size (
 443                                 0, audio_codec_context()->channels, _frame->nb_samples, audio_sample_format (), 1
 444                                 );
 445
 446                         audio (deinterleave_audio (_frame->data, data_size), ct);
 447                 }
 448
 449                 copy_packet.data += decode_result;
 450                 copy_packet.size -= decode_result;
 451         }
 452 }
 453
 454 bool
 455 FFmpegDecoder::decode_video_packet ()
 456 {
 457         int frame_finished;
 458         if (avcodec_decode_video2 (video_codec_context(), _frame, &frame_finished, &_packet) < 0 || !frame_finished) {
 459                 return false;
 460         }
 461
 462         boost::mutex::scoped_lock lm (_filter_graphs_mutex);
 463
 464         shared_ptr<FilterGraph> graph;
 465
 466         list<shared_ptr<FilterGraph> >::iterator i = _filter_graphs.begin();
 467         while (i != _filter_graphs.end() && !(*i)->can_process (dcp::Size (_frame->width, _frame->height), (AVPixelFormat) _frame->format)) {
 468                 ++i;
 469         }
 470
 471         if (i == _filter_graphs.end ()) {
 472                 graph.reset (new FilterGraph (_ffmpeg_content, dcp::Size (_frame->width, _frame->height), (AVPixelFormat) _frame->format));
 473                 _filter_graphs.push_back (graph);
 474                 _log->log (String::compose (N_("New graph for %1x%2, pixel format %3"), _frame->width, _frame->height, _frame->format));
 475         } else {
 476                 graph = *i;
 477         }
 478
 479         list<pair<shared_ptr<Image>, int64_t> > images = graph->process (_frame);
 480
 481         for (list<pair<shared_ptr<Image>, int64_t> >::iterator i = images.begin(); i != images.end(); ++i) {
 482
 483                 shared_ptr<Image> image = i->first;
 484
 485                 if (i->second != AV_NOPTS_VALUE) {
 486                         double const pts = i->second * av_q2d (_format_context->streams[_video_stream]->time_base) + _pts_offset.seconds ();
 487                         video (image, rint (pts * _ffmpeg_content->video_frame_rate ()));
 488                 } else {
 489                         _log->log ("Dropping frame without PTS");
 490                 }
 491         }
 492
 493         return true;
 494 }
 495
 496
 497 void
 498 FFmpegDecoder::setup_subtitle ()
 499 {
 500         boost::mutex::scoped_lock lm (_mutex);
 501
 502         if (!_ffmpeg_content->subtitle_stream()) {
 503                 return;
 504         }
 505
 506         _subtitle_codec_context = _ffmpeg_content->subtitle_stream()->stream(_format_context)->codec;
 507         if (_subtitle_codec_context == 0) {
 508                 throw DecodeError (N_("could not find subtitle stream"));
 509         }
 510
 511         _subtitle_codec = avcodec_find_decoder (_subtitle_codec_context->codec_id);
 512
 513         if (_subtitle_codec == 0) {
 514                 throw DecodeError (N_("could not find subtitle decoder"));
 515         }
 516
 517         if (avcodec_open2 (_subtitle_codec_context, _subtitle_codec, 0) < 0) {
 518                 throw DecodeError (N_("could not open subtitle decoder"));
 519         }
 520 }
 521
 522 void
 523 FFmpegDecoder::decode_subtitle_packet ()
 524 {
 525         int got_subtitle;
 526         AVSubtitle sub;
 527         if (avcodec_decode_subtitle2 (_subtitle_codec_context, &sub, &got_subtitle, &_packet) < 0 || !got_subtitle) {
 528                 return;
 529         }
 530
 531         /* Sometimes we get an empty AVSubtitle, which is used by some codecs to
 532            indicate that the previous subtitle should stop.
 533         */
 534         if (sub.num_rects <= 0) {
 535                 image_subtitle (ContentTime (), ContentTime (), shared_ptr<Image> (), dcpomatic::Rect<double> ());
 536                 return;
 537         } else if (sub.num_rects > 1) {
 538                 throw DecodeError (_("multi-part subtitles not yet supported"));
 539         }
 540
 541         /* Subtitle PTS (within the source, not taking into account any of the
 542            source that we may have chopped off for the DCP)
 543         */
 544         ContentTime packet_time = ContentTime::from_seconds (static_cast<double> (sub.pts) / AV_TIME_BASE) + _pts_offset;
 545
 546         /* hence start time for this sub */
 547         ContentTime const from = packet_time + ContentTime::from_seconds (sub.start_display_time / 1e3);
 548         ContentTime const to = packet_time + ContentTime::from_seconds (sub.end_display_time / 1e3);
 549
 550         AVSubtitleRect const * rect = sub.rects[0];
 551
 552         if (rect->type != SUBTITLE_BITMAP) {
 553                 /* XXX */
 554                 // throw DecodeError (_("non-bitmap subtitles not yet supported"));
 555                 return;
 556         }
 557
 558         /* Note RGBA is expressed little-endian, so the first byte in the word is R, second
 559            G, third B, fourth A.
 560         */
 561         shared_ptr<Image> image (new Image (PIX_FMT_RGBA, dcp::Size (rect->w, rect->h), true));
 562
 563         /* Start of the first line in the subtitle */
 564         uint8_t* sub_p = rect->pict.data[0];
 565         /* sub_p looks up into a BGRA palette which is here
 566            (i.e. first byte B, second G, third R, fourth A)
 567         */
 568         uint32_t const * palette = (uint32_t *) rect->pict.data[1];
 569         /* Start of the output data */
 570         uint32_t* out_p = (uint32_t *) image->data()[0];
 571
 572         for (int y = 0; y < rect->h; ++y) {
 573                 uint8_t* sub_line_p = sub_p;
 574                 uint32_t* out_line_p = out_p;
 575                 for (int x = 0; x < rect->w; ++x) {
 576                         uint32_t const p = palette[*sub_line_p++];
 577                         *out_line_p++ = ((p & 0xff) << 16) | (p & 0xff00) | ((p & 0xff0000) >> 16) | (p & 0xff000000);
 578                 }
 579                 sub_p += rect->pict.linesize[0];
 580                 out_p += image->stride()[0] / sizeof (uint32_t);
 581         }
 582
 583         dcp::Size const vs = _ffmpeg_content->video_size ();
 584
 585         image_subtitle (
 586                 from,
 587                 to,
 588                 image,
 589                 dcpomatic::Rect<double> (
 590                         static_cast<double> (rect->x) / vs.width,
 591                         static_cast<double> (rect->y) / vs.height,
 592                         static_cast<double> (rect->w) / vs.width,
 593                         static_cast<double> (rect->h) / vs.height
 594                         )
 595                 );
 596
 597         avsubtitle_free (&sub);
 598 }