src/lib/ffmpeg_decoder.cc

   1 /*
   2     Copyright (C) 2012 Carl Hetherington <cth@carlh.net>
   3
   4     This program is free software; you can redistribute it and/or modify
   5     it under the terms of the GNU General Public License as published by
   6     the Free Software Foundation; either version 2 of the License, or
   7     (at your option) any later version.
   8
   9     This program is distributed in the hope that it will be useful,
  10     but WITHOUT ANY WARRANTY; without even the implied warranty of
  11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12     GNU General Public License for more details.
  13
  14     You should have received a copy of the GNU General Public License
  15     along with this program; if not, write to the Free Software
  16     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  17
  18 */
  19
  20 /** @file  src/ffmpeg_decoder.cc
  21  *  @brief A decoder using FFmpeg to decode content.
  22  */
  23
  24 #include <stdexcept>
  25 #include <vector>
  26 #include <sstream>
  27 #include <iomanip>
  28 #include <iostream>
  29 #include <stdint.h>
  30 #include <boost/lexical_cast.hpp>
  31 #include <sndfile.h>
  32 extern "C" {
  33 #include <libavcodec/avcodec.h>
  34 #include <libavformat/avformat.h>
  35 }
  36 #include "filter.h"
  37 #include "exceptions.h"
  38 #include "image.h"
  39 #include "util.h"
  40 #include "log.h"
  41 #include "ffmpeg_decoder.h"
  42 #include "filter_graph.h"
  43 #include "audio_buffers.h"
  44 #include "ffmpeg_content.h"
  45
  46 #include "i18n.h"
  47
  48 using std::cout;
  49 using std::string;
  50 using std::vector;
  51 using std::stringstream;
  52 using std::list;
  53 using std::min;
  54 using std::pair;
  55 using boost::shared_ptr;
  56 using boost::optional;
  57 using boost::dynamic_pointer_cast;
  58 using dcp::Size;
  59
  60 FFmpegDecoder::FFmpegDecoder (shared_ptr<const FFmpegContent> c, shared_ptr<Log> log)
  61         : VideoDecoder (c)
  62         , AudioDecoder (c)
  63         , FFmpeg (c)
  64         , _log (log)
  65         , _subtitle_codec_context (0)
  66         , _subtitle_codec (0)
  67 {
  68         setup_subtitle ();
  69
  70         /* Audio and video frame PTS values may not start with 0.  We want
  71            to fiddle them so that:
  72
  73            1.  One of them starts at time 0.
  74            2.  The first video PTS value ends up on a frame boundary.
  75
  76            Then we remove big initial gaps in PTS and we allow our
  77            insertion of black frames to work.
  78
  79            We will do pts_to_use = pts_from_ffmpeg + pts_offset;
  80         */
  81
  82         bool const have_video = c->first_video();
  83         bool const have_audio = c->audio_stream () && c->audio_stream()->first_audio;
  84
  85         /* First, make one of them start at 0 */
  86
  87         if (have_audio && have_video) {
  88                 _pts_offset = - min (c->first_video().get(), c->audio_stream()->first_audio.get());
  89         } else if (have_video) {
  90                 _pts_offset = - c->first_video().get();
  91         } else if (have_audio) {
  92                 _pts_offset = - c->audio_stream()->first_audio.get();
  93         }
  94
  95         /* Now adjust both so that the video pts starts on a frame */
  96         if (have_video && have_audio) {
  97                 ContentTime first_video = c->first_video().get() + _pts_offset;
  98                 ContentTime const old_first_video = first_video;
  99                 _pts_offset += first_video.round_up (c->video_frame_rate ()) - old_first_video;
 100         }
 101 }
 102
 103 FFmpegDecoder::~FFmpegDecoder ()
 104 {
 105         boost::mutex::scoped_lock lm (_mutex);
 106
 107         if (_subtitle_codec_context) {
 108                 avcodec_close (_subtitle_codec_context);
 109         }
 110 }
 111
 112 void
 113 FFmpegDecoder::flush ()
 114 {
 115         /* Get any remaining frames */
 116
 117         _packet.data = 0;
 118         _packet.size = 0;
 119
 120         /* XXX: should we reset _packet.data and size after each *_decode_* call? */
 121
 122         while (decode_video_packet ()) {}
 123
 124         if (_ffmpeg_content->audio_stream()) {
 125                 decode_audio_packet ();
 126                 AudioDecoder::flush ();
 127         }
 128 }
 129
 130 bool
 131 FFmpegDecoder::pass ()
 132 {
 133         int r = av_read_frame (_format_context, &_packet);
 134
 135         if (r < 0) {
 136                 if (r != AVERROR_EOF) {
 137                         /* Maybe we should fail here, but for now we'll just finish off instead */
 138                         char buf[256];
 139                         av_strerror (r, buf, sizeof(buf));
 140                         _log->log (String::compose (N_("error on av_read_frame (%1) (%2)"), buf, r));
 141                 }
 142
 143                 flush ();
 144                 return true;
 145         }
 146
 147         int const si = _packet.stream_index;
 148
 149         if (si == _video_stream) {
 150                 decode_video_packet ();
 151         } else if (_ffmpeg_content->audio_stream() && _ffmpeg_content->audio_stream()->uses_index (_format_context, si)) {
 152                 decode_audio_packet ();
 153         } else if (_ffmpeg_content->subtitle_stream() && _ffmpeg_content->subtitle_stream()->uses_index (_format_context, si)) {
 154                 decode_subtitle_packet ();
 155         }
 156
 157         av_free_packet (&_packet);
 158         return false;
 159 }
 160
 161 /** @param data pointer to array of pointers to buffers.
 162  *  Only the first buffer will be used for non-planar data, otherwise there will be one per channel.
 163  */
 164 shared_ptr<AudioBuffers>
 165 FFmpegDecoder::deinterleave_audio (uint8_t** data, int size)
 166 {
 167         assert (_ffmpeg_content->audio_channels());
 168         assert (bytes_per_audio_sample());
 169
 170         /* Deinterleave and convert to float */
 171
 172         assert ((size % (bytes_per_audio_sample() * _ffmpeg_content->audio_channels())) == 0);
 173
 174         int const total_samples = size / bytes_per_audio_sample();
 175         int const frames = total_samples / _ffmpeg_content->audio_channels();
 176         shared_ptr<AudioBuffers> audio (new AudioBuffers (_ffmpeg_content->audio_channels(), frames));
 177
 178         switch (audio_sample_format()) {
 179         case AV_SAMPLE_FMT_U8:
 180         {
 181                 uint8_t* p = reinterpret_cast<uint8_t *> (data[0]);
 182                 int sample = 0;
 183                 int channel = 0;
 184                 for (int i = 0; i < total_samples; ++i) {
 185                         audio->data(channel)[sample] = float(*p++) / (1 << 23);
 186
 187                         ++channel;
 188                         if (channel == _ffmpeg_content->audio_channels()) {
 189                                 channel = 0;
 190                                 ++sample;
 191                         }
 192                 }
 193         }
 194         break;
 195
 196         case AV_SAMPLE_FMT_S16:
 197         {
 198                 int16_t* p = reinterpret_cast<int16_t *> (data[0]);
 199                 int sample = 0;
 200                 int channel = 0;
 201                 for (int i = 0; i < total_samples; ++i) {
 202                         audio->data(channel)[sample] = float(*p++) / (1 << 15);
 203
 204                         ++channel;
 205                         if (channel == _ffmpeg_content->audio_channels()) {
 206                                 channel = 0;
 207                                 ++sample;
 208                         }
 209                 }
 210         }
 211         break;
 212
 213         case AV_SAMPLE_FMT_S16P:
 214         {
 215                 int16_t** p = reinterpret_cast<int16_t **> (data);
 216                 for (int i = 0; i < _ffmpeg_content->audio_channels(); ++i) {
 217                         for (int j = 0; j < frames; ++j) {
 218                                 audio->data(i)[j] = static_cast<float>(p[i][j]) / (1 << 15);
 219                         }
 220                 }
 221         }
 222         break;
 223
 224         case AV_SAMPLE_FMT_S32:
 225         {
 226                 int32_t* p = reinterpret_cast<int32_t *> (data[0]);
 227                 int sample = 0;
 228                 int channel = 0;
 229                 for (int i = 0; i < total_samples; ++i) {
 230                         audio->data(channel)[sample] = static_cast<float>(*p++) / (1 << 31);
 231
 232                         ++channel;
 233                         if (channel == _ffmpeg_content->audio_channels()) {
 234                                 channel = 0;
 235                                 ++sample;
 236                         }
 237                 }
 238         }
 239         break;
 240
 241         case AV_SAMPLE_FMT_FLT:
 242         {
 243                 float* p = reinterpret_cast<float*> (data[0]);
 244                 int sample = 0;
 245                 int channel = 0;
 246                 for (int i = 0; i < total_samples; ++i) {
 247                         audio->data(channel)[sample] = *p++;
 248
 249                         ++channel;
 250                         if (channel == _ffmpeg_content->audio_channels()) {
 251                                 channel = 0;
 252                                 ++sample;
 253                         }
 254                 }
 255         }
 256         break;
 257
 258         case AV_SAMPLE_FMT_FLTP:
 259         {
 260                 float** p = reinterpret_cast<float**> (data);
 261                 for (int i = 0; i < _ffmpeg_content->audio_channels(); ++i) {
 262                         memcpy (audio->data(i), p[i], frames * sizeof(float));
 263                 }
 264         }
 265         break;
 266
 267         default:
 268                 throw DecodeError (String::compose (_("Unrecognised audio sample format (%1)"), static_cast<int> (audio_sample_format())));
 269         }
 270
 271         return audio;
 272 }
 273
 274 AVSampleFormat
 275 FFmpegDecoder::audio_sample_format () const
 276 {
 277         if (!_ffmpeg_content->audio_stream()) {
 278                 return (AVSampleFormat) 0;
 279         }
 280
 281         return audio_codec_context()->sample_fmt;
 282 }
 283
 284 int
 285 FFmpegDecoder::bytes_per_audio_sample () const
 286 {
 287         return av_get_bytes_per_sample (audio_sample_format ());
 288 }
 289
 290 int
 291 FFmpegDecoder::minimal_run (boost::function<bool (optional<ContentTime>, optional<ContentTime>, int)> finished)
 292 {
 293         int frames_read = 0;
 294         optional<ContentTime> last_video;
 295         optional<ContentTime> last_audio;
 296
 297         while (!finished (last_video, last_audio, frames_read)) {
 298                 int r = av_read_frame (_format_context, &_packet);
 299                 if (r < 0) {
 300                         /* We should flush our decoders here, possibly yielding a few more frames,
 301                            but the consequence of having to do that is too hideous to contemplate.
 302                            Instead we give up and say that you can't seek too close to the end
 303                            of a file.
 304                         */
 305                         return frames_read;
 306                 }
 307
 308                 ++frames_read;
 309
 310                 double const time_base = av_q2d (_format_context->streams[_packet.stream_index]->time_base);
 311
 312                 if (_packet.stream_index == _video_stream) {
 313
 314                         avcodec_get_frame_defaults (_frame);
 315
 316                         int finished = 0;
 317                         r = avcodec_decode_video2 (video_codec_context(), _frame, &finished, &_packet);
 318                         if (r >= 0 && finished) {
 319                                 last_video = ContentTime::from_seconds (av_frame_get_best_effort_timestamp (_frame) * time_base) + _pts_offset;
 320                         }
 321
 322                 } else if (_ffmpeg_content->audio_stream() && _ffmpeg_content->audio_stream()->uses_index (_format_context, _packet.stream_index)) {
 323                         AVPacket copy_packet = _packet;
 324                         while (copy_packet.size > 0) {
 325
 326                                 int finished;
 327                                 r = avcodec_decode_audio4 (audio_codec_context(), _frame, &finished, &_packet);
 328                                 if (r >= 0 && finished) {
 329                                         last_audio = ContentTime::from_seconds (av_frame_get_best_effort_timestamp (_frame) * time_base) + _pts_offset;
 330                                 }
 331
 332                                 copy_packet.data += r;
 333                                 copy_packet.size -= r;
 334                         }
 335                 }
 336
 337                 av_free_packet (&_packet);
 338         }
 339
 340         return frames_read;
 341 }
 342
 343 bool
 344 FFmpegDecoder::seek_overrun_finished (ContentTime seek, optional<ContentTime> last_video, optional<ContentTime> last_audio) const
 345 {
 346         return (last_video && last_video.get() >= seek) || (last_audio && last_audio.get() >= seek);
 347 }
 348
 349 bool
 350 FFmpegDecoder::seek_final_finished (int n, int done) const
 351 {
 352         return n == done;
 353 }
 354
 355 void
 356 FFmpegDecoder::seek_and_flush (ContentTime t)
 357 {
 358         ContentTime const u = t - _pts_offset;
 359         int64_t s = u.seconds() / av_q2d (_format_context->streams[_video_stream]->time_base);
 360
 361         if (_ffmpeg_content->audio_stream ()) {
 362                 s = min (
 363                         s, int64_t (u.seconds() / av_q2d (_ffmpeg_content->audio_stream()->stream(_format_context)->time_base))
 364                         );
 365         }
 366
 367         /* Ridiculous empirical hack */
 368         s--;
 369         if (s < 0) {
 370                 s = 0;
 371         }
 372
 373         av_seek_frame (_format_context, _video_stream, s, 0);
 374
 375         avcodec_flush_buffers (video_codec_context());
 376         if (audio_codec_context ()) {
 377                 avcodec_flush_buffers (audio_codec_context ());
 378         }
 379         if (_subtitle_codec_context) {
 380                 avcodec_flush_buffers (_subtitle_codec_context);
 381         }
 382 }
 383
 384 void
 385 FFmpegDecoder::seek (ContentTime time, bool accurate)
 386 {
 387         VideoDecoder::seek (time, accurate);
 388         AudioDecoder::seek (time, accurate);
 389
 390         /* If we are doing an accurate seek, our initial shot will be 200ms (200 being
 391            a number plucked from the air) earlier than we want to end up.  The loop below
 392            will hopefully then step through to where we want to be.
 393         */
 394
 395         ContentTime pre_roll = accurate ? ContentTime::from_seconds (0.2) : ContentTime (0);
 396         ContentTime initial_seek = time - pre_roll;
 397         if (initial_seek < ContentTime (0)) {
 398                 initial_seek = ContentTime (0);
 399         }
 400
 401         /* Initial seek time in the video stream's timebase */
 402
 403         seek_and_flush (initial_seek);
 404
 405         if (!accurate) {
 406                 /* That'll do */
 407                 return;
 408         }
 409
 410         int const N = minimal_run (boost::bind (&FFmpegDecoder::seek_overrun_finished, this, time, _1, _2));
 411
 412         seek_and_flush (initial_seek);
 413         if (N > 0) {
 414                 minimal_run (boost::bind (&FFmpegDecoder::seek_final_finished, this, N - 1, _3));
 415         }
 416 }
 417
 418 void
 419 FFmpegDecoder::decode_audio_packet ()
 420 {
 421         /* Audio packets can contain multiple frames, so we may have to call avcodec_decode_audio4
 422            several times.
 423         */
 424
 425         AVPacket copy_packet = _packet;
 426
 427         while (copy_packet.size > 0) {
 428
 429                 int frame_finished;
 430                 int const decode_result = avcodec_decode_audio4 (audio_codec_context(), _frame, &frame_finished, &copy_packet);
 431
 432                 if (decode_result < 0) {
 433                         _log->log (String::compose ("avcodec_decode_audio4 failed (%1)", decode_result));
 434                         return;
 435                 }
 436
 437                 if (frame_finished) {
 438                         ContentTime const ct = ContentTime::from_seconds (
 439                                 av_frame_get_best_effort_timestamp (_frame) *
 440                                 av_q2d (_ffmpeg_content->audio_stream()->stream (_format_context)->time_base))
 441                                 + _pts_offset;
 442
 443                         int const data_size = av_samples_get_buffer_size (
 444                                 0, audio_codec_context()->channels, _frame->nb_samples, audio_sample_format (), 1
 445                                 );
 446
 447                         audio (deinterleave_audio (_frame->data, data_size), ct);
 448                 }
 449
 450                 copy_packet.data += decode_result;
 451                 copy_packet.size -= decode_result;
 452         }
 453 }
 454
 455 bool
 456 FFmpegDecoder::decode_video_packet ()
 457 {
 458         int frame_finished;
 459         if (avcodec_decode_video2 (video_codec_context(), _frame, &frame_finished, &_packet) < 0 || !frame_finished) {
 460                 return false;
 461         }
 462
 463         boost::mutex::scoped_lock lm (_filter_graphs_mutex);
 464
 465         shared_ptr<FilterGraph> graph;
 466
 467         list<shared_ptr<FilterGraph> >::iterator i = _filter_graphs.begin();
 468         while (i != _filter_graphs.end() && !(*i)->can_process (dcp::Size (_frame->width, _frame->height), (AVPixelFormat) _frame->format)) {
 469                 ++i;
 470         }
 471
 472         if (i == _filter_graphs.end ()) {
 473                 graph.reset (new FilterGraph (_ffmpeg_content, dcp::Size (_frame->width, _frame->height), (AVPixelFormat) _frame->format));
 474                 _filter_graphs.push_back (graph);
 475                 _log->log (String::compose (N_("New graph for %1x%2, pixel format %3"), _frame->width, _frame->height, _frame->format));
 476         } else {
 477                 graph = *i;
 478         }
 479
 480         list<pair<shared_ptr<Image>, int64_t> > images = graph->process (_frame);
 481
 482         for (list<pair<shared_ptr<Image>, int64_t> >::iterator i = images.begin(); i != images.end(); ++i) {
 483
 484                 shared_ptr<Image> image = i->first;
 485
 486                 if (i->second != AV_NOPTS_VALUE) {
 487                         double const pts = i->second * av_q2d (_format_context->streams[_video_stream]->time_base) + _pts_offset.seconds ();
 488                         video (image, rint (pts * _ffmpeg_content->video_frame_rate ()));
 489                 } else {
 490                         _log->log ("Dropping frame without PTS");
 491                 }
 492         }
 493
 494         return true;
 495 }
 496
 497
 498 void
 499 FFmpegDecoder::setup_subtitle ()
 500 {
 501         boost::mutex::scoped_lock lm (_mutex);
 502
 503         if (!_ffmpeg_content->subtitle_stream()) {
 504                 return;
 505         }
 506
 507         _subtitle_codec_context = _ffmpeg_content->subtitle_stream()->stream(_format_context)->codec;
 508         if (_subtitle_codec_context == 0) {
 509                 throw DecodeError (N_("could not find subtitle stream"));
 510         }
 511
 512         _subtitle_codec = avcodec_find_decoder (_subtitle_codec_context->codec_id);
 513
 514         if (_subtitle_codec == 0) {
 515                 throw DecodeError (N_("could not find subtitle decoder"));
 516         }
 517
 518         if (avcodec_open2 (_subtitle_codec_context, _subtitle_codec, 0) < 0) {
 519                 throw DecodeError (N_("could not open subtitle decoder"));
 520         }
 521 }
 522
 523 void
 524 FFmpegDecoder::decode_subtitle_packet ()
 525 {
 526         int got_subtitle;
 527         AVSubtitle sub;
 528         if (avcodec_decode_subtitle2 (_subtitle_codec_context, &sub, &got_subtitle, &_packet) < 0 || !got_subtitle) {
 529                 return;
 530         }
 531
 532         /* Sometimes we get an empty AVSubtitle, which is used by some codecs to
 533            indicate that the previous subtitle should stop.
 534         */
 535         if (sub.num_rects <= 0) {
 536                 image_subtitle (ContentTime (), ContentTime (), shared_ptr<Image> (), dcpomatic::Rect<double> ());
 537                 return;
 538         } else if (sub.num_rects > 1) {
 539                 throw DecodeError (_("multi-part subtitles not yet supported"));
 540         }
 541
 542         /* Subtitle PTS (within the source, not taking into account any of the
 543            source that we may have chopped off for the DCP)
 544         */
 545         ContentTime packet_time = ContentTime::from_seconds (static_cast<double> (sub.pts) / AV_TIME_BASE) + _pts_offset;
 546
 547         /* hence start time for this sub */
 548         ContentTime const from = packet_time + ContentTime::from_seconds (sub.start_display_time / 1e3);
 549         ContentTime const to = packet_time + ContentTime::from_seconds (sub.end_display_time / 1e3);
 550
 551         AVSubtitleRect const * rect = sub.rects[0];
 552
 553         if (rect->type != SUBTITLE_BITMAP) {
 554                 throw DecodeError (_("non-bitmap subtitles not yet supported"));
 555         }
 556
 557         /* Note RGBA is expressed little-endian, so the first byte in the word is R, second
 558            G, third B, fourth A.
 559         */
 560         shared_ptr<Image> image (new Image (PIX_FMT_RGBA, dcp::Size (rect->w, rect->h), true));
 561
 562         /* Start of the first line in the subtitle */
 563         uint8_t* sub_p = rect->pict.data[0];
 564         /* sub_p looks up into a BGRA palette which is here
 565            (i.e. first byte B, second G, third R, fourth A)
 566         */
 567         uint32_t const * palette = (uint32_t *) rect->pict.data[1];
 568         /* Start of the output data */
 569         uint32_t* out_p = (uint32_t *) image->data()[0];
 570
 571         for (int y = 0; y < rect->h; ++y) {
 572                 uint8_t* sub_line_p = sub_p;
 573                 uint32_t* out_line_p = out_p;
 574                 for (int x = 0; x < rect->w; ++x) {
 575                         uint32_t const p = palette[*sub_line_p++];
 576                         *out_line_p++ = ((p & 0xff) << 16) | (p & 0xff00) | ((p & 0xff0000) >> 16) | (p & 0xff000000);
 577                 }
 578                 sub_p += rect->pict.linesize[0];
 579                 out_p += image->stride()[0] / sizeof (uint32_t);
 580         }
 581
 582         dcp::Size const vs = _ffmpeg_content->video_size ();
 583
 584         image_subtitle (
 585                 from,
 586                 to,
 587                 image,
 588                 dcpomatic::Rect<double> (
 589                         static_cast<double> (rect->x) / vs.width,
 590                         static_cast<double> (rect->y) / vs.height,
 591                         static_cast<double> (rect->w) / vs.width,
 592                         static_cast<double> (rect->h) / vs.height
 593                         )
 594                 );
 595
 596         avsubtitle_free (&sub);
 597 }