src/lib/ffmpeg_decoder.cc

   1 /*
   2     Copyright (C) 2012-2014 Carl Hetherington <cth@carlh.net>
   3
   4     This program is free software; you can redistribute it and/or modify
   5     it under the terms of the GNU General Public License as published by
   6     the Free Software Foundation; either version 2 of the License, or
   7     (at your option) any later version.
   8
   9     This program is distributed in the hope that it will be useful,
  10     but WITHOUT ANY WARRANTY; without even the implied warranty of
  11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12     GNU General Public License for more details.
  13
  14     You should have received a copy of the GNU General Public License
  15     along with this program; if not, write to the Free Software
  16     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  17
  18 */
  19
  20 /** @file  src/ffmpeg_decoder.cc
  21  *  @brief A decoder using FFmpeg to decode content.
  22  */
  23
  24 #include <stdexcept>
  25 #include <vector>
  26 #include <sstream>
  27 #include <iomanip>
  28 #include <iostream>
  29 #include <stdint.h>
  30 #include <sndfile.h>
  31 extern "C" {
  32 #include <libavcodec/avcodec.h>
  33 #include <libavformat/avformat.h>
  34 }
  35 #include "filter.h"
  36 #include "exceptions.h"
  37 #include "image.h"
  38 #include "util.h"
  39 #include "log.h"
  40 #include "ffmpeg_decoder.h"
  41 #include "ffmpeg_audio_stream.h"
  42 #include "ffmpeg_subtitle_stream.h"
  43 #include "filter_graph.h"
  44 #include "audio_buffers.h"
  45 #include "ffmpeg_content.h"
  46 #include "image_proxy.h"
  47
  48 #include "i18n.h"
  49
  50 using std::cout;
  51 using std::string;
  52 using std::vector;
  53 using std::stringstream;
  54 using std::list;
  55 using std::min;
  56 using std::pair;
  57 using boost::shared_ptr;
  58 using boost::optional;
  59 using boost::dynamic_pointer_cast;
  60 using dcp::Size;
  61
  62 FFmpegDecoder::FFmpegDecoder (shared_ptr<const FFmpegContent> c, shared_ptr<Log> log)
  63         : VideoDecoder (c)
  64         , AudioDecoder (c)
  65         , SubtitleDecoder (c)
  66         , FFmpeg (c)
  67         , _log (log)
  68         , _subtitle_codec_context (0)
  69         , _subtitle_codec (0)
  70 {
  71         /* Audio and video frame PTS values may not start with 0.  We want
  72            to fiddle them so that:
  73
  74            1.  One of them starts at time 0.
  75            2.  The first video PTS value ends up on a frame boundary.
  76
  77            Then we remove big initial gaps in PTS and we allow our
  78            insertion of black frames to work.
  79
  80            We will do pts_to_use = pts_from_ffmpeg + pts_offset;
  81         */
  82
  83         bool const have_video = c->first_video();
  84         bool const have_audio = c->audio_stream () && c->audio_stream()->first_audio;
  85
  86         /* First, make one of them start at 0 */
  87
  88         if (have_audio && have_video) {
  89                 _pts_offset = - min (c->first_video().get(), c->audio_stream()->first_audio.get());
  90         } else if (have_video) {
  91                 _pts_offset = - c->first_video().get();
  92         } else if (have_audio) {
  93                 _pts_offset = - c->audio_stream()->first_audio.get();
  94         }
  95
  96         /* Now adjust both so that the video pts starts on a frame */
  97         if (have_video && have_audio) {
  98                 ContentTime first_video = c->first_video().get() + _pts_offset;
  99                 ContentTime const old_first_video = first_video;
 100                 _pts_offset += first_video.round_up (c->video_frame_rate ()) - old_first_video;
 101         }
 102 }
 103
 104 FFmpegDecoder::~FFmpegDecoder ()
 105 {
 106         boost::mutex::scoped_lock lm (_mutex);
 107
 108         if (_subtitle_codec_context) {
 109                 avcodec_close (_subtitle_codec_context);
 110         }
 111 }
 112
 113 void
 114 FFmpegDecoder::flush ()
 115 {
 116         /* Get any remaining frames */
 117
 118         _packet.data = 0;
 119         _packet.size = 0;
 120
 121         /* XXX: should we reset _packet.data and size after each *_decode_* call? */
 122
 123         while (decode_video_packet ()) {}
 124
 125         if (_ffmpeg_content->audio_stream()) {
 126                 decode_audio_packet ();
 127                 AudioDecoder::flush ();
 128         }
 129 }
 130
 131 bool
 132 FFmpegDecoder::pass ()
 133 {
 134         int r = av_read_frame (_format_context, &_packet);
 135
 136         if (r < 0) {
 137                 if (r != AVERROR_EOF) {
 138                         /* Maybe we should fail here, but for now we'll just finish off instead */
 139                         char buf[256];
 140                         av_strerror (r, buf, sizeof(buf));
 141                         _log->log (String::compose (N_("error on av_read_frame (%1) (%2)"), buf, r));
 142                 }
 143
 144                 flush ();
 145                 return true;
 146         }
 147
 148         int const si = _packet.stream_index;
 149
 150         if (si == _video_stream) {
 151                 decode_video_packet ();
 152         } else if (_ffmpeg_content->audio_stream() && _ffmpeg_content->audio_stream()->uses_index (_format_context, si)) {
 153                 decode_audio_packet ();
 154         } else if (_ffmpeg_content->subtitle_stream() && _ffmpeg_content->subtitle_stream()->uses_index (_format_context, si)) {
 155                 decode_subtitle_packet ();
 156         }
 157
 158         av_free_packet (&_packet);
 159         return false;
 160 }
 161
 162 /** @param data pointer to array of pointers to buffers.
 163  *  Only the first buffer will be used for non-planar data, otherwise there will be one per channel.
 164  */
 165 shared_ptr<AudioBuffers>
 166 FFmpegDecoder::deinterleave_audio (uint8_t** data, int size)
 167 {
 168         assert (_ffmpeg_content->audio_channels());
 169         assert (bytes_per_audio_sample());
 170
 171         /* Deinterleave and convert to float */
 172
 173         assert ((size % (bytes_per_audio_sample() * _ffmpeg_content->audio_channels())) == 0);
 174
 175         int const total_samples = size / bytes_per_audio_sample();
 176         int const frames = total_samples / _ffmpeg_content->audio_channels();
 177         shared_ptr<AudioBuffers> audio (new AudioBuffers (_ffmpeg_content->audio_channels(), frames));
 178
 179         switch (audio_sample_format()) {
 180         case AV_SAMPLE_FMT_U8:
 181         {
 182                 uint8_t* p = reinterpret_cast<uint8_t *> (data[0]);
 183                 int sample = 0;
 184                 int channel = 0;
 185                 for (int i = 0; i < total_samples; ++i) {
 186                         audio->data(channel)[sample] = float(*p++) / (1 << 23);
 187
 188                         ++channel;
 189                         if (channel == _ffmpeg_content->audio_channels()) {
 190                                 channel = 0;
 191                                 ++sample;
 192                         }
 193                 }
 194         }
 195         break;
 196
 197         case AV_SAMPLE_FMT_S16:
 198         {
 199                 int16_t* p = reinterpret_cast<int16_t *> (data[0]);
 200                 int sample = 0;
 201                 int channel = 0;
 202                 for (int i = 0; i < total_samples; ++i) {
 203                         audio->data(channel)[sample] = float(*p++) / (1 << 15);
 204
 205                         ++channel;
 206                         if (channel == _ffmpeg_content->audio_channels()) {
 207                                 channel = 0;
 208                                 ++sample;
 209                         }
 210                 }
 211         }
 212         break;
 213
 214         case AV_SAMPLE_FMT_S16P:
 215         {
 216                 int16_t** p = reinterpret_cast<int16_t **> (data);
 217                 for (int i = 0; i < _ffmpeg_content->audio_channels(); ++i) {
 218                         for (int j = 0; j < frames; ++j) {
 219                                 audio->data(i)[j] = static_cast<float>(p[i][j]) / (1 << 15);
 220                         }
 221                 }
 222         }
 223         break;
 224
 225         case AV_SAMPLE_FMT_S32:
 226         {
 227                 int32_t* p = reinterpret_cast<int32_t *> (data[0]);
 228                 int sample = 0;
 229                 int channel = 0;
 230                 for (int i = 0; i < total_samples; ++i) {
 231                         audio->data(channel)[sample] = static_cast<float>(*p++) / (1 << 31);
 232
 233                         ++channel;
 234                         if (channel == _ffmpeg_content->audio_channels()) {
 235                                 channel = 0;
 236                                 ++sample;
 237                         }
 238                 }
 239         }
 240         break;
 241
 242         case AV_SAMPLE_FMT_FLT:
 243         {
 244                 float* p = reinterpret_cast<float*> (data[0]);
 245                 int sample = 0;
 246                 int channel = 0;
 247                 for (int i = 0; i < total_samples; ++i) {
 248                         audio->data(channel)[sample] = *p++;
 249
 250                         ++channel;
 251                         if (channel == _ffmpeg_content->audio_channels()) {
 252                                 channel = 0;
 253                                 ++sample;
 254                         }
 255                 }
 256         }
 257         break;
 258
 259         case AV_SAMPLE_FMT_FLTP:
 260         {
 261                 float** p = reinterpret_cast<float**> (data);
 262                 for (int i = 0; i < _ffmpeg_content->audio_channels(); ++i) {
 263                         memcpy (audio->data(i), p[i], frames * sizeof(float));
 264                 }
 265         }
 266         break;
 267
 268         default:
 269                 throw DecodeError (String::compose (_("Unrecognised audio sample format (%1)"), static_cast<int> (audio_sample_format())));
 270         }
 271
 272         return audio;
 273 }
 274
 275 AVSampleFormat
 276 FFmpegDecoder::audio_sample_format () const
 277 {
 278         if (!_ffmpeg_content->audio_stream()) {
 279                 return (AVSampleFormat) 0;
 280         }
 281
 282         return audio_codec_context()->sample_fmt;
 283 }
 284
 285 int
 286 FFmpegDecoder::bytes_per_audio_sample () const
 287 {
 288         return av_get_bytes_per_sample (audio_sample_format ());
 289 }
 290
 291 int
 292 FFmpegDecoder::minimal_run (boost::function<bool (optional<ContentTime>, optional<ContentTime>, int)> finished)
 293 {
 294         int frames_read = 0;
 295         optional<ContentTime> last_video;
 296         optional<ContentTime> last_audio;
 297
 298         while (!finished (last_video, last_audio, frames_read)) {
 299                 int r = av_read_frame (_format_context, &_packet);
 300                 if (r < 0) {
 301                         /* We should flush our decoders here, possibly yielding a few more frames,
 302                            but the consequence of having to do that is too hideous to contemplate.
 303                            Instead we give up and say that you can't seek too close to the end
 304                            of a file.
 305                         */
 306                         return frames_read;
 307                 }
 308
 309                 ++frames_read;
 310
 311                 double const time_base = av_q2d (_format_context->streams[_packet.stream_index]->time_base);
 312
 313                 if (_packet.stream_index == _video_stream) {
 314
 315                         avcodec_get_frame_defaults (_frame);
 316
 317                         int got_picture = 0;
 318                         r = avcodec_decode_video2 (video_codec_context(), _frame, &got_picture, &_packet);
 319                         if (r >= 0 && got_picture) {
 320                                 last_video = ContentTime::from_seconds (av_frame_get_best_effort_timestamp (_frame) * time_base) + _pts_offset;
 321                         }
 322
 323                 } else if (_ffmpeg_content->audio_stream() && _ffmpeg_content->audio_stream()->uses_index (_format_context, _packet.stream_index)) {
 324                         AVPacket copy_packet = _packet;
 325                         while (copy_packet.size > 0) {
 326
 327                                 int got_frame;
 328                                 r = avcodec_decode_audio4 (audio_codec_context(), _frame, &got_frame, &_packet);
 329                                 if (r >= 0 && got_frame) {
 330                                         last_audio = ContentTime::from_seconds (av_frame_get_best_effort_timestamp (_frame) * time_base) + _pts_offset;
 331                                 }
 332
 333                                 copy_packet.data += r;
 334                                 copy_packet.size -= r;
 335                         }
 336                 }
 337
 338                 av_free_packet (&_packet);
 339         }
 340
 341         return frames_read;
 342 }
 343
 344 bool
 345 FFmpegDecoder::seek_overrun_finished (ContentTime seek, optional<ContentTime> last_video, optional<ContentTime> last_audio) const
 346 {
 347         return (last_video && last_video.get() >= seek) || (last_audio && last_audio.get() >= seek);
 348 }
 349
 350 bool
 351 FFmpegDecoder::seek_final_finished (int n, int done) const
 352 {
 353         return n == done;
 354 }
 355
 356 void
 357 FFmpegDecoder::seek_and_flush (ContentTime t)
 358 {
 359         ContentTime const u = t - _pts_offset;
 360         int64_t s = u.seconds() / av_q2d (_format_context->streams[_video_stream]->time_base);
 361
 362         if (_ffmpeg_content->audio_stream ()) {
 363                 s = min (
 364                         s, int64_t (u.seconds() / av_q2d (_ffmpeg_content->audio_stream()->stream(_format_context)->time_base))
 365                         );
 366         }
 367
 368         /* Ridiculous empirical hack */
 369         s--;
 370         if (s < 0) {
 371                 s = 0;
 372         }
 373
 374         av_seek_frame (_format_context, _video_stream, s, 0);
 375
 376         avcodec_flush_buffers (video_codec_context());
 377         if (audio_codec_context ()) {
 378                 avcodec_flush_buffers (audio_codec_context ());
 379         }
 380         if (_subtitle_codec_context) {
 381                 avcodec_flush_buffers (_subtitle_codec_context);
 382         }
 383 }
 384
 385 void
 386 FFmpegDecoder::seek (ContentTime time, bool accurate)
 387 {
 388         VideoDecoder::seek (time, accurate);
 389         AudioDecoder::seek (time, accurate);
 390
 391         /* If we are doing an accurate seek, our initial shot will be 2s (2 being
 392            a number plucked from the air) earlier than we want to end up.  The loop below
 393            will hopefully then step through to where we want to be.
 394         */
 395
 396         ContentTime pre_roll = accurate ? ContentTime::from_seconds (2) : ContentTime (0);
 397         ContentTime initial_seek = time - pre_roll;
 398         if (initial_seek < ContentTime (0)) {
 399                 initial_seek = ContentTime (0);
 400         }
 401
 402         /* Initial seek time in the video stream's timebase */
 403
 404         seek_and_flush (initial_seek);
 405
 406         if (!accurate) {
 407                 /* That'll do */
 408                 return;
 409         }
 410
 411         int const N = minimal_run (boost::bind (&FFmpegDecoder::seek_overrun_finished, this, time, _1, _2));
 412
 413         seek_and_flush (initial_seek);
 414         if (N > 0) {
 415                 minimal_run (boost::bind (&FFmpegDecoder::seek_final_finished, this, N - 1, _3));
 416         }
 417 }
 418
 419 void
 420 FFmpegDecoder::decode_audio_packet ()
 421 {
 422         /* Audio packets can contain multiple frames, so we may have to call avcodec_decode_audio4
 423            several times.
 424         */
 425
 426         AVPacket copy_packet = _packet;
 427
 428         while (copy_packet.size > 0) {
 429
 430                 int frame_finished;
 431                 int const decode_result = avcodec_decode_audio4 (audio_codec_context(), _frame, &frame_finished, &copy_packet);
 432
 433                 if (decode_result < 0) {
 434                         _log->log (String::compose ("avcodec_decode_audio4 failed (%1)", decode_result));
 435                         return;
 436                 }
 437
 438                 if (frame_finished) {
 439                         ContentTime const ct = ContentTime::from_seconds (
 440                                 av_frame_get_best_effort_timestamp (_frame) *
 441                                 av_q2d (_ffmpeg_content->audio_stream()->stream (_format_context)->time_base))
 442                                 + _pts_offset;
 443
 444                         int const data_size = av_samples_get_buffer_size (
 445                                 0, audio_codec_context()->channels, _frame->nb_samples, audio_sample_format (), 1
 446                                 );
 447
 448                         audio (deinterleave_audio (_frame->data, data_size), ct);
 449                 }
 450
 451                 copy_packet.data += decode_result;
 452                 copy_packet.size -= decode_result;
 453         }
 454 }
 455
 456 bool
 457 FFmpegDecoder::decode_video_packet ()
 458 {
 459         int frame_finished;
 460         if (avcodec_decode_video2 (video_codec_context(), _frame, &frame_finished, &_packet) < 0 || !frame_finished) {
 461                 return false;
 462         }
 463
 464         boost::mutex::scoped_lock lm (_filter_graphs_mutex);
 465
 466         shared_ptr<FilterGraph> graph;
 467
 468         list<shared_ptr<FilterGraph> >::iterator i = _filter_graphs.begin();
 469         while (i != _filter_graphs.end() && !(*i)->can_process (dcp::Size (_frame->width, _frame->height), (AVPixelFormat) _frame->format)) {
 470                 ++i;
 471         }
 472
 473         if (i == _filter_graphs.end ()) {
 474                 graph.reset (new FilterGraph (_ffmpeg_content, dcp::Size (_frame->width, _frame->height), (AVPixelFormat) _frame->format));
 475                 _filter_graphs.push_back (graph);
 476                 _log->log (String::compose (N_("New graph for %1x%2, pixel format %3"), _frame->width, _frame->height, _frame->format));
 477         } else {
 478                 graph = *i;
 479         }
 480
 481         list<pair<shared_ptr<Image>, int64_t> > images = graph->process (_frame);
 482
 483         for (list<pair<shared_ptr<Image>, int64_t> >::iterator i = images.begin(); i != images.end(); ++i) {
 484
 485                 shared_ptr<Image> image = i->first;
 486
 487                 if (i->second != AV_NOPTS_VALUE) {
 488                         double const pts = i->second * av_q2d (_format_context->streams[_video_stream]->time_base) + _pts_offset.seconds ();
 489                         video (shared_ptr<ImageProxy> (new RawImageProxy (image)), rint (pts * _ffmpeg_content->video_frame_rate ()));
 490                 } else {
 491                         _log->log ("Dropping frame without PTS");
 492                 }
 493         }
 494
 495         return true;
 496 }
 497
 498 void
 499 FFmpegDecoder::decode_subtitle_packet ()
 500 {
 501         int got_subtitle;
 502         AVSubtitle sub;
 503         if (avcodec_decode_subtitle2 (_subtitle_codec_context, &sub, &got_subtitle, &_packet) < 0 || !got_subtitle) {
 504                 return;
 505         }
 506
 507         /* Sometimes we get an empty AVSubtitle, which is used by some codecs to
 508            indicate that the previous subtitle should stop.
 509         */
 510         if (sub.num_rects <= 0) {
 511                 image_subtitle (ContentTimePeriod (), shared_ptr<Image> (), dcpomatic::Rect<double> ());
 512                 return;
 513         } else if (sub.num_rects > 1) {
 514                 throw DecodeError (_("multi-part subtitles not yet supported"));
 515         }
 516
 517         /* Subtitle PTS (within the source, not taking into account any of the
 518            source that we may have chopped off for the DCP)
 519         */
 520         ContentTimePeriod period = subtitle_period (sub) + _pts_offset;
 521
 522         AVSubtitleRect const * rect = sub.rects[0];
 523
 524         if (rect->type != SUBTITLE_BITMAP) {
 525                 /* XXX */
 526                 // throw DecodeError (_("non-bitmap subtitles not yet supported"));
 527                 return;
 528         }
 529
 530         /* Note RGBA is expressed little-endian, so the first byte in the word is R, second
 531            G, third B, fourth A.
 532         */
 533         shared_ptr<Image> image (new Image (PIX_FMT_RGBA, dcp::Size (rect->w, rect->h), true));
 534
 535         /* Start of the first line in the subtitle */
 536         uint8_t* sub_p = rect->pict.data[0];
 537         /* sub_p looks up into a BGRA palette which is here
 538            (i.e. first byte B, second G, third R, fourth A)
 539         */
 540         uint32_t const * palette = (uint32_t *) rect->pict.data[1];
 541         /* Start of the output data */
 542         uint32_t* out_p = (uint32_t *) image->data()[0];
 543
 544         for (int y = 0; y < rect->h; ++y) {
 545                 uint8_t* sub_line_p = sub_p;
 546                 uint32_t* out_line_p = out_p;
 547                 for (int x = 0; x < rect->w; ++x) {
 548                         uint32_t const p = palette[*sub_line_p++];
 549                         *out_line_p++ = ((p & 0xff) << 16) | (p & 0xff00) | ((p & 0xff0000) >> 16) | (p & 0xff000000);
 550                 }
 551                 sub_p += rect->pict.linesize[0];
 552                 out_p += image->stride()[0] / sizeof (uint32_t);
 553         }
 554
 555         dcp::Size const vs = _ffmpeg_content->video_size ();
 556
 557         image_subtitle (
 558                 period,
 559                 image,
 560                 dcpomatic::Rect<double> (
 561                         static_cast<double> (rect->x) / vs.width,
 562                         static_cast<double> (rect->y) / vs.height,
 563                         static_cast<double> (rect->w) / vs.width,
 564                         static_cast<double> (rect->h) / vs.height
 565                         )
 566                 );
 567
 568         avsubtitle_free (&sub);
 569 }