src/lib/ffmpeg_decoder.cc

   1 /*
   2     Copyright (C) 2012-2014 Carl Hetherington <cth@carlh.net>
   3
   4     This program is free software; you can redistribute it and/or modify
   5     it under the terms of the GNU General Public License as published by
   6     the Free Software Foundation; either version 2 of the License, or
   7     (at your option) any later version.
   8
   9     This program is distributed in the hope that it will be useful,
  10     but WITHOUT ANY WARRANTY; without even the implied warranty of
  11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12     GNU General Public License for more details.
  13
  14     You should have received a copy of the GNU General Public License
  15     along with this program; if not, write to the Free Software
  16     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  17
  18 */
  19
  20 /** @file  src/ffmpeg_decoder.cc
  21  *  @brief A decoder using FFmpeg to decode content.
  22  */
  23
  24 #include <stdexcept>
  25 #include <vector>
  26 #include <sstream>
  27 #include <iomanip>
  28 #include <iostream>
  29 #include <stdint.h>
  30 #include <sndfile.h>
  31 extern "C" {
  32 #include <libavcodec/avcodec.h>
  33 #include <libavformat/avformat.h>
  34 }
  35 #include "filter.h"
  36 #include "exceptions.h"
  37 #include "image.h"
  38 #include "util.h"
  39 #include "log.h"
  40 #include "ffmpeg_decoder.h"
  41 #include "ffmpeg_audio_stream.h"
  42 #include "ffmpeg_subtitle_stream.h"
  43 #include "filter_graph.h"
  44 #include "audio_buffers.h"
  45 #include "ffmpeg_content.h"
  46 #include "image_proxy.h"
  47 #include "film.h"
  48
  49 #include "i18n.h"
  50
  51 #define LOG_GENERAL(...) _video_content->film()->log()->log (String::compose (__VA_ARGS__), Log::TYPE_GENERAL);
  52 #define LOG_ERROR(...) _video_content->film()->log()->log (String::compose (__VA_ARGS__), Log::TYPE_ERROR);
  53 #define LOG_WARNING(...) _video_content->film()->log()->log (__VA_ARGS__, Log::TYPE_WARNING);
  54
  55 using std::cout;
  56 using std::string;
  57 using std::vector;
  58 using std::stringstream;
  59 using std::list;
  60 using std::min;
  61 using std::pair;
  62 using boost::shared_ptr;
  63 using boost::optional;
  64 using boost::dynamic_pointer_cast;
  65 using dcp::Size;
  66
  67 FFmpegDecoder::FFmpegDecoder (shared_ptr<const FFmpegContent> c, shared_ptr<Log> log)
  68         : VideoDecoder (c)
  69         , AudioDecoder (c)
  70         , SubtitleDecoder (c)
  71         , FFmpeg (c)
  72         , _log (log)
  73 {
  74         /* Audio and video frame PTS values may not start with 0.  We want
  75            to fiddle them so that:
  76
  77            1.  One of them starts at time 0.
  78            2.  The first video PTS value ends up on a frame boundary.
  79
  80            Then we remove big initial gaps in PTS and we allow our
  81            insertion of black frames to work.
  82
  83            We will do pts_to_use = pts_from_ffmpeg + pts_offset;
  84         */
  85
  86         bool const have_video = c->first_video();
  87         bool const have_audio = c->audio_stream () && c->audio_stream()->first_audio;
  88
  89         /* First, make one of them start at 0 */
  90
  91         if (have_audio && have_video) {
  92                 _pts_offset = - min (c->first_video().get(), c->audio_stream()->first_audio.get());
  93         } else if (have_video) {
  94                 _pts_offset = - c->first_video().get();
  95         } else if (have_audio) {
  96                 _pts_offset = - c->audio_stream()->first_audio.get();
  97         }
  98
  99         /* Now adjust both so that the video pts starts on a frame */
 100         if (have_video && have_audio) {
 101                 ContentTime first_video = c->first_video().get() + _pts_offset;
 102                 ContentTime const old_first_video = first_video;
 103                 _pts_offset += first_video.round_up (c->video_frame_rate ()) - old_first_video;
 104         }
 105 }
 106
 107 void
 108 FFmpegDecoder::flush ()
 109 {
 110         /* Get any remaining frames */
 111
 112         _packet.data = 0;
 113         _packet.size = 0;
 114
 115         /* XXX: should we reset _packet.data and size after each *_decode_* call? */
 116
 117         while (decode_video_packet ()) {}
 118
 119         if (_ffmpeg_content->audio_stream()) {
 120                 decode_audio_packet ();
 121                 AudioDecoder::flush ();
 122         }
 123 }
 124
 125 bool
 126 FFmpegDecoder::pass ()
 127 {
 128         int r = av_read_frame (_format_context, &_packet);
 129
 130         if (r < 0) {
 131                 if (r != AVERROR_EOF) {
 132                         /* Maybe we should fail here, but for now we'll just finish off instead */
 133                         char buf[256];
 134                         av_strerror (r, buf, sizeof(buf));
 135                         LOG_ERROR (N_("error on av_read_frame (%1) (%2)"), buf, r);
 136                 }
 137
 138                 flush ();
 139                 return true;
 140         }
 141
 142         int const si = _packet.stream_index;
 143
 144         if (si == _video_stream) {
 145                 decode_video_packet ();
 146         } else if (_ffmpeg_content->audio_stream() && _ffmpeg_content->audio_stream()->uses_index (_format_context, si)) {
 147                 decode_audio_packet ();
 148         } else if (_ffmpeg_content->subtitle_stream() && _ffmpeg_content->subtitle_stream()->uses_index (_format_context, si)) {
 149                 decode_subtitle_packet ();
 150         }
 151
 152         av_free_packet (&_packet);
 153         return false;
 154 }
 155
 156 /** @param data pointer to array of pointers to buffers.
 157  *  Only the first buffer will be used for non-planar data, otherwise there will be one per channel.
 158  */
 159 shared_ptr<AudioBuffers>
 160 FFmpegDecoder::deinterleave_audio (uint8_t** data, int size)
 161 {
 162         assert (_ffmpeg_content->audio_channels());
 163         assert (bytes_per_audio_sample());
 164
 165         /* Deinterleave and convert to float */
 166
 167         assert ((size % (bytes_per_audio_sample() * _ffmpeg_content->audio_channels())) == 0);
 168
 169         int const total_samples = size / bytes_per_audio_sample();
 170         int const frames = total_samples / _ffmpeg_content->audio_channels();
 171         shared_ptr<AudioBuffers> audio (new AudioBuffers (_ffmpeg_content->audio_channels(), frames));
 172
 173         switch (audio_sample_format()) {
 174         case AV_SAMPLE_FMT_U8:
 175         {
 176                 uint8_t* p = reinterpret_cast<uint8_t *> (data[0]);
 177                 int sample = 0;
 178                 int channel = 0;
 179                 for (int i = 0; i < total_samples; ++i) {
 180                         audio->data(channel)[sample] = float(*p++) / (1 << 23);
 181
 182                         ++channel;
 183                         if (channel == _ffmpeg_content->audio_channels()) {
 184                                 channel = 0;
 185                                 ++sample;
 186                         }
 187                 }
 188         }
 189         break;
 190
 191         case AV_SAMPLE_FMT_S16:
 192         {
 193                 int16_t* p = reinterpret_cast<int16_t *> (data[0]);
 194                 int sample = 0;
 195                 int channel = 0;
 196                 for (int i = 0; i < total_samples; ++i) {
 197                         audio->data(channel)[sample] = float(*p++) / (1 << 15);
 198
 199                         ++channel;
 200                         if (channel == _ffmpeg_content->audio_channels()) {
 201                                 channel = 0;
 202                                 ++sample;
 203                         }
 204                 }
 205         }
 206         break;
 207
 208         case AV_SAMPLE_FMT_S16P:
 209         {
 210                 int16_t** p = reinterpret_cast<int16_t **> (data);
 211                 for (int i = 0; i < _ffmpeg_content->audio_channels(); ++i) {
 212                         for (int j = 0; j < frames; ++j) {
 213                                 audio->data(i)[j] = static_cast<float>(p[i][j]) / (1 << 15);
 214                         }
 215                 }
 216         }
 217         break;
 218
 219         case AV_SAMPLE_FMT_S32:
 220         {
 221                 int32_t* p = reinterpret_cast<int32_t *> (data[0]);
 222                 int sample = 0;
 223                 int channel = 0;
 224                 for (int i = 0; i < total_samples; ++i) {
 225                         audio->data(channel)[sample] = static_cast<float>(*p++) / (1 << 31);
 226
 227                         ++channel;
 228                         if (channel == _ffmpeg_content->audio_channels()) {
 229                                 channel = 0;
 230                                 ++sample;
 231                         }
 232                 }
 233         }
 234         break;
 235
 236         case AV_SAMPLE_FMT_FLT:
 237         {
 238                 float* p = reinterpret_cast<float*> (data[0]);
 239                 int sample = 0;
 240                 int channel = 0;
 241                 for (int i = 0; i < total_samples; ++i) {
 242                         audio->data(channel)[sample] = *p++;
 243
 244                         ++channel;
 245                         if (channel == _ffmpeg_content->audio_channels()) {
 246                                 channel = 0;
 247                                 ++sample;
 248                         }
 249                 }
 250         }
 251         break;
 252
 253         case AV_SAMPLE_FMT_FLTP:
 254         {
 255                 float** p = reinterpret_cast<float**> (data);
 256                 for (int i = 0; i < _ffmpeg_content->audio_channels(); ++i) {
 257                         memcpy (audio->data(i), p[i], frames * sizeof(float));
 258                 }
 259         }
 260         break;
 261
 262         default:
 263                 throw DecodeError (String::compose (_("Unrecognised audio sample format (%1)"), static_cast<int> (audio_sample_format())));
 264         }
 265
 266         return audio;
 267 }
 268
 269 AVSampleFormat
 270 FFmpegDecoder::audio_sample_format () const
 271 {
 272         if (!_ffmpeg_content->audio_stream()) {
 273                 return (AVSampleFormat) 0;
 274         }
 275
 276         return audio_codec_context()->sample_fmt;
 277 }
 278
 279 int
 280 FFmpegDecoder::bytes_per_audio_sample () const
 281 {
 282         return av_get_bytes_per_sample (audio_sample_format ());
 283 }
 284
 285 int
 286 FFmpegDecoder::minimal_run (boost::function<bool (optional<ContentTime>, optional<ContentTime>, int)> finished)
 287 {
 288         int frames_read = 0;
 289         optional<ContentTime> last_video;
 290         optional<ContentTime> last_audio;
 291
 292         while (!finished (last_video, last_audio, frames_read)) {
 293                 int r = av_read_frame (_format_context, &_packet);
 294                 if (r < 0) {
 295                         /* We should flush our decoders here, possibly yielding a few more frames,
 296                            but the consequence of having to do that is too hideous to contemplate.
 297                            Instead we give up and say that you can't seek too close to the end
 298                            of a file.
 299                         */
 300                         return frames_read;
 301                 }
 302
 303                 ++frames_read;
 304
 305                 double const time_base = av_q2d (_format_context->streams[_packet.stream_index]->time_base);
 306
 307                 if (_packet.stream_index == _video_stream) {
 308
 309                         av_frame_unref (_frame);
 310
 311                         int got_picture = 0;
 312                         r = avcodec_decode_video2 (video_codec_context(), _frame, &got_picture, &_packet);
 313                         if (r >= 0 && got_picture) {
 314                                 last_video = ContentTime::from_seconds (av_frame_get_best_effort_timestamp (_frame) * time_base) + _pts_offset;
 315                         }
 316
 317                 } else if (_ffmpeg_content->audio_stream() && _ffmpeg_content->audio_stream()->uses_index (_format_context, _packet.stream_index)) {
 318                         AVPacket copy_packet = _packet;
 319                         while (copy_packet.size > 0) {
 320
 321                                 int got_frame;
 322                                 r = avcodec_decode_audio4 (audio_codec_context(), _frame, &got_frame, &_packet);
 323                                 if (r >= 0 && got_frame) {
 324                                         last_audio = ContentTime::from_seconds (av_frame_get_best_effort_timestamp (_frame) * time_base) + _pts_offset;
 325                                 }
 326
 327                                 copy_packet.data += r;
 328                                 copy_packet.size -= r;
 329                         }
 330                 }
 331
 332                 av_free_packet (&_packet);
 333         }
 334
 335         return frames_read;
 336 }
 337
 338 bool
 339 FFmpegDecoder::seek_overrun_finished (ContentTime seek, optional<ContentTime> last_video, optional<ContentTime> last_audio) const
 340 {
 341         return (last_video && last_video.get() >= seek) || (last_audio && last_audio.get() >= seek);
 342 }
 343
 344 bool
 345 FFmpegDecoder::seek_final_finished (int n, int done) const
 346 {
 347         return n == done;
 348 }
 349
 350 void
 351 FFmpegDecoder::seek_and_flush (ContentTime t)
 352 {
 353         ContentTime const u = t - _pts_offset;
 354         int64_t s = u.seconds() / av_q2d (_format_context->streams[_video_stream]->time_base);
 355
 356         if (_ffmpeg_content->audio_stream ()) {
 357                 s = min (
 358                         s, int64_t (u.seconds() / av_q2d (_ffmpeg_content->audio_stream()->stream(_format_context)->time_base))
 359                         );
 360         }
 361
 362         /* Ridiculous empirical hack */
 363         s--;
 364         if (s < 0) {
 365                 s = 0;
 366         }
 367
 368         av_seek_frame (_format_context, _video_stream, s, 0);
 369
 370         avcodec_flush_buffers (video_codec_context());
 371         if (audio_codec_context ()) {
 372                 avcodec_flush_buffers (audio_codec_context ());
 373         }
 374         if (subtitle_codec_context ()) {
 375                 avcodec_flush_buffers (subtitle_codec_context ());
 376         }
 377 }
 378
 379 void
 380 FFmpegDecoder::seek (ContentTime time, bool accurate)
 381 {
 382         VideoDecoder::seek (time, accurate);
 383         AudioDecoder::seek (time, accurate);
 384
 385         /* If we are doing an accurate seek, our initial shot will be 2s (2 being
 386            a number plucked from the air) earlier than we want to end up.  The loop below
 387            will hopefully then step through to where we want to be.
 388         */
 389
 390         ContentTime pre_roll = accurate ? ContentTime::from_seconds (2) : ContentTime (0);
 391         ContentTime initial_seek = time - pre_roll;
 392         if (initial_seek < ContentTime (0)) {
 393                 initial_seek = ContentTime (0);
 394         }
 395
 396         /* Initial seek time in the video stream's timebase */
 397
 398         seek_and_flush (initial_seek);
 399
 400         if (!accurate) {
 401                 /* That'll do */
 402                 return;
 403         }
 404
 405         int const N = minimal_run (boost::bind (&FFmpegDecoder::seek_overrun_finished, this, time, _1, _2));
 406
 407         seek_and_flush (initial_seek);
 408         if (N > 0) {
 409                 minimal_run (boost::bind (&FFmpegDecoder::seek_final_finished, this, N - 1, _3));
 410         }
 411 }
 412
 413 void
 414 FFmpegDecoder::decode_audio_packet ()
 415 {
 416         /* Audio packets can contain multiple frames, so we may have to call avcodec_decode_audio4
 417            several times.
 418         */
 419
 420         AVPacket copy_packet = _packet;
 421
 422         while (copy_packet.size > 0) {
 423
 424                 int frame_finished;
 425                 int const decode_result = avcodec_decode_audio4 (audio_codec_context(), _frame, &frame_finished, &copy_packet);
 426
 427                 if (decode_result < 0) {
 428                         LOG_ERROR ("avcodec_decode_audio4 failed (%1)", decode_result);
 429                         return;
 430                 }
 431
 432                 if (frame_finished) {
 433                         ContentTime const ct = ContentTime::from_seconds (
 434                                 av_frame_get_best_effort_timestamp (_frame) *
 435                                 av_q2d (_ffmpeg_content->audio_stream()->stream (_format_context)->time_base))
 436                                 + _pts_offset;
 437
 438                         int const data_size = av_samples_get_buffer_size (
 439                                 0, audio_codec_context()->channels, _frame->nb_samples, audio_sample_format (), 1
 440                                 );
 441
 442                         audio (deinterleave_audio (_frame->data, data_size), ct);
 443                 }
 444
 445                 copy_packet.data += decode_result;
 446                 copy_packet.size -= decode_result;
 447         }
 448 }
 449
 450 bool
 451 FFmpegDecoder::decode_video_packet ()
 452 {
 453         int frame_finished;
 454         if (avcodec_decode_video2 (video_codec_context(), _frame, &frame_finished, &_packet) < 0 || !frame_finished) {
 455                 return false;
 456         }
 457
 458         boost::mutex::scoped_lock lm (_filter_graphs_mutex);
 459
 460         shared_ptr<FilterGraph> graph;
 461
 462         list<shared_ptr<FilterGraph> >::iterator i = _filter_graphs.begin();
 463         while (i != _filter_graphs.end() && !(*i)->can_process (dcp::Size (_frame->width, _frame->height), (AVPixelFormat) _frame->format)) {
 464                 ++i;
 465         }
 466
 467         if (i == _filter_graphs.end ()) {
 468                 graph.reset (new FilterGraph (_ffmpeg_content, dcp::Size (_frame->width, _frame->height), (AVPixelFormat) _frame->format));
 469                 _filter_graphs.push_back (graph);
 470                 LOG_GENERAL (N_("New graph for %1x%2, pixel format %3"), _frame->width, _frame->height, _frame->format);
 471         } else {
 472                 graph = *i;
 473         }
 474
 475         list<pair<shared_ptr<Image>, int64_t> > images = graph->process (_frame);
 476
 477         for (list<pair<shared_ptr<Image>, int64_t> >::iterator i = images.begin(); i != images.end(); ++i) {
 478
 479                 shared_ptr<Image> image = i->first;
 480
 481                 if (i->second != AV_NOPTS_VALUE) {
 482                         double const pts = i->second * av_q2d (_format_context->streams[_video_stream]->time_base) + _pts_offset.seconds ();
 483                         video (
 484                                 shared_ptr<ImageProxy> (new RawImageProxy (image, _video_content->film()->log())),
 485                                 rint (pts * _ffmpeg_content->video_frame_rate ())
 486                                 );
 487                 } else {
 488                         LOG_WARNING ("Dropping frame without PTS");
 489                 }
 490         }
 491
 492         return true;
 493 }
 494
 495 void
 496 FFmpegDecoder::decode_subtitle_packet ()
 497 {
 498         int got_subtitle;
 499         AVSubtitle sub;
 500         if (avcodec_decode_subtitle2 (subtitle_codec_context(), &sub, &got_subtitle, &_packet) < 0 || !got_subtitle) {
 501                 return;
 502         }
 503
 504         /* Sometimes we get an empty AVSubtitle, which is used by some codecs to
 505            indicate that the previous subtitle should stop.
 506         */
 507         if (sub.num_rects <= 0) {
 508                 image_subtitle (ContentTimePeriod (), shared_ptr<Image> (), dcpomatic::Rect<double> ());
 509                 return;
 510         } else if (sub.num_rects > 1) {
 511                 throw DecodeError (_("multi-part subtitles not yet supported"));
 512         }
 513
 514         /* Subtitle PTS (within the source, not taking into account any of the
 515            source that we may have chopped off for the DCP)
 516         */
 517         ContentTimePeriod period = subtitle_period (sub) + _pts_offset;
 518
 519         AVSubtitleRect const * rect = sub.rects[0];
 520
 521         if (rect->type != SUBTITLE_BITMAP) {
 522                 /* XXX */
 523                 // throw DecodeError (_("non-bitmap subtitles not yet supported"));
 524                 return;
 525         }
 526
 527         /* Note RGBA is expressed little-endian, so the first byte in the word is R, second
 528            G, third B, fourth A.
 529         */
 530         shared_ptr<Image> image (new Image (PIX_FMT_RGBA, dcp::Size (rect->w, rect->h), true));
 531
 532         /* Start of the first line in the subtitle */
 533         uint8_t* sub_p = rect->pict.data[0];
 534         /* sub_p looks up into a BGRA palette which is here
 535            (i.e. first byte B, second G, third R, fourth A)
 536         */
 537         uint32_t const * palette = (uint32_t *) rect->pict.data[1];
 538         /* Start of the output data */
 539         uint32_t* out_p = (uint32_t *) image->data()[0];
 540
 541         for (int y = 0; y < rect->h; ++y) {
 542                 uint8_t* sub_line_p = sub_p;
 543                 uint32_t* out_line_p = out_p;
 544                 for (int x = 0; x < rect->w; ++x) {
 545                         uint32_t const p = palette[*sub_line_p++];
 546                         *out_line_p++ = ((p & 0xff) << 16) | (p & 0xff00) | ((p & 0xff0000) >> 16) | (p & 0xff000000);
 547                 }
 548                 sub_p += rect->pict.linesize[0];
 549                 out_p += image->stride()[0] / sizeof (uint32_t);
 550         }
 551
 552         dcp::Size const vs = _ffmpeg_content->video_size ();
 553
 554         image_subtitle (
 555                 period,
 556                 image,
 557                 dcpomatic::Rect<double> (
 558                         static_cast<double> (rect->x) / vs.width,
 559                         static_cast<double> (rect->y) / vs.height,
 560                         static_cast<double> (rect->w) / vs.width,
 561                         static_cast<double> (rect->h) / vs.height
 562                         )
 563                 );
 564
 565         avsubtitle_free (&sub);
 566 }
 567
 568 bool
 569 FFmpegDecoder::has_subtitle_during (ContentTimePeriod p) const
 570 {
 571         return _ffmpeg_content->has_subtitle_during (p);
 572 }