src/lib/ffmpeg_decoder.cc

   1 /*
   2     Copyright (C) 2012 Carl Hetherington <cth@carlh.net>
   3
   4     This program is free software; you can redistribute it and/or modify
   5     it under the terms of the GNU General Public License as published by
   6     the Free Software Foundation; either version 2 of the License, or
   7     (at your option) any later version.
   8
   9     This program is distributed in the hope that it will be useful,
  10     but WITHOUT ANY WARRANTY; without even the implied warranty of
  11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12     GNU General Public License for more details.
  13
  14     You should have received a copy of the GNU General Public License
  15     along with this program; if not, write to the Free Software
  16     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  17
  18 */
  19
  20 /** @file  src/ffmpeg_decoder.cc
  21  *  @brief A decoder using FFmpeg to decode content.
  22  */
  23
  24 #include <stdexcept>
  25 #include <vector>
  26 #include <sstream>
  27 #include <iomanip>
  28 #include <iostream>
  29 #include <stdint.h>
  30 #include <boost/lexical_cast.hpp>
  31 #include <sndfile.h>
  32 extern "C" {
  33 #include <libavcodec/avcodec.h>
  34 #include <libavformat/avformat.h>
  35 }
  36 #include "film.h"
  37 #include "filter.h"
  38 #include "exceptions.h"
  39 #include "image.h"
  40 #include "util.h"
  41 #include "log.h"
  42 #include "ffmpeg_decoder.h"
  43 #include "filter_graph.h"
  44 #include "audio_buffers.h"
  45 #include "ffmpeg_content.h"
  46
  47 #include "i18n.h"
  48
  49 using std::cout;
  50 using std::string;
  51 using std::vector;
  52 using std::stringstream;
  53 using std::list;
  54 using std::min;
  55 using std::pair;
  56 using boost::shared_ptr;
  57 using boost::optional;
  58 using boost::dynamic_pointer_cast;
  59 using libdcp::Size;
  60
  61 FFmpegDecoder::FFmpegDecoder (shared_ptr<const Film> f, shared_ptr<const FFmpegContent> c, bool video, bool audio)
  62         : Decoder (f)
  63         , VideoDecoder (f, c)
  64         , AudioDecoder (f, c)
  65         , SubtitleDecoder (f)
  66         , FFmpeg (c)
  67         , _subtitle_codec_context (0)
  68         , _subtitle_codec (0)
  69         , _decode_video (video)
  70         , _decode_audio (audio)
  71         , _pts_offset (0)
  72 {
  73         setup_subtitle ();
  74
  75         /* Audio and video frame PTS values may not start with 0.  We want
  76            to fiddle them so that:
  77
  78            1.  One of them starts at time 0.
  79            2.  The first video PTS value ends up on a frame boundary.
  80
  81            Then we remove big initial gaps in PTS and we allow our
  82            insertion of black frames to work.
  83
  84            We will do pts_to_use = pts_from_ffmpeg + pts_offset;
  85         */
  86
  87         bool const have_video = video && c->first_video();
  88         bool const have_audio = _decode_audio && c->audio_stream () && c->audio_stream()->first_audio;
  89
  90         /* First, make one of them start at 0 */
  91
  92         if (have_audio && have_video) {
  93                 _pts_offset = - min (c->first_video().get(), c->audio_stream()->first_audio.get());
  94         } else if (have_video) {
  95                 _pts_offset = - c->first_video().get();
  96         } else if (have_audio) {
  97                 _pts_offset = - c->audio_stream()->first_audio.get();
  98         }
  99
 100         /* Now adjust both so that the video pts starts on a frame */
 101         if (have_video && have_audio) {
 102                 double first_video = c->first_video().get() + _pts_offset;
 103                 double const old_first_video = first_video;
 104
 105                 /* Round the first video up to a frame boundary */
 106                 if (fabs (rint (first_video * c->video_frame_rate()) - first_video * c->video_frame_rate()) > 1e-6) {
 107                         first_video = ceil (first_video * c->video_frame_rate()) / c->video_frame_rate ();
 108                 }
 109
 110                 _pts_offset += first_video - old_first_video;
 111         }
 112 }
 113
 114 FFmpegDecoder::~FFmpegDecoder ()
 115 {
 116         boost::mutex::scoped_lock lm (_mutex);
 117
 118         if (_subtitle_codec_context) {
 119                 avcodec_close (_subtitle_codec_context);
 120         }
 121 }
 122
 123 void
 124 FFmpegDecoder::flush ()
 125 {
 126         /* Get any remaining frames */
 127
 128         _packet.data = 0;
 129         _packet.size = 0;
 130
 131         /* XXX: should we reset _packet.data and size after each *_decode_* call? */
 132
 133         if (_decode_video) {
 134                 while (decode_video_packet ()) {}
 135         }
 136
 137         if (_ffmpeg_content->audio_stream() && _decode_audio) {
 138                 decode_audio_packet ();
 139         }
 140
 141         AudioDecoder::flush ();
 142 }
 143
 144 bool
 145 FFmpegDecoder::pass ()
 146 {
 147         int r = av_read_frame (_format_context, &_packet);
 148
 149         if (r < 0) {
 150                 if (r != AVERROR_EOF) {
 151                         /* Maybe we should fail here, but for now we'll just finish off instead */
 152                         char buf[256];
 153                         av_strerror (r, buf, sizeof(buf));
 154                         shared_ptr<const Film> film = _film.lock ();
 155                         assert (film);
 156                         film->log()->log (String::compose (N_("error on av_read_frame (%1) (%2)"), buf, r));
 157                 }
 158
 159                 flush ();
 160                 return true;
 161         }
 162
 163         shared_ptr<const Film> film = _film.lock ();
 164         assert (film);
 165
 166         int const si = _packet.stream_index;
 167
 168         if (si == _video_stream && _decode_video) {
 169                 decode_video_packet ();
 170         } else if (_ffmpeg_content->audio_stream() && si == _ffmpeg_content->audio_stream()->index (_format_context) && _decode_audio) {
 171                 decode_audio_packet ();
 172         } else if (_ffmpeg_content->subtitle_stream() && si == _ffmpeg_content->subtitle_stream()->index (_format_context) && film->with_subtitles ()) {
 173                 decode_subtitle_packet ();
 174         }
 175
 176         av_free_packet (&_packet);
 177         return false;
 178 }
 179
 180 /** @param data pointer to array of pointers to buffers.
 181  *  Only the first buffer will be used for non-planar data, otherwise there will be one per channel.
 182  */
 183 shared_ptr<AudioBuffers>
 184 FFmpegDecoder::deinterleave_audio (uint8_t** data, int size)
 185 {
 186         assert (_ffmpeg_content->audio_channels());
 187         assert (bytes_per_audio_sample());
 188
 189         /* Deinterleave and convert to float */
 190
 191         assert ((size % (bytes_per_audio_sample() * _ffmpeg_content->audio_channels())) == 0);
 192
 193         int const total_samples = size / bytes_per_audio_sample();
 194         int const frames = total_samples / _ffmpeg_content->audio_channels();
 195         shared_ptr<AudioBuffers> audio (new AudioBuffers (_ffmpeg_content->audio_channels(), frames));
 196
 197         switch (audio_sample_format()) {
 198         case AV_SAMPLE_FMT_S16:
 199         {
 200                 int16_t* p = reinterpret_cast<int16_t *> (data[0]);
 201                 int sample = 0;
 202                 int channel = 0;
 203                 for (int i = 0; i < total_samples; ++i) {
 204                         audio->data(channel)[sample] = float(*p++) / (1 << 15);
 205
 206                         ++channel;
 207                         if (channel == _ffmpeg_content->audio_channels()) {
 208                                 channel = 0;
 209                                 ++sample;
 210                         }
 211                 }
 212         }
 213         break;
 214
 215         case AV_SAMPLE_FMT_S16P:
 216         {
 217                 int16_t** p = reinterpret_cast<int16_t **> (data);
 218                 for (int i = 0; i < _ffmpeg_content->audio_channels(); ++i) {
 219                         for (int j = 0; j < frames; ++j) {
 220                                 audio->data(i)[j] = static_cast<float>(p[i][j]) / (1 << 15);
 221                         }
 222                 }
 223         }
 224         break;
 225
 226         case AV_SAMPLE_FMT_S32:
 227         {
 228                 int32_t* p = reinterpret_cast<int32_t *> (data[0]);
 229                 int sample = 0;
 230                 int channel = 0;
 231                 for (int i = 0; i < total_samples; ++i) {
 232                         audio->data(channel)[sample] = static_cast<float>(*p++) / (1 << 31);
 233
 234                         ++channel;
 235                         if (channel == _ffmpeg_content->audio_channels()) {
 236                                 channel = 0;
 237                                 ++sample;
 238                         }
 239                 }
 240         }
 241         break;
 242
 243         case AV_SAMPLE_FMT_FLT:
 244         {
 245                 float* p = reinterpret_cast<float*> (data[0]);
 246                 int sample = 0;
 247                 int channel = 0;
 248                 for (int i = 0; i < total_samples; ++i) {
 249                         audio->data(channel)[sample] = *p++;
 250
 251                         ++channel;
 252                         if (channel == _ffmpeg_content->audio_channels()) {
 253                                 channel = 0;
 254                                 ++sample;
 255                         }
 256                 }
 257         }
 258         break;
 259
 260         case AV_SAMPLE_FMT_FLTP:
 261         {
 262                 float** p = reinterpret_cast<float**> (data);
 263                 for (int i = 0; i < _ffmpeg_content->audio_channels(); ++i) {
 264                         memcpy (audio->data(i), p[i], frames * sizeof(float));
 265                 }
 266         }
 267         break;
 268
 269         default:
 270                 throw DecodeError (String::compose (_("Unrecognised audio sample format (%1)"), static_cast<int> (audio_sample_format())));
 271         }
 272
 273         return audio;
 274 }
 275
 276 AVSampleFormat
 277 FFmpegDecoder::audio_sample_format () const
 278 {
 279         if (!_ffmpeg_content->audio_stream()) {
 280                 return (AVSampleFormat) 0;
 281         }
 282
 283         return audio_codec_context()->sample_fmt;
 284 }
 285
 286 int
 287 FFmpegDecoder::bytes_per_audio_sample () const
 288 {
 289         return av_get_bytes_per_sample (audio_sample_format ());
 290 }
 291
 292 int
 293 FFmpegDecoder::minimal_run (boost::function<bool (optional<ContentTime>, optional<ContentTime>, int)> finished)
 294 {
 295         int frames_read = 0;
 296         optional<ContentTime> last_video;
 297         optional<ContentTime> last_audio;
 298
 299         while (!finished (last_video, last_audio, frames_read)) {
 300                 int r = av_read_frame (_format_context, &_packet);
 301                 if (r < 0) {
 302                         /* We should flush our decoders here, possibly yielding a few more frames,
 303                            but the consequence of having to do that is too hideous to contemplate.
 304                            Instead we give up and say that you can't seek too close to the end
 305                            of a file.
 306                         */
 307                         return frames_read;
 308                 }
 309
 310                 ++frames_read;
 311
 312                 double const time_base = av_q2d (_format_context->streams[_packet.stream_index]->time_base);
 313
 314                 if (_packet.stream_index == _video_stream) {
 315
 316                         avcodec_get_frame_defaults (_frame);
 317
 318                         int finished = 0;
 319                         r = avcodec_decode_video2 (video_codec_context(), _frame, &finished, &_packet);
 320                         if (r >= 0 && finished) {
 321                                 last_video = rint (
 322                                         (av_frame_get_best_effort_timestamp (_frame) * time_base + _pts_offset) * TIME_HZ
 323                                         );
 324                         }
 325
 326                 } else if (_ffmpeg_content->audio_stream() && _packet.stream_index == _ffmpeg_content->audio_stream()->index (_format_context)) {
 327                         AVPacket copy_packet = _packet;
 328                         while (copy_packet.size > 0) {
 329
 330                                 int finished;
 331                                 r = avcodec_decode_audio4 (audio_codec_context(), _frame, &finished, &_packet);
 332                                 if (r >= 0 && finished) {
 333                                         last_audio = rint (
 334                                                 (av_frame_get_best_effort_timestamp (_frame) * time_base + _pts_offset) * TIME_HZ
 335                                                 );
 336                                 }
 337
 338                                 copy_packet.data += r;
 339                                 copy_packet.size -= r;
 340                         }
 341                 }
 342
 343                 av_free_packet (&_packet);
 344         }
 345
 346         return frames_read;
 347 }
 348
 349 bool
 350 FFmpegDecoder::seek_overrun_finished (ContentTime seek, optional<ContentTime> last_video, optional<ContentTime> last_audio) const
 351 {
 352         return (last_video && last_video.get() >= seek) || (last_audio && last_audio.get() >= seek);
 353 }
 354
 355 bool
 356 FFmpegDecoder::seek_final_finished (int n, int done) const
 357 {
 358         return n == done;
 359 }
 360
 361 void
 362 FFmpegDecoder::seek_and_flush (ContentTime t)
 363 {
 364         int64_t s = ((double (t) / TIME_HZ) - _pts_offset) /
 365                 av_q2d (_format_context->streams[_video_stream]->time_base);
 366
 367         if (_ffmpeg_content->audio_stream ()) {
 368                 s = min (
 369                         s, int64_t (
 370                                 ((double (t) / TIME_HZ) - _pts_offset) /
 371                                 av_q2d (_ffmpeg_content->audio_stream()->stream(_format_context)->time_base)
 372                                 )
 373                         );
 374         }
 375
 376         /* Ridiculous empirical hack */
 377         s--;
 378
 379         av_seek_frame (_format_context, _video_stream, s, AVSEEK_FLAG_BACKWARD);
 380
 381         avcodec_flush_buffers (video_codec_context());
 382         if (audio_codec_context ()) {
 383                 avcodec_flush_buffers (audio_codec_context ());
 384         }
 385         if (_subtitle_codec_context) {
 386                 avcodec_flush_buffers (_subtitle_codec_context);
 387         }
 388 }
 389
 390 void
 391 FFmpegDecoder::seek (ContentTime time, bool accurate)
 392 {
 393         Decoder::seek (time, accurate);
 394         AudioDecoder::seek (time, accurate);
 395
 396         /* If we are doing an accurate seek, our initial shot will be 200ms (200 being
 397            a number plucked from the air) earlier than we want to end up.  The loop below
 398            will hopefully then step through to where we want to be.
 399         */
 400
 401         ContentTime pre_roll = accurate ? (0.2 * TIME_HZ) : 0;
 402         ContentTime initial_seek = time - pre_roll;
 403         if (initial_seek < 0) {
 404                 initial_seek = 0;
 405         }
 406
 407         /* Initial seek time in the video stream's timebase */
 408
 409         seek_and_flush (initial_seek);
 410
 411         if (!accurate) {
 412                 /* That'll do */
 413                 return;
 414         }
 415
 416         int const N = minimal_run (boost::bind (&FFmpegDecoder::seek_overrun_finished, this, time, _1, _2));
 417
 418         seek_and_flush (initial_seek);
 419         if (N > 0) {
 420                 minimal_run (boost::bind (&FFmpegDecoder::seek_final_finished, this, N - 1, _3));
 421         }
 422 }
 423
 424 void
 425 FFmpegDecoder::decode_audio_packet ()
 426 {
 427         /* Audio packets can contain multiple frames, so we may have to call avcodec_decode_audio4
 428            several times.
 429         */
 430
 431         AVPacket copy_packet = _packet;
 432
 433         while (copy_packet.size > 0) {
 434
 435                 int frame_finished;
 436                 int const decode_result = avcodec_decode_audio4 (audio_codec_context(), _frame, &frame_finished, &copy_packet);
 437
 438                 if (decode_result < 0) {
 439                         shared_ptr<const Film> film = _film.lock ();
 440                         assert (film);
 441                         film->log()->log (String::compose ("avcodec_decode_audio4 failed (%1)", decode_result));
 442                         return;
 443                 }
 444
 445                 if (frame_finished) {
 446                         ContentTime const ct = (
 447                                 av_frame_get_best_effort_timestamp (_frame) *
 448                                 av_q2d (_ffmpeg_content->audio_stream()->stream (_format_context)->time_base)
 449                                 + _pts_offset
 450                                 ) * TIME_HZ;
 451
 452                         int const data_size = av_samples_get_buffer_size (
 453                                 0, audio_codec_context()->channels, _frame->nb_samples, audio_sample_format (), 1
 454                                 );
 455
 456                         audio (deinterleave_audio (_frame->data, data_size), ct);
 457                 }
 458
 459                 copy_packet.data += decode_result;
 460                 copy_packet.size -= decode_result;
 461         }
 462 }
 463
 464 bool
 465 FFmpegDecoder::decode_video_packet ()
 466 {
 467         int frame_finished;
 468         if (avcodec_decode_video2 (video_codec_context(), _frame, &frame_finished, &_packet) < 0 || !frame_finished) {
 469                 return false;
 470         }
 471
 472         boost::mutex::scoped_lock lm (_filter_graphs_mutex);
 473
 474         shared_ptr<FilterGraph> graph;
 475
 476         list<shared_ptr<FilterGraph> >::iterator i = _filter_graphs.begin();
 477         while (i != _filter_graphs.end() && !(*i)->can_process (libdcp::Size (_frame->width, _frame->height), (AVPixelFormat) _frame->format)) {
 478                 ++i;
 479         }
 480
 481         if (i == _filter_graphs.end ()) {
 482                 shared_ptr<const Film> film = _film.lock ();
 483                 assert (film);
 484
 485                 graph.reset (new FilterGraph (_ffmpeg_content, libdcp::Size (_frame->width, _frame->height), (AVPixelFormat) _frame->format));
 486                 _filter_graphs.push_back (graph);
 487
 488                 film->log()->log (String::compose (N_("New graph for %1x%2, pixel format %3"), _frame->width, _frame->height, _frame->format));
 489         } else {
 490                 graph = *i;
 491         }
 492
 493         list<pair<shared_ptr<Image>, int64_t> > images = graph->process (_frame);
 494
 495         string post_process = Filter::ffmpeg_strings (_ffmpeg_content->filters()).second;
 496
 497         for (list<pair<shared_ptr<Image>, int64_t> >::iterator i = images.begin(); i != images.end(); ++i) {
 498
 499                 shared_ptr<Image> image = i->first;
 500                 if (!post_process.empty ()) {
 501                         image = image->post_process (post_process, true);
 502                 }
 503
 504                 if (i->second != AV_NOPTS_VALUE) {
 505                         double const pts = i->second * av_q2d (_format_context->streams[_video_stream]->time_base) + _pts_offset;
 506                         VideoFrame const f = rint (pts * _ffmpeg_content->video_frame_rate ());
 507                         video (image, false, f);
 508                 } else {
 509                         shared_ptr<const Film> film = _film.lock ();
 510                         assert (film);
 511                         film->log()->log ("Dropping frame without PTS");
 512                 }
 513         }
 514
 515         return true;
 516 }
 517
 518
 519 void
 520 FFmpegDecoder::setup_subtitle ()
 521 {
 522         boost::mutex::scoped_lock lm (_mutex);
 523
 524         if (!_ffmpeg_content->subtitle_stream() || _ffmpeg_content->subtitle_stream()->index (_format_context) >= int (_format_context->nb_streams)) {
 525                 return;
 526         }
 527
 528         _subtitle_codec_context = _ffmpeg_content->subtitle_stream()->stream(_format_context)->codec;
 529         _subtitle_codec = avcodec_find_decoder (_subtitle_codec_context->codec_id);
 530
 531         if (_subtitle_codec == 0) {
 532                 throw DecodeError (_("could not find subtitle decoder"));
 533         }
 534
 535         if (avcodec_open2 (_subtitle_codec_context, _subtitle_codec, 0) < 0) {
 536                 throw DecodeError (N_("could not open subtitle decoder"));
 537         }
 538 }
 539
 540 void
 541 FFmpegDecoder::decode_subtitle_packet ()
 542 {
 543         int got_subtitle;
 544         AVSubtitle sub;
 545         if (avcodec_decode_subtitle2 (_subtitle_codec_context, &sub, &got_subtitle, &_packet) < 0 || !got_subtitle) {
 546                 return;
 547         }
 548
 549         /* Sometimes we get an empty AVSubtitle, which is used by some codecs to
 550            indicate that the previous subtitle should stop.
 551         */
 552         if (sub.num_rects <= 0) {
 553                 image_subtitle (shared_ptr<Image> (), dcpomatic::Rect<double> (), 0, 0);
 554                 return;
 555         } else if (sub.num_rects > 1) {
 556                 throw DecodeError (_("multi-part subtitles not yet supported"));
 557         }
 558
 559         /* Subtitle PTS in seconds (within the source, not taking into account any of the
 560            source that we may have chopped off for the DCP)
 561         */
 562         double const packet_time = (static_cast<double> (sub.pts) / AV_TIME_BASE) + _pts_offset;
 563
 564         /* hence start time for this sub */
 565         ContentTime const from = (packet_time + (double (sub.start_display_time) / 1e3)) * TIME_HZ;
 566         ContentTime const to = (packet_time + (double (sub.end_display_time) / 1e3)) * TIME_HZ;
 567
 568         AVSubtitleRect const * rect = sub.rects[0];
 569
 570         if (rect->type != SUBTITLE_BITMAP) {
 571                 throw DecodeError (_("non-bitmap subtitles not yet supported"));
 572         }
 573
 574         /* Note RGBA is expressed little-endian, so the first byte in the word is R, second
 575            G, third B, fourth A.
 576         */
 577         shared_ptr<Image> image (new Image (PIX_FMT_RGBA, libdcp::Size (rect->w, rect->h), true));
 578
 579         /* Start of the first line in the subtitle */
 580         uint8_t* sub_p = rect->pict.data[0];
 581         /* sub_p looks up into a BGRA palette which is here
 582            (i.e. first byte B, second G, third R, fourth A)
 583         */
 584         uint32_t const * palette = (uint32_t *) rect->pict.data[1];
 585         /* Start of the output data */
 586         uint32_t* out_p = (uint32_t *) image->data()[0];
 587
 588         for (int y = 0; y < rect->h; ++y) {
 589                 uint8_t* sub_line_p = sub_p;
 590                 uint32_t* out_line_p = out_p;
 591                 for (int x = 0; x < rect->w; ++x) {
 592                         uint32_t const p = palette[*sub_line_p++];
 593                         *out_line_p++ = ((p & 0xff) << 16) | (p & 0xff00) | ((p & 0xff0000) >> 16) | (p & 0xff000000);
 594                 }
 595                 sub_p += rect->pict.linesize[0];
 596                 out_p += image->stride()[0] / sizeof (uint32_t);
 597         }
 598
 599         libdcp::Size const vs = _ffmpeg_content->video_size ();
 600
 601         image_subtitle (
 602                 image,
 603                 dcpomatic::Rect<double> (
 604                         static_cast<double> (rect->x) / vs.width,
 605                         static_cast<double> (rect->y) / vs.height,
 606                         static_cast<double> (rect->w) / vs.width,
 607                         static_cast<double> (rect->h) / vs.height
 608                         ),
 609                 from,
 610                 to
 611                 );
 612
 613
 614         avsubtitle_free (&sub);
 615 }