src/lib/ffmpeg_decoder.cc

   1 /*
   2     Copyright (C) 2012 Carl Hetherington <cth@carlh.net>
   3
   4     This program is free software; you can redistribute it and/or modify
   5     it under the terms of the GNU General Public License as published by
   6     the Free Software Foundation; either version 2 of the License, or
   7     (at your option) any later version.
   8
   9     This program is distributed in the hope that it will be useful,
  10     but WITHOUT ANY WARRANTY; without even the implied warranty of
  11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12     GNU General Public License for more details.
  13
  14     You should have received a copy of the GNU General Public License
  15     along with this program; if not, write to the Free Software
  16     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  17
  18 */
  19
  20 /** @file  src/ffmpeg_decoder.cc
  21  *  @brief A decoder using FFmpeg to decode content.
  22  */
  23
  24 #include <stdexcept>
  25 #include <vector>
  26 #include <sstream>
  27 #include <iomanip>
  28 #include <iostream>
  29 #include <stdint.h>
  30 #include <boost/lexical_cast.hpp>
  31 #include <sndfile.h>
  32 extern "C" {
  33 #include <libavcodec/avcodec.h>
  34 #include <libavformat/avformat.h>
  35 }
  36 #include "film.h"
  37 #include "filter.h"
  38 #include "exceptions.h"
  39 #include "image.h"
  40 #include "util.h"
  41 #include "log.h"
  42 #include "ffmpeg_decoder.h"
  43 #include "filter_graph.h"
  44 #include "audio_buffers.h"
  45 #include "ffmpeg_content.h"
  46
  47 #include "i18n.h"
  48
  49 using std::cout;
  50 using std::string;
  51 using std::vector;
  52 using std::stringstream;
  53 using std::list;
  54 using std::min;
  55 using std::pair;
  56 using boost::shared_ptr;
  57 using boost::optional;
  58 using boost::dynamic_pointer_cast;
  59 using libdcp::Size;
  60
  61 FFmpegDecoder::FFmpegDecoder (shared_ptr<const Film> f, shared_ptr<const FFmpegContent> c, bool video, bool audio)
  62         : Decoder (f)
  63         , VideoDecoder (f, c)
  64         , AudioDecoder (f, c)
  65         , SubtitleDecoder (f)
  66         , FFmpeg (c)
  67         , _subtitle_codec_context (0)
  68         , _subtitle_codec (0)
  69         , _decode_video (video)
  70         , _decode_audio (audio)
  71         , _pts_offset (0)
  72 {
  73         setup_subtitle ();
  74
  75         /* Audio and video frame PTS values may not start with 0.  We want
  76            to fiddle them so that:
  77
  78            1.  One of them starts at time 0.
  79            2.  The first video PTS value ends up on a frame boundary.
  80
  81            Then we remove big initial gaps in PTS and we allow our
  82            insertion of black frames to work.
  83
  84            We will do pts_to_use = pts_from_ffmpeg + pts_offset;
  85         */
  86
  87         bool const have_video = video && c->first_video();
  88         bool const have_audio = _decode_audio && c->audio_stream () && c->audio_stream()->first_audio;
  89
  90         /* First, make one of them start at 0 */
  91
  92         if (have_audio && have_video) {
  93                 _pts_offset = - min (c->first_video().get(), c->audio_stream()->first_audio.get());
  94         } else if (have_video) {
  95                 _pts_offset = - c->first_video().get();
  96         } else if (have_audio) {
  97                 _pts_offset = - c->audio_stream()->first_audio.get();
  98         }
  99
 100         /* Now adjust both so that the video pts starts on a frame */
 101         if (have_video && have_audio) {
 102                 double first_video = c->first_video().get() + _pts_offset;
 103                 double const old_first_video = first_video;
 104
 105                 /* Round the first video up to a frame boundary */
 106                 if (fabs (rint (first_video * c->video_frame_rate()) - first_video * c->video_frame_rate()) > 1e-6) {
 107                         first_video = ceil (first_video * c->video_frame_rate()) / c->video_frame_rate ();
 108                 }
 109
 110                 _pts_offset += first_video - old_first_video;
 111         }
 112 }
 113
 114 FFmpegDecoder::~FFmpegDecoder ()
 115 {
 116         boost::mutex::scoped_lock lm (_mutex);
 117
 118         if (_subtitle_codec_context) {
 119                 avcodec_close (_subtitle_codec_context);
 120         }
 121 }
 122
 123 void
 124 FFmpegDecoder::flush ()
 125 {
 126         /* Get any remaining frames */
 127
 128         _packet.data = 0;
 129         _packet.size = 0;
 130
 131         /* XXX: should we reset _packet.data and size after each *_decode_* call? */
 132
 133         if (_decode_video) {
 134                 while (decode_video_packet ()) {}
 135         }
 136
 137         if (_ffmpeg_content->audio_stream() && _decode_audio) {
 138                 decode_audio_packet ();
 139         }
 140
 141         AudioDecoder::flush ();
 142 }
 143
 144 bool
 145 FFmpegDecoder::pass ()
 146 {
 147         int r = av_read_frame (_format_context, &_packet);
 148
 149         if (r < 0) {
 150                 if (r != AVERROR_EOF) {
 151                         /* Maybe we should fail here, but for now we'll just finish off instead */
 152                         char buf[256];
 153                         av_strerror (r, buf, sizeof(buf));
 154                         shared_ptr<const Film> film = _film.lock ();
 155                         assert (film);
 156                         film->log()->log (String::compose (N_("error on av_read_frame (%1) (%2)"), buf, r));
 157                 }
 158
 159                 flush ();
 160                 return true;
 161         }
 162
 163         avcodec_get_frame_defaults (_frame);
 164
 165         shared_ptr<const Film> film = _film.lock ();
 166         assert (film);
 167
 168         int const si = _packet.stream_index;
 169
 170         if (si == _video_stream && _decode_video) {
 171                 decode_video_packet ();
 172         } else if (_ffmpeg_content->audio_stream() && si == _ffmpeg_content->audio_stream()->index (_format_context) && _decode_audio) {
 173                 decode_audio_packet ();
 174         } else if (_ffmpeg_content->subtitle_stream() && si == _ffmpeg_content->subtitle_stream()->index (_format_context) && film->with_subtitles ()) {
 175                 decode_subtitle_packet ();
 176         }
 177
 178         av_free_packet (&_packet);
 179         return false;
 180 }
 181
 182 /** @param data pointer to array of pointers to buffers.
 183  *  Only the first buffer will be used for non-planar data, otherwise there will be one per channel.
 184  */
 185 shared_ptr<AudioBuffers>
 186 FFmpegDecoder::deinterleave_audio (uint8_t** data, int size)
 187 {
 188         assert (_ffmpeg_content->audio_channels());
 189         assert (bytes_per_audio_sample());
 190
 191         /* Deinterleave and convert to float */
 192
 193         assert ((size % (bytes_per_audio_sample() * _ffmpeg_content->audio_channels())) == 0);
 194
 195         int const total_samples = size / bytes_per_audio_sample();
 196         int const frames = total_samples / _ffmpeg_content->audio_channels();
 197         shared_ptr<AudioBuffers> audio (new AudioBuffers (_ffmpeg_content->audio_channels(), frames));
 198
 199         switch (audio_sample_format()) {
 200         case AV_SAMPLE_FMT_S16:
 201         {
 202                 int16_t* p = reinterpret_cast<int16_t *> (data[0]);
 203                 int sample = 0;
 204                 int channel = 0;
 205                 for (int i = 0; i < total_samples; ++i) {
 206                         audio->data(channel)[sample] = float(*p++) / (1 << 15);
 207
 208                         ++channel;
 209                         if (channel == _ffmpeg_content->audio_channels()) {
 210                                 channel = 0;
 211                                 ++sample;
 212                         }
 213                 }
 214         }
 215         break;
 216
 217         case AV_SAMPLE_FMT_S16P:
 218         {
 219                 int16_t** p = reinterpret_cast<int16_t **> (data);
 220                 for (int i = 0; i < _ffmpeg_content->audio_channels(); ++i) {
 221                         for (int j = 0; j < frames; ++j) {
 222                                 audio->data(i)[j] = static_cast<float>(p[i][j]) / (1 << 15);
 223                         }
 224                 }
 225         }
 226         break;
 227
 228         case AV_SAMPLE_FMT_S32:
 229         {
 230                 int32_t* p = reinterpret_cast<int32_t *> (data[0]);
 231                 int sample = 0;
 232                 int channel = 0;
 233                 for (int i = 0; i < total_samples; ++i) {
 234                         audio->data(channel)[sample] = static_cast<float>(*p++) / (1 << 31);
 235
 236                         ++channel;
 237                         if (channel == _ffmpeg_content->audio_channels()) {
 238                                 channel = 0;
 239                                 ++sample;
 240                         }
 241                 }
 242         }
 243         break;
 244
 245         case AV_SAMPLE_FMT_FLT:
 246         {
 247                 float* p = reinterpret_cast<float*> (data[0]);
 248                 int sample = 0;
 249                 int channel = 0;
 250                 for (int i = 0; i < total_samples; ++i) {
 251                         audio->data(channel)[sample] = *p++;
 252
 253                         ++channel;
 254                         if (channel == _ffmpeg_content->audio_channels()) {
 255                                 channel = 0;
 256                                 ++sample;
 257                         }
 258                 }
 259         }
 260         break;
 261
 262         case AV_SAMPLE_FMT_FLTP:
 263         {
 264                 float** p = reinterpret_cast<float**> (data);
 265                 for (int i = 0; i < _ffmpeg_content->audio_channels(); ++i) {
 266                         memcpy (audio->data(i), p[i], frames * sizeof(float));
 267                 }
 268         }
 269         break;
 270
 271         default:
 272                 throw DecodeError (String::compose (_("Unrecognised audio sample format (%1)"), static_cast<int> (audio_sample_format())));
 273         }
 274
 275         return audio;
 276 }
 277
 278 AVSampleFormat
 279 FFmpegDecoder::audio_sample_format () const
 280 {
 281         if (!_ffmpeg_content->audio_stream()) {
 282                 return (AVSampleFormat) 0;
 283         }
 284
 285         return audio_codec_context()->sample_fmt;
 286 }
 287
 288 int
 289 FFmpegDecoder::bytes_per_audio_sample () const
 290 {
 291         return av_get_bytes_per_sample (audio_sample_format ());
 292 }
 293
 294 int
 295 FFmpegDecoder::minimal_run (boost::function<bool (optional<ContentTime>, optional<ContentTime>, int)> finished)
 296 {
 297         int frames_read = 0;
 298         optional<ContentTime> last_video;
 299         optional<ContentTime> last_audio;
 300
 301         while (!finished (last_video, last_audio, frames_read)) {
 302                 int r = av_read_frame (_format_context, &_packet);
 303                 if (r < 0) {
 304                         /* We should flush our decoders here, possibly yielding a few more frames,
 305                            but the consequence of having to do that is too hideous to contemplate.
 306                            Instead we give up and say that you can't seek too close to the end
 307                            of a file.
 308                         */
 309                         return frames_read;
 310                 }
 311
 312                 ++frames_read;
 313
 314                 double const time_base = av_q2d (_format_context->streams[_packet.stream_index]->time_base);
 315
 316                 if (_packet.stream_index == _video_stream) {
 317
 318                         avcodec_get_frame_defaults (_frame);
 319
 320                         int finished = 0;
 321                         r = avcodec_decode_video2 (video_codec_context(), _frame, &finished, &_packet);
 322                         if (r >= 0 && finished) {
 323                                 last_video = rint (
 324                                         (av_frame_get_best_effort_timestamp (_frame) * time_base + _pts_offset) * TIME_HZ
 325                                         );
 326                         }
 327
 328                 } else if (_ffmpeg_content->audio_stream() && _packet.stream_index == _ffmpeg_content->audio_stream()->index (_format_context)) {
 329                         AVPacket copy_packet = _packet;
 330                         while (copy_packet.size > 0) {
 331
 332                                 int finished;
 333                                 r = avcodec_decode_audio4 (audio_codec_context(), _frame, &finished, &_packet);
 334                                 if (r >= 0 && finished) {
 335                                         last_audio = rint (
 336                                                 (av_frame_get_best_effort_timestamp (_frame) * time_base + _pts_offset) * TIME_HZ
 337                                                 );
 338                                 }
 339
 340                                 copy_packet.data += r;
 341                                 copy_packet.size -= r;
 342                         }
 343                 }
 344
 345                 av_free_packet (&_packet);
 346         }
 347
 348         return frames_read;
 349 }
 350
 351 bool
 352 FFmpegDecoder::seek_overrun_finished (ContentTime seek, optional<ContentTime> last_video, optional<ContentTime> last_audio) const
 353 {
 354         return (last_video && last_video.get() >= seek) || (last_audio && last_audio.get() >= seek);
 355 }
 356
 357 bool
 358 FFmpegDecoder::seek_final_finished (int n, int done) const
 359 {
 360         return n == done;
 361 }
 362
 363 void
 364 FFmpegDecoder::seek_and_flush (ContentTime t)
 365 {
 366         int64_t s = ((double (t) / TIME_HZ) - _pts_offset) /
 367                 av_q2d (_format_context->streams[_video_stream]->time_base);
 368
 369         if (_ffmpeg_content->audio_stream ()) {
 370                 s = min (
 371                         s, int64_t (
 372                                 ((double (t) / TIME_HZ) - _pts_offset) /
 373                                 av_q2d (_ffmpeg_content->audio_stream()->stream(_format_context)->time_base)
 374                                 )
 375                         );
 376         }
 377
 378         /* Ridiculous empirical hack */
 379         s--;
 380
 381         av_seek_frame (_format_context, _video_stream, s, AVSEEK_FLAG_BACKWARD);
 382
 383         avcodec_flush_buffers (video_codec_context());
 384         if (audio_codec_context ()) {
 385                 avcodec_flush_buffers (audio_codec_context ());
 386         }
 387         if (_subtitle_codec_context) {
 388                 avcodec_flush_buffers (_subtitle_codec_context);
 389         }
 390 }
 391
 392 void
 393 FFmpegDecoder::seek (ContentTime time, bool accurate)
 394 {
 395         Decoder::seek (time, accurate);
 396         AudioDecoder::seek (time, accurate);
 397
 398         /* If we are doing an accurate seek, our initial shot will be 200ms (200 being
 399            a number plucked from the air) earlier than we want to end up.  The loop below
 400            will hopefully then step through to where we want to be.
 401         */
 402
 403         ContentTime pre_roll = accurate ? (0.2 * TIME_HZ) : 0;
 404         ContentTime initial_seek = time - pre_roll;
 405         if (initial_seek < 0) {
 406                 initial_seek = 0;
 407         }
 408
 409         /* Initial seek time in the video stream's timebase */
 410
 411         seek_and_flush (initial_seek);
 412
 413         if (!accurate) {
 414                 /* That'll do */
 415                 return;
 416         }
 417
 418         int const N = minimal_run (boost::bind (&FFmpegDecoder::seek_overrun_finished, this, time, _1, _2));
 419
 420         seek_and_flush (initial_seek);
 421         if (N > 0) {
 422                 minimal_run (boost::bind (&FFmpegDecoder::seek_final_finished, this, N - 1, _3));
 423         }
 424 }
 425
 426 void
 427 FFmpegDecoder::decode_audio_packet ()
 428 {
 429         /* Audio packets can contain multiple frames, so we may have to call avcodec_decode_audio4
 430            several times.
 431         */
 432
 433         AVPacket copy_packet = _packet;
 434
 435         while (copy_packet.size > 0) {
 436
 437                 int frame_finished;
 438                 int const decode_result = avcodec_decode_audio4 (audio_codec_context(), _frame, &frame_finished, &copy_packet);
 439
 440                 if (decode_result < 0) {
 441                         shared_ptr<const Film> film = _film.lock ();
 442                         assert (film);
 443                         film->log()->log (String::compose ("avcodec_decode_audio4 failed (%1)", decode_result));
 444                         return;
 445                 }
 446
 447                 if (frame_finished) {
 448                         ContentTime const ct = (
 449                                 av_frame_get_best_effort_timestamp (_frame) *
 450                                 av_q2d (_ffmpeg_content->audio_stream()->stream (_format_context)->time_base)
 451                                 + _pts_offset
 452                                 ) * TIME_HZ;
 453
 454                         int const data_size = av_samples_get_buffer_size (
 455                                 0, audio_codec_context()->channels, _frame->nb_samples, audio_sample_format (), 1
 456                                 );
 457
 458                         audio (deinterleave_audio (_frame->data, data_size), ct);
 459                 }
 460
 461                 copy_packet.data += decode_result;
 462                 copy_packet.size -= decode_result;
 463         }
 464 }
 465
 466 bool
 467 FFmpegDecoder::decode_video_packet ()
 468 {
 469         int frame_finished;
 470         if (avcodec_decode_video2 (video_codec_context(), _frame, &frame_finished, &_packet) < 0 || !frame_finished) {
 471                 return false;
 472         }
 473
 474         boost::mutex::scoped_lock lm (_filter_graphs_mutex);
 475
 476         shared_ptr<FilterGraph> graph;
 477
 478         list<shared_ptr<FilterGraph> >::iterator i = _filter_graphs.begin();
 479         while (i != _filter_graphs.end() && !(*i)->can_process (libdcp::Size (_frame->width, _frame->height), (AVPixelFormat) _frame->format)) {
 480                 ++i;
 481         }
 482
 483         if (i == _filter_graphs.end ()) {
 484                 shared_ptr<const Film> film = _film.lock ();
 485                 assert (film);
 486
 487                 graph.reset (new FilterGraph (_ffmpeg_content, libdcp::Size (_frame->width, _frame->height), (AVPixelFormat) _frame->format));
 488                 _filter_graphs.push_back (graph);
 489
 490                 film->log()->log (String::compose (N_("New graph for %1x%2, pixel format %3"), _frame->width, _frame->height, _frame->format));
 491         } else {
 492                 graph = *i;
 493         }
 494
 495         list<pair<shared_ptr<Image>, int64_t> > images = graph->process (_frame);
 496
 497         string post_process = Filter::ffmpeg_strings (_ffmpeg_content->filters()).second;
 498
 499         for (list<pair<shared_ptr<Image>, int64_t> >::iterator i = images.begin(); i != images.end(); ++i) {
 500
 501                 shared_ptr<Image> image = i->first;
 502                 if (!post_process.empty ()) {
 503                         image = image->post_process (post_process, true);
 504                 }
 505
 506                 if (i->second != AV_NOPTS_VALUE) {
 507                         double const pts = i->second * av_q2d (_format_context->streams[_video_stream]->time_base) + _pts_offset;
 508                         VideoFrame const f = rint (pts * _ffmpeg_content->video_frame_rate ());
 509                         video (image, false, f);
 510                 } else {
 511                         shared_ptr<const Film> film = _film.lock ();
 512                         assert (film);
 513                         film->log()->log ("Dropping frame without PTS");
 514                 }
 515         }
 516
 517         return true;
 518 }
 519
 520
 521 void
 522 FFmpegDecoder::setup_subtitle ()
 523 {
 524         boost::mutex::scoped_lock lm (_mutex);
 525
 526         if (!_ffmpeg_content->subtitle_stream() || _ffmpeg_content->subtitle_stream()->index (_format_context) >= int (_format_context->nb_streams)) {
 527                 return;
 528         }
 529
 530         _subtitle_codec_context = _ffmpeg_content->subtitle_stream()->stream(_format_context)->codec;
 531         _subtitle_codec = avcodec_find_decoder (_subtitle_codec_context->codec_id);
 532
 533         if (_subtitle_codec == 0) {
 534                 throw DecodeError (_("could not find subtitle decoder"));
 535         }
 536
 537         if (avcodec_open2 (_subtitle_codec_context, _subtitle_codec, 0) < 0) {
 538                 throw DecodeError (N_("could not open subtitle decoder"));
 539         }
 540 }
 541
 542 void
 543 FFmpegDecoder::decode_subtitle_packet ()
 544 {
 545         int got_subtitle;
 546         AVSubtitle sub;
 547         if (avcodec_decode_subtitle2 (_subtitle_codec_context, &sub, &got_subtitle, &_packet) < 0 || !got_subtitle) {
 548                 return;
 549         }
 550
 551         /* Sometimes we get an empty AVSubtitle, which is used by some codecs to
 552            indicate that the previous subtitle should stop.
 553         */
 554         if (sub.num_rects <= 0) {
 555                 subtitle (shared_ptr<Image> (), dcpomatic::Rect<double> (), 0, 0);
 556                 return;
 557         } else if (sub.num_rects > 1) {
 558                 throw DecodeError (_("multi-part subtitles not yet supported"));
 559         }
 560
 561         /* Subtitle PTS in seconds (within the source, not taking into account any of the
 562            source that we may have chopped off for the DCP)
 563         */
 564         double const packet_time = (static_cast<double> (sub.pts ) / AV_TIME_BASE) + _pts_offset;
 565
 566         /* hence start time for this sub */
 567         ContentTime const from = (packet_time + (double (sub.start_display_time) / 1e3)) * TIME_HZ;
 568         ContentTime const to = (packet_time + (double (sub.end_display_time) / 1e3)) * TIME_HZ;
 569
 570         AVSubtitleRect const * rect = sub.rects[0];
 571
 572         if (rect->type != SUBTITLE_BITMAP) {
 573                 throw DecodeError (_("non-bitmap subtitles not yet supported"));
 574         }
 575
 576         /* Note RGBA is expressed little-endian, so the first byte in the word is R, second
 577            G, third B, fourth A.
 578         */
 579         shared_ptr<Image> image (new Image (PIX_FMT_RGBA, libdcp::Size (rect->w, rect->h), true));
 580
 581         /* Start of the first line in the subtitle */
 582         uint8_t* sub_p = rect->pict.data[0];
 583         /* sub_p looks up into a BGRA palette which is here
 584            (i.e. first byte B, second G, third R, fourth A)
 585         */
 586         uint32_t const * palette = (uint32_t *) rect->pict.data[1];
 587         /* Start of the output data */
 588         uint32_t* out_p = (uint32_t *) image->data()[0];
 589
 590         for (int y = 0; y < rect->h; ++y) {
 591                 uint8_t* sub_line_p = sub_p;
 592                 uint32_t* out_line_p = out_p;
 593                 for (int x = 0; x < rect->w; ++x) {
 594                         uint32_t const p = palette[*sub_line_p++];
 595                         *out_line_p++ = ((p & 0xff) << 16) | (p & 0xff00) | ((p & 0xff0000) >> 16) | (p & 0xff000000);
 596                 }
 597                 sub_p += rect->pict.linesize[0];
 598                 out_p += image->stride()[0] / sizeof (uint32_t);
 599         }
 600
 601         libdcp::Size const vs = _ffmpeg_content->video_size ();
 602
 603         subtitle (
 604                 image,
 605                 dcpomatic::Rect<double> (
 606                         static_cast<double> (rect->x) / vs.width,
 607                         static_cast<double> (rect->y) / vs.height,
 608                         static_cast<double> (rect->w) / vs.width,
 609                         static_cast<double> (rect->h) / vs.height
 610                         ),
 611                 from,
 612                 to
 613                 );
 614
 615
 616         avsubtitle_free (&sub);
 617 }