src/lib/ffmpeg_decoder.cc

   1 /*
   2     Copyright (C) 2012 Carl Hetherington <cth@carlh.net>
   3
   4     This program is free software; you can redistribute it and/or modify
   5     it under the terms of the GNU General Public License as published by
   6     the Free Software Foundation; either version 2 of the License, or
   7     (at your option) any later version.
   8
   9     This program is distributed in the hope that it will be useful,
  10     but WITHOUT ANY WARRANTY; without even the implied warranty of
  11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12     GNU General Public License for more details.
  13
  14     You should have received a copy of the GNU General Public License
  15     along with this program; if not, write to the Free Software
  16     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  17
  18 */
  19
  20 /** @file  src/ffmpeg_decoder.cc
  21  *  @brief A decoder using FFmpeg to decode content.
  22  */
  23
  24 #include <stdexcept>
  25 #include <vector>
  26 #include <sstream>
  27 #include <iomanip>
  28 #include <iostream>
  29 #include <stdint.h>
  30 #include <boost/lexical_cast.hpp>
  31 #include <sndfile.h>
  32 extern "C" {
  33 #include <libavcodec/avcodec.h>
  34 #include <libavformat/avformat.h>
  35 }
  36 #include "film.h"
  37 #include "filter.h"
  38 #include "exceptions.h"
  39 #include "image.h"
  40 #include "util.h"
  41 #include "log.h"
  42 #include "ffmpeg_decoder.h"
  43 #include "filter_graph.h"
  44 #include "audio_buffers.h"
  45 #include "ffmpeg_content.h"
  46
  47 #include "i18n.h"
  48
  49 using std::cout;
  50 using std::string;
  51 using std::vector;
  52 using std::stringstream;
  53 using std::list;
  54 using std::min;
  55 using std::pair;
  56 using boost::shared_ptr;
  57 using boost::optional;
  58 using boost::dynamic_pointer_cast;
  59 using libdcp::Size;
  60
  61 FFmpegDecoder::FFmpegDecoder (shared_ptr<const Film> f, shared_ptr<const FFmpegContent> c, bool video, bool audio)
  62         : Decoder (f)
  63         , VideoDecoder (f, c)
  64         , AudioDecoder (f, c)
  65         , SubtitleDecoder (f)
  66         , FFmpeg (c)
  67         , _subtitle_codec_context (0)
  68         , _subtitle_codec (0)
  69         , _decode_video (video)
  70         , _decode_audio (audio)
  71         , _video_pts_offset (0)
  72         , _audio_pts_offset (0)
  73 {
  74         setup_subtitle ();
  75
  76         /* Audio and video frame PTS values may not start with 0.  We want
  77            to fiddle them so that:
  78
  79            1.  One of them starts at time 0.
  80            2.  The first video PTS value ends up on a frame boundary.
  81
  82            Then we remove big initial gaps in PTS and we allow our
  83            insertion of black frames to work.
  84
  85            We will do:
  86              audio_pts_to_use = audio_pts_from_ffmpeg + audio_pts_offset;
  87              video_pts_to_use = video_pts_from_ffmpeg + video_pts_offset;
  88         */
  89
  90         bool const have_video = video && c->first_video();
  91         bool const have_audio = _decode_audio && c->audio_stream () && c->audio_stream()->first_audio;
  92
  93         /* First, make one of them start at 0 */
  94
  95         if (have_audio && have_video) {
  96                 _video_pts_offset = _audio_pts_offset = - min (c->first_video().get(), c->audio_stream()->first_audio.get());
  97         } else if (have_video) {
  98                 _video_pts_offset = - c->first_video().get();
  99         } else if (have_audio) {
 100                 _audio_pts_offset = - c->audio_stream()->first_audio.get();
 101         }
 102
 103         /* Now adjust both so that the video pts starts on a frame */
 104         if (have_video && have_audio) {
 105                 double first_video = c->first_video().get() + _video_pts_offset;
 106                 double const old_first_video = first_video;
 107
 108                 /* Round the first video up to a frame boundary */
 109                 if (fabs (rint (first_video * c->video_frame_rate()) - first_video * c->video_frame_rate()) > 1e-6) {
 110                         first_video = ceil (first_video * c->video_frame_rate()) / c->video_frame_rate ();
 111                 }
 112
 113                 _video_pts_offset += first_video - old_first_video;
 114                 _audio_pts_offset += first_video - old_first_video;
 115         }
 116 }
 117
 118 FFmpegDecoder::~FFmpegDecoder ()
 119 {
 120         boost::mutex::scoped_lock lm (_mutex);
 121
 122         if (_subtitle_codec_context) {
 123                 avcodec_close (_subtitle_codec_context);
 124         }
 125 }
 126
 127 void
 128 FFmpegDecoder::flush ()
 129 {
 130         /* Get any remaining frames */
 131
 132         _packet.data = 0;
 133         _packet.size = 0;
 134
 135         /* XXX: should we reset _packet.data and size after each *_decode_* call? */
 136
 137         if (_decode_video) {
 138                 while (decode_video_packet ()) {}
 139         }
 140
 141         if (_ffmpeg_content->audio_stream() && _decode_audio) {
 142                 decode_audio_packet ();
 143         }
 144 }
 145
 146 bool
 147 FFmpegDecoder::pass ()
 148 {
 149         int r = av_read_frame (_format_context, &_packet);
 150
 151         if (r < 0) {
 152                 if (r != AVERROR_EOF) {
 153                         /* Maybe we should fail here, but for now we'll just finish off instead */
 154                         char buf[256];
 155                         av_strerror (r, buf, sizeof(buf));
 156                         shared_ptr<const Film> film = _film.lock ();
 157                         assert (film);
 158                         film->log()->log (String::compose (N_("error on av_read_frame (%1) (%2)"), buf, r));
 159                 }
 160
 161                 flush ();
 162                 return true;
 163         }
 164
 165         avcodec_get_frame_defaults (_frame);
 166
 167         shared_ptr<const Film> film = _film.lock ();
 168         assert (film);
 169
 170         int const si = _packet.stream_index;
 171
 172         if (si == _video_stream && _decode_video) {
 173                 decode_video_packet ();
 174         } else if (_ffmpeg_content->audio_stream() && si == _ffmpeg_content->audio_stream()->index (_format_context) && _decode_audio) {
 175                 decode_audio_packet ();
 176         } else if (_ffmpeg_content->subtitle_stream() && si == _ffmpeg_content->subtitle_stream()->index (_format_context) && film->with_subtitles ()) {
 177                 decode_subtitle_packet ();
 178         }
 179
 180         av_free_packet (&_packet);
 181         return false;
 182 }
 183
 184 /** @param data pointer to array of pointers to buffers.
 185  *  Only the first buffer will be used for non-planar data, otherwise there will be one per channel.
 186  */
 187 shared_ptr<AudioBuffers>
 188 FFmpegDecoder::deinterleave_audio (uint8_t** data, int size)
 189 {
 190         assert (_ffmpeg_content->audio_channels());
 191         assert (bytes_per_audio_sample());
 192
 193         /* Deinterleave and convert to float */
 194
 195         assert ((size % (bytes_per_audio_sample() * _ffmpeg_content->audio_channels())) == 0);
 196
 197         int const total_samples = size / bytes_per_audio_sample();
 198         int const frames = total_samples / _ffmpeg_content->audio_channels();
 199         shared_ptr<AudioBuffers> audio (new AudioBuffers (_ffmpeg_content->audio_channels(), frames));
 200
 201         switch (audio_sample_format()) {
 202         case AV_SAMPLE_FMT_S16:
 203         {
 204                 int16_t* p = reinterpret_cast<int16_t *> (data[0]);
 205                 int sample = 0;
 206                 int channel = 0;
 207                 for (int i = 0; i < total_samples; ++i) {
 208                         audio->data(channel)[sample] = float(*p++) / (1 << 15);
 209
 210                         ++channel;
 211                         if (channel == _ffmpeg_content->audio_channels()) {
 212                                 channel = 0;
 213                                 ++sample;
 214                         }
 215                 }
 216         }
 217         break;
 218
 219         case AV_SAMPLE_FMT_S16P:
 220         {
 221                 int16_t** p = reinterpret_cast<int16_t **> (data);
 222                 for (int i = 0; i < _ffmpeg_content->audio_channels(); ++i) {
 223                         for (int j = 0; j < frames; ++j) {
 224                                 audio->data(i)[j] = static_cast<float>(p[i][j]) / (1 << 15);
 225                         }
 226                 }
 227         }
 228         break;
 229
 230         case AV_SAMPLE_FMT_S32:
 231         {
 232                 int32_t* p = reinterpret_cast<int32_t *> (data[0]);
 233                 int sample = 0;
 234                 int channel = 0;
 235                 for (int i = 0; i < total_samples; ++i) {
 236                         audio->data(channel)[sample] = static_cast<float>(*p++) / (1 << 31);
 237
 238                         ++channel;
 239                         if (channel == _ffmpeg_content->audio_channels()) {
 240                                 channel = 0;
 241                                 ++sample;
 242                         }
 243                 }
 244         }
 245         break;
 246
 247         case AV_SAMPLE_FMT_FLT:
 248         {
 249                 float* p = reinterpret_cast<float*> (data[0]);
 250                 int sample = 0;
 251                 int channel = 0;
 252                 for (int i = 0; i < total_samples; ++i) {
 253                         audio->data(channel)[sample] = *p++;
 254
 255                         ++channel;
 256                         if (channel == _ffmpeg_content->audio_channels()) {
 257                                 channel = 0;
 258                                 ++sample;
 259                         }
 260                 }
 261         }
 262         break;
 263
 264         case AV_SAMPLE_FMT_FLTP:
 265         {
 266                 float** p = reinterpret_cast<float**> (data);
 267                 for (int i = 0; i < _ffmpeg_content->audio_channels(); ++i) {
 268                         memcpy (audio->data(i), p[i], frames * sizeof(float));
 269                 }
 270         }
 271         break;
 272
 273         default:
 274                 throw DecodeError (String::compose (_("Unrecognised audio sample format (%1)"), static_cast<int> (audio_sample_format())));
 275         }
 276
 277         return audio;
 278 }
 279
 280 AVSampleFormat
 281 FFmpegDecoder::audio_sample_format () const
 282 {
 283         if (!_ffmpeg_content->audio_stream()) {
 284                 return (AVSampleFormat) 0;
 285         }
 286
 287         return audio_codec_context()->sample_fmt;
 288 }
 289
 290 int
 291 FFmpegDecoder::bytes_per_audio_sample () const
 292 {
 293         return av_get_bytes_per_sample (audio_sample_format ());
 294 }
 295
 296 int
 297 FFmpegDecoder::minimal_run (boost::function<bool (ContentTime, ContentTime, int)> finished)
 298 {
 299         int frames_read = 0;
 300         ContentTime last_video = 0;
 301         ContentTime last_audio = 0;
 302         bool flushing = false;
 303
 304         while (!finished (last_video, last_audio, frames_read)) {
 305                 int r = av_read_frame (_format_context, &_packet);
 306                 if (r < 0) {
 307                         /* We should flush our decoders here, possibly yielding a few more frames,
 308                            but the consequence of having to do that is too hideous to contemplate.
 309                            Instead we give up and say that you can't seek too close to the end
 310                            of a file.
 311                         */
 312                         return frames_read;
 313                 }
 314
 315                 ++frames_read;
 316
 317                 double const time_base = av_q2d (_format_context->streams[_packet.stream_index]->time_base);
 318
 319                 if (_packet.stream_index == _video_stream) {
 320
 321                         avcodec_get_frame_defaults (_frame);
 322
 323                         int finished = 0;
 324                         r = avcodec_decode_video2 (video_codec_context(), _frame, &finished, &_packet);
 325                         if (r >= 0 && finished) {
 326                                 last_video = rint (
 327                                         (av_frame_get_best_effort_timestamp (_frame) * time_base + _video_pts_offset) * TIME_HZ
 328                                         );
 329                         }
 330
 331                 } else if (_ffmpeg_content->audio_stream() && _packet.stream_index == _ffmpeg_content->audio_stream()->index (_format_context)) {
 332                         AVPacket copy_packet = _packet;
 333                         while (copy_packet.size > 0) {
 334
 335                                 int finished;
 336                                 r = avcodec_decode_audio4 (audio_codec_context(), _frame, &finished, &_packet);
 337                                 if (r >= 0 && finished) {
 338                                         last_audio = rint (
 339                                                 (av_frame_get_best_effort_timestamp (_frame) * time_base + _audio_pts_offset) * TIME_HZ
 340                                                 );
 341                                 }
 342
 343                                 copy_packet.data += r;
 344                                 copy_packet.size -= r;
 345                         }
 346                 }
 347
 348                 av_free_packet (&_packet);
 349         }
 350
 351         return frames_read;
 352 }
 353
 354 bool
 355 FFmpegDecoder::seek_overrun_finished (ContentTime seek, ContentTime last_video, ContentTime last_audio) const
 356 {
 357         return last_video >= seek || last_audio >= seek;
 358 }
 359
 360 bool
 361 FFmpegDecoder::seek_final_finished (int n, int done) const
 362 {
 363         return n == done;
 364 }
 365
 366 void
 367 FFmpegDecoder::seek_and_flush (ContentTime t)
 368 {
 369         int64_t const initial_v = ((double (t) / TIME_HZ) - _video_pts_offset) /
 370                 av_q2d (_format_context->streams[_video_stream]->time_base);
 371
 372         av_seek_frame (_format_context, _video_stream, initial_v, AVSEEK_FLAG_BACKWARD);
 373
 374         shared_ptr<FFmpegAudioStream> as = _ffmpeg_content->audio_stream ();
 375         if (as) {
 376                 int64_t initial_a = ((double (t) / TIME_HZ) - _audio_pts_offset) /
 377                         av_q2d (as->stream(_format_context)->time_base);
 378
 379                 av_seek_frame (_format_context, as->index (_format_context), initial_a, AVSEEK_FLAG_BACKWARD);
 380         }
 381
 382         avcodec_flush_buffers (video_codec_context());
 383         if (audio_codec_context ()) {
 384                 avcodec_flush_buffers (audio_codec_context ());
 385         }
 386         if (_subtitle_codec_context) {
 387                 avcodec_flush_buffers (_subtitle_codec_context);
 388         }
 389 }
 390
 391 void
 392 FFmpegDecoder::seek (ContentTime time, bool accurate)
 393 {
 394         Decoder::seek (time, accurate);
 395
 396         /* If we are doing an accurate seek, our initial shot will be 200ms (200 being
 397            a number plucked from the air) earlier than we want to end up.  The loop below
 398            will hopefully then step through to where we want to be.
 399         */
 400
 401         ContentTime pre_roll = accurate ? (0.2 * TIME_HZ) : 0;
 402         ContentTime initial_seek = time - pre_roll;
 403         if (initial_seek < 0) {
 404                 initial_seek = 0;
 405         }
 406
 407         /* Initial seek time in the video stream's timebase */
 408
 409         seek_and_flush (initial_seek);
 410
 411         if (time == 0 || !accurate) {
 412                 /* We're already there, or we're as close as we need to be */
 413                 return;
 414         }
 415
 416         int const N = minimal_run (boost::bind (&FFmpegDecoder::seek_overrun_finished, this, time, _1, _2));
 417
 418         seek_and_flush (initial_seek);
 419         if (N > 0) {
 420                 minimal_run (boost::bind (&FFmpegDecoder::seek_final_finished, this, N - 1, _3));
 421         }
 422 }
 423
 424 void
 425 FFmpegDecoder::decode_audio_packet ()
 426 {
 427         /* Audio packets can contain multiple frames, so we may have to call avcodec_decode_audio4
 428            several times.
 429         */
 430
 431         AVPacket copy_packet = _packet;
 432
 433         while (copy_packet.size > 0) {
 434
 435                 int frame_finished;
 436                 int const decode_result = avcodec_decode_audio4 (audio_codec_context(), _frame, &frame_finished, &copy_packet);
 437
 438                 if (decode_result < 0) {
 439                         shared_ptr<const Film> film = _film.lock ();
 440                         assert (film);
 441                         film->log()->log (String::compose ("avcodec_decode_audio4 failed (%1)", decode_result));
 442                         return;
 443                 }
 444
 445                 if (frame_finished) {
 446                         ContentTime const t = rint (
 447                                 (av_q2d (_format_context->streams[copy_packet.stream_index]->time_base)
 448                                  * av_frame_get_best_effort_timestamp(_frame) + _audio_pts_offset) * TIME_HZ
 449                                 );
 450
 451                         int const data_size = av_samples_get_buffer_size (
 452                                 0, audio_codec_context()->channels, _frame->nb_samples, audio_sample_format (), 1
 453                                 );
 454
 455                         audio (deinterleave_audio (_frame->data, data_size), t);
 456                 }
 457
 458                 copy_packet.data += decode_result;
 459                 copy_packet.size -= decode_result;
 460         }
 461 }
 462
 463 bool
 464 FFmpegDecoder::decode_video_packet ()
 465 {
 466         int frame_finished;
 467         if (avcodec_decode_video2 (video_codec_context(), _frame, &frame_finished, &_packet) < 0 || !frame_finished) {
 468                 return false;
 469         }
 470
 471         boost::mutex::scoped_lock lm (_filter_graphs_mutex);
 472
 473         shared_ptr<FilterGraph> graph;
 474
 475         list<shared_ptr<FilterGraph> >::iterator i = _filter_graphs.begin();
 476         while (i != _filter_graphs.end() && !(*i)->can_process (libdcp::Size (_frame->width, _frame->height), (AVPixelFormat) _frame->format)) {
 477                 ++i;
 478         }
 479
 480         if (i == _filter_graphs.end ()) {
 481                 shared_ptr<const Film> film = _film.lock ();
 482                 assert (film);
 483
 484                 graph.reset (new FilterGraph (_ffmpeg_content, libdcp::Size (_frame->width, _frame->height), (AVPixelFormat) _frame->format));
 485                 _filter_graphs.push_back (graph);
 486
 487                 film->log()->log (String::compose (N_("New graph for %1x%2, pixel format %3"), _frame->width, _frame->height, _frame->format));
 488         } else {
 489                 graph = *i;
 490         }
 491
 492         list<pair<shared_ptr<Image>, int64_t> > images = graph->process (_frame);
 493
 494         string post_process = Filter::ffmpeg_strings (_ffmpeg_content->filters()).second;
 495
 496         for (list<pair<shared_ptr<Image>, int64_t> >::iterator i = images.begin(); i != images.end(); ++i) {
 497
 498                 shared_ptr<Image> image = i->first;
 499                 if (!post_process.empty ()) {
 500                         image = image->post_process (post_process, true);
 501                 }
 502
 503                 if (i->second != AV_NOPTS_VALUE) {
 504                         ContentTime const t = rint ((i->second * av_q2d (_format_context->streams[_video_stream]->time_base) + _video_pts_offset) * TIME_HZ);
 505                         video (image, false, t);
 506                 } else {
 507                         shared_ptr<const Film> film = _film.lock ();
 508                         assert (film);
 509                         film->log()->log ("Dropping frame without PTS");
 510                 }
 511         }
 512
 513         return true;
 514 }
 515
 516
 517 void
 518 FFmpegDecoder::setup_subtitle ()
 519 {
 520         boost::mutex::scoped_lock lm (_mutex);
 521
 522         if (!_ffmpeg_content->subtitle_stream() || _ffmpeg_content->subtitle_stream()->index (_format_context) >= int (_format_context->nb_streams)) {
 523                 return;
 524         }
 525
 526         _subtitle_codec_context = _ffmpeg_content->subtitle_stream()->stream(_format_context)->codec;
 527         _subtitle_codec = avcodec_find_decoder (_subtitle_codec_context->codec_id);
 528
 529         if (_subtitle_codec == 0) {
 530                 throw DecodeError (_("could not find subtitle decoder"));
 531         }
 532
 533         if (avcodec_open2 (_subtitle_codec_context, _subtitle_codec, 0) < 0) {
 534                 throw DecodeError (N_("could not open subtitle decoder"));
 535         }
 536 }
 537
 538 void
 539 FFmpegDecoder::decode_subtitle_packet ()
 540 {
 541         int got_subtitle;
 542         AVSubtitle sub;
 543         if (avcodec_decode_subtitle2 (_subtitle_codec_context, &sub, &got_subtitle, &_packet) < 0 || !got_subtitle) {
 544                 return;
 545         }
 546
 547         /* Sometimes we get an empty AVSubtitle, which is used by some codecs to
 548            indicate that the previous subtitle should stop.
 549         */
 550         if (sub.num_rects <= 0) {
 551                 subtitle (shared_ptr<Image> (), dcpomatic::Rect<double> (), 0, 0);
 552                 return;
 553         } else if (sub.num_rects > 1) {
 554                 throw DecodeError (_("multi-part subtitles not yet supported"));
 555         }
 556
 557         /* Subtitle PTS in seconds (within the source, not taking into account any of the
 558            source that we may have chopped off for the DCP)
 559         */
 560         double const packet_time = static_cast<double> (sub.pts) / AV_TIME_BASE;
 561
 562         /* hence start time for this sub */
 563         DCPTime const from = (packet_time + (double (sub.start_display_time) / 1e3)) * TIME_HZ;
 564         DCPTime const to = (packet_time + (double (sub.end_display_time) / 1e3)) * TIME_HZ;
 565
 566         AVSubtitleRect const * rect = sub.rects[0];
 567
 568         if (rect->type != SUBTITLE_BITMAP) {
 569                 throw DecodeError (_("non-bitmap subtitles not yet supported"));
 570         }
 571
 572         shared_ptr<Image> image (new Image (PIX_FMT_RGBA, libdcp::Size (rect->w, rect->h), true));
 573
 574         /* Start of the first line in the subtitle */
 575         uint8_t* sub_p = rect->pict.data[0];
 576         /* sub_p looks up into a RGB palette which is here */
 577         uint32_t const * palette = (uint32_t *) rect->pict.data[1];
 578         /* Start of the output data */
 579         uint32_t* out_p = (uint32_t *) image->data()[0];
 580
 581         for (int y = 0; y < rect->h; ++y) {
 582                 uint8_t* sub_line_p = sub_p;
 583                 uint32_t* out_line_p = out_p;
 584                 for (int x = 0; x < rect->w; ++x) {
 585                         *out_line_p++ = palette[*sub_line_p++];
 586                 }
 587                 sub_p += rect->pict.linesize[0];
 588                 out_p += image->stride()[0] / sizeof (uint32_t);
 589         }
 590
 591         libdcp::Size const vs = _ffmpeg_content->video_size ();
 592
 593         subtitle (
 594                 image,
 595                 dcpomatic::Rect<double> (
 596                         static_cast<double> (rect->x) / vs.width,
 597                         static_cast<double> (rect->y) / vs.height,
 598                         static_cast<double> (rect->w) / vs.width,
 599                         static_cast<double> (rect->h) / vs.height
 600                         ),
 601                 from,
 602                 to
 603                 );
 604
 605
 606         avsubtitle_free (&sub);
 607 }