src/lib/ffmpeg_decoder.cc

   1 /*
   2     Copyright (C) 2012 Carl Hetherington <cth@carlh.net>
   3
   4     This program is free software; you can redistribute it and/or modify
   5     it under the terms of the GNU General Public License as published by
   6     the Free Software Foundation; either version 2 of the License, or
   7     (at your option) any later version.
   8
   9     This program is distributed in the hope that it will be useful,
  10     but WITHOUT ANY WARRANTY; without even the implied warranty of
  11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12     GNU General Public License for more details.
  13
  14     You should have received a copy of the GNU General Public License
  15     along with this program; if not, write to the Free Software
  16     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  17
  18 */
  19
  20 /** @file  src/ffmpeg_decoder.cc
  21  *  @brief A decoder using FFmpeg to decode content.
  22  */
  23
  24 #include <stdexcept>
  25 #include <vector>
  26 #include <sstream>
  27 #include <iomanip>
  28 #include <iostream>
  29 #include <stdint.h>
  30 #include <boost/lexical_cast.hpp>
  31 #include <sndfile.h>
  32 extern "C" {
  33 #include <libavcodec/avcodec.h>
  34 #include <libavformat/avformat.h>
  35 }
  36 #include "film.h"
  37 #include "filter.h"
  38 #include "exceptions.h"
  39 #include "image.h"
  40 #include "util.h"
  41 #include "log.h"
  42 #include "ffmpeg_decoder.h"
  43 #include "filter_graph.h"
  44 #include "audio_buffers.h"
  45 #include "ffmpeg_content.h"
  46
  47 #include "i18n.h"
  48
  49 using std::cout;
  50 using std::string;
  51 using std::vector;
  52 using std::stringstream;
  53 using std::list;
  54 using std::min;
  55 using std::pair;
  56 using boost::shared_ptr;
  57 using boost::optional;
  58 using boost::dynamic_pointer_cast;
  59 using libdcp::Size;
  60
  61 FFmpegDecoder::FFmpegDecoder (shared_ptr<const Film> f, shared_ptr<const FFmpegContent> c, bool video, bool audio)
  62         : Decoder (f)
  63         , VideoDecoder (f, c)
  64         , AudioDecoder (f, c)
  65         , SubtitleDecoder (f)
  66         , FFmpeg (c)
  67         , _subtitle_codec_context (0)
  68         , _subtitle_codec (0)
  69         , _decode_video (video)
  70         , _decode_audio (audio)
  71         , _video_pts_offset (0)
  72         , _audio_pts_offset (0)
  73 {
  74         setup_subtitle ();
  75
  76         /* Audio and video frame PTS values may not start with 0.  We want
  77            to fiddle them so that:
  78
  79            1.  One of them starts at time 0.
  80            2.  The first video PTS value ends up on a frame boundary.
  81
  82            Then we remove big initial gaps in PTS and we allow our
  83            insertion of black frames to work.
  84
  85            We will do:
  86              audio_pts_to_use = audio_pts_from_ffmpeg + audio_pts_offset;
  87              video_pts_to_use = video_pts_from_ffmpeg + video_pts_offset;
  88         */
  89
  90         bool const have_video = video && c->first_video();
  91         bool const have_audio = _decode_audio && c->audio_stream () && c->audio_stream()->first_audio;
  92
  93         /* First, make one of them start at 0 */
  94
  95         if (have_audio && have_video) {
  96                 _video_pts_offset = _audio_pts_offset = - min (c->first_video().get(), c->audio_stream()->first_audio.get());
  97         } else if (have_video) {
  98                 _video_pts_offset = - c->first_video().get();
  99         } else if (have_audio) {
 100                 _audio_pts_offset = - c->audio_stream()->first_audio.get();
 101         }
 102
 103         /* Now adjust both so that the video pts starts on a frame */
 104         if (have_video && have_audio) {
 105                 double first_video = c->first_video().get() + _video_pts_offset;
 106                 double const old_first_video = first_video;
 107
 108                 /* Round the first video up to a frame boundary */
 109                 if (fabs (rint (first_video * c->video_frame_rate()) - first_video * c->video_frame_rate()) > 1e-6) {
 110                         first_video = ceil (first_video * c->video_frame_rate()) / c->video_frame_rate ();
 111                 }
 112
 113                 _video_pts_offset += first_video - old_first_video;
 114                 _audio_pts_offset += first_video - old_first_video;
 115         }
 116 }
 117
 118 FFmpegDecoder::~FFmpegDecoder ()
 119 {
 120         boost::mutex::scoped_lock lm (_mutex);
 121
 122         if (_subtitle_codec_context) {
 123                 avcodec_close (_subtitle_codec_context);
 124         }
 125 }
 126
 127 void
 128 FFmpegDecoder::flush ()
 129 {
 130         /* Get any remaining frames */
 131
 132         _packet.data = 0;
 133         _packet.size = 0;
 134
 135         /* XXX: should we reset _packet.data and size after each *_decode_* call? */
 136
 137         if (_decode_video) {
 138                 while (decode_video_packet ()) {}
 139         }
 140
 141         if (_ffmpeg_content->audio_stream() && _decode_audio) {
 142                 decode_audio_packet ();
 143         }
 144
 145 #if 0
 146         /* XXX */
 147         /* Stop us being asked for any more data */
 148         _video_position = _ffmpeg_content->video_length ();
 149         _audio_position = _ffmpeg_content->audio_length ();
 150 #endif
 151 }
 152
 153 bool
 154 FFmpegDecoder::pass ()
 155 {
 156         int r = av_read_frame (_format_context, &_packet);
 157
 158         if (r < 0) {
 159                 if (r != AVERROR_EOF) {
 160                         /* Maybe we should fail here, but for now we'll just finish off instead */
 161                         char buf[256];
 162                         av_strerror (r, buf, sizeof(buf));
 163                         shared_ptr<const Film> film = _film.lock ();
 164                         assert (film);
 165                         film->log()->log (String::compose (N_("error on av_read_frame (%1) (%2)"), buf, r));
 166                 }
 167
 168                 flush ();
 169                 return true;
 170         }
 171
 172         avcodec_get_frame_defaults (_frame);
 173
 174         shared_ptr<const Film> film = _film.lock ();
 175         assert (film);
 176
 177         int const si = _packet.stream_index;
 178
 179         if (si == _video_stream && _decode_video) {
 180                 decode_video_packet ();
 181         } else if (_ffmpeg_content->audio_stream() && si == _ffmpeg_content->audio_stream()->index (_format_context) && _decode_audio) {
 182                 decode_audio_packet ();
 183         } else if (_ffmpeg_content->subtitle_stream() && si == _ffmpeg_content->subtitle_stream()->index (_format_context) && film->with_subtitles ()) {
 184                 decode_subtitle_packet ();
 185         }
 186
 187         av_free_packet (&_packet);
 188         return false;
 189 }
 190
 191 /** @param data pointer to array of pointers to buffers.
 192  *  Only the first buffer will be used for non-planar data, otherwise there will be one per channel.
 193  */
 194 shared_ptr<AudioBuffers>
 195 FFmpegDecoder::deinterleave_audio (uint8_t** data, int size)
 196 {
 197         assert (_ffmpeg_content->audio_channels());
 198         assert (bytes_per_audio_sample());
 199
 200         /* Deinterleave and convert to float */
 201
 202         assert ((size % (bytes_per_audio_sample() * _ffmpeg_content->audio_channels())) == 0);
 203
 204         int const total_samples = size / bytes_per_audio_sample();
 205         int const frames = total_samples / _ffmpeg_content->audio_channels();
 206         shared_ptr<AudioBuffers> audio (new AudioBuffers (_ffmpeg_content->audio_channels(), frames));
 207
 208         switch (audio_sample_format()) {
 209         case AV_SAMPLE_FMT_S16:
 210         {
 211                 int16_t* p = reinterpret_cast<int16_t *> (data[0]);
 212                 int sample = 0;
 213                 int channel = 0;
 214                 for (int i = 0; i < total_samples; ++i) {
 215                         audio->data(channel)[sample] = float(*p++) / (1 << 15);
 216
 217                         ++channel;
 218                         if (channel == _ffmpeg_content->audio_channels()) {
 219                                 channel = 0;
 220                                 ++sample;
 221                         }
 222                 }
 223         }
 224         break;
 225
 226         case AV_SAMPLE_FMT_S16P:
 227         {
 228                 int16_t** p = reinterpret_cast<int16_t **> (data);
 229                 for (int i = 0; i < _ffmpeg_content->audio_channels(); ++i) {
 230                         for (int j = 0; j < frames; ++j) {
 231                                 audio->data(i)[j] = static_cast<float>(p[i][j]) / (1 << 15);
 232                         }
 233                 }
 234         }
 235         break;
 236
 237         case AV_SAMPLE_FMT_S32:
 238         {
 239                 int32_t* p = reinterpret_cast<int32_t *> (data[0]);
 240                 int sample = 0;
 241                 int channel = 0;
 242                 for (int i = 0; i < total_samples; ++i) {
 243                         audio->data(channel)[sample] = static_cast<float>(*p++) / (1 << 31);
 244
 245                         ++channel;
 246                         if (channel == _ffmpeg_content->audio_channels()) {
 247                                 channel = 0;
 248                                 ++sample;
 249                         }
 250                 }
 251         }
 252         break;
 253
 254         case AV_SAMPLE_FMT_FLT:
 255         {
 256                 float* p = reinterpret_cast<float*> (data[0]);
 257                 int sample = 0;
 258                 int channel = 0;
 259                 for (int i = 0; i < total_samples; ++i) {
 260                         audio->data(channel)[sample] = *p++;
 261
 262                         ++channel;
 263                         if (channel == _ffmpeg_content->audio_channels()) {
 264                                 channel = 0;
 265                                 ++sample;
 266                         }
 267                 }
 268         }
 269         break;
 270
 271         case AV_SAMPLE_FMT_FLTP:
 272         {
 273                 float** p = reinterpret_cast<float**> (data);
 274                 for (int i = 0; i < _ffmpeg_content->audio_channels(); ++i) {
 275                         memcpy (audio->data(i), p[i], frames * sizeof(float));
 276                 }
 277         }
 278         break;
 279
 280         default:
 281                 throw DecodeError (String::compose (_("Unrecognised audio sample format (%1)"), static_cast<int> (audio_sample_format())));
 282         }
 283
 284         return audio;
 285 }
 286
 287 AVSampleFormat
 288 FFmpegDecoder::audio_sample_format () const
 289 {
 290         if (!_ffmpeg_content->audio_stream()) {
 291                 return (AVSampleFormat) 0;
 292         }
 293
 294         return audio_codec_context()->sample_fmt;
 295 }
 296
 297 int
 298 FFmpegDecoder::bytes_per_audio_sample () const
 299 {
 300         return av_get_bytes_per_sample (audio_sample_format ());
 301 }
 302
 303 int
 304 FFmpegDecoder::minimal_run (boost::function<bool (ContentTime, ContentTime, int)> finished)
 305 {
 306         int frames_read = 0;
 307         ContentTime last_video = 0;
 308         ContentTime last_audio = 0;
 309         bool flushing = false;
 310
 311         while (!finished (last_video, last_audio, frames_read)) {
 312                 int r = av_read_frame (_format_context, &_packet);
 313                 if (r < 0) {
 314                         /* We should flush our decoders here, possibly yielding a few more frames,
 315                            but the consequence of having to do that is too hideous to contemplate.
 316                            Instead we give up and say that you can't seek too close to the end
 317                            of a file.
 318                         */
 319                         return frames_read;
 320                 }
 321
 322                 ++frames_read;
 323
 324                 double const time_base = av_q2d (_format_context->streams[_packet.stream_index]->time_base);
 325
 326                 if (_packet.stream_index == _video_stream) {
 327
 328                         avcodec_get_frame_defaults (_frame);
 329
 330                         int finished = 0;
 331                         r = avcodec_decode_video2 (video_codec_context(), _frame, &finished, &_packet);
 332                         if (r >= 0 && finished) {
 333                                 last_video = rint (
 334                                         (av_frame_get_best_effort_timestamp (_frame) * time_base + _video_pts_offset) * TIME_HZ
 335                                         );
 336                         }
 337
 338                 } else if (_ffmpeg_content->audio_stream() && _packet.stream_index == _ffmpeg_content->audio_stream()->index (_format_context)) {
 339                         AVPacket copy_packet = _packet;
 340                         while (copy_packet.size > 0) {
 341
 342                                 int finished;
 343                                 r = avcodec_decode_audio4 (audio_codec_context(), _frame, &finished, &_packet);
 344                                 if (r >= 0 && finished) {
 345                                         last_audio = rint (
 346                                                 (av_frame_get_best_effort_timestamp (_frame) * time_base + _audio_pts_offset) * TIME_HZ
 347                                                 );
 348                                 }
 349
 350                                 copy_packet.data += r;
 351                                 copy_packet.size -= r;
 352                         }
 353                 }
 354
 355                 av_free_packet (&_packet);
 356         }
 357
 358         return frames_read;
 359 }
 360
 361 bool
 362 FFmpegDecoder::seek_overrun_finished (ContentTime seek, ContentTime last_video, ContentTime last_audio) const
 363 {
 364         return last_video >= seek || last_audio >= seek;
 365 }
 366
 367 bool
 368 FFmpegDecoder::seek_final_finished (int n, int done) const
 369 {
 370         return n == done;
 371 }
 372
 373 void
 374 FFmpegDecoder::seek_and_flush (ContentTime t)
 375 {
 376         int64_t const initial_v = ((double (t) / TIME_HZ) - _video_pts_offset) /
 377                 av_q2d (_format_context->streams[_video_stream]->time_base);
 378
 379         av_seek_frame (_format_context, _video_stream, initial_v, AVSEEK_FLAG_BACKWARD);
 380
 381         shared_ptr<FFmpegAudioStream> as = _ffmpeg_content->audio_stream ();
 382         if (as) {
 383                 int64_t initial_a = ((double (t) / TIME_HZ) - _audio_pts_offset) /
 384                         av_q2d (as->stream(_format_context)->time_base);
 385
 386                 av_seek_frame (_format_context, as->index (_format_context), initial_a, AVSEEK_FLAG_BACKWARD);
 387         }
 388
 389         avcodec_flush_buffers (video_codec_context());
 390         if (audio_codec_context ()) {
 391                 avcodec_flush_buffers (audio_codec_context ());
 392         }
 393         if (_subtitle_codec_context) {
 394                 avcodec_flush_buffers (_subtitle_codec_context);
 395         }
 396 }
 397
 398 void
 399 FFmpegDecoder::seek (ContentTime time, bool accurate)
 400 {
 401         Decoder::seek (time, accurate);
 402
 403         /* If we are doing an accurate seek, our initial shot will be 200ms (200 being
 404            a number plucked from the air) earlier than we want to end up.  The loop below
 405            will hopefully then step through to where we want to be.
 406         */
 407
 408         ContentTime pre_roll = accurate ? (0.2 * TIME_HZ) : 0;
 409         ContentTime initial_seek = time - pre_roll;
 410         if (initial_seek < 0) {
 411                 initial_seek = 0;
 412         }
 413
 414         /* Initial seek time in the video stream's timebase */
 415
 416         seek_and_flush (initial_seek);
 417
 418         if (time == 0 || !accurate) {
 419                 /* We're already there, or we're as close as we need to be */
 420                 return;
 421         }
 422
 423         int const N = minimal_run (boost::bind (&FFmpegDecoder::seek_overrun_finished, this, time, _1, _2));
 424
 425         seek_and_flush (initial_seek);
 426         if (N > 0) {
 427                 minimal_run (boost::bind (&FFmpegDecoder::seek_final_finished, this, N - 1, _3));
 428         }
 429 }
 430
 431 void
 432 FFmpegDecoder::decode_audio_packet ()
 433 {
 434         /* Audio packets can contain multiple frames, so we may have to call avcodec_decode_audio4
 435            several times.
 436         */
 437
 438         AVPacket copy_packet = _packet;
 439
 440         while (copy_packet.size > 0) {
 441
 442                 int frame_finished;
 443                 int const decode_result = avcodec_decode_audio4 (audio_codec_context(), _frame, &frame_finished, &copy_packet);
 444
 445                 if (decode_result < 0) {
 446                         shared_ptr<const Film> film = _film.lock ();
 447                         assert (film);
 448                         film->log()->log (String::compose ("avcodec_decode_audio4 failed (%1)", decode_result));
 449                         return;
 450                 }
 451
 452                 if (frame_finished) {
 453                         ContentTime const t = rint (
 454                                 (av_q2d (_format_context->streams[copy_packet.stream_index]->time_base)
 455                                  * av_frame_get_best_effort_timestamp(_frame) + _audio_pts_offset) * TIME_HZ
 456                                 );
 457
 458                         int const data_size = av_samples_get_buffer_size (
 459                                 0, audio_codec_context()->channels, _frame->nb_samples, audio_sample_format (), 1
 460                                 );
 461
 462                         audio (deinterleave_audio (_frame->data, data_size), t);
 463                 }
 464
 465                 copy_packet.data += decode_result;
 466                 copy_packet.size -= decode_result;
 467         }
 468 }
 469
 470 bool
 471 FFmpegDecoder::decode_video_packet ()
 472 {
 473         int frame_finished;
 474         if (avcodec_decode_video2 (video_codec_context(), _frame, &frame_finished, &_packet) < 0 || !frame_finished) {
 475                 return false;
 476         }
 477
 478         boost::mutex::scoped_lock lm (_filter_graphs_mutex);
 479
 480         shared_ptr<FilterGraph> graph;
 481
 482         list<shared_ptr<FilterGraph> >::iterator i = _filter_graphs.begin();
 483         while (i != _filter_graphs.end() && !(*i)->can_process (libdcp::Size (_frame->width, _frame->height), (AVPixelFormat) _frame->format)) {
 484                 ++i;
 485         }
 486
 487         if (i == _filter_graphs.end ()) {
 488                 shared_ptr<const Film> film = _film.lock ();
 489                 assert (film);
 490
 491                 graph.reset (new FilterGraph (_ffmpeg_content, libdcp::Size (_frame->width, _frame->height), (AVPixelFormat) _frame->format));
 492                 _filter_graphs.push_back (graph);
 493
 494                 film->log()->log (String::compose (N_("New graph for %1x%2, pixel format %3"), _frame->width, _frame->height, _frame->format));
 495         } else {
 496                 graph = *i;
 497         }
 498
 499         list<pair<shared_ptr<Image>, int64_t> > images = graph->process (_frame);
 500
 501         string post_process = Filter::ffmpeg_strings (_ffmpeg_content->filters()).second;
 502
 503         for (list<pair<shared_ptr<Image>, int64_t> >::iterator i = images.begin(); i != images.end(); ++i) {
 504
 505                 shared_ptr<Image> image = i->first;
 506                 if (!post_process.empty ()) {
 507                         image = image->post_process (post_process, true);
 508                 }
 509
 510                 if (i->second != AV_NOPTS_VALUE) {
 511                         ContentTime const t = rint ((i->second * av_q2d (_format_context->streams[_video_stream]->time_base) + _video_pts_offset) * TIME_HZ);
 512                         video (image, false, t);
 513                 } else {
 514                         shared_ptr<const Film> film = _film.lock ();
 515                         assert (film);
 516                         film->log()->log ("Dropping frame without PTS");
 517                 }
 518         }
 519
 520         return true;
 521 }
 522
 523
 524 void
 525 FFmpegDecoder::setup_subtitle ()
 526 {
 527         boost::mutex::scoped_lock lm (_mutex);
 528
 529         if (!_ffmpeg_content->subtitle_stream() || _ffmpeg_content->subtitle_stream()->index (_format_context) >= int (_format_context->nb_streams)) {
 530                 return;
 531         }
 532
 533         _subtitle_codec_context = _ffmpeg_content->subtitle_stream()->stream(_format_context)->codec;
 534         _subtitle_codec = avcodec_find_decoder (_subtitle_codec_context->codec_id);
 535
 536         if (_subtitle_codec == 0) {
 537                 throw DecodeError (_("could not find subtitle decoder"));
 538         }
 539
 540         if (avcodec_open2 (_subtitle_codec_context, _subtitle_codec, 0) < 0) {
 541                 throw DecodeError (N_("could not open subtitle decoder"));
 542         }
 543 }
 544
 545 void
 546 FFmpegDecoder::decode_subtitle_packet ()
 547 {
 548         int got_subtitle;
 549         AVSubtitle sub;
 550         if (avcodec_decode_subtitle2 (_subtitle_codec_context, &sub, &got_subtitle, &_packet) < 0 || !got_subtitle) {
 551                 return;
 552         }
 553
 554         /* Sometimes we get an empty AVSubtitle, which is used by some codecs to
 555            indicate that the previous subtitle should stop.
 556         */
 557         if (sub.num_rects <= 0) {
 558                 subtitle (shared_ptr<Image> (), dcpomatic::Rect<double> (), 0, 0);
 559                 return;
 560         } else if (sub.num_rects > 1) {
 561                 throw DecodeError (_("multi-part subtitles not yet supported"));
 562         }
 563
 564         /* Subtitle PTS in seconds (within the source, not taking into account any of the
 565            source that we may have chopped off for the DCP)
 566         */
 567         double const packet_time = static_cast<double> (sub.pts) / AV_TIME_BASE;
 568
 569         /* hence start time for this sub */
 570         DCPTime const from = (packet_time + (double (sub.start_display_time) / 1e3)) * TIME_HZ;
 571         DCPTime const to = (packet_time + (double (sub.end_display_time) / 1e3)) * TIME_HZ;
 572
 573         AVSubtitleRect const * rect = sub.rects[0];
 574
 575         if (rect->type != SUBTITLE_BITMAP) {
 576                 throw DecodeError (_("non-bitmap subtitles not yet supported"));
 577         }
 578
 579         shared_ptr<Image> image (new Image (PIX_FMT_RGBA, libdcp::Size (rect->w, rect->h), true));
 580
 581         /* Start of the first line in the subtitle */
 582         uint8_t* sub_p = rect->pict.data[0];
 583         /* sub_p looks up into a RGB palette which is here */
 584         uint32_t const * palette = (uint32_t *) rect->pict.data[1];
 585         /* Start of the output data */
 586         uint32_t* out_p = (uint32_t *) image->data()[0];
 587
 588         for (int y = 0; y < rect->h; ++y) {
 589                 uint8_t* sub_line_p = sub_p;
 590                 uint32_t* out_line_p = out_p;
 591                 for (int x = 0; x < rect->w; ++x) {
 592                         *out_line_p++ = palette[*sub_line_p++];
 593                 }
 594                 sub_p += rect->pict.linesize[0];
 595                 out_p += image->stride()[0] / sizeof (uint32_t);
 596         }
 597
 598         libdcp::Size const vs = _ffmpeg_content->video_size ();
 599
 600         subtitle (
 601                 image,
 602                 dcpomatic::Rect<double> (
 603                         static_cast<double> (rect->x) / vs.width,
 604                         static_cast<double> (rect->y) / vs.height,
 605                         static_cast<double> (rect->w) / vs.width,
 606                         static_cast<double> (rect->h) / vs.height
 607                         ),
 608                 from,
 609                 to
 610                 );
 611
 612
 613         avsubtitle_free (&sub);
 614 }