src/lib/ffmpeg_decoder.cc

   1 /*
   2     Copyright (C) 2012 Carl Hetherington <cth@carlh.net>
   3
   4     This program is free software; you can redistribute it and/or modify
   5     it under the terms of the GNU General Public License as published by
   6     the Free Software Foundation; either version 2 of the License, or
   7     (at your option) any later version.
   8
   9     This program is distributed in the hope that it will be useful,
  10     but WITHOUT ANY WARRANTY; without even the implied warranty of
  11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12     GNU General Public License for more details.
  13
  14     You should have received a copy of the GNU General Public License
  15     along with this program; if not, write to the Free Software
  16     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  17
  18 */
  19
  20 /** @file  src/ffmpeg_decoder.cc
  21  *  @brief A decoder using FFmpeg to decode content.
  22  */
  23
  24 #include <stdexcept>
  25 #include <vector>
  26 #include <sstream>
  27 #include <iomanip>
  28 #include <iostream>
  29 #include <stdint.h>
  30 #include <boost/lexical_cast.hpp>
  31 #include <sndfile.h>
  32 extern "C" {
  33 #include <libavcodec/avcodec.h>
  34 #include <libavformat/avformat.h>
  35 }
  36 #include "film.h"
  37 #include "filter.h"
  38 #include "exceptions.h"
  39 #include "image.h"
  40 #include "util.h"
  41 #include "log.h"
  42 #include "ffmpeg_decoder.h"
  43 #include "filter_graph.h"
  44 #include "audio_buffers.h"
  45 #include "ffmpeg_content.h"
  46
  47 #include "i18n.h"
  48
  49 using std::cout;
  50 using std::string;
  51 using std::vector;
  52 using std::stringstream;
  53 using std::list;
  54 using std::min;
  55 using std::pair;
  56 using boost::shared_ptr;
  57 using boost::optional;
  58 using boost::dynamic_pointer_cast;
  59 using dcp::Size;
  60
  61 FFmpegDecoder::FFmpegDecoder (shared_ptr<const Film> f, shared_ptr<const FFmpegContent> c, bool video, bool audio)
  62         : Decoder (f)
  63         , VideoDecoder (f, c)
  64         , AudioDecoder (f, c)
  65         , SubtitleDecoder (f)
  66         , FFmpeg (c)
  67         , _subtitle_codec_context (0)
  68         , _subtitle_codec (0)
  69         , _decode_video (video)
  70         , _decode_audio (audio)
  71         , _pts_offset (0)
  72 {
  73         setup_subtitle ();
  74
  75         /* Audio and video frame PTS values may not start with 0.  We want
  76            to fiddle them so that:
  77
  78            1.  One of them starts at time 0.
  79            2.  The first video PTS value ends up on a frame boundary.
  80
  81            Then we remove big initial gaps in PTS and we allow our
  82            insertion of black frames to work.
  83
  84            We will do pts_to_use = pts_from_ffmpeg + pts_offset;
  85         */
  86
  87         bool const have_video = video && c->first_video();
  88         bool const have_audio = _decode_audio && c->audio_stream () && c->audio_stream()->first_audio;
  89
  90         /* First, make one of them start at 0 */
  91
  92         if (have_audio && have_video) {
  93                 _pts_offset = - min (c->first_video().get(), c->audio_stream()->first_audio.get());
  94         } else if (have_video) {
  95                 _pts_offset = - c->first_video().get();
  96         } else if (have_audio) {
  97                 _pts_offset = - c->audio_stream()->first_audio.get();
  98         }
  99
 100         /* Now adjust both so that the video pts starts on a frame */
 101         if (have_video && have_audio) {
 102                 double first_video = c->first_video().get() + _pts_offset;
 103                 double const old_first_video = first_video;
 104
 105                 /* Round the first video up to a frame boundary */
 106                 if (fabs (rint (first_video * c->video_frame_rate()) - first_video * c->video_frame_rate()) > 1e-6) {
 107                         first_video = ceil (first_video * c->video_frame_rate()) / c->video_frame_rate ();
 108                 }
 109
 110                 _pts_offset += first_video - old_first_video;
 111         }
 112 }
 113
 114 FFmpegDecoder::~FFmpegDecoder ()
 115 {
 116         boost::mutex::scoped_lock lm (_mutex);
 117
 118         if (_subtitle_codec_context) {
 119                 avcodec_close (_subtitle_codec_context);
 120         }
 121 }
 122
 123 void
 124 FFmpegDecoder::flush ()
 125 {
 126         /* Get any remaining frames */
 127
 128         _packet.data = 0;
 129         _packet.size = 0;
 130
 131         /* XXX: should we reset _packet.data and size after each *_decode_* call? */
 132
 133         if (_decode_video) {
 134                 while (decode_video_packet ()) {}
 135         }
 136
 137         if (_ffmpeg_content->audio_stream() && _decode_audio) {
 138                 decode_audio_packet ();
 139                 AudioDecoder::flush ();
 140         }
 141 }
 142
 143 bool
 144 FFmpegDecoder::pass ()
 145 {
 146         int r = av_read_frame (_format_context, &_packet);
 147
 148         if (r < 0) {
 149                 if (r != AVERROR_EOF) {
 150                         /* Maybe we should fail here, but for now we'll just finish off instead */
 151                         char buf[256];
 152                         av_strerror (r, buf, sizeof(buf));
 153                         shared_ptr<const Film> film = _film.lock ();
 154                         assert (film);
 155                         film->log()->log (String::compose (N_("error on av_read_frame (%1) (%2)"), buf, r));
 156                 }
 157
 158                 flush ();
 159                 return true;
 160         }
 161
 162         shared_ptr<const Film> film = _film.lock ();
 163         assert (film);
 164
 165         int const si = _packet.stream_index;
 166
 167         if (si == _video_stream && _decode_video) {
 168                 decode_video_packet ();
 169         } else if (_ffmpeg_content->audio_stream() && _ffmpeg_content->audio_stream()->uses_index (_format_context, si) && _decode_audio) {
 170                 decode_audio_packet ();
 171         } else if (_ffmpeg_content->subtitle_stream() && _ffmpeg_content->subtitle_stream()->uses_index (_format_context, si) && film->with_subtitles ()) {
 172                 decode_subtitle_packet ();
 173         }
 174
 175         av_free_packet (&_packet);
 176         return false;
 177 }
 178
 179 /** @param data pointer to array of pointers to buffers.
 180  *  Only the first buffer will be used for non-planar data, otherwise there will be one per channel.
 181  */
 182 shared_ptr<AudioBuffers>
 183 FFmpegDecoder::deinterleave_audio (uint8_t** data, int size)
 184 {
 185         assert (_ffmpeg_content->audio_channels());
 186         assert (bytes_per_audio_sample());
 187
 188         /* Deinterleave and convert to float */
 189
 190         assert ((size % (bytes_per_audio_sample() * _ffmpeg_content->audio_channels())) == 0);
 191
 192         int const total_samples = size / bytes_per_audio_sample();
 193         int const frames = total_samples / _ffmpeg_content->audio_channels();
 194         shared_ptr<AudioBuffers> audio (new AudioBuffers (_ffmpeg_content->audio_channels(), frames));
 195
 196         switch (audio_sample_format()) {
 197         case AV_SAMPLE_FMT_S16:
 198         {
 199                 int16_t* p = reinterpret_cast<int16_t *> (data[0]);
 200                 int sample = 0;
 201                 int channel = 0;
 202                 for (int i = 0; i < total_samples; ++i) {
 203                         audio->data(channel)[sample] = float(*p++) / (1 << 15);
 204
 205                         ++channel;
 206                         if (channel == _ffmpeg_content->audio_channels()) {
 207                                 channel = 0;
 208                                 ++sample;
 209                         }
 210                 }
 211         }
 212         break;
 213
 214         case AV_SAMPLE_FMT_S16P:
 215         {
 216                 int16_t** p = reinterpret_cast<int16_t **> (data);
 217                 for (int i = 0; i < _ffmpeg_content->audio_channels(); ++i) {
 218                         for (int j = 0; j < frames; ++j) {
 219                                 audio->data(i)[j] = static_cast<float>(p[i][j]) / (1 << 15);
 220                         }
 221                 }
 222         }
 223         break;
 224
 225         case AV_SAMPLE_FMT_S32:
 226         {
 227                 int32_t* p = reinterpret_cast<int32_t *> (data[0]);
 228                 int sample = 0;
 229                 int channel = 0;
 230                 for (int i = 0; i < total_samples; ++i) {
 231                         audio->data(channel)[sample] = static_cast<float>(*p++) / (1 << 31);
 232
 233                         ++channel;
 234                         if (channel == _ffmpeg_content->audio_channels()) {
 235                                 channel = 0;
 236                                 ++sample;
 237                         }
 238                 }
 239         }
 240         break;
 241
 242         case AV_SAMPLE_FMT_FLT:
 243         {
 244                 float* p = reinterpret_cast<float*> (data[0]);
 245                 int sample = 0;
 246                 int channel = 0;
 247                 for (int i = 0; i < total_samples; ++i) {
 248                         audio->data(channel)[sample] = *p++;
 249
 250                         ++channel;
 251                         if (channel == _ffmpeg_content->audio_channels()) {
 252                                 channel = 0;
 253                                 ++sample;
 254                         }
 255                 }
 256         }
 257         break;
 258
 259         case AV_SAMPLE_FMT_FLTP:
 260         {
 261                 float** p = reinterpret_cast<float**> (data);
 262                 for (int i = 0; i < _ffmpeg_content->audio_channels(); ++i) {
 263                         memcpy (audio->data(i), p[i], frames * sizeof(float));
 264                 }
 265         }
 266         break;
 267
 268         default:
 269                 throw DecodeError (String::compose (_("Unrecognised audio sample format (%1)"), static_cast<int> (audio_sample_format())));
 270         }
 271
 272         return audio;
 273 }
 274
 275 AVSampleFormat
 276 FFmpegDecoder::audio_sample_format () const
 277 {
 278         if (!_ffmpeg_content->audio_stream()) {
 279                 return (AVSampleFormat) 0;
 280         }
 281
 282         return audio_codec_context()->sample_fmt;
 283 }
 284
 285 int
 286 FFmpegDecoder::bytes_per_audio_sample () const
 287 {
 288         return av_get_bytes_per_sample (audio_sample_format ());
 289 }
 290
 291 int
 292 FFmpegDecoder::minimal_run (boost::function<bool (optional<ContentTime>, optional<ContentTime>, int)> finished)
 293 {
 294         int frames_read = 0;
 295         optional<ContentTime> last_video;
 296         optional<ContentTime> last_audio;
 297
 298         while (!finished (last_video, last_audio, frames_read)) {
 299                 int r = av_read_frame (_format_context, &_packet);
 300                 if (r < 0) {
 301                         /* We should flush our decoders here, possibly yielding a few more frames,
 302                            but the consequence of having to do that is too hideous to contemplate.
 303                            Instead we give up and say that you can't seek too close to the end
 304                            of a file.
 305                         */
 306                         return frames_read;
 307                 }
 308
 309                 ++frames_read;
 310
 311                 double const time_base = av_q2d (_format_context->streams[_packet.stream_index]->time_base);
 312
 313                 if (_packet.stream_index == _video_stream) {
 314
 315                         avcodec_get_frame_defaults (_frame);
 316
 317                         int finished = 0;
 318                         r = avcodec_decode_video2 (video_codec_context(), _frame, &finished, &_packet);
 319                         if (r >= 0 && finished) {
 320                                 last_video = rint (
 321                                         (av_frame_get_best_effort_timestamp (_frame) * time_base + _pts_offset) * TIME_HZ
 322                                         );
 323                         }
 324
 325                 } else if (_ffmpeg_content->audio_stream() && _ffmpeg_content->audio_stream()->uses_index (_format_context, _packet.stream_index)) {
 326                         AVPacket copy_packet = _packet;
 327                         while (copy_packet.size > 0) {
 328
 329                                 int finished;
 330                                 r = avcodec_decode_audio4 (audio_codec_context(), _frame, &finished, &_packet);
 331                                 if (r >= 0 && finished) {
 332                                         last_audio = rint (
 333                                                 (av_frame_get_best_effort_timestamp (_frame) * time_base + _pts_offset) * TIME_HZ
 334                                                 );
 335                                 }
 336
 337                                 copy_packet.data += r;
 338                                 copy_packet.size -= r;
 339                         }
 340                 }
 341
 342                 av_free_packet (&_packet);
 343         }
 344
 345         return frames_read;
 346 }
 347
 348 bool
 349 FFmpegDecoder::seek_overrun_finished (ContentTime seek, optional<ContentTime> last_video, optional<ContentTime> last_audio) const
 350 {
 351         return (last_video && last_video.get() >= seek) || (last_audio && last_audio.get() >= seek);
 352 }
 353
 354 bool
 355 FFmpegDecoder::seek_final_finished (int n, int done) const
 356 {
 357         return n == done;
 358 }
 359
 360 void
 361 FFmpegDecoder::seek_and_flush (ContentTime t)
 362 {
 363         int64_t s = ((double (t) / TIME_HZ) - _pts_offset) /
 364                 av_q2d (_format_context->streams[_video_stream]->time_base);
 365
 366         if (_ffmpeg_content->audio_stream ()) {
 367                 s = min (
 368                         s, int64_t (
 369                                 ((double (t) / TIME_HZ) - _pts_offset) /
 370                                 av_q2d (_ffmpeg_content->audio_stream()->stream(_format_context)->time_base)
 371                                 )
 372                         );
 373         }
 374
 375         cout << "S&F " << t << "\n";
 376
 377         /* Ridiculous empirical hack */
 378         s--;
 379         if (s < 0) {
 380                 s = 0;
 381         }
 382
 383         av_seek_frame (_format_context, _video_stream, s, 0);
 384
 385         avcodec_flush_buffers (video_codec_context());
 386         if (audio_codec_context ()) {
 387                 avcodec_flush_buffers (audio_codec_context ());
 388         }
 389         if (_subtitle_codec_context) {
 390                 avcodec_flush_buffers (_subtitle_codec_context);
 391         }
 392 }
 393
 394 void
 395 FFmpegDecoder::seek (ContentTime time, bool accurate)
 396 {
 397         Decoder::seek (time, accurate);
 398         if (_decode_audio) {
 399                 AudioDecoder::seek (time, accurate);
 400         }
 401
 402         /* If we are doing an accurate seek, our initial shot will be 200ms (200 being
 403            a number plucked from the air) earlier than we want to end up.  The loop below
 404            will hopefully then step through to where we want to be.
 405         */
 406
 407         ContentTime pre_roll = accurate ? (0.2 * TIME_HZ) : 0;
 408         ContentTime initial_seek = time - pre_roll;
 409         if (initial_seek < 0) {
 410                 initial_seek = 0;
 411         }
 412
 413         /* Initial seek time in the video stream's timebase */
 414
 415         seek_and_flush (initial_seek);
 416
 417         if (!accurate) {
 418                 /* That'll do */
 419                 return;
 420         }
 421
 422         int const N = minimal_run (boost::bind (&FFmpegDecoder::seek_overrun_finished, this, time, _1, _2));
 423
 424         seek_and_flush (initial_seek);
 425         if (N > 0) {
 426                 minimal_run (boost::bind (&FFmpegDecoder::seek_final_finished, this, N - 1, _3));
 427         }
 428 }
 429
 430 void
 431 FFmpegDecoder::decode_audio_packet ()
 432 {
 433         /* Audio packets can contain multiple frames, so we may have to call avcodec_decode_audio4
 434            several times.
 435         */
 436
 437         AVPacket copy_packet = _packet;
 438
 439         while (copy_packet.size > 0) {
 440
 441                 int frame_finished;
 442                 int const decode_result = avcodec_decode_audio4 (audio_codec_context(), _frame, &frame_finished, &copy_packet);
 443
 444                 if (decode_result < 0) {
 445                         shared_ptr<const Film> film = _film.lock ();
 446                         assert (film);
 447                         film->log()->log (String::compose ("avcodec_decode_audio4 failed (%1)", decode_result));
 448                         return;
 449                 }
 450
 451                 if (frame_finished) {
 452                         ContentTime const ct = (
 453                                 av_frame_get_best_effort_timestamp (_frame) *
 454                                 av_q2d (_ffmpeg_content->audio_stream()->stream (_format_context)->time_base)
 455                                 + _pts_offset
 456                                 ) * TIME_HZ;
 457
 458                         int const data_size = av_samples_get_buffer_size (
 459                                 0, audio_codec_context()->channels, _frame->nb_samples, audio_sample_format (), 1
 460                                 );
 461
 462                         audio (deinterleave_audio (_frame->data, data_size), ct);
 463                 }
 464
 465                 copy_packet.data += decode_result;
 466                 copy_packet.size -= decode_result;
 467         }
 468 }
 469
 470 bool
 471 FFmpegDecoder::decode_video_packet ()
 472 {
 473         int frame_finished;
 474         if (avcodec_decode_video2 (video_codec_context(), _frame, &frame_finished, &_packet) < 0 || !frame_finished) {
 475                 return false;
 476         }
 477
 478         boost::mutex::scoped_lock lm (_filter_graphs_mutex);
 479
 480         shared_ptr<FilterGraph> graph;
 481
 482         list<shared_ptr<FilterGraph> >::iterator i = _filter_graphs.begin();
 483         while (i != _filter_graphs.end() && !(*i)->can_process (dcp::Size (_frame->width, _frame->height), (AVPixelFormat) _frame->format)) {
 484                 ++i;
 485         }
 486
 487         if (i == _filter_graphs.end ()) {
 488                 shared_ptr<const Film> film = _film.lock ();
 489                 assert (film);
 490
 491                 graph.reset (new FilterGraph (_ffmpeg_content, dcp::Size (_frame->width, _frame->height), (AVPixelFormat) _frame->format));
 492                 _filter_graphs.push_back (graph);
 493
 494                 film->log()->log (String::compose (N_("New graph for %1x%2, pixel format %3"), _frame->width, _frame->height, _frame->format));
 495         } else {
 496                 graph = *i;
 497         }
 498
 499         list<pair<shared_ptr<Image>, int64_t> > images = graph->process (_frame);
 500
 501         string post_process = Filter::ffmpeg_strings (_ffmpeg_content->filters()).second;
 502
 503         for (list<pair<shared_ptr<Image>, int64_t> >::iterator i = images.begin(); i != images.end(); ++i) {
 504
 505                 shared_ptr<Image> image = i->first;
 506                 if (!post_process.empty ()) {
 507                         image = image->post_process (post_process, true);
 508                 }
 509
 510                 if (i->second != AV_NOPTS_VALUE) {
 511                         double const pts = i->second * av_q2d (_format_context->streams[_video_stream]->time_base) + _pts_offset;
 512                         VideoFrame const f = rint (pts * _ffmpeg_content->video_frame_rate ());
 513                         video (image, false, f);
 514                 } else {
 515                         shared_ptr<const Film> film = _film.lock ();
 516                         assert (film);
 517                         film->log()->log ("Dropping frame without PTS");
 518                 }
 519         }
 520
 521         return true;
 522 }
 523
 524
 525 void
 526 FFmpegDecoder::setup_subtitle ()
 527 {
 528         boost::mutex::scoped_lock lm (_mutex);
 529
 530         if (!_ffmpeg_content->subtitle_stream()) {
 531                 return;
 532         }
 533
 534         _subtitle_codec_context = _ffmpeg_content->subtitle_stream()->stream(_format_context)->codec;
 535         if (_subtitle_codec_context == 0) {
 536                 throw DecodeError (N_("could not find subtitle stream"));
 537         }
 538
 539         _subtitle_codec = avcodec_find_decoder (_subtitle_codec_context->codec_id);
 540
 541         if (_subtitle_codec == 0) {
 542                 throw DecodeError (N_("could not find subtitle decoder"));
 543         }
 544
 545         if (avcodec_open2 (_subtitle_codec_context, _subtitle_codec, 0) < 0) {
 546                 throw DecodeError (N_("could not open subtitle decoder"));
 547         }
 548 }
 549
 550 void
 551 FFmpegDecoder::decode_subtitle_packet ()
 552 {
 553         int got_subtitle;
 554         AVSubtitle sub;
 555         if (avcodec_decode_subtitle2 (_subtitle_codec_context, &sub, &got_subtitle, &_packet) < 0 || !got_subtitle) {
 556                 return;
 557         }
 558
 559         /* Sometimes we get an empty AVSubtitle, which is used by some codecs to
 560            indicate that the previous subtitle should stop.
 561         */
 562         if (sub.num_rects <= 0) {
 563                 image_subtitle (shared_ptr<Image> (), dcpomatic::Rect<double> (), 0, 0);
 564                 return;
 565         } else if (sub.num_rects > 1) {
 566                 throw DecodeError (_("multi-part subtitles not yet supported"));
 567         }
 568
 569         /* Subtitle PTS in seconds (within the source, not taking into account any of the
 570            source that we may have chopped off for the DCP)
 571         */
 572         double const packet_time = (static_cast<double> (sub.pts) / AV_TIME_BASE) + _pts_offset;
 573
 574         /* hence start time for this sub */
 575         ContentTime const from = (packet_time + (double (sub.start_display_time) / 1e3)) * TIME_HZ;
 576         ContentTime const to = (packet_time + (double (sub.end_display_time) / 1e3)) * TIME_HZ;
 577
 578         AVSubtitleRect const * rect = sub.rects[0];
 579
 580         if (rect->type != SUBTITLE_BITMAP) {
 581                 throw DecodeError (_("non-bitmap subtitles not yet supported"));
 582         }
 583
 584         /* Note RGBA is expressed little-endian, so the first byte in the word is R, second
 585            G, third B, fourth A.
 586         */
 587         shared_ptr<Image> image (new Image (PIX_FMT_RGBA, dcp::Size (rect->w, rect->h), true));
 588
 589         /* Start of the first line in the subtitle */
 590         uint8_t* sub_p = rect->pict.data[0];
 591         /* sub_p looks up into a BGRA palette which is here
 592            (i.e. first byte B, second G, third R, fourth A)
 593         */
 594         uint32_t const * palette = (uint32_t *) rect->pict.data[1];
 595         /* Start of the output data */
 596         uint32_t* out_p = (uint32_t *) image->data()[0];
 597
 598         for (int y = 0; y < rect->h; ++y) {
 599                 uint8_t* sub_line_p = sub_p;
 600                 uint32_t* out_line_p = out_p;
 601                 for (int x = 0; x < rect->w; ++x) {
 602                         uint32_t const p = palette[*sub_line_p++];
 603                         *out_line_p++ = ((p & 0xff) << 16) | (p & 0xff00) | ((p & 0xff0000) >> 16) | (p & 0xff000000);
 604                 }
 605                 sub_p += rect->pict.linesize[0];
 606                 out_p += image->stride()[0] / sizeof (uint32_t);
 607         }
 608
 609         dcp::Size const vs = _ffmpeg_content->video_size ();
 610
 611         image_subtitle (
 612                 image,
 613                 dcpomatic::Rect<double> (
 614                         static_cast<double> (rect->x) / vs.width,
 615                         static_cast<double> (rect->y) / vs.height,
 616                         static_cast<double> (rect->w) / vs.width,
 617                         static_cast<double> (rect->h) / vs.height
 618                         ),
 619                 from,
 620                 to
 621                 );
 622
 623
 624         avsubtitle_free (&sub);
 625 }