src/lib/ffmpeg_decoder.cc

   1 /*
   2     Copyright (C) 2012 Carl Hetherington <cth@carlh.net>
   3
   4     This program is free software; you can redistribute it and/or modify
   5     it under the terms of the GNU General Public License as published by
   6     the Free Software Foundation; either version 2 of the License, or
   7     (at your option) any later version.
   8
   9     This program is distributed in the hope that it will be useful,
  10     but WITHOUT ANY WARRANTY; without even the implied warranty of
  11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12     GNU General Public License for more details.
  13
  14     You should have received a copy of the GNU General Public License
  15     along with this program; if not, write to the Free Software
  16     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  17
  18 */
  19
  20 /** @file  src/ffmpeg_decoder.cc
  21  *  @brief A decoder using FFmpeg to decode content.
  22  */
  23
  24 #include <stdexcept>
  25 #include <vector>
  26 #include <sstream>
  27 #include <iomanip>
  28 #include <iostream>
  29 #include <stdint.h>
  30 #include <boost/lexical_cast.hpp>
  31 #include <sndfile.h>
  32 extern "C" {
  33 #include <libavcodec/avcodec.h>
  34 #include <libavformat/avformat.h>
  35 }
  36 #include "film.h"
  37 #include "filter.h"
  38 #include "exceptions.h"
  39 #include "image.h"
  40 #include "util.h"
  41 #include "log.h"
  42 #include "ffmpeg_decoder.h"
  43 #include "filter_graph.h"
  44 #include "audio_buffers.h"
  45 #include "ffmpeg_content.h"
  46
  47 #include "i18n.h"
  48
  49 using std::cout;
  50 using std::string;
  51 using std::vector;
  52 using std::stringstream;
  53 using std::list;
  54 using std::min;
  55 using std::pair;
  56 using boost::shared_ptr;
  57 using boost::optional;
  58 using boost::dynamic_pointer_cast;
  59 using dcp::Size;
  60
  61 FFmpegDecoder::FFmpegDecoder (shared_ptr<const Film> f, shared_ptr<const FFmpegContent> c, bool video, bool audio)
  62         : Decoder (f)
  63         , VideoDecoder (f, c)
  64         , AudioDecoder (f, c)
  65         , SubtitleDecoder (f)
  66         , FFmpeg (c)
  67         , _subtitle_codec_context (0)
  68         , _subtitle_codec (0)
  69         , _decode_video (video)
  70         , _decode_audio (audio)
  71         , _pts_offset (0)
  72 {
  73         setup_subtitle ();
  74
  75         /* Audio and video frame PTS values may not start with 0.  We want
  76            to fiddle them so that:
  77
  78            1.  One of them starts at time 0.
  79            2.  The first video PTS value ends up on a frame boundary.
  80
  81            Then we remove big initial gaps in PTS and we allow our
  82            insertion of black frames to work.
  83
  84            We will do pts_to_use = pts_from_ffmpeg + pts_offset;
  85         */
  86
  87         bool const have_video = video && c->first_video();
  88         bool const have_audio = _decode_audio && c->audio_stream () && c->audio_stream()->first_audio;
  89
  90         /* First, make one of them start at 0 */
  91
  92         if (have_audio && have_video) {
  93                 _pts_offset = - min (c->first_video().get(), c->audio_stream()->first_audio.get());
  94         } else if (have_video) {
  95                 _pts_offset = - c->first_video().get();
  96         } else if (have_audio) {
  97                 _pts_offset = - c->audio_stream()->first_audio.get();
  98         }
  99
 100         /* Now adjust both so that the video pts starts on a frame */
 101         if (have_video && have_audio) {
 102                 ContentTime first_video = c->first_video().get() + _pts_offset;
 103                 ContentTime const old_first_video = first_video;
 104                 _pts_offset += first_video.round_up (c->video_frame_rate ()) - old_first_video;
 105         }
 106 }
 107
 108 FFmpegDecoder::~FFmpegDecoder ()
 109 {
 110         boost::mutex::scoped_lock lm (_mutex);
 111
 112         if (_subtitle_codec_context) {
 113                 avcodec_close (_subtitle_codec_context);
 114         }
 115 }
 116
 117 void
 118 FFmpegDecoder::flush ()
 119 {
 120         /* Get any remaining frames */
 121
 122         _packet.data = 0;
 123         _packet.size = 0;
 124
 125         /* XXX: should we reset _packet.data and size after each *_decode_* call? */
 126
 127         if (_decode_video) {
 128                 while (decode_video_packet ()) {}
 129         }
 130
 131         if (_ffmpeg_content->audio_stream() && _decode_audio) {
 132                 decode_audio_packet ();
 133                 AudioDecoder::flush ();
 134         }
 135 }
 136
 137 bool
 138 FFmpegDecoder::pass ()
 139 {
 140         int r = av_read_frame (_format_context, &_packet);
 141
 142         if (r < 0) {
 143                 if (r != AVERROR_EOF) {
 144                         /* Maybe we should fail here, but for now we'll just finish off instead */
 145                         char buf[256];
 146                         av_strerror (r, buf, sizeof(buf));
 147                         shared_ptr<const Film> film = _film.lock ();
 148                         assert (film);
 149                         film->log()->log (String::compose (N_("error on av_read_frame (%1) (%2)"), buf, r));
 150                 }
 151
 152                 flush ();
 153                 return true;
 154         }
 155
 156         shared_ptr<const Film> film = _film.lock ();
 157         assert (film);
 158
 159         int const si = _packet.stream_index;
 160
 161         if (si == _video_stream && _decode_video) {
 162                 decode_video_packet ();
 163         } else if (_ffmpeg_content->audio_stream() && _ffmpeg_content->audio_stream()->uses_index (_format_context, si) && _decode_audio) {
 164                 decode_audio_packet ();
 165         } else if (_ffmpeg_content->subtitle_stream() && _ffmpeg_content->subtitle_stream()->uses_index (_format_context, si) && film->with_subtitles ()) {
 166                 decode_subtitle_packet ();
 167         }
 168
 169         av_free_packet (&_packet);
 170         return false;
 171 }
 172
 173 /** @param data pointer to array of pointers to buffers.
 174  *  Only the first buffer will be used for non-planar data, otherwise there will be one per channel.
 175  */
 176 shared_ptr<AudioBuffers>
 177 FFmpegDecoder::deinterleave_audio (uint8_t** data, int size)
 178 {
 179         assert (_ffmpeg_content->audio_channels());
 180         assert (bytes_per_audio_sample());
 181
 182         /* Deinterleave and convert to float */
 183
 184         assert ((size % (bytes_per_audio_sample() * _ffmpeg_content->audio_channels())) == 0);
 185
 186         int const total_samples = size / bytes_per_audio_sample();
 187         int const frames = total_samples / _ffmpeg_content->audio_channels();
 188         shared_ptr<AudioBuffers> audio (new AudioBuffers (_ffmpeg_content->audio_channels(), frames));
 189
 190         switch (audio_sample_format()) {
 191         case AV_SAMPLE_FMT_S16:
 192         {
 193                 int16_t* p = reinterpret_cast<int16_t *> (data[0]);
 194                 int sample = 0;
 195                 int channel = 0;
 196                 for (int i = 0; i < total_samples; ++i) {
 197                         audio->data(channel)[sample] = float(*p++) / (1 << 15);
 198
 199                         ++channel;
 200                         if (channel == _ffmpeg_content->audio_channels()) {
 201                                 channel = 0;
 202                                 ++sample;
 203                         }
 204                 }
 205         }
 206         break;
 207
 208         case AV_SAMPLE_FMT_S16P:
 209         {
 210                 int16_t** p = reinterpret_cast<int16_t **> (data);
 211                 for (int i = 0; i < _ffmpeg_content->audio_channels(); ++i) {
 212                         for (int j = 0; j < frames; ++j) {
 213                                 audio->data(i)[j] = static_cast<float>(p[i][j]) / (1 << 15);
 214                         }
 215                 }
 216         }
 217         break;
 218
 219         case AV_SAMPLE_FMT_S32:
 220         {
 221                 int32_t* p = reinterpret_cast<int32_t *> (data[0]);
 222                 int sample = 0;
 223                 int channel = 0;
 224                 for (int i = 0; i < total_samples; ++i) {
 225                         audio->data(channel)[sample] = static_cast<float>(*p++) / (1 << 31);
 226
 227                         ++channel;
 228                         if (channel == _ffmpeg_content->audio_channels()) {
 229                                 channel = 0;
 230                                 ++sample;
 231                         }
 232                 }
 233         }
 234         break;
 235
 236         case AV_SAMPLE_FMT_FLT:
 237         {
 238                 float* p = reinterpret_cast<float*> (data[0]);
 239                 int sample = 0;
 240                 int channel = 0;
 241                 for (int i = 0; i < total_samples; ++i) {
 242                         audio->data(channel)[sample] = *p++;
 243
 244                         ++channel;
 245                         if (channel == _ffmpeg_content->audio_channels()) {
 246                                 channel = 0;
 247                                 ++sample;
 248                         }
 249                 }
 250         }
 251         break;
 252
 253         case AV_SAMPLE_FMT_FLTP:
 254         {
 255                 float** p = reinterpret_cast<float**> (data);
 256                 for (int i = 0; i < _ffmpeg_content->audio_channels(); ++i) {
 257                         memcpy (audio->data(i), p[i], frames * sizeof(float));
 258                 }
 259         }
 260         break;
 261
 262         default:
 263                 throw DecodeError (String::compose (_("Unrecognised audio sample format (%1)"), static_cast<int> (audio_sample_format())));
 264         }
 265
 266         return audio;
 267 }
 268
 269 AVSampleFormat
 270 FFmpegDecoder::audio_sample_format () const
 271 {
 272         if (!_ffmpeg_content->audio_stream()) {
 273                 return (AVSampleFormat) 0;
 274         }
 275
 276         return audio_codec_context()->sample_fmt;
 277 }
 278
 279 int
 280 FFmpegDecoder::bytes_per_audio_sample () const
 281 {
 282         return av_get_bytes_per_sample (audio_sample_format ());
 283 }
 284
 285 int
 286 FFmpegDecoder::minimal_run (boost::function<bool (optional<ContentTime>, optional<ContentTime>, int)> finished)
 287 {
 288         int frames_read = 0;
 289         optional<ContentTime> last_video;
 290         optional<ContentTime> last_audio;
 291
 292         while (!finished (last_video, last_audio, frames_read)) {
 293                 int r = av_read_frame (_format_context, &_packet);
 294                 if (r < 0) {
 295                         /* We should flush our decoders here, possibly yielding a few more frames,
 296                            but the consequence of having to do that is too hideous to contemplate.
 297                            Instead we give up and say that you can't seek too close to the end
 298                            of a file.
 299                         */
 300                         return frames_read;
 301                 }
 302
 303                 ++frames_read;
 304
 305                 double const time_base = av_q2d (_format_context->streams[_packet.stream_index]->time_base);
 306
 307                 if (_packet.stream_index == _video_stream) {
 308
 309                         avcodec_get_frame_defaults (_frame);
 310
 311                         int finished = 0;
 312                         r = avcodec_decode_video2 (video_codec_context(), _frame, &finished, &_packet);
 313                         if (r >= 0 && finished) {
 314                                 last_video = ContentTime::from_seconds (av_frame_get_best_effort_timestamp (_frame) * time_base) + _pts_offset;
 315                         }
 316
 317                 } else if (_ffmpeg_content->audio_stream() && _ffmpeg_content->audio_stream()->uses_index (_format_context, _packet.stream_index)) {
 318                         AVPacket copy_packet = _packet;
 319                         while (copy_packet.size > 0) {
 320
 321                                 int finished;
 322                                 r = avcodec_decode_audio4 (audio_codec_context(), _frame, &finished, &_packet);
 323                                 if (r >= 0 && finished) {
 324                                         last_audio = ContentTime::from_seconds (av_frame_get_best_effort_timestamp (_frame) * time_base) + _pts_offset;
 325                                 }
 326
 327                                 copy_packet.data += r;
 328                                 copy_packet.size -= r;
 329                         }
 330                 }
 331
 332                 av_free_packet (&_packet);
 333         }
 334
 335         return frames_read;
 336 }
 337
 338 bool
 339 FFmpegDecoder::seek_overrun_finished (ContentTime seek, optional<ContentTime> last_video, optional<ContentTime> last_audio) const
 340 {
 341         return (last_video && last_video.get() >= seek) || (last_audio && last_audio.get() >= seek);
 342 }
 343
 344 bool
 345 FFmpegDecoder::seek_final_finished (int n, int done) const
 346 {
 347         return n == done;
 348 }
 349
 350 void
 351 FFmpegDecoder::seek_and_flush (ContentTime t)
 352 {
 353         ContentTime const u = t - _pts_offset;
 354         int64_t s = u.seconds() / av_q2d (_format_context->streams[_video_stream]->time_base);
 355
 356         if (_ffmpeg_content->audio_stream ()) {
 357                 s = min (
 358                         s, int64_t (u.seconds() / av_q2d (_ffmpeg_content->audio_stream()->stream(_format_context)->time_base))
 359                         );
 360         }
 361
 362         /* Ridiculous empirical hack */
 363         s--;
 364         if (s < 0) {
 365                 s = 0;
 366         }
 367
 368         av_seek_frame (_format_context, _video_stream, s, 0);
 369
 370         avcodec_flush_buffers (video_codec_context());
 371         if (audio_codec_context ()) {
 372                 avcodec_flush_buffers (audio_codec_context ());
 373         }
 374         if (_subtitle_codec_context) {
 375                 avcodec_flush_buffers (_subtitle_codec_context);
 376         }
 377 }
 378
 379 void
 380 FFmpegDecoder::seek (ContentTime time, bool accurate)
 381 {
 382         Decoder::seek (time, accurate);
 383         if (_decode_audio) {
 384                 AudioDecoder::seek (time, accurate);
 385         }
 386
 387         /* If we are doing an accurate seek, our initial shot will be 200ms (200 being
 388            a number plucked from the air) earlier than we want to end up.  The loop below
 389            will hopefully then step through to where we want to be.
 390         */
 391
 392         ContentTime pre_roll = accurate ? ContentTime::from_seconds (0.2) : ContentTime (0);
 393         ContentTime initial_seek = time - pre_roll;
 394         if (initial_seek < ContentTime (0)) {
 395                 initial_seek = ContentTime (0);
 396         }
 397
 398         /* Initial seek time in the video stream's timebase */
 399
 400         seek_and_flush (initial_seek);
 401
 402         if (!accurate) {
 403                 /* That'll do */
 404                 return;
 405         }
 406
 407         int const N = minimal_run (boost::bind (&FFmpegDecoder::seek_overrun_finished, this, time, _1, _2));
 408
 409         seek_and_flush (initial_seek);
 410         if (N > 0) {
 411                 minimal_run (boost::bind (&FFmpegDecoder::seek_final_finished, this, N - 1, _3));
 412         }
 413 }
 414
 415 void
 416 FFmpegDecoder::decode_audio_packet ()
 417 {
 418         /* Audio packets can contain multiple frames, so we may have to call avcodec_decode_audio4
 419            several times.
 420         */
 421
 422         AVPacket copy_packet = _packet;
 423
 424         while (copy_packet.size > 0) {
 425
 426                 int frame_finished;
 427                 int const decode_result = avcodec_decode_audio4 (audio_codec_context(), _frame, &frame_finished, &copy_packet);
 428
 429                 if (decode_result < 0) {
 430                         shared_ptr<const Film> film = _film.lock ();
 431                         assert (film);
 432                         film->log()->log (String::compose ("avcodec_decode_audio4 failed (%1)", decode_result));
 433                         return;
 434                 }
 435
 436                 if (frame_finished) {
 437                         ContentTime const ct = ContentTime::from_seconds (
 438                                 av_frame_get_best_effort_timestamp (_frame) *
 439                                 av_q2d (_ffmpeg_content->audio_stream()->stream (_format_context)->time_base))
 440                                 + _pts_offset;
 441
 442                         int const data_size = av_samples_get_buffer_size (
 443                                 0, audio_codec_context()->channels, _frame->nb_samples, audio_sample_format (), 1
 444                                 );
 445
 446                         audio (deinterleave_audio (_frame->data, data_size), ct);
 447                 }
 448
 449                 copy_packet.data += decode_result;
 450                 copy_packet.size -= decode_result;
 451         }
 452 }
 453
 454 bool
 455 FFmpegDecoder::decode_video_packet ()
 456 {
 457         int frame_finished;
 458         if (avcodec_decode_video2 (video_codec_context(), _frame, &frame_finished, &_packet) < 0 || !frame_finished) {
 459                 return false;
 460         }
 461
 462         boost::mutex::scoped_lock lm (_filter_graphs_mutex);
 463
 464         shared_ptr<FilterGraph> graph;
 465
 466         list<shared_ptr<FilterGraph> >::iterator i = _filter_graphs.begin();
 467         while (i != _filter_graphs.end() && !(*i)->can_process (dcp::Size (_frame->width, _frame->height), (AVPixelFormat) _frame->format)) {
 468                 ++i;
 469         }
 470
 471         if (i == _filter_graphs.end ()) {
 472                 shared_ptr<const Film> film = _film.lock ();
 473                 assert (film);
 474
 475                 graph.reset (new FilterGraph (_ffmpeg_content, dcp::Size (_frame->width, _frame->height), (AVPixelFormat) _frame->format));
 476                 _filter_graphs.push_back (graph);
 477
 478                 film->log()->log (String::compose (N_("New graph for %1x%2, pixel format %3"), _frame->width, _frame->height, _frame->format));
 479         } else {
 480                 graph = *i;
 481         }
 482
 483         list<pair<shared_ptr<Image>, int64_t> > images = graph->process (_frame);
 484
 485         string post_process = Filter::ffmpeg_strings (_ffmpeg_content->filters()).second;
 486
 487         for (list<pair<shared_ptr<Image>, int64_t> >::iterator i = images.begin(); i != images.end(); ++i) {
 488
 489                 shared_ptr<Image> image = i->first;
 490                 if (!post_process.empty ()) {
 491                         image = image->post_process (post_process, true);
 492                 }
 493
 494                 if (i->second != AV_NOPTS_VALUE) {
 495                         video (image, false, ContentTime::from_seconds (i->second * av_q2d (_format_context->streams[_video_stream]->time_base)) + _pts_offset);
 496                 } else {
 497                         shared_ptr<const Film> film = _film.lock ();
 498                         assert (film);
 499                         film->log()->log ("Dropping frame without PTS");
 500                 }
 501         }
 502
 503         return true;
 504 }
 505
 506
 507 void
 508 FFmpegDecoder::setup_subtitle ()
 509 {
 510         boost::mutex::scoped_lock lm (_mutex);
 511
 512         if (!_ffmpeg_content->subtitle_stream()) {
 513                 return;
 514         }
 515
 516         _subtitle_codec_context = _ffmpeg_content->subtitle_stream()->stream(_format_context)->codec;
 517         if (_subtitle_codec_context == 0) {
 518                 throw DecodeError (N_("could not find subtitle stream"));
 519         }
 520
 521         _subtitle_codec = avcodec_find_decoder (_subtitle_codec_context->codec_id);
 522
 523         if (_subtitle_codec == 0) {
 524                 throw DecodeError (N_("could not find subtitle decoder"));
 525         }
 526
 527         if (avcodec_open2 (_subtitle_codec_context, _subtitle_codec, 0) < 0) {
 528                 throw DecodeError (N_("could not open subtitle decoder"));
 529         }
 530 }
 531
 532 void
 533 FFmpegDecoder::decode_subtitle_packet ()
 534 {
 535         int got_subtitle;
 536         AVSubtitle sub;
 537         if (avcodec_decode_subtitle2 (_subtitle_codec_context, &sub, &got_subtitle, &_packet) < 0 || !got_subtitle) {
 538                 return;
 539         }
 540
 541         /* Sometimes we get an empty AVSubtitle, which is used by some codecs to
 542            indicate that the previous subtitle should stop.
 543         */
 544         if (sub.num_rects <= 0) {
 545                 image_subtitle (shared_ptr<Image> (), dcpomatic::Rect<double> (), ContentTime (), ContentTime ());
 546                 return;
 547         } else if (sub.num_rects > 1) {
 548                 throw DecodeError (_("multi-part subtitles not yet supported"));
 549         }
 550
 551         /* Subtitle PTS (within the source, not taking into account any of the
 552            source that we may have chopped off for the DCP)
 553         */
 554         ContentTime packet_time = ContentTime::from_seconds (static_cast<double> (sub.pts) / AV_TIME_BASE) + _pts_offset;
 555
 556         /* hence start time for this sub */
 557         ContentTime const from = packet_time + ContentTime::from_seconds (sub.start_display_time / 1e3);
 558         ContentTime const to = packet_time + ContentTime::from_seconds (sub.end_display_time / 1e3);
 559
 560         AVSubtitleRect const * rect = sub.rects[0];
 561
 562         if (rect->type != SUBTITLE_BITMAP) {
 563                 throw DecodeError (_("non-bitmap subtitles not yet supported"));
 564         }
 565
 566         /* Note RGBA is expressed little-endian, so the first byte in the word is R, second
 567            G, third B, fourth A.
 568         */
 569         shared_ptr<Image> image (new Image (PIX_FMT_RGBA, dcp::Size (rect->w, rect->h), true));
 570
 571         /* Start of the first line in the subtitle */
 572         uint8_t* sub_p = rect->pict.data[0];
 573         /* sub_p looks up into a BGRA palette which is here
 574            (i.e. first byte B, second G, third R, fourth A)
 575         */
 576         uint32_t const * palette = (uint32_t *) rect->pict.data[1];
 577         /* Start of the output data */
 578         uint32_t* out_p = (uint32_t *) image->data()[0];
 579
 580         for (int y = 0; y < rect->h; ++y) {
 581                 uint8_t* sub_line_p = sub_p;
 582                 uint32_t* out_line_p = out_p;
 583                 for (int x = 0; x < rect->w; ++x) {
 584                         uint32_t const p = palette[*sub_line_p++];
 585                         *out_line_p++ = ((p & 0xff) << 16) | (p & 0xff00) | ((p & 0xff0000) >> 16) | (p & 0xff000000);
 586                 }
 587                 sub_p += rect->pict.linesize[0];
 588                 out_p += image->stride()[0] / sizeof (uint32_t);
 589         }
 590
 591         dcp::Size const vs = _ffmpeg_content->video_size ();
 592
 593         image_subtitle (
 594                 image,
 595                 dcpomatic::Rect<double> (
 596                         static_cast<double> (rect->x) / vs.width,
 597                         static_cast<double> (rect->y) / vs.height,
 598                         static_cast<double> (rect->w) / vs.width,
 599                         static_cast<double> (rect->h) / vs.height
 600                         ),
 601                 from,
 602                 to
 603                 );
 604
 605
 606         avsubtitle_free (&sub);
 607 }