Cleanup: move some stuff into process_video_frame().
[dcpomatic.git] / src / lib / ffmpeg_decoder.cc
index cbde534c30608e97cdd1c673754a95ae6c32bf8b..f64ccbd3b6305249c4522ee69d175dee6bb26849 100644 (file)
 
 */
 
+
 /** @file  src/ffmpeg_decoder.cc
  *  @brief A decoder using FFmpeg to decode content.
  */
 
-#include "filter.h"
-#include "exceptions.h"
-#include "image.h"
-#include "util.h"
-#include "log.h"
+
+#include "audio_buffers.h"
+#include "audio_content.h"
+#include "audio_decoder.h"
+#include "compose.hpp"
 #include "dcpomatic_log.h"
-#include "ffmpeg_decoder.h"
-#include "text_decoder.h"
+#include "exceptions.h"
 #include "ffmpeg_audio_stream.h"
-#include "ffmpeg_subtitle_stream.h"
-#include "video_filter_graph.h"
-#include "audio_buffers.h"
 #include "ffmpeg_content.h"
-#include "raw_image_proxy.h"
-#include "video_decoder.h"
+#include "ffmpeg_decoder.h"
+#include "ffmpeg_subtitle_stream.h"
 #include "film.h"
-#include "audio_decoder.h"
-#include "compose.hpp"
+#include "filter.h"
+#include "frame_interval_checker.h"
+#include "image.h"
+#include "log.h"
+#include "raw_image_proxy.h"
 #include "text_content.h"
-#include "audio_content.h"
+#include "text_decoder.h"
+#include "util.h"
+#include "video_decoder.h"
+#include "video_filter_graph.h"
 #include <dcp/subtitle_string.h>
 #include <sub/ssa_reader.h>
 #include <sub/subtitle.h>
@@ -50,101 +53,114 @@ extern "C" {
 #include <libavcodec/avcodec.h>
 #include <libavformat/avformat.h>
 }
-#include <boost/foreach.hpp>
 #include <boost/algorithm/string.hpp>
-#include <vector>
 #include <iomanip>
 #include <iostream>
+#include <vector>
 #include <stdint.h>
 
 #include "i18n.h"
 
+
 using std::cout;
+using std::dynamic_pointer_cast;
+using std::make_shared;
+using std::min;
+using std::shared_ptr;
 using std::string;
 using std::vector;
-using std::list;
-using std::min;
-using std::pair;
-using std::max;
-using std::map;
-using boost::shared_ptr;
-using boost::is_any_of;
-using boost::split;
 using boost::optional;
-using boost::dynamic_pointer_cast;
 using dcp::Size;
 using namespace dcpomatic;
 
+
 FFmpegDecoder::FFmpegDecoder (shared_ptr<const Film> film, shared_ptr<const FFmpegContent> c, bool fast)
        : FFmpeg (c)
        , Decoder (film)
-       , _have_current_subtitle (false)
 {
        if (c->video && c->video->use()) {
-               video.reset (new VideoDecoder (this, c));
+               video = make_shared<VideoDecoder>(this, c);
                _pts_offset = pts_offset (c->ffmpeg_audio_streams(), c->first_video(), c->active_video_frame_rate(film));
                /* It doesn't matter what size or pixel format this is, it just needs to be black */
-               _black_image.reset (new Image (AV_PIX_FMT_RGB24, dcp::Size (128, 128), true));
+               _black_image = make_shared<Image>(AV_PIX_FMT_RGB24, dcp::Size (128, 128), Image::Alignment::PADDED);
                _black_image->make_black ();
        } else {
-               _pts_offset = ContentTime ();
+               _pts_offset = {};
        }
 
        if (c->audio) {
-               audio.reset (new AudioDecoder (this, c->audio, fast));
+               audio = make_shared<AudioDecoder>(this, c->audio, fast);
        }
 
        if (c->only_text()) {
                /* XXX: this time here should be the time of the first subtitle, not 0 */
-               text.push_back (shared_ptr<TextDecoder> (new TextDecoder (this, c->only_text(), ContentTime())));
+               text.push_back (make_shared<TextDecoder>(this, c->only_text(), ContentTime()));
        }
 
-       _next_time.resize (_format_context->nb_streams);
+       for (auto i: c->ffmpeg_audio_streams()) {
+               _next_time[i] = boost::optional<dcpomatic::ContentTime>();
+       }
 }
 
-void
+
+bool
 FFmpegDecoder::flush ()
 {
-       /* Get any remaining frames */
-
-       _packet.data = 0;
-       _packet.size = 0;
+       /* Flush video and audio once */
 
-       /* XXX: should we reset _packet.data and size after each *_decode_* call? */
+       bool did_something = false;
+       if (video) {
+               if (decode_and_process_video_packet(nullptr)) {
+                       did_something = true;
+               }
+       }
 
-       while (video && decode_video_packet()) {}
+       for (auto i: ffmpeg_content()->ffmpeg_audio_streams()) {
+               auto context = _codec_context[i->index(_format_context)];
+               int r = avcodec_send_packet (context, nullptr);
+               if (r < 0 && r != AVERROR_EOF) {
+                       /* EOF can happen if we've already sent a flush packet */
+                       throw DecodeError (N_("avcodec_send_packet"), N_("FFmpegDecoder::flush"), r);
+               }
+               r = avcodec_receive_frame (context, audio_frame(i));
+               if (r >= 0) {
+                       process_audio_frame (i);
+                       did_something = true;
+               }
+       }
 
-       if (audio) {
-               decode_audio_packet ();
+       if (did_something) {
+               /* We want to be called again */
+               return false;
        }
 
        /* Make sure all streams are the same length and round up to the next video frame */
 
-       FrameRateChange const frc = film()->active_frame_rate_change(_ffmpeg_content->position());
+       auto const frc = film()->active_frame_rate_change(_ffmpeg_content->position());
        ContentTime full_length (_ffmpeg_content->full_length(film()), frc);
        full_length = full_length.ceil (frc.source);
        if (video) {
                double const vfr = _ffmpeg_content->video_frame_rate().get();
-               Frame const f = full_length.frames_round (vfr);
-               Frame v = video->position(film()).frames_round(vfr) + 1;
+               auto const f = full_length.frames_round (vfr);
+               auto v = video->position(film()).get_value_or(ContentTime()).frames_round(vfr) + 1;
                while (v < f) {
                        video->emit (film(), shared_ptr<const ImageProxy> (new RawImageProxy (_black_image)), v);
                        ++v;
                }
        }
 
-       BOOST_FOREACH (shared_ptr<FFmpegAudioStream> i, _ffmpeg_content->ffmpeg_audio_streams ()) {
-               ContentTime a = audio->stream_position(film(), i);
+       for (auto i: _ffmpeg_content->ffmpeg_audio_streams ()) {
+               auto a = audio->stream_position(film(), i);
                /* Unfortunately if a is 0 that really means that we don't know the stream position since
                   there has been no data on it since the last seek.  In this case we'll just do nothing
                   here.  I'm not sure if that's the right idea.
                */
                if (a > ContentTime()) {
                        while (a < full_length) {
-                               ContentTime to_do = min (full_length - a, ContentTime::from_seconds (0.1));
-                               shared_ptr<AudioBuffers> silence (new AudioBuffers (i->channels(), to_do.frames_ceil (i->frame_rate())));
+                               auto to_do = min (full_length - a, ContentTime::from_seconds (0.1));
+                               auto silence = make_shared<AudioBuffers>(i->channels(), to_do.frames_ceil (i->frame_rate()));
                                silence->make_silent ();
-                               audio->emit (film(), i, silence, a);
+                               audio->emit (film(), i, silence, a, true);
                                a += to_do;
                        }
                }
@@ -153,18 +169,18 @@ FFmpegDecoder::flush ()
        if (audio) {
                audio->flush ();
        }
+
+       return true;
 }
 
+
 bool
 FFmpegDecoder::pass ()
 {
-#ifdef DCPOMATIC_VARIANT_SWAROOP
-       if (_ffmpeg_content->encrypted() && !_ffmpeg_content->kdm()) {
-               return true;
-       }
-#endif
+       auto packet = av_packet_alloc();
+       DCPOMATIC_ASSERT (packet);
 
-       int r = av_read_frame (_format_context, &_packet);
+       int r = av_read_frame (_format_context, packet);
 
        /* AVERROR_INVALIDDATA can apparently be returned sometimes even when av_read_frame
           has pretty-much succeeded (and hence generated data which should be processed).
@@ -178,52 +194,46 @@ FFmpegDecoder::pass ()
                        LOG_ERROR (N_("error on av_read_frame (%1) (%2)"), &buf[0], r);
                }
 
-               flush ();
-               return true;
+               av_packet_free (&packet);
+               return flush ();
        }
 
-       int const si = _packet.stream_index;
-       shared_ptr<const FFmpegContent> fc = _ffmpeg_content;
+       int const si = packet->stream_index;
+       auto fc = _ffmpeg_content;
 
        if (_video_stream && si == _video_stream.get() && video && !video->ignore()) {
-               decode_video_packet ();
+               decode_and_process_video_packet (packet);
        } else if (fc->subtitle_stream() && fc->subtitle_stream()->uses_index(_format_context, si) && !only_text()->ignore()) {
-               decode_subtitle_packet ();
+               decode_and_process_subtitle_packet (packet);
        } else {
-               decode_audio_packet ();
+               decode_and_process_audio_packet (packet);
        }
 
-       av_packet_unref (&_packet);
+       av_packet_free (&packet);
        return false;
 }
 
+
 /** @param data pointer to array of pointers to buffers.
  *  Only the first buffer will be used for non-planar data, otherwise there will be one per channel.
  */
 shared_ptr<AudioBuffers>
-FFmpegDecoder::deinterleave_audio (shared_ptr<FFmpegAudioStream> stream) const
+FFmpegDecoder::deinterleave_audio (AVFrame* frame)
 {
-       DCPOMATIC_ASSERT (bytes_per_audio_sample (stream));
+       auto format = static_cast<AVSampleFormat>(frame->format);
 
-       int const size = av_samples_get_buffer_size (
-               0, stream->stream(_format_context)->codec->channels, _frame->nb_samples, audio_sample_format (stream), 1
-               );
-
-       /* Deinterleave and convert to float */
+       /* XXX: can't we use swr_convert() to do the format conversion? */
 
-       /* total_samples and frames will be rounded down here, so if there are stray samples at the end
-          of the block that do not form a complete sample or frame they will be dropped.
-       */
-       int const total_samples = size / bytes_per_audio_sample (stream);
-       int const channels = stream->channels();
-       int const frames = total_samples / channels;
-       shared_ptr<AudioBuffers> audio (new AudioBuffers (channels, frames));
-       float** data = audio->data();
+       int const channels = frame->channels;
+       int const frames = frame->nb_samples;
+       int const total_samples = frames * channels;
+       auto audio = make_shared<AudioBuffers>(channels, frames);
+       auto data = audio->data();
 
-       switch (audio_sample_format (stream)) {
+       switch (format) {
        case AV_SAMPLE_FMT_U8:
        {
-               uint8_t* p = reinterpret_cast<uint8_t *> (_frame->data[0]);
+               auto p = reinterpret_cast<uint8_t *> (frame->data[0]);
                int sample = 0;
                int channel = 0;
                for (int i = 0; i < total_samples; ++i) {
@@ -240,7 +250,7 @@ FFmpegDecoder::deinterleave_audio (shared_ptr<FFmpegAudioStream> stream) const
 
        case AV_SAMPLE_FMT_S16:
        {
-               int16_t* p = reinterpret_cast<int16_t *> (_frame->data[0]);
+               auto p = reinterpret_cast<int16_t *> (frame->data[0]);
                int sample = 0;
                int channel = 0;
                for (int i = 0; i < total_samples; ++i) {
@@ -257,7 +267,7 @@ FFmpegDecoder::deinterleave_audio (shared_ptr<FFmpegAudioStream> stream) const
 
        case AV_SAMPLE_FMT_S16P:
        {
-               int16_t** p = reinterpret_cast<int16_t **> (_frame->data);
+               auto p = reinterpret_cast<int16_t **> (frame->data);
                for (int i = 0; i < channels; ++i) {
                        for (int j = 0; j < frames; ++j) {
                                data[i][j] = static_cast<float>(p[i][j]) / (1 << 15);
@@ -268,7 +278,7 @@ FFmpegDecoder::deinterleave_audio (shared_ptr<FFmpegAudioStream> stream) const
 
        case AV_SAMPLE_FMT_S32:
        {
-               int32_t* p = reinterpret_cast<int32_t *> (_frame->data[0]);
+               auto p = reinterpret_cast<int32_t *> (frame->data[0]);
                int sample = 0;
                int channel = 0;
                for (int i = 0; i < total_samples; ++i) {
@@ -285,7 +295,7 @@ FFmpegDecoder::deinterleave_audio (shared_ptr<FFmpegAudioStream> stream) const
 
        case AV_SAMPLE_FMT_S32P:
        {
-               int32_t** p = reinterpret_cast<int32_t **> (_frame->data);
+               auto p = reinterpret_cast<int32_t **> (frame->data);
                for (int i = 0; i < channels; ++i) {
                        for (int j = 0; j < frames; ++j) {
                                data[i][j] = static_cast<float>(p[i][j]) / 2147483648;
@@ -296,7 +306,7 @@ FFmpegDecoder::deinterleave_audio (shared_ptr<FFmpegAudioStream> stream) const
 
        case AV_SAMPLE_FMT_FLT:
        {
-               float* p = reinterpret_cast<float*> (_frame->data[0]);
+               auto p = reinterpret_cast<float*> (frame->data[0]);
                int sample = 0;
                int channel = 0;
                for (int i = 0; i < total_samples; ++i) {
@@ -313,36 +323,40 @@ FFmpegDecoder::deinterleave_audio (shared_ptr<FFmpegAudioStream> stream) const
 
        case AV_SAMPLE_FMT_FLTP:
        {
-               float** p = reinterpret_cast<float**> (_frame->data);
-               /* Sometimes there aren't as many channels in the _frame as in the stream */
-               for (int i = 0; i < _frame->channels; ++i) {
+               auto p = reinterpret_cast<float**> (frame->data);
+               DCPOMATIC_ASSERT (frame->channels <= channels);
+               /* Sometimes there aren't as many channels in the frame as in the stream */
+               for (int i = 0; i < frame->channels; ++i) {
                        memcpy (data[i], p[i], frames * sizeof(float));
                }
-               for (int i = _frame->channels; i < channels; ++i) {
+               for (int i = frame->channels; i < channels; ++i) {
                        audio->make_silent (i);
                }
        }
        break;
 
        default:
-               throw DecodeError (String::compose (_("Unrecognised audio sample format (%1)"), static_cast<int> (audio_sample_format (stream))));
+               throw DecodeError (String::compose(_("Unrecognised audio sample format (%1)"), static_cast<int>(format)));
        }
 
        return audio;
 }
 
+
 AVSampleFormat
 FFmpegDecoder::audio_sample_format (shared_ptr<FFmpegAudioStream> stream) const
 {
-       return stream->stream (_format_context)->codec->sample_fmt;
+       return static_cast<AVSampleFormat>(stream->stream(_format_context)->codecpar->format);
 }
 
+
 int
 FFmpegDecoder::bytes_per_audio_sample (shared_ptr<FFmpegAudioStream> stream) const
 {
        return av_get_bytes_per_sample (audio_sample_format (stream));
 }
 
+
 void
 FFmpegDecoder::seek (ContentTime time, bool accurate)
 {
@@ -352,7 +366,7 @@ FFmpegDecoder::seek (ContentTime time, bool accurate)
           we don't really know what the seek will give us.
        */
 
-       ContentTime pre_roll = accurate ? ContentTime::from_seconds (2) : ContentTime (0);
+       auto pre_roll = accurate ? ContentTime::from_seconds (2) : ContentTime (0);
        time -= pre_roll;
 
        /* XXX: it seems debatable whether PTS should be used here...
@@ -364,7 +378,8 @@ FFmpegDecoder::seek (ContentTime time, bool accurate)
        if (_video_stream) {
                stream = _video_stream;
        } else {
-               shared_ptr<FFmpegAudioStream> s = dynamic_pointer_cast<FFmpegAudioStream> (_ffmpeg_content->audio->stream ());
+               DCPOMATIC_ASSERT (_ffmpeg_content->audio);
+               auto s = dynamic_pointer_cast<FFmpegAudioStream>(_ffmpeg_content->audio->stream());
                if (s) {
                        stream = s->index (_format_context);
                }
@@ -372,7 +387,7 @@ FFmpegDecoder::seek (ContentTime time, bool accurate)
 
        DCPOMATIC_ASSERT (stream);
 
-       ContentTime u = time - _pts_offset;
+       auto u = time - _pts_offset;
        if (u < ContentTime ()) {
                u = ContentTime ();
        }
@@ -395,8 +410,8 @@ FFmpegDecoder::seek (ContentTime time, bool accurate)
                avcodec_flush_buffers (video_codec_context());
        }
 
-       BOOST_FOREACH (shared_ptr<FFmpegAudioStream> i, ffmpeg_content()->ffmpeg_audio_streams()) {
-               avcodec_flush_buffers (i->stream(_format_context)->codec);
+       for (auto i: ffmpeg_content()->ffmpeg_audio_streams()) {
+               avcodec_flush_buffers (_codec_context[i->index(_format_context)]);
        }
 
        if (subtitle_codec_context ()) {
@@ -404,180 +419,216 @@ FFmpegDecoder::seek (ContentTime time, bool accurate)
        }
 
        _have_current_subtitle = false;
-}
 
-void
-FFmpegDecoder::decode_audio_packet ()
-{
-       /* Audio packets can contain multiple frames, so we may have to call avcodec_decode_audio4
-          several times.
-       */
+       for (auto& i: _next_time) {
+               i.second = boost::optional<dcpomatic::ContentTime>();
+       }
+}
 
-       AVPacket copy_packet = _packet;
-       int const stream_index = copy_packet.stream_index;
 
+shared_ptr<FFmpegAudioStream>
+FFmpegDecoder::audio_stream_from_index (int index) const
+{
        /* XXX: inefficient */
-       vector<shared_ptr<FFmpegAudioStream> > streams = ffmpeg_content()->ffmpeg_audio_streams ();
-       vector<shared_ptr<FFmpegAudioStream> >::const_iterator stream = streams.begin ();
-       while (stream != streams.end () && !(*stream)->uses_index (_format_context, stream_index)) {
+       auto streams = ffmpeg_content()->ffmpeg_audio_streams();
+       auto stream = streams.begin();
+       while (stream != streams.end() && !(*stream)->uses_index(_format_context, index)) {
                ++stream;
        }
 
        if (stream == streams.end ()) {
-               /* The packet's stream may not be an audio one; just ignore it in this method if so */
-               return;
+               return {};
        }
 
-       while (copy_packet.size > 0) {
-
-               int frame_finished;
-               int decode_result = avcodec_decode_audio4 ((*stream)->stream (_format_context)->codec, _frame, &frame_finished, &copy_packet);
-               if (decode_result < 0) {
-                       /* avcodec_decode_audio4 can sometimes return an error even though it has decoded
-                          some valid data; for example dca_subframe_footer can return AVERROR_INVALIDDATA
-                          if it overreads the auxiliary data.  ffplay carries on if frame_finished is true,
-                          even in the face of such an error, so I think we should too.
+       return *stream;
+}
 
-                          Returning from the method here caused mantis #352.
-                       */
-                       LOG_WARNING ("avcodec_decode_audio4 failed (%1)", decode_result);
 
-                       /* Fudge decode_result so that we come out of the while loop when
-                          we've processed this data.
-                       */
-                       decode_result = copy_packet.size;
+void
+FFmpegDecoder::process_audio_frame (shared_ptr<FFmpegAudioStream> stream)
+{
+       auto frame = audio_frame (stream);
+       auto data = deinterleave_audio (frame);
+
+       ContentTime ct;
+       if (frame->pts == AV_NOPTS_VALUE) {
+               /* In some streams we see not every frame coming through with a timestamp; for those
+                  that have AV_NOPTS_VALUE we need to work out the timestamp ourselves.  This is
+                  particularly noticeable with TrueHD streams (see #1111).
+                  */
+               if (_next_time[stream]) {
+                       ct = *_next_time[stream];
                }
+       } else {
+               ct = ContentTime::from_seconds (
+                       frame->best_effort_timestamp *
+                       av_q2d (stream->stream(_format_context)->time_base))
+                       + _pts_offset;
+       }
 
-               if (frame_finished) {
-                       shared_ptr<AudioBuffers> data = deinterleave_audio (*stream);
-
-                       ContentTime ct;
-                       if (_frame->pts == AV_NOPTS_VALUE && _next_time[stream_index]) {
-                               /* In some streams we see not every frame coming through with a timestamp; for those
-                                  that have AV_NOPTS_VALUE we need to work out the timestamp ourselves.  This is
-                                  particularly noticeable with TrueHD streams (see #1111).
-                               */
-                               ct = *_next_time[stream_index];
-                       } else {
-                               ct = ContentTime::from_seconds (
-                                       av_frame_get_best_effort_timestamp (_frame) *
-                                       av_q2d ((*stream)->stream (_format_context)->time_base))
-                                       + _pts_offset;
-                       }
+       _next_time[stream] = ct + ContentTime::from_frames(data->frames(), stream->frame_rate());
 
-                       _next_time[stream_index] = ct + ContentTime::from_frames(data->frames(), (*stream)->frame_rate());
+       if (ct < ContentTime()) {
+               /* Discard audio data that comes before time 0 */
+               auto const remove = min (int64_t(data->frames()), (-ct).frames_ceil(double(stream->frame_rate())));
+               data->move (data->frames() - remove, remove, 0);
+               data->set_frames (data->frames() - remove);
+               ct += ContentTime::from_frames (remove, stream->frame_rate());
+       }
 
-                       if (ct < ContentTime ()) {
-                               /* Discard audio data that comes before time 0 */
-                               Frame const remove = min (int64_t (data->frames()), (-ct).frames_ceil(double((*stream)->frame_rate ())));
-                               data->move (data->frames() - remove, remove, 0);
-                               data->set_frames (data->frames() - remove);
-                               ct += ContentTime::from_frames (remove, (*stream)->frame_rate ());
-                       }
+       if (ct < ContentTime()) {
+               LOG_WARNING (
+                       "Crazy timestamp %1 for %2 samples in stream %3 (ts=%4 tb=%5, off=%6)",
+                       to_string(ct),
+                       data->frames(),
+                       stream->id(),
+                       frame->best_effort_timestamp,
+                       av_q2d(stream->stream(_format_context)->time_base),
+                       to_string(_pts_offset)
+                       );
+       }
 
-                       if (ct < ContentTime()) {
-                               LOG_WARNING (
-                                       "Crazy timestamp %1 for %2 samples in stream %3 packet pts %4 (ts=%5 tb=%6, off=%7)",
-                                       to_string(ct),
-                                       data->frames(),
-                                       copy_packet.stream_index,
-                                       copy_packet.pts,
-                                       av_frame_get_best_effort_timestamp(_frame),
-                                       av_q2d((*stream)->stream(_format_context)->time_base),
-                                       to_string(_pts_offset)
-                                       );
-                       }
+       /* Give this data provided there is some, and its time is sane */
+       if (ct >= ContentTime() && data->frames() > 0) {
+               audio->emit (film(), stream, data, ct);
+       }
+}
 
-                       /* Give this data provided there is some, and its time is sane */
-                       if (ct >= ContentTime() && data->frames() > 0) {
-                               audio->emit (film(), *stream, data, ct);
-                       }
+
+void
+FFmpegDecoder::decode_and_process_audio_packet (AVPacket* packet)
+{
+       auto stream = audio_stream_from_index (packet->stream_index);
+       if (!stream) {
+               return;
+       }
+
+       auto context = _codec_context[stream->index(_format_context)];
+       auto frame = audio_frame (stream);
+
+       int r = avcodec_send_packet (context, packet);
+       if (r < 0) {
+               LOG_WARNING("avcodec_send_packet returned %1 for an audio packet", r);
+       }
+       while (r >= 0) {
+               r = avcodec_receive_frame (context, frame);
+               if (r == AVERROR(EAGAIN)) {
+                       /* More input is required */
+                       return;
                }
 
-               copy_packet.data += decode_result;
-               copy_packet.size -= decode_result;
+               /* We choose to be relaxed here about other errors; it seems that there may be valid
+                * data to decode even if an error occurred.  #352 may be related (though this was
+                * when we were using an old version of the FFmpeg API).
+                */
+               process_audio_frame (stream);
        }
 }
 
+
 bool
-FFmpegDecoder::decode_video_packet ()
+FFmpegDecoder::decode_and_process_video_packet (AVPacket* packet)
 {
        DCPOMATIC_ASSERT (_video_stream);
 
-       int frame_finished;
-       if (avcodec_decode_video2 (video_codec_context(), _frame, &frame_finished, &_packet) < 0 || !frame_finished) {
+       auto context = video_codec_context();
+
+       int r = avcodec_send_packet (context, packet);
+       if (r < 0) {
+               LOG_WARNING("avcodec_send_packet returned %1 for a video packet", r);
+       }
+
+       r = avcodec_receive_frame (context, _video_frame);
+       if (r == AVERROR(EAGAIN) || r == AVERROR_EOF || (r < 0 && !packet)) {
+               /* More input is required, no more frames are coming, or we are flushing and there was
+                * some error which we just want to ignore.
+                */
                return false;
+       } else if (r < 0) {
+               throw DecodeError (N_("avcodec_receive_frame"), N_("FFmpeg::decode_and_process_video_packet"), r);
        }
 
+       process_video_frame ();
+
+       return true;
+}
+
+
+void
+FFmpegDecoder::process_video_frame ()
+{
+       /* We assume we'll only get one frame here, which I think is safe */
+
        boost::mutex::scoped_lock lm (_filter_graphs_mutex);
 
        shared_ptr<VideoFilterGraph> graph;
 
-       list<shared_ptr<VideoFilterGraph> >::iterator i = _filter_graphs.begin();
-       while (i != _filter_graphs.end() && !(*i)->can_process (dcp::Size (_frame->width, _frame->height), (AVPixelFormat) _frame->format)) {
+       auto i = _filter_graphs.begin();
+       while (i != _filter_graphs.end() && !(*i)->can_process(dcp::Size(_video_frame->width, _video_frame->height), (AVPixelFormat) _video_frame->format)) {
                ++i;
        }
 
        if (i == _filter_graphs.end ()) {
                dcp::Fraction vfr (lrint(_ffmpeg_content->video_frame_rate().get() * 1000), 1000);
-               graph.reset (new VideoFilterGraph (dcp::Size (_frame->width, _frame->height), (AVPixelFormat) _frame->format, vfr));
+               graph = make_shared<VideoFilterGraph>(dcp::Size(_video_frame->width, _video_frame->height), (AVPixelFormat) _video_frame->format, vfr);
                graph->setup (_ffmpeg_content->filters ());
                _filter_graphs.push_back (graph);
-               LOG_GENERAL (N_("New graph for %1x%2, pixel format %3"), _frame->width, _frame->height, _frame->format);
+               LOG_GENERAL (N_("New graph for %1x%2, pixel format %3"), _video_frame->width, _video_frame->height, _video_frame->format);
        } else {
                graph = *i;
        }
 
-       list<pair<shared_ptr<Image>, int64_t> > images = graph->process (_frame);
+       auto images = graph->process (_video_frame);
 
-       for (list<pair<shared_ptr<Image>, int64_t> >::iterator i = images.begin(); i != images.end(); ++i) {
+       for (auto const& i: images) {
 
-               shared_ptr<Image> image = i->first;
+               auto image = i.first;
 
-               if (i->second != AV_NOPTS_VALUE) {
-                       double const pts = i->second * av_q2d (_format_context->streams[_video_stream.get()]->time_base) + _pts_offset.seconds ();
+               if (i.second != AV_NOPTS_VALUE) {
+                       double const pts = i.second * av_q2d(_format_context->streams[_video_stream.get()]->time_base) + _pts_offset.seconds();
 
                        video->emit (
                                film(),
-                               shared_ptr<ImageProxy> (new RawImageProxy (image)),
+                               make_shared<RawImageProxy>(image),
                                llrint(pts * _ffmpeg_content->active_video_frame_rate(film()))
                                );
                } else {
                        LOG_WARNING_NC ("Dropping frame without PTS");
                }
        }
-
-       return true;
 }
 
+
 void
-FFmpegDecoder::decode_subtitle_packet ()
+FFmpegDecoder::decode_and_process_subtitle_packet (AVPacket* packet)
 {
        int got_subtitle;
        AVSubtitle sub;
-       if (avcodec_decode_subtitle2 (subtitle_codec_context(), &sub, &got_subtitle, &_packet) < 0 || !got_subtitle) {
+       if (avcodec_decode_subtitle2 (subtitle_codec_context(), &sub, &got_subtitle, packet) < 0 || !got_subtitle) {
                return;
        }
 
+       auto sub_period = subtitle_period (packet, ffmpeg_content()->subtitle_stream()->stream(_format_context), sub);
+
        /* Stop any current subtitle, either at the time it was supposed to stop, or now if now is sooner */
        if (_have_current_subtitle) {
                if (_current_subtitle_to) {
-                       only_text()->emit_stop (min(*_current_subtitle_to, subtitle_period(sub).from + _pts_offset));
+                       only_text()->emit_stop (min(*_current_subtitle_to, sub_period.from + _pts_offset));
                } else {
-                       only_text()->emit_stop (subtitle_period(sub).from + _pts_offset);
+                       only_text()->emit_stop (sub_period.from + _pts_offset);
                }
                _have_current_subtitle = false;
        }
 
        if (sub.num_rects <= 0) {
                /* Nothing new in this subtitle */
+               avsubtitle_free (&sub);
                return;
        }
 
        /* Subtitle PTS (within the source, not taking into account any of the
           source that we may have chopped off for the DCP).
        */
-       FFmpegSubtitlePeriod sub_period = subtitle_period (sub);
        ContentTime from;
        from = sub_period.from + _pts_offset;
        if (sub_period.to) {
@@ -588,19 +639,19 @@ FFmpegDecoder::decode_subtitle_packet ()
        }
 
        for (unsigned int i = 0; i < sub.num_rects; ++i) {
-               AVSubtitleRect const * rect = sub.rects[i];
+               auto const rect = sub.rects[i];
 
                switch (rect->type) {
                case SUBTITLE_NONE:
                        break;
                case SUBTITLE_BITMAP:
-                       decode_bitmap_subtitle (rect, from);
+                       process_bitmap_subtitle (rect, from);
                        break;
                case SUBTITLE_TEXT:
                        cout << "XXX: SUBTITLE_TEXT " << rect->text << "\n";
                        break;
                case SUBTITLE_ASS:
-                       decode_ass_subtitle (rect->ass, from);
+                       process_ass_subtitle (rect->ass, from);
                        break;
                }
        }
@@ -612,37 +663,38 @@ FFmpegDecoder::decode_subtitle_packet ()
        avsubtitle_free (&sub);
 }
 
+
 void
-FFmpegDecoder::decode_bitmap_subtitle (AVSubtitleRect const * rect, ContentTime from)
+FFmpegDecoder::process_bitmap_subtitle (AVSubtitleRect const * rect, ContentTime from)
 {
        /* Note BGRA is expressed little-endian, so the first byte in the word is B, second
           G, third R, fourth A.
        */
-       shared_ptr<Image> image (new Image (AV_PIX_FMT_BGRA, dcp::Size (rect->w, rect->h), true));
+       auto image = make_shared<Image>(AV_PIX_FMT_BGRA, dcp::Size (rect->w, rect->h), Image::Alignment::PADDED);
 
 #ifdef DCPOMATIC_HAVE_AVSUBTITLERECT_PICT
        /* Start of the first line in the subtitle */
-       uint8_t* sub_p = rect->pict.data[0];
-       /* sub_p looks up into a BGRA palette which is here
+       auto sub_p = rect->pict.data[0];
+       /* sub_p looks up into a BGRA palette which is at rect->pict.data[1];
           (i.e. first byte B, second G, third R, fourth A)
        */
-       uint32_t const * palette = (uint32_t *) rect->pict.data[1];
+       auto const palette = rect->pict.data[1];
 #else
        /* Start of the first line in the subtitle */
-       uint8_t* sub_p = rect->data[0];
-       /* sub_p looks up into a BGRA palette which is here
-          (i.e. first byte B, second G, third R, fourth A)
+       auto sub_p = rect->data[0];
+       /* sub_p looks up into a BGRA palette which is at rect->data[1].
+          (first byte B, second G, third R, fourth A)
        */
-       uint32_t const * palette = (uint32_t *) rect->data[1];
+       auto const* palette = rect->data[1];
 #endif
        /* And the stream has a map of those palette colours to colours
           chosen by the user; created a `mapped' palette from those settings.
        */
-       map<RGBA, RGBA> colour_map = ffmpeg_content()->subtitle_stream()->colours ();
+       auto colour_map = ffmpeg_content()->subtitle_stream()->colours();
        vector<RGBA> mapped_palette (rect->nb_colors);
        for (int i = 0; i < rect->nb_colors; ++i) {
-               RGBA c ((palette[i] & 0xff0000) >> 16, (palette[i] & 0xff00) >> 8, palette[i] & 0xff, (palette[i] & 0xff000000) >> 24);
-               map<RGBA, RGBA>::const_iterator j = colour_map.find (c);
+               RGBA c (palette[2], palette[1], palette[0], palette[3]);
+               auto j = colour_map.find (c);
                if (j != colour_map.end ()) {
                        mapped_palette[i] = j->second;
                } else {
@@ -652,25 +704,28 @@ FFmpegDecoder::decode_bitmap_subtitle (AVSubtitleRect const * rect, ContentTime
                        */
                        mapped_palette[i] = c;
                }
+               palette += 4;
        }
 
        /* Start of the output data */
-       uint32_t* out_p = (uint32_t *) image->data()[0];
+       auto out_p = image->data()[0];
 
        for (int y = 0; y < rect->h; ++y) {
-               uint8_t* sub_line_p = sub_p;
-               uint32_t* out_line_p = out_p;
+               auto sub_line_p = sub_p;
+               auto out_line_p = out_p;
                for (int x = 0; x < rect->w; ++x) {
-                       RGBA const p = mapped_palette[*sub_line_p++];
-                       /* XXX: this seems to be wrong to me (isn't the output image BGRA?) but it looks right on screen */
-                       *out_line_p++ = (p.a << 24) | (p.b << 16) | (p.g << 8) | p.r;
+                       auto const p = mapped_palette[*sub_line_p++];
+                       *out_line_p++ = p.b;
+                       *out_line_p++ = p.g;
+                       *out_line_p++ = p.r;
+                       *out_line_p++ = p.a;
                }
 #ifdef DCPOMATIC_HAVE_AVSUBTITLERECT_PICT
                sub_p += rect->pict.linesize[0];
 #else
                sub_p += rect->linesize[0];
 #endif
-               out_p += image->stride()[0] / sizeof (uint32_t);
+               out_p += image->stride()[0];
        }
 
        int target_width = subtitle_codec_context()->width;
@@ -687,17 +742,18 @@ FFmpegDecoder::decode_bitmap_subtitle (AVSubtitleRect const * rect, ContentTime
        DCPOMATIC_ASSERT (target_width);
        DCPOMATIC_ASSERT (target_height);
        dcpomatic::Rect<double> const scaled_rect (
-               static_cast<double> (rect->x) / target_width,
-               static_cast<double> (rect->y) / target_height,
-               static_cast<double> (rect->w) / target_width,
-               static_cast<double> (rect->h) / target_height
+               static_cast<double>(rect->x) / target_width,
+               static_cast<double>(rect->y) / target_height,
+               static_cast<double>(rect->w) / target_width,
+               static_cast<double>(rect->h) / target_height
                );
 
        only_text()->emit_bitmap_start (from, image, scaled_rect);
 }
 
+
 void
-FFmpegDecoder::decode_ass_subtitle (string ass, ContentTime from)
+FFmpegDecoder::process_ass_subtitle (string ass, ContentTime from)
 {
        /* We have no styles and no Format: line, so I'm assuming that FFmpeg
           produces a single format of Dialogue: lines...
@@ -718,14 +774,14 @@ FFmpegDecoder::decode_ass_subtitle (string ass, ContentTime from)
        }
 
        sub::RawSubtitle base;
-       list<sub::RawSubtitle> raw = sub::SSAReader::parse_line (
+       auto raw = sub::SSAReader::parse_line (
                base,
                text,
                _ffmpeg_content->video->size().width,
                _ffmpeg_content->video->size().height
                );
 
-       BOOST_FOREACH (sub::Subtitle const & i, sub::collect<list<sub::Subtitle> > (raw)) {
+       for (auto const& i: sub::collect<vector<sub::Subtitle>>(raw)) {
                only_text()->emit_plain_start (from, i);
        }
 }