Use SRC_LINEAR for speed when analysing audio (#685).
[dcpomatic.git] / src / lib / ffmpeg_decoder.cc
index dd47d306a6e54c0611e0abde6b6ab802cab4e77b..ae44ff5220d04a2730087b89ea4b7f49a24e1be3 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
-    Copyright (C) 2012-2014 Carl Hetherington <cth@carlh.net>
+    Copyright (C) 2012-2015 Carl Hetherington <cth@carlh.net>
 
     This program is free software; you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
 
     This program is free software; you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
  *  @brief A decoder using FFmpeg to decode content.
  */
 
  *  @brief A decoder using FFmpeg to decode content.
  */
 
-#include <stdexcept>
-#include <vector>
-#include <sstream>
-#include <iomanip>
-#include <iostream>
-#include <stdint.h>
-#include <sndfile.h>
-extern "C" {
-#include <libavcodec/avcodec.h>
-#include <libavformat/avformat.h>
-}
 #include "filter.h"
 #include "exceptions.h"
 #include "image.h"
 #include "filter.h"
 #include "exceptions.h"
 #include "image.h"
@@ -45,30 +34,36 @@ extern "C" {
 #include "ffmpeg_content.h"
 #include "raw_image_proxy.h"
 #include "film.h"
 #include "ffmpeg_content.h"
 #include "raw_image_proxy.h"
 #include "film.h"
-#include "timer.h"
+#include "compose.hpp"
+extern "C" {
+#include <libavcodec/avcodec.h>
+#include <libavformat/avformat.h>
+}
+#include <boost/foreach.hpp>
+#include <vector>
+#include <iomanip>
+#include <iostream>
+#include <stdint.h>
 
 #include "i18n.h"
 
 #define LOG_GENERAL(...) _video_content->film()->log()->log (String::compose (__VA_ARGS__), Log::TYPE_GENERAL);
 #define LOG_ERROR(...) _video_content->film()->log()->log (String::compose (__VA_ARGS__), Log::TYPE_ERROR);
 
 #include "i18n.h"
 
 #define LOG_GENERAL(...) _video_content->film()->log()->log (String::compose (__VA_ARGS__), Log::TYPE_GENERAL);
 #define LOG_ERROR(...) _video_content->film()->log()->log (String::compose (__VA_ARGS__), Log::TYPE_ERROR);
-#define LOG_WARNING(...) _video_content->film()->log()->log (__VA_ARGS__, Log::TYPE_WARNING);
+#define LOG_WARNING_NC(...) _video_content->film()->log()->log (__VA_ARGS__, Log::TYPE_WARNING);
+#define LOG_WARNING(...) _video_content->film()->log()->log (String::compose (__VA_ARGS__), Log::TYPE_WARNING);
 
 using std::cout;
 
 using std::cout;
-using std::string;
 using std::vector;
 using std::vector;
-using std::stringstream;
 using std::list;
 using std::min;
 using std::pair;
 using std::list;
 using std::min;
 using std::pair;
-using std::make_pair;
+using std::max;
 using boost::shared_ptr;
 using boost::shared_ptr;
-using boost::optional;
-using boost::dynamic_pointer_cast;
 using dcp::Size;
 
 using dcp::Size;
 
-FFmpegDecoder::FFmpegDecoder (shared_ptr<const FFmpegContent> c, shared_ptr<Log> log)
+FFmpegDecoder::FFmpegDecoder (shared_ptr<const FFmpegContent> c, shared_ptr<Log> log, bool fast)
        : VideoDecoder (c)
        : VideoDecoder (c)
-       , AudioDecoder (c)
+       , AudioDecoder (c, fast)
        , SubtitleDecoder (c)
        , FFmpeg (c)
        , _log (log)
        , SubtitleDecoder (c)
        , FFmpeg (c)
        , _log (log)
@@ -82,24 +77,37 @@ FFmpegDecoder::FFmpegDecoder (shared_ptr<const FFmpegContent> c, shared_ptr<Log>
           Then we remove big initial gaps in PTS and we allow our
           insertion of black frames to work.
 
           Then we remove big initial gaps in PTS and we allow our
           insertion of black frames to work.
 
-          We will do pts_to_use = pts_from_ffmpeg + pts_offset;
+          We will do:
+            audio_pts_to_use = audio_pts_from_ffmpeg + pts_offset;
+            video_pts_to_use = video_pts_from_ffmpeg + pts_offset;
        */
 
        */
 
-       bool const have_video = c->first_video();
-       bool const have_audio = c->audio_stream () && c->audio_stream()->first_audio;
-
        /* First, make one of them start at 0 */
 
        /* First, make one of them start at 0 */
 
-       if (have_audio && have_video) {
-               _pts_offset = - min (c->first_video().get(), c->audio_stream()->first_audio.get());
-       } else if (have_video) {
-               _pts_offset = - c->first_video().get();
-       } else if (have_audio) {
-               _pts_offset = - c->audio_stream()->first_audio.get();
+       vector<shared_ptr<FFmpegAudioStream> > streams = c->ffmpeg_audio_streams ();
+
+       _pts_offset = ContentTime::min ();
+
+       if (c->first_video ()) {
+               _pts_offset = - c->first_video().get ();
+       }
+
+       BOOST_FOREACH (shared_ptr<FFmpegAudioStream> i, streams) {
+               if (i->first_audio) {
+                       _pts_offset = max (_pts_offset, - i->first_audio.get ());
+               }
        }
 
        }
 
-       /* Now adjust both so that the video pts starts on a frame */
-       if (have_video && have_audio) {
+       /* If _pts_offset is positive we would be pushing things from a -ve PTS to be played.
+          I don't think we ever want to do that, as it seems things at -ve PTS are not meant
+          to be seen (use for alignment bars etc.); see mantis #418.
+       */
+       if (_pts_offset > ContentTime ()) {
+               _pts_offset = ContentTime ();
+       }
+
+       /* Now adjust so that the video pts starts on a frame */
+       if (c->first_video ()) {
                ContentTime first_video = c->first_video().get() + _pts_offset;
                ContentTime const old_first_video = first_video;
                _pts_offset += first_video.round_up (c->video_frame_rate ()) - old_first_video;
                ContentTime first_video = c->first_video().get() + _pts_offset;
                ContentTime const old_first_video = first_video;
                _pts_offset += first_video.round_up (c->video_frame_rate ()) - old_first_video;
@@ -110,18 +118,16 @@ void
 FFmpegDecoder::flush ()
 {
        /* Get any remaining frames */
 FFmpegDecoder::flush ()
 {
        /* Get any remaining frames */
-       
+
        _packet.data = 0;
        _packet.size = 0;
        _packet.data = 0;
        _packet.size = 0;
-       
+
        /* XXX: should we reset _packet.data and size after each *_decode_* call? */
        /* XXX: should we reset _packet.data and size after each *_decode_* call? */
-       
+
        while (decode_video_packet ()) {}
        while (decode_video_packet ()) {}
-       
-       if (_ffmpeg_content->audio_stream()) {
-               decode_audio_packet ();
-               AudioDecoder::flush ();
-       }
+
+       decode_audio_packet ();
+       AudioDecoder::flush ();
 }
 
 bool
 }
 
 bool
@@ -129,7 +135,11 @@ FFmpegDecoder::pass ()
 {
        int r = av_read_frame (_format_context, &_packet);
 
 {
        int r = av_read_frame (_format_context, &_packet);
 
-       if (r < 0) {
+       /* AVERROR_INVALIDDATA can apparently be returned sometimes even when av_read_frame
+          has pretty-much succeeded (and hence generated data which should be processed).
+          Hence it makes sense to continue here in that case.
+       */
+       if (r < 0 && r != AVERROR_INVALIDDATA) {
                if (r != AVERROR_EOF) {
                        /* Maybe we should fail here, but for now we'll just finish off instead */
                        char buf[256];
                if (r != AVERROR_EOF) {
                        /* Maybe we should fail here, but for now we'll just finish off instead */
                        char buf[256];
@@ -142,13 +152,14 @@ FFmpegDecoder::pass ()
        }
 
        int const si = _packet.stream_index;
        }
 
        int const si = _packet.stream_index;
+       shared_ptr<const FFmpegContent> fc = _ffmpeg_content;
 
 
-       if (si == _video_stream) {
+       if (si == _video_stream && !_ignore_video) {
                decode_video_packet ();
                decode_video_packet ();
-       } else if (_ffmpeg_content->audio_stream() && _ffmpeg_content->audio_stream()->uses_index (_format_context, si)) {
-               decode_audio_packet ();
-       } else if (_ffmpeg_content->subtitle_stream() && _ffmpeg_content->subtitle_stream()->uses_index (_format_context, si)) {
+       } else if (fc->subtitle_stream() && fc->subtitle_stream()->uses_index (_format_context, si)) {
                decode_subtitle_packet ();
                decode_subtitle_packet ();
+       } else {
+               decode_audio_packet ();
        }
 
        av_free_packet (&_packet);
        }
 
        av_free_packet (&_packet);
@@ -159,20 +170,20 @@ FFmpegDecoder::pass ()
  *  Only the first buffer will be used for non-planar data, otherwise there will be one per channel.
  */
 shared_ptr<AudioBuffers>
  *  Only the first buffer will be used for non-planar data, otherwise there will be one per channel.
  */
 shared_ptr<AudioBuffers>
-FFmpegDecoder::deinterleave_audio (uint8_t** data, int size)
+FFmpegDecoder::deinterleave_audio (shared_ptr<FFmpegAudioStream> stream, uint8_t** data, int size)
 {
 {
-       assert (_ffmpeg_content->audio_channels());
-       assert (bytes_per_audio_sample());
+       DCPOMATIC_ASSERT (bytes_per_audio_sample (stream));
 
        /* Deinterleave and convert to float */
 
 
        /* Deinterleave and convert to float */
 
-       assert ((size % (bytes_per_audio_sample() * _ffmpeg_content->audio_channels())) == 0);
-
-       int const total_samples = size / bytes_per_audio_sample();
-       int const frames = total_samples / _ffmpeg_content->audio_channels();
-       shared_ptr<AudioBuffers> audio (new AudioBuffers (_ffmpeg_content->audio_channels(), frames));
+       /* total_samples and frames will be rounded down here, so if there are stray samples at the end
+          of the block that do not form a complete sample or frame they will be dropped.
+       */
+       int const total_samples = size / bytes_per_audio_sample (stream);
+       int const frames = total_samples / stream->channels();
+       shared_ptr<AudioBuffers> audio (new AudioBuffers (stream->channels(), frames));
 
 
-       switch (audio_sample_format()) {
+       switch (audio_sample_format (stream)) {
        case AV_SAMPLE_FMT_U8:
        {
                uint8_t* p = reinterpret_cast<uint8_t *> (data[0]);
        case AV_SAMPLE_FMT_U8:
        {
                uint8_t* p = reinterpret_cast<uint8_t *> (data[0]);
@@ -182,14 +193,14 @@ FFmpegDecoder::deinterleave_audio (uint8_t** data, int size)
                        audio->data(channel)[sample] = float(*p++) / (1 << 23);
 
                        ++channel;
                        audio->data(channel)[sample] = float(*p++) / (1 << 23);
 
                        ++channel;
-                       if (channel == _ffmpeg_content->audio_channels()) {
+                       if (channel == stream->channels()) {
                                channel = 0;
                                ++sample;
                        }
                }
        }
        break;
                                channel = 0;
                                ++sample;
                        }
                }
        }
        break;
-       
+
        case AV_SAMPLE_FMT_S16:
        {
                int16_t* p = reinterpret_cast<int16_t *> (data[0]);
        case AV_SAMPLE_FMT_S16:
        {
                int16_t* p = reinterpret_cast<int16_t *> (data[0]);
@@ -199,7 +210,7 @@ FFmpegDecoder::deinterleave_audio (uint8_t** data, int size)
                        audio->data(channel)[sample] = float(*p++) / (1 << 15);
 
                        ++channel;
                        audio->data(channel)[sample] = float(*p++) / (1 << 15);
 
                        ++channel;
-                       if (channel == _ffmpeg_content->audio_channels()) {
+                       if (channel == stream->channels()) {
                                channel = 0;
                                ++sample;
                        }
                                channel = 0;
                                ++sample;
                        }
@@ -210,14 +221,14 @@ FFmpegDecoder::deinterleave_audio (uint8_t** data, int size)
        case AV_SAMPLE_FMT_S16P:
        {
                int16_t** p = reinterpret_cast<int16_t **> (data);
        case AV_SAMPLE_FMT_S16P:
        {
                int16_t** p = reinterpret_cast<int16_t **> (data);
-               for (int i = 0; i < _ffmpeg_content->audio_channels(); ++i) {
+               for (int i = 0; i < stream->channels(); ++i) {
                        for (int j = 0; j < frames; ++j) {
                                audio->data(i)[j] = static_cast<float>(p[i][j]) / (1 << 15);
                        }
                }
        }
        break;
                        for (int j = 0; j < frames; ++j) {
                                audio->data(i)[j] = static_cast<float>(p[i][j]) / (1 << 15);
                        }
                }
        }
        break;
-       
+
        case AV_SAMPLE_FMT_S32:
        {
                int32_t* p = reinterpret_cast<int32_t *> (data[0]);
        case AV_SAMPLE_FMT_S32:
        {
                int32_t* p = reinterpret_cast<int32_t *> (data[0]);
@@ -227,7 +238,7 @@ FFmpegDecoder::deinterleave_audio (uint8_t** data, int size)
                        audio->data(channel)[sample] = static_cast<float>(*p++) / (1 << 31);
 
                        ++channel;
                        audio->data(channel)[sample] = static_cast<float>(*p++) / (1 << 31);
 
                        ++channel;
-                       if (channel == _ffmpeg_content->audio_channels()) {
+                       if (channel == stream->channels()) {
                                channel = 0;
                                ++sample;
                        }
                                channel = 0;
                                ++sample;
                        }
@@ -244,44 +255,40 @@ FFmpegDecoder::deinterleave_audio (uint8_t** data, int size)
                        audio->data(channel)[sample] = *p++;
 
                        ++channel;
                        audio->data(channel)[sample] = *p++;
 
                        ++channel;
-                       if (channel == _ffmpeg_content->audio_channels()) {
+                       if (channel == stream->channels()) {
                                channel = 0;
                                ++sample;
                        }
                }
        }
        break;
                                channel = 0;
                                ++sample;
                        }
                }
        }
        break;
-               
+
        case AV_SAMPLE_FMT_FLTP:
        {
                float** p = reinterpret_cast<float**> (data);
        case AV_SAMPLE_FMT_FLTP:
        {
                float** p = reinterpret_cast<float**> (data);
-               for (int i = 0; i < _ffmpeg_content->audio_channels(); ++i) {
+               for (int i = 0; i < stream->channels(); ++i) {
                        memcpy (audio->data(i), p[i], frames * sizeof(float));
                }
        }
        break;
 
        default:
                        memcpy (audio->data(i), p[i], frames * sizeof(float));
                }
        }
        break;
 
        default:
-               throw DecodeError (String::compose (_("Unrecognised audio sample format (%1)"), static_cast<int> (audio_sample_format())));
+               throw DecodeError (String::compose (_("Unrecognised audio sample format (%1)"), static_cast<int> (audio_sample_format (stream))));
        }
 
        return audio;
 }
 
 AVSampleFormat
        }
 
        return audio;
 }
 
 AVSampleFormat
-FFmpegDecoder::audio_sample_format () const
+FFmpegDecoder::audio_sample_format (shared_ptr<FFmpegAudioStream> stream) const
 {
 {
-       if (!_ffmpeg_content->audio_stream()) {
-               return (AVSampleFormat) 0;
-       }
-       
-       return audio_codec_context()->sample_fmt;
+       return stream->stream (_format_context)->codec->sample_fmt;
 }
 
 int
 }
 
 int
-FFmpegDecoder::bytes_per_audio_sample () const
+FFmpegDecoder::bytes_per_audio_sample (shared_ptr<FFmpegAudioStream> stream) const
 {
 {
-       return av_get_bytes_per_sample (audio_sample_format ());
+       return av_get_bytes_per_sample (audio_sample_format (stream));
 }
 
 void
 }
 
 void
@@ -289,38 +296,29 @@ FFmpegDecoder::seek (ContentTime time, bool accurate)
 {
        VideoDecoder::seek (time, accurate);
        AudioDecoder::seek (time, accurate);
 {
        VideoDecoder::seek (time, accurate);
        AudioDecoder::seek (time, accurate);
-       
+       SubtitleDecoder::seek (time, accurate);
+
        /* If we are doing an `accurate' seek, we need to use pre-roll, as
           we don't really know what the seek will give us.
        */
 
        ContentTime pre_roll = accurate ? ContentTime::from_seconds (2) : ContentTime (0);
        time -= pre_roll;
        /* If we are doing an `accurate' seek, we need to use pre-roll, as
           we don't really know what the seek will give us.
        */
 
        ContentTime pre_roll = accurate ? ContentTime::from_seconds (2) : ContentTime (0);
        time -= pre_roll;
-       if (time < ContentTime (0)) {
-               time = ContentTime (0);
-       }
 
 
-       ContentTime const u = time - _pts_offset;
-       int64_t s = u.seconds() / av_q2d (_format_context->streams[_video_stream]->time_base);
+       /* XXX: it seems debatable whether PTS should be used here...
+          http://www.mjbshaw.com/2012/04/seeking-in-ffmpeg-know-your-timestamp.html
+       */
 
 
-       if (_ffmpeg_content->audio_stream ()) {
-               s = min (
-                       s, int64_t (u.seconds() / av_q2d (_ffmpeg_content->audio_stream()->stream(_format_context)->time_base))
-                       );
+       ContentTime u = time - _pts_offset;
+       if (u < ContentTime ()) {
+               u = ContentTime ();
        }
        }
+       av_seek_frame (_format_context, _video_stream, u.seconds() / av_q2d (_format_context->streams[_video_stream]->time_base), AVSEEK_FLAG_BACKWARD);
 
 
-       /* Ridiculous empirical hack */
-       s--;
-       if (s < 0) {
-               s = 0;
-       }
+       avcodec_flush_buffers (video_codec_context());
 
 
-       av_seek_frame (_format_context, _video_stream, s, 0);
+       /* XXX: should be flushing audio buffers? */
 
 
-       avcodec_flush_buffers (video_codec_context());
-       if (audio_codec_context ()) {
-               avcodec_flush_buffers (audio_codec_context ());
-       }
        if (subtitle_codec_context ()) {
                avcodec_flush_buffers (subtitle_codec_context ());
        }
        if (subtitle_codec_context ()) {
                avcodec_flush_buffers (subtitle_codec_context ());
        }
@@ -332,32 +330,54 @@ FFmpegDecoder::decode_audio_packet ()
        /* Audio packets can contain multiple frames, so we may have to call avcodec_decode_audio4
           several times.
        */
        /* Audio packets can contain multiple frames, so we may have to call avcodec_decode_audio4
           several times.
        */
-       
+
        AVPacket copy_packet = _packet;
        AVPacket copy_packet = _packet;
-       
+
+       /* XXX: inefficient */
+       vector<shared_ptr<FFmpegAudioStream> > streams = ffmpeg_content()->ffmpeg_audio_streams ();
+       vector<shared_ptr<FFmpegAudioStream> >::const_iterator stream = streams.begin ();
+       while (stream != streams.end () && !(*stream)->uses_index (_format_context, copy_packet.stream_index)) {
+               ++stream;
+       }
+
+       if (stream == streams.end ()) {
+               /* The packet's stream may not be an audio one; just ignore it in this method if so */
+               return;
+       }
+
        while (copy_packet.size > 0) {
 
                int frame_finished;
        while (copy_packet.size > 0) {
 
                int frame_finished;
-               int const decode_result = avcodec_decode_audio4 (audio_codec_context(), _frame, &frame_finished, &copy_packet);
-
+               int decode_result = avcodec_decode_audio4 ((*stream)->stream (_format_context)->codec, _frame, &frame_finished, &copy_packet);
                if (decode_result < 0) {
                if (decode_result < 0) {
-                       LOG_ERROR ("avcodec_decode_audio4 failed (%1)", decode_result);
-                       return;
+                       /* avcodec_decode_audio4 can sometimes return an error even though it has decoded
+                          some valid data; for example dca_subframe_footer can return AVERROR_INVALIDDATA
+                          if it overreads the auxiliary data.  ffplay carries on if frame_finished is true,
+                          even in the face of such an error, so I think we should too.
+
+                          Returning from the method here caused mantis #352.
+                       */
+                       LOG_WARNING ("avcodec_decode_audio4 failed (%1)", decode_result);
+
+                       /* Fudge decode_result so that we come out of the while loop when
+                          we've processed this data.
+                       */
+                       decode_result = copy_packet.size;
                }
 
                if (frame_finished) {
                        ContentTime const ct = ContentTime::from_seconds (
                                av_frame_get_best_effort_timestamp (_frame) *
                }
 
                if (frame_finished) {
                        ContentTime const ct = ContentTime::from_seconds (
                                av_frame_get_best_effort_timestamp (_frame) *
-                               av_q2d (_ffmpeg_content->audio_stream()->stream (_format_context)->time_base))
+                               av_q2d ((*stream)->stream (_format_context)->time_base))
                                + _pts_offset;
                                + _pts_offset;
-                       
+
                        int const data_size = av_samples_get_buffer_size (
                        int const data_size = av_samples_get_buffer_size (
-                               0, audio_codec_context()->channels, _frame->nb_samples, audio_sample_format (), 1
+                               0, (*stream)->stream(_format_context)->codec->channels, _frame->nb_samples, audio_sample_format (*stream), 1
                                );
 
                                );
 
-                       audio (deinterleave_audio (_frame->data, data_size), ct);
+                       audio (*stream, deinterleave_audio (*stream, _frame->data, data_size), ct);
                }
                }
-                       
+
                copy_packet.data += decode_result;
                copy_packet.size -= decode_result;
        }
                copy_packet.data += decode_result;
                copy_packet.size -= decode_result;
        }
@@ -374,7 +394,7 @@ FFmpegDecoder::decode_video_packet ()
        boost::mutex::scoped_lock lm (_filter_graphs_mutex);
 
        shared_ptr<FilterGraph> graph;
        boost::mutex::scoped_lock lm (_filter_graphs_mutex);
 
        shared_ptr<FilterGraph> graph;
-       
+
        list<shared_ptr<FilterGraph> >::iterator i = _filter_graphs.begin();
        while (i != _filter_graphs.end() && !(*i)->can_process (dcp::Size (_frame->width, _frame->height), (AVPixelFormat) _frame->format)) {
                ++i;
        list<shared_ptr<FilterGraph> >::iterator i = _filter_graphs.begin();
        while (i != _filter_graphs.end() && !(*i)->can_process (dcp::Size (_frame->width, _frame->height), (AVPixelFormat) _frame->format)) {
                ++i;
@@ -393,21 +413,21 @@ FFmpegDecoder::decode_video_packet ()
        for (list<pair<shared_ptr<Image>, int64_t> >::iterator i = images.begin(); i != images.end(); ++i) {
 
                shared_ptr<Image> image = i->first;
        for (list<pair<shared_ptr<Image>, int64_t> >::iterator i = images.begin(); i != images.end(); ++i) {
 
                shared_ptr<Image> image = i->first;
-               
+
                if (i->second != AV_NOPTS_VALUE) {
                        double const pts = i->second * av_q2d (_format_context->streams[_video_stream]->time_base) + _pts_offset.seconds ();
                        video (
                if (i->second != AV_NOPTS_VALUE) {
                        double const pts = i->second * av_q2d (_format_context->streams[_video_stream]->time_base) + _pts_offset.seconds ();
                        video (
-                               shared_ptr<ImageProxy> (new RawImageProxy (image, _video_content->film()->log())),
-                               rint (pts * _ffmpeg_content->video_frame_rate ())
+                               shared_ptr<ImageProxy> (new RawImageProxy (image)),
+                               llrint (pts * _ffmpeg_content->video_frame_rate ())
                                );
                } else {
                                );
                } else {
-                       LOG_WARNING ("Dropping frame without PTS");
+                       LOG_WARNING_NC ("Dropping frame without PTS");
                }
        }
 
        return true;
 }
                }
        }
 
        return true;
 }
-       
+
 void
 FFmpegDecoder::decode_subtitle_packet ()
 {
 void
 FFmpegDecoder::decode_subtitle_packet ()
 {
@@ -417,29 +437,63 @@ FFmpegDecoder::decode_subtitle_packet ()
                return;
        }
 
                return;
        }
 
-       /* Sometimes we get an empty AVSubtitle, which is used by some codecs to
-          indicate that the previous subtitle should stop.
-       */
        if (sub.num_rects <= 0) {
        if (sub.num_rects <= 0) {
-               image_subtitle (ContentTimePeriod (), shared_ptr<Image> (), dcpomatic::Rect<double> ());
+               /* Sometimes we get an empty AVSubtitle, which is used by some codecs to
+                  indicate that the previous subtitle should stop.  We can ignore it here.
+               */
                return;
        } else if (sub.num_rects > 1) {
                throw DecodeError (_("multi-part subtitles not yet supported"));
        }
                return;
        } else if (sub.num_rects > 1) {
                throw DecodeError (_("multi-part subtitles not yet supported"));
        }
-               
+
        /* Subtitle PTS (within the source, not taking into account any of the
        /* Subtitle PTS (within the source, not taking into account any of the
-          source that we may have chopped off for the DCP)
+          source that we may have chopped off for the DCP).
        */
        */
-       ContentTimePeriod period = subtitle_period (sub) + _pts_offset;
+       FFmpegSubtitlePeriod sub_period = subtitle_period (sub);
+       ContentTimePeriod period;
+       period.from = sub_period.from + _pts_offset;
+       if (sub_period.to) {
+               /* We already know the subtitle period `to' time */
+               period.to = sub_period.to.get() + _pts_offset;
+       } else {
+               /* We have to look up the `to' time in the stream's records */
+               period.to = ffmpeg_content()->subtitle_stream()->find_subtitle_to (sub_period.from);
+       }
 
        AVSubtitleRect const * rect = sub.rects[0];
 
 
        AVSubtitleRect const * rect = sub.rects[0];
 
-       if (rect->type != SUBTITLE_BITMAP) {
-               /* XXX */
-               // throw DecodeError (_("non-bitmap subtitles not yet supported"));
-               return;
+       switch (rect->type) {
+       case SUBTITLE_NONE:
+               break;
+       case SUBTITLE_BITMAP:
+               decode_bitmap_subtitle (rect, period);
+               break;
+       case SUBTITLE_TEXT:
+               cout << "XXX: SUBTITLE_TEXT " << rect->text << "\n";
+               break;
+       case SUBTITLE_ASS:
+               cout << "XXX: SUBTITLE_ASS " << rect->ass << "\n";
+               break;
        }
 
        }
 
+       avsubtitle_free (&sub);
+}
+
+list<ContentTimePeriod>
+FFmpegDecoder::image_subtitles_during (ContentTimePeriod p, bool starting) const
+{
+       return _ffmpeg_content->subtitles_during (p, starting);
+}
+
+list<ContentTimePeriod>
+FFmpegDecoder::text_subtitles_during (ContentTimePeriod, bool) const
+{
+       return list<ContentTimePeriod> ();
+}
+
+void
+FFmpegDecoder::decode_bitmap_subtitle (AVSubtitleRect const * rect, ContentTimePeriod period)
+{
        /* Note RGBA is expressed little-endian, so the first byte in the word is R, second
           G, third B, fourth A.
        */
        /* Note RGBA is expressed little-endian, so the first byte in the word is R, second
           G, third B, fourth A.
        */
@@ -466,23 +520,12 @@ FFmpegDecoder::decode_subtitle_packet ()
        }
 
        dcp::Size const vs = _ffmpeg_content->video_size ();
        }
 
        dcp::Size const vs = _ffmpeg_content->video_size ();
-
-       image_subtitle (
-               period,
-               image,
-               dcpomatic::Rect<double> (
-                       static_cast<double> (rect->x) / vs.width,
-                       static_cast<double> (rect->y) / vs.height,
-                       static_cast<double> (rect->w) / vs.width,
-                       static_cast<double> (rect->h) / vs.height
-                       )
+       dcpomatic::Rect<double> const scaled_rect (
+               static_cast<double> (rect->x) / vs.width,
+               static_cast<double> (rect->y) / vs.height,
+               static_cast<double> (rect->w) / vs.width,
+               static_cast<double> (rect->h) / vs.height
                );
                );
-       
-       avsubtitle_free (&sub);
-}
 
 
-list<ContentTimePeriod>
-FFmpegDecoder::subtitles_during (ContentTimePeriod p, bool starting) const
-{
-       return _ffmpeg_content->subtitles_during (p, starting);
+       image_subtitle (period, image, scaled_rect);
 }
 }