Don't seek before 0 in FFmpegDecoder; fixes errors on seek close to 0.
[dcpomatic.git] / src / lib / ffmpeg_decoder.cc
index ec4e33a6ec56599c3fa5bcd4836e597e5c47bee7..4ca06329b116ae9561ddb3c61fa11a17416af9c2 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
-    Copyright (C) 2012-2014 Carl Hetherington <cth@carlh.net>
+    Copyright (C) 2012-2015 Carl Hetherington <cth@carlh.net>
 
     This program is free software; you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
 
     This program is free software; you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
  *  @brief A decoder using FFmpeg to decode content.
  */
 
  *  @brief A decoder using FFmpeg to decode content.
  */
 
-#include <stdexcept>
-#include <vector>
-#include <iomanip>
-#include <iostream>
-#include <stdint.h>
-#include <sndfile.h>
-extern "C" {
-#include <libavcodec/avcodec.h>
-#include <libavformat/avformat.h>
-}
 #include "filter.h"
 #include "exceptions.h"
 #include "image.h"
 #include "filter.h"
 #include "exceptions.h"
 #include "image.h"
@@ -45,6 +35,17 @@ extern "C" {
 #include "raw_image_proxy.h"
 #include "film.h"
 #include "timer.h"
 #include "raw_image_proxy.h"
 #include "film.h"
 #include "timer.h"
+extern "C" {
+#include <libavcodec/avcodec.h>
+#include <libavformat/avformat.h>
+}
+#include <boost/foreach.hpp>
+#include <stdexcept>
+#include <vector>
+#include <iomanip>
+#include <iostream>
+#include <stdint.h>
+#include <sndfile.h>
 
 #include "i18n.h"
 
 
 #include "i18n.h"
 
@@ -60,6 +61,7 @@ using std::list;
 using std::min;
 using std::pair;
 using std::make_pair;
 using std::min;
 using std::pair;
 using std::make_pair;
+using std::max;
 using boost::shared_ptr;
 using boost::optional;
 using boost::dynamic_pointer_cast;
 using boost::shared_ptr;
 using boost::optional;
 using boost::dynamic_pointer_cast;
@@ -81,20 +83,25 @@ FFmpegDecoder::FFmpegDecoder (shared_ptr<const FFmpegContent> c, shared_ptr<Log>
           Then we remove big initial gaps in PTS and we allow our
           insertion of black frames to work.
 
           Then we remove big initial gaps in PTS and we allow our
           insertion of black frames to work.
 
-          We will do pts_to_use = pts_from_ffmpeg + pts_offset;
+          We will do:
+            audio_pts_to_use = audio_pts_from_ffmpeg + pts_offset;
+            video_pts_to_use = video_pts_from_ffmpeg + pts_offset;
        */
 
        */
 
-       bool const have_video = c->first_video();
-       bool const have_audio = c->audio_stream () && c->audio_stream()->first_audio;
-
        /* First, make one of them start at 0 */
 
        /* First, make one of them start at 0 */
 
-       if (have_audio && have_video) {
-               _pts_offset = - min (c->first_video().get(), c->audio_stream()->first_audio.get());
-       } else if (have_video) {
-               _pts_offset = - c->first_video().get();
-       } else if (have_audio) {
-               _pts_offset = - c->audio_stream()->first_audio.get();
+       vector<shared_ptr<FFmpegAudioStream> > streams = c->ffmpeg_audio_streams ();
+
+       _pts_offset = ContentTime::min ();
+
+       if (c->first_video ()) {
+               _pts_offset = - c->first_video().get ();
+       }
+
+       BOOST_FOREACH (shared_ptr<FFmpegAudioStream> i, streams) {
+               if (i->first_audio) {
+                       _pts_offset = max (_pts_offset, - i->first_audio.get ());
+               }
        }
 
        /* If _pts_offset is positive we would be pushing things from a -ve PTS to be played.
        }
 
        /* If _pts_offset is positive we would be pushing things from a -ve PTS to be played.
@@ -105,8 +112,8 @@ FFmpegDecoder::FFmpegDecoder (shared_ptr<const FFmpegContent> c, shared_ptr<Log>
                _pts_offset = ContentTime ();
        }
 
                _pts_offset = ContentTime ();
        }
 
-       /* Now adjust both so that the video pts starts on a frame */
-       if (have_video && have_audio) {
+       /* Now adjust so that the video pts starts on a frame */
+       if (c->first_video ()) {
                ContentTime first_video = c->first_video().get() + _pts_offset;
                ContentTime const old_first_video = first_video;
                _pts_offset += first_video.round_up (c->video_frame_rate ()) - old_first_video;
                ContentTime first_video = c->first_video().get() + _pts_offset;
                ContentTime const old_first_video = first_video;
                _pts_offset += first_video.round_up (c->video_frame_rate ()) - old_first_video;
@@ -125,10 +132,8 @@ FFmpegDecoder::flush ()
        
        while (decode_video_packet ()) {}
        
        
        while (decode_video_packet ()) {}
        
-       if (_ffmpeg_content->audio_stream()) {
-               decode_audio_packet ();
-               AudioDecoder::flush ();
-       }
+       decode_audio_packet ();
+       AudioDecoder::flush ();
 }
 
 bool
 }
 
 bool
@@ -153,13 +158,14 @@ FFmpegDecoder::pass ()
        }
 
        int const si = _packet.stream_index;
        }
 
        int const si = _packet.stream_index;
+       shared_ptr<const FFmpegContent> fc = _ffmpeg_content;
 
 
-       if (si == _video_stream) {
+       if (si == _video_stream && !_ignore_video) {
                decode_video_packet ();
                decode_video_packet ();
-       } else if (_ffmpeg_content->audio_stream() && _ffmpeg_content->audio_stream()->uses_index (_format_context, si)) {
-               decode_audio_packet ();
-       } else if (_ffmpeg_content->subtitle_stream() && _ffmpeg_content->subtitle_stream()->uses_index (_format_context, si)) {
+       } else if (fc->subtitle_stream() && fc->subtitle_stream()->uses_index (_format_context, si)) {
                decode_subtitle_packet ();
                decode_subtitle_packet ();
+       } else {
+               decode_audio_packet ();
        }
 
        av_free_packet (&_packet);
        }
 
        av_free_packet (&_packet);
@@ -170,21 +176,20 @@ FFmpegDecoder::pass ()
  *  Only the first buffer will be used for non-planar data, otherwise there will be one per channel.
  */
 shared_ptr<AudioBuffers>
  *  Only the first buffer will be used for non-planar data, otherwise there will be one per channel.
  */
 shared_ptr<AudioBuffers>
-FFmpegDecoder::deinterleave_audio (uint8_t** data, int size)
+FFmpegDecoder::deinterleave_audio (shared_ptr<FFmpegAudioStream> stream, uint8_t** data, int size)
 {
 {
-       DCPOMATIC_ASSERT (_ffmpeg_content->audio_channels());
-       DCPOMATIC_ASSERT (bytes_per_audio_sample());
+       DCPOMATIC_ASSERT (bytes_per_audio_sample (stream));
 
        /* Deinterleave and convert to float */
 
        /* total_samples and frames will be rounded down here, so if there are stray samples at the end
           of the block that do not form a complete sample or frame they will be dropped.
        */
 
        /* Deinterleave and convert to float */
 
        /* total_samples and frames will be rounded down here, so if there are stray samples at the end
           of the block that do not form a complete sample or frame they will be dropped.
        */
-       int const total_samples = size / bytes_per_audio_sample();
-       int const frames = total_samples / _ffmpeg_content->audio_channels();
-       shared_ptr<AudioBuffers> audio (new AudioBuffers (_ffmpeg_content->audio_channels(), frames));
+       int const total_samples = size / bytes_per_audio_sample (stream);
+       int const frames = total_samples / stream->channels();
+       shared_ptr<AudioBuffers> audio (new AudioBuffers (stream->channels(), frames));
 
 
-       switch (audio_sample_format()) {
+       switch (audio_sample_format (stream)) {
        case AV_SAMPLE_FMT_U8:
        {
                uint8_t* p = reinterpret_cast<uint8_t *> (data[0]);
        case AV_SAMPLE_FMT_U8:
        {
                uint8_t* p = reinterpret_cast<uint8_t *> (data[0]);
@@ -194,7 +199,7 @@ FFmpegDecoder::deinterleave_audio (uint8_t** data, int size)
                        audio->data(channel)[sample] = float(*p++) / (1 << 23);
 
                        ++channel;
                        audio->data(channel)[sample] = float(*p++) / (1 << 23);
 
                        ++channel;
-                       if (channel == _ffmpeg_content->audio_channels()) {
+                       if (channel == stream->channels()) {
                                channel = 0;
                                ++sample;
                        }
                                channel = 0;
                                ++sample;
                        }
@@ -211,7 +216,7 @@ FFmpegDecoder::deinterleave_audio (uint8_t** data, int size)
                        audio->data(channel)[sample] = float(*p++) / (1 << 15);
 
                        ++channel;
                        audio->data(channel)[sample] = float(*p++) / (1 << 15);
 
                        ++channel;
-                       if (channel == _ffmpeg_content->audio_channels()) {
+                       if (channel == stream->channels()) {
                                channel = 0;
                                ++sample;
                        }
                                channel = 0;
                                ++sample;
                        }
@@ -222,7 +227,7 @@ FFmpegDecoder::deinterleave_audio (uint8_t** data, int size)
        case AV_SAMPLE_FMT_S16P:
        {
                int16_t** p = reinterpret_cast<int16_t **> (data);
        case AV_SAMPLE_FMT_S16P:
        {
                int16_t** p = reinterpret_cast<int16_t **> (data);
-               for (int i = 0; i < _ffmpeg_content->audio_channels(); ++i) {
+               for (int i = 0; i < stream->channels(); ++i) {
                        for (int j = 0; j < frames; ++j) {
                                audio->data(i)[j] = static_cast<float>(p[i][j]) / (1 << 15);
                        }
                        for (int j = 0; j < frames; ++j) {
                                audio->data(i)[j] = static_cast<float>(p[i][j]) / (1 << 15);
                        }
@@ -239,7 +244,7 @@ FFmpegDecoder::deinterleave_audio (uint8_t** data, int size)
                        audio->data(channel)[sample] = static_cast<float>(*p++) / (1 << 31);
 
                        ++channel;
                        audio->data(channel)[sample] = static_cast<float>(*p++) / (1 << 31);
 
                        ++channel;
-                       if (channel == _ffmpeg_content->audio_channels()) {
+                       if (channel == stream->channels()) {
                                channel = 0;
                                ++sample;
                        }
                                channel = 0;
                                ++sample;
                        }
@@ -256,7 +261,7 @@ FFmpegDecoder::deinterleave_audio (uint8_t** data, int size)
                        audio->data(channel)[sample] = *p++;
 
                        ++channel;
                        audio->data(channel)[sample] = *p++;
 
                        ++channel;
-                       if (channel == _ffmpeg_content->audio_channels()) {
+                       if (channel == stream->channels()) {
                                channel = 0;
                                ++sample;
                        }
                                channel = 0;
                                ++sample;
                        }
@@ -267,33 +272,29 @@ FFmpegDecoder::deinterleave_audio (uint8_t** data, int size)
        case AV_SAMPLE_FMT_FLTP:
        {
                float** p = reinterpret_cast<float**> (data);
        case AV_SAMPLE_FMT_FLTP:
        {
                float** p = reinterpret_cast<float**> (data);
-               for (int i = 0; i < _ffmpeg_content->audio_channels(); ++i) {
+               for (int i = 0; i < stream->channels(); ++i) {
                        memcpy (audio->data(i), p[i], frames * sizeof(float));
                }
        }
        break;
 
        default:
                        memcpy (audio->data(i), p[i], frames * sizeof(float));
                }
        }
        break;
 
        default:
-               throw DecodeError (String::compose (_("Unrecognised audio sample format (%1)"), static_cast<int> (audio_sample_format())));
+               throw DecodeError (String::compose (_("Unrecognised audio sample format (%1)"), static_cast<int> (audio_sample_format (stream))));
        }
 
        return audio;
 }
 
 AVSampleFormat
        }
 
        return audio;
 }
 
 AVSampleFormat
-FFmpegDecoder::audio_sample_format () const
+FFmpegDecoder::audio_sample_format (shared_ptr<FFmpegAudioStream> stream) const
 {
 {
-       if (!_ffmpeg_content->audio_stream()) {
-               return (AVSampleFormat) 0;
-       }
-       
-       return audio_codec_context()->sample_fmt;
+       return stream->stream (_format_context)->codec->sample_fmt;
 }
 
 int
 }
 
 int
-FFmpegDecoder::bytes_per_audio_sample () const
+FFmpegDecoder::bytes_per_audio_sample (shared_ptr<FFmpegAudioStream> stream) const
 {
 {
-       return av_get_bytes_per_sample (audio_sample_format ());
+       return av_get_bytes_per_sample (audio_sample_format (stream));
 }
 
 void
 }
 
 void
@@ -301,7 +302,8 @@ FFmpegDecoder::seek (ContentTime time, bool accurate)
 {
        VideoDecoder::seek (time, accurate);
        AudioDecoder::seek (time, accurate);
 {
        VideoDecoder::seek (time, accurate);
        AudioDecoder::seek (time, accurate);
-       
+       SubtitleDecoder::seek (time, accurate);
+
        /* If we are doing an `accurate' seek, we need to use pre-roll, as
           we don't really know what the seek will give us.
        */
        /* If we are doing an `accurate' seek, we need to use pre-roll, as
           we don't really know what the seek will give us.
        */
@@ -313,21 +315,16 @@ FFmpegDecoder::seek (ContentTime time, bool accurate)
           http://www.mjbshaw.com/2012/04/seeking-in-ffmpeg-know-your-timestamp.html
        */
        
           http://www.mjbshaw.com/2012/04/seeking-in-ffmpeg-know-your-timestamp.html
        */
        
-       ContentTime const u = time - _pts_offset;
-       int64_t s = u.seconds() / av_q2d (_format_context->streams[_video_stream]->time_base);
-
-       if (_ffmpeg_content->audio_stream ()) {
-               s = min (
-                       s, int64_t (u.seconds() / av_q2d (_ffmpeg_content->audio_stream()->stream(_format_context)->time_base))
-                       );
+       ContentTime u = time - _pts_offset;
+       if (u < ContentTime ()) {
+               u = ContentTime ();
        }
        }
-
-       av_seek_frame (_format_context, _video_stream, s, 0);
+       av_seek_frame (_format_context, _video_stream, u.seconds() / av_q2d (_format_context->streams[_video_stream]->time_base), AVSEEK_FLAG_BACKWARD);
 
        avcodec_flush_buffers (video_codec_context());
 
        avcodec_flush_buffers (video_codec_context());
-       if (audio_codec_context ()) {
-               avcodec_flush_buffers (audio_codec_context ());
-       }
+
+       /* XXX: should be flushing audio buffers? */
+       
        if (subtitle_codec_context ()) {
                avcodec_flush_buffers (subtitle_codec_context ());
        }
        if (subtitle_codec_context ()) {
                avcodec_flush_buffers (subtitle_codec_context ());
        }
@@ -341,11 +338,23 @@ FFmpegDecoder::decode_audio_packet ()
        */
        
        AVPacket copy_packet = _packet;
        */
        
        AVPacket copy_packet = _packet;
+
+       /* XXX: inefficient */
+       vector<shared_ptr<FFmpegAudioStream> > streams = ffmpeg_content()->ffmpeg_audio_streams ();
+       vector<shared_ptr<FFmpegAudioStream> >::const_iterator stream = streams.begin ();
+       while (stream != streams.end () && !(*stream)->uses_index (_format_context, copy_packet.stream_index)) {
+               ++stream;
+       }
+
+       if (stream == streams.end ()) {
+               /* The packet's stream may not be an audio one; just ignore it in this method if so */
+               return;
+       }
        
        while (copy_packet.size > 0) {
 
                int frame_finished;
        
        while (copy_packet.size > 0) {
 
                int frame_finished;
-               int decode_result = avcodec_decode_audio4 (audio_codec_context(), _frame, &frame_finished, &copy_packet);
+               int decode_result = avcodec_decode_audio4 ((*stream)->stream (_format_context)->codec, _frame, &frame_finished, &copy_packet);
                if (decode_result < 0) {
                        /* avcodec_decode_audio4 can sometimes return an error even though it has decoded
                           some valid data; for example dca_subframe_footer can return AVERROR_INVALIDDATA
                if (decode_result < 0) {
                        /* avcodec_decode_audio4 can sometimes return an error even though it has decoded
                           some valid data; for example dca_subframe_footer can return AVERROR_INVALIDDATA
@@ -365,14 +374,14 @@ FFmpegDecoder::decode_audio_packet ()
                if (frame_finished) {
                        ContentTime const ct = ContentTime::from_seconds (
                                av_frame_get_best_effort_timestamp (_frame) *
                if (frame_finished) {
                        ContentTime const ct = ContentTime::from_seconds (
                                av_frame_get_best_effort_timestamp (_frame) *
-                               av_q2d (_ffmpeg_content->audio_stream()->stream (_format_context)->time_base))
+                               av_q2d ((*stream)->stream (_format_context)->time_base))
                                + _pts_offset;
                        
                        int const data_size = av_samples_get_buffer_size (
                                + _pts_offset;
                        
                        int const data_size = av_samples_get_buffer_size (
-                               0, audio_codec_context()->channels, _frame->nb_samples, audio_sample_format (), 1
+                               0, (*stream)->stream(_format_context)->codec->channels, _frame->nb_samples, audio_sample_format (*stream), 1
                                );
 
                                );
 
-                       audio (deinterleave_audio (_frame->data, data_size), ct);
+                       audio (*stream, deinterleave_audio (*stream, _frame->data, data_size), ct);
                }
                        
                copy_packet.data += decode_result;
                }
                        
                copy_packet.data += decode_result;
@@ -433,35 +442,69 @@ FFmpegDecoder::decode_subtitle_packet ()
        if (avcodec_decode_subtitle2 (subtitle_codec_context(), &sub, &got_subtitle, &_packet) < 0 || !got_subtitle) {
                return;
        }
        if (avcodec_decode_subtitle2 (subtitle_codec_context(), &sub, &got_subtitle, &_packet) < 0 || !got_subtitle) {
                return;
        }
-
-       /* Sometimes we get an empty AVSubtitle, which is used by some codecs to
-          indicate that the previous subtitle should stop.
-       */
+       
        if (sub.num_rects <= 0) {
        if (sub.num_rects <= 0) {
-               image_subtitle (ContentTimePeriod (), shared_ptr<Image> (), dcpomatic::Rect<double> ());
+               /* Sometimes we get an empty AVSubtitle, which is used by some codecs to
+                  indicate that the previous subtitle should stop.  We can ignore it here.
+               */
                return;
        } else if (sub.num_rects > 1) {
                throw DecodeError (_("multi-part subtitles not yet supported"));
        }
                return;
        } else if (sub.num_rects > 1) {
                throw DecodeError (_("multi-part subtitles not yet supported"));
        }
-               
+
        /* Subtitle PTS (within the source, not taking into account any of the
        /* Subtitle PTS (within the source, not taking into account any of the
-          source that we may have chopped off for the DCP)
+          source that we may have chopped off for the DCP).
        */
        */
-       ContentTimePeriod period = subtitle_period (sub) + _pts_offset;
-
+       FFmpegSubtitlePeriod sub_period = subtitle_period (sub);
+       ContentTimePeriod period;
+       period.from = sub_period.from + _pts_offset;
+       if (sub_period.to) {
+               /* We already know the subtitle period `to' time */
+               period.to = sub_period.to.get() + _pts_offset;
+       } else {
+               /* We have to look up the `to' time in the stream's records */
+               period.to = ffmpeg_content()->subtitle_stream()->find_subtitle_to (sub_period.from);
+       }
+       
        AVSubtitleRect const * rect = sub.rects[0];
 
        AVSubtitleRect const * rect = sub.rects[0];
 
-       if (rect->type != SUBTITLE_BITMAP) {
-               /* XXX */
-               // throw DecodeError (_("non-bitmap subtitles not yet supported"));
-               return;
+       switch (rect->type) {
+       case SUBTITLE_NONE:
+               break;
+       case SUBTITLE_BITMAP:
+               decode_bitmap_subtitle (rect, period);
+               break;
+       case SUBTITLE_TEXT:
+               cout << "XXX: SUBTITLE_TEXT " << rect->text << "\n";
+               break;
+       case SUBTITLE_ASS:
+               cout << "XXX: SUBTITLE_ASS " << rect->ass << "\n";
+               break;
        }
        }
+       
+       avsubtitle_free (&sub);
+}
+
+list<ContentTimePeriod>
+FFmpegDecoder::image_subtitles_during (ContentTimePeriod p, bool starting) const
+{
+       return _ffmpeg_content->subtitles_during (p, starting);
+}
+
+list<ContentTimePeriod>
+FFmpegDecoder::text_subtitles_during (ContentTimePeriod, bool) const
+{
+       return list<ContentTimePeriod> ();
+}
 
 
+void
+FFmpegDecoder::decode_bitmap_subtitle (AVSubtitleRect const * rect, ContentTimePeriod period)
+{
        /* Note RGBA is expressed little-endian, so the first byte in the word is R, second
           G, third B, fourth A.
        */
        shared_ptr<Image> image (new Image (PIX_FMT_RGBA, dcp::Size (rect->w, rect->h), true));
        /* Note RGBA is expressed little-endian, so the first byte in the word is R, second
           G, third B, fourth A.
        */
        shared_ptr<Image> image (new Image (PIX_FMT_RGBA, dcp::Size (rect->w, rect->h), true));
-
+       
        /* Start of the first line in the subtitle */
        uint8_t* sub_p = rect->pict.data[0];
        /* sub_p looks up into a BGRA palette which is here
        /* Start of the first line in the subtitle */
        uint8_t* sub_p = rect->pict.data[0];
        /* sub_p looks up into a BGRA palette which is here
@@ -470,7 +513,7 @@ FFmpegDecoder::decode_subtitle_packet ()
        uint32_t const * palette = (uint32_t *) rect->pict.data[1];
        /* Start of the output data */
        uint32_t* out_p = (uint32_t *) image->data()[0];
        uint32_t const * palette = (uint32_t *) rect->pict.data[1];
        /* Start of the output data */
        uint32_t* out_p = (uint32_t *) image->data()[0];
-
+       
        for (int y = 0; y < rect->h; ++y) {
                uint8_t* sub_line_p = sub_p;
                uint32_t* out_line_p = out_p;
        for (int y = 0; y < rect->h; ++y) {
                uint8_t* sub_line_p = sub_p;
                uint32_t* out_line_p = out_p;
@@ -481,25 +524,15 @@ FFmpegDecoder::decode_subtitle_packet ()
                sub_p += rect->pict.linesize[0];
                out_p += image->stride()[0] / sizeof (uint32_t);
        }
                sub_p += rect->pict.linesize[0];
                out_p += image->stride()[0] / sizeof (uint32_t);
        }
-
+       
        dcp::Size const vs = _ffmpeg_content->video_size ();
        dcp::Size const vs = _ffmpeg_content->video_size ();
-
-       image_subtitle (
-               period,
-               image,
-               dcpomatic::Rect<double> (
-                       static_cast<double> (rect->x) / vs.width,
-                       static_cast<double> (rect->y) / vs.height,
-                       static_cast<double> (rect->w) / vs.width,
-                       static_cast<double> (rect->h) / vs.height
-                       )
+       dcpomatic::Rect<double> const scaled_rect (
+               static_cast<double> (rect->x) / vs.width,
+               static_cast<double> (rect->y) / vs.height,
+               static_cast<double> (rect->w) / vs.width,
+               static_cast<double> (rect->h) / vs.height
                );
        
                );
        
-       avsubtitle_free (&sub);
+       image_subtitle (period, image, scaled_rect);
 }
 
 }
 
-list<ContentTimePeriod>
-FFmpegDecoder::subtitles_during (ContentTimePeriod p, bool starting) const
-{
-       return _ffmpeg_content->subtitles_during (p, starting);
-}