Merge master.
[dcpomatic.git] / src / lib / ffmpeg_decoder.cc
index 1920f9275c66ba92d3ae85c9ddf81fbe2591253c..c9efd80fc47ba87b052362fd34b7940b570ff127 100644 (file)
@@ -33,7 +33,6 @@ extern "C" {
 #include <libavcodec/avcodec.h>
 #include <libavformat/avformat.h>
 }
 #include <libavcodec/avcodec.h>
 #include <libavformat/avformat.h>
 }
-#include "film.h"
 #include "filter.h"
 #include "exceptions.h"
 #include "image.h"
 #include "filter.h"
 #include "exceptions.h"
 #include "image.h"
@@ -56,20 +55,15 @@ using std::pair;
 using boost::shared_ptr;
 using boost::optional;
 using boost::dynamic_pointer_cast;
 using boost::shared_ptr;
 using boost::optional;
 using boost::dynamic_pointer_cast;
-using libdcp::Size;
+using dcp::Size;
 
 
-FFmpegDecoder::FFmpegDecoder (shared_ptr<const Film> f, shared_ptr<const FFmpegContent> c, bool video, bool audio)
-       : Decoder (f)
-       , VideoDecoder (f, c)
-       , AudioDecoder (f, c)
-       , SubtitleDecoder (f)
+FFmpegDecoder::FFmpegDecoder (shared_ptr<const FFmpegContent> c, shared_ptr<Log> log)
+       : VideoDecoder (c)
+       , AudioDecoder (c)
        , FFmpeg (c)
        , FFmpeg (c)
+       , _log (log)
        , _subtitle_codec_context (0)
        , _subtitle_codec (0)
        , _subtitle_codec_context (0)
        , _subtitle_codec (0)
-       , _decode_video (video)
-       , _decode_audio (audio)
-       , _pts_offset (0)
-       , _just_sought (false)
 {
        setup_subtitle ();
 
 {
        setup_subtitle ();
 
@@ -82,13 +76,11 @@ FFmpegDecoder::FFmpegDecoder (shared_ptr<const Film> f, shared_ptr<const FFmpegC
           Then we remove big initial gaps in PTS and we allow our
           insertion of black frames to work.
 
           Then we remove big initial gaps in PTS and we allow our
           insertion of black frames to work.
 
-          We will do:
-            audio_pts_to_use = audio_pts_from_ffmpeg + pts_offset;
-            video_pts_to_use = video_pts_from_ffmpeg + pts_offset;
+          We will do pts_to_use = pts_from_ffmpeg + pts_offset;
        */
 
        */
 
-       bool const have_video = video && c->first_video();
-       bool const have_audio = audio && c->audio_stream() && c->audio_stream()->first_audio;
+       bool const have_video = c->first_video();
+       bool const have_audio = c->audio_stream () && c->audio_stream()->first_audio;
 
        /* First, make one of them start at 0 */
 
 
        /* First, make one of them start at 0 */
 
@@ -102,15 +94,9 @@ FFmpegDecoder::FFmpegDecoder (shared_ptr<const Film> f, shared_ptr<const FFmpegC
 
        /* Now adjust both so that the video pts starts on a frame */
        if (have_video && have_audio) {
 
        /* Now adjust both so that the video pts starts on a frame */
        if (have_video && have_audio) {
-               double first_video = c->first_video().get() + _pts_offset;
-               double const old_first_video = first_video;
-               
-               /* Round the first video up to a frame boundary */
-               if (fabs (rint (first_video * c->video_frame_rate()) - first_video * c->video_frame_rate()) > 1e-6) {
-                       first_video = ceil (first_video * c->video_frame_rate()) / c->video_frame_rate ();
-               }
-
-               _pts_offset += first_video - old_first_video;
+               ContentTime first_video = c->first_video().get() + _pts_offset;
+               ContentTime const old_first_video = first_video;
+               _pts_offset += first_video.round_up (c->video_frame_rate ()) - old_first_video;
        }
 }
 
        }
 }
 
@@ -133,20 +119,15 @@ FFmpegDecoder::flush ()
        
        /* XXX: should we reset _packet.data and size after each *_decode_* call? */
        
        
        /* XXX: should we reset _packet.data and size after each *_decode_* call? */
        
-       if (_decode_video) {
-               while (decode_video_packet ()) {}
-       }
+       while (decode_video_packet ()) {}
        
        
-       if (_ffmpeg_content->audio_stream() && _decode_audio) {
+       if (_ffmpeg_content->audio_stream()) {
                decode_audio_packet ();
                decode_audio_packet ();
+               AudioDecoder::flush ();
        }
        }
-
-       /* Stop us being asked for any more data */
-       _video_position = _ffmpeg_content->video_length_after_3d_combine ();
-       _audio_position = _ffmpeg_content->audio_length ();
 }
 
 }
 
-void
+bool
 FFmpegDecoder::pass ()
 {
        int r = av_read_frame (_format_context, &_packet);
 FFmpegDecoder::pass ()
 {
        int r = av_read_frame (_format_context, &_packet);
@@ -156,29 +137,25 @@ FFmpegDecoder::pass ()
                        /* Maybe we should fail here, but for now we'll just finish off instead */
                        char buf[256];
                        av_strerror (r, buf, sizeof(buf));
                        /* Maybe we should fail here, but for now we'll just finish off instead */
                        char buf[256];
                        av_strerror (r, buf, sizeof(buf));
-                       shared_ptr<const Film> film = _film.lock ();
-                       assert (film);
-                       film->log()->log (String::compose (N_("error on av_read_frame (%1) (%2)"), buf, r));
+                       _log->log (String::compose (N_("error on av_read_frame (%1) (%2)"), buf, r));
                }
 
                flush ();
                }
 
                flush ();
-               return;
+               return true;
        }
 
        }
 
-       shared_ptr<const Film> film = _film.lock ();
-       assert (film);
-
        int const si = _packet.stream_index;
        
        int const si = _packet.stream_index;
        
-       if (si == _video_stream && _decode_video) {
+       if (si == _video_stream) {
                decode_video_packet ();
                decode_video_packet ();
-       } else if (_ffmpeg_content->audio_stream() && _ffmpeg_content->audio_stream()->uses_index (_format_context, si) && _decode_audio) {
+       } else if (_ffmpeg_content->audio_stream() && _ffmpeg_content->audio_stream()->uses_index (_format_context, si)) {
                decode_audio_packet ();
                decode_audio_packet ();
-       } else if (_ffmpeg_content->subtitle_stream() && _ffmpeg_content->subtitle_stream()->uses_index (_format_context, si) && film->with_subtitles ()) {
+       } else if (_ffmpeg_content->subtitle_stream() && _ffmpeg_content->subtitle_stream()->uses_index (_format_context, si)) {
                decode_subtitle_packet ();
        }
 
        av_free_packet (&_packet);
                decode_subtitle_packet ();
        }
 
        av_free_packet (&_packet);
+       return false;
 }
 
 /** @param data pointer to array of pointers to buffers.
 }
 
 /** @param data pointer to array of pointers to buffers.
@@ -310,77 +287,131 @@ FFmpegDecoder::bytes_per_audio_sample () const
        return av_get_bytes_per_sample (audio_sample_format ());
 }
 
        return av_get_bytes_per_sample (audio_sample_format ());
 }
 
-void
-FFmpegDecoder::seek (VideoContent::Frame frame, bool accurate)
+int
+FFmpegDecoder::minimal_run (boost::function<bool (optional<ContentTime>, optional<ContentTime>, int)> finished)
 {
 {
-       double const time_base = av_q2d (_format_context->streams[_video_stream]->time_base);
+       int frames_read = 0;
+       optional<ContentTime> last_video;
+       optional<ContentTime> last_audio;
 
 
-       /* If we are doing an accurate seek, our initial shot will be 5 frames (5 being
-          a number plucked from the air) earlier than we want to end up.  The loop below
-          will hopefully then step through to where we want to be.
-       */
-       int initial = frame;
+       while (!finished (last_video, last_audio, frames_read)) {
+               int r = av_read_frame (_format_context, &_packet);
+               if (r < 0) {
+                       /* We should flush our decoders here, possibly yielding a few more frames,
+                          but the consequence of having to do that is too hideous to contemplate.
+                          Instead we give up and say that you can't seek too close to the end
+                          of a file.
+                       */
+                       return frames_read;
+               }
+
+               ++frames_read;
+
+               double const time_base = av_q2d (_format_context->streams[_packet.stream_index]->time_base);
+
+               if (_packet.stream_index == _video_stream) {
+
+                       avcodec_get_frame_defaults (_frame);
+                       
+                       int finished = 0;
+                       r = avcodec_decode_video2 (video_codec_context(), _frame, &finished, &_packet);
+                       if (r >= 0 && finished) {
+                               last_video = ContentTime::from_seconds (av_frame_get_best_effort_timestamp (_frame) * time_base) + _pts_offset;
+                       }
+
+               } else if (_ffmpeg_content->audio_stream() && _ffmpeg_content->audio_stream()->uses_index (_format_context, _packet.stream_index)) {
+                       AVPacket copy_packet = _packet;
+                       while (copy_packet.size > 0) {
 
 
-       if (accurate) {
-               initial -= 5;
+                               int finished;
+                               r = avcodec_decode_audio4 (audio_codec_context(), _frame, &finished, &_packet);
+                               if (r >= 0 && finished) {
+                                       last_audio = ContentTime::from_seconds (av_frame_get_best_effort_timestamp (_frame) * time_base) + _pts_offset;
+                               }
+                                       
+                               copy_packet.data += r;
+                               copy_packet.size -= r;
+                       }
+               }
+               
+               av_free_packet (&_packet);
        }
 
        }
 
-       if (initial < 0) {
-               initial = 0;
+       return frames_read;
+}
+
+bool
+FFmpegDecoder::seek_overrun_finished (ContentTime seek, optional<ContentTime> last_video, optional<ContentTime> last_audio) const
+{
+       return (last_video && last_video.get() >= seek) || (last_audio && last_audio.get() >= seek);
+}
+
+bool
+FFmpegDecoder::seek_final_finished (int n, int done) const
+{
+       return n == done;
+}
+
+void
+FFmpegDecoder::seek_and_flush (ContentTime t)
+{
+       ContentTime const u = t - _pts_offset;
+       int64_t s = u.seconds() / av_q2d (_format_context->streams[_video_stream]->time_base);
+
+       if (_ffmpeg_content->audio_stream ()) {
+               s = min (
+                       s, int64_t (u.seconds() / av_q2d (_ffmpeg_content->audio_stream()->stream(_format_context)->time_base))
+                       );
        }
 
        }
 
-       /* Initial seek time in the stream's timebase */
-       int64_t const initial_vt = ((initial / _ffmpeg_content->video_frame_rate()) - _pts_offset) / time_base;
+       /* Ridiculous empirical hack */
+       s--;
+       if (s < 0) {
+               s = 0;
+       }
 
 
-       av_seek_frame (_format_context, _video_stream, initial_vt, AVSEEK_FLAG_BACKWARD);
+       av_seek_frame (_format_context, _video_stream, s, 0);
 
        avcodec_flush_buffers (video_codec_context());
 
        avcodec_flush_buffers (video_codec_context());
+       if (audio_codec_context ()) {
+               avcodec_flush_buffers (audio_codec_context ());
+       }
        if (_subtitle_codec_context) {
                avcodec_flush_buffers (_subtitle_codec_context);
        }
        if (_subtitle_codec_context) {
                avcodec_flush_buffers (_subtitle_codec_context);
        }
+}
 
 
-       /* This !accurate is piling hack upon hack; setting _just_sought to true
-          even with accurate == true defeats our attempt to align the start
-          of the video and audio.  Here we disable that defeat when accurate == true
-          i.e. when we are making a DCP rather than just previewing one.
-          Ewww.  This should be gone in 2.0.
+void
+FFmpegDecoder::seek (ContentTime time, bool accurate)
+{
+       VideoDecoder::seek (time, accurate);
+       AudioDecoder::seek (time, accurate);
+       
+       /* If we are doing an accurate seek, our initial shot will be 200ms (200 being
+          a number plucked from the air) earlier than we want to end up.  The loop below
+          will hopefully then step through to where we want to be.
        */
        */
-       if (!accurate) {
-               _just_sought = true;
+
+       ContentTime pre_roll = accurate ? ContentTime::from_seconds (0.2) : ContentTime (0);
+       ContentTime initial_seek = time - pre_roll;
+       if (initial_seek < ContentTime (0)) {
+               initial_seek = ContentTime (0);
        }
        }
-       
-       _video_position = frame;
-       
-       if (frame == 0 || !accurate) {
-               /* We're already there, or we're as close as we need to be */
+
+       /* Initial seek time in the video stream's timebase */
+
+       seek_and_flush (initial_seek);
+
+       if (!accurate) {
+               /* That'll do */
                return;
        }
 
                return;
        }
 
-       while (1) {
-               int r = av_read_frame (_format_context, &_packet);
-               if (r < 0) {
-                       return;
-               }
-
-               if (_packet.stream_index != _video_stream) {
-                       av_free_packet (&_packet);
-                       continue;
-               }
-               
-               int finished = 0;
-               r = avcodec_decode_video2 (video_codec_context(), _frame, &finished, &_packet);
-               if (r >= 0 && finished) {
-                       _video_position = rint (
-                               (av_frame_get_best_effort_timestamp (_frame) * time_base + _pts_offset) * _ffmpeg_content->video_frame_rate()
-                               );
+       int const N = minimal_run (boost::bind (&FFmpegDecoder::seek_overrun_finished, this, time, _1, _2));
 
 
-                       if (_video_position >= (frame - 1)) {
-                               av_free_packet (&_packet);
-                               break;
-                       }
-               }
-               
-               av_free_packet (&_packet);
+       seek_and_flush (initial_seek);
+       if (N > 0) {
+               minimal_run (boost::bind (&FFmpegDecoder::seek_final_finished, this, N - 1, _3));
        }
 }
 
        }
 }
 
@@ -397,39 +428,23 @@ FFmpegDecoder::decode_audio_packet ()
 
                int frame_finished;
                int const decode_result = avcodec_decode_audio4 (audio_codec_context(), _frame, &frame_finished, &copy_packet);
 
                int frame_finished;
                int const decode_result = avcodec_decode_audio4 (audio_codec_context(), _frame, &frame_finished, &copy_packet);
+
                if (decode_result < 0) {
                if (decode_result < 0) {
-                       shared_ptr<const Film> film = _film.lock ();
-                       assert (film);
-                       film->log()->log (String::compose ("avcodec_decode_audio4 failed (%1)", decode_result));
+                       _log->log (String::compose ("avcodec_decode_audio4 failed (%1)", decode_result));
                        return;
                }
 
                if (frame_finished) {
                        return;
                }
 
                if (frame_finished) {
-                       
-                       if (_audio_position == 0) {
-                               /* Where we are in the source, in seconds */
-                               double const pts = av_q2d (_format_context->streams[copy_packet.stream_index]->time_base)
-                                       * av_frame_get_best_effort_timestamp(_frame) + _pts_offset;
-
-                               if (pts > 0) {
-                                       /* Emit some silence */
-                                       shared_ptr<AudioBuffers> silence (
-                                               new AudioBuffers (
-                                                       _ffmpeg_content->audio_channels(),
-                                                       pts * _ffmpeg_content->content_audio_frame_rate()
-                                                       )
-                                               );
-                                       
-                                       silence->make_silent ();
-                                       audio (silence, _audio_position);
-                               }
-                       }
+                       ContentTime const ct = ContentTime::from_seconds (
+                               av_frame_get_best_effort_timestamp (_frame) *
+                               av_q2d (_ffmpeg_content->audio_stream()->stream (_format_context)->time_base))
+                               + _pts_offset;
                        
                        int const data_size = av_samples_get_buffer_size (
                                0, audio_codec_context()->channels, _frame->nb_samples, audio_sample_format (), 1
                                );
                        
                        int const data_size = av_samples_get_buffer_size (
                                0, audio_codec_context()->channels, _frame->nb_samples, audio_sample_format (), 1
                                );
-                       
-                       audio (deinterleave_audio (_frame->data, data_size), _audio_position);
+
+                       audio (deinterleave_audio (_frame->data, data_size), ct);
                }
                        
                copy_packet.data += decode_result;
                }
                        
                copy_packet.data += decode_result;
@@ -450,18 +465,14 @@ FFmpegDecoder::decode_video_packet ()
        shared_ptr<FilterGraph> graph;
        
        list<shared_ptr<FilterGraph> >::iterator i = _filter_graphs.begin();
        shared_ptr<FilterGraph> graph;
        
        list<shared_ptr<FilterGraph> >::iterator i = _filter_graphs.begin();
-       while (i != _filter_graphs.end() && !(*i)->can_process (libdcp::Size (_frame->width, _frame->height), (AVPixelFormat) _frame->format)) {
+       while (i != _filter_graphs.end() && !(*i)->can_process (dcp::Size (_frame->width, _frame->height), (AVPixelFormat) _frame->format)) {
                ++i;
        }
 
        if (i == _filter_graphs.end ()) {
                ++i;
        }
 
        if (i == _filter_graphs.end ()) {
-               shared_ptr<const Film> film = _film.lock ();
-               assert (film);
-
-               graph.reset (new FilterGraph (_ffmpeg_content, libdcp::Size (_frame->width, _frame->height), (AVPixelFormat) _frame->format));
+               graph.reset (new FilterGraph (_ffmpeg_content, dcp::Size (_frame->width, _frame->height), (AVPixelFormat) _frame->format));
                _filter_graphs.push_back (graph);
                _filter_graphs.push_back (graph);
-
-               film->log()->log (String::compose (N_("New graph for %1x%2, pixel format %3"), _frame->width, _frame->height, _frame->format));
+               _log->log (String::compose (N_("New graph for %1x%2, pixel format %3"), _frame->width, _frame->height, _frame->format));
        } else {
                graph = *i;
        }
        } else {
                graph = *i;
        }
@@ -473,49 +484,10 @@ FFmpegDecoder::decode_video_packet ()
                shared_ptr<Image> image = i->first;
                
                if (i->second != AV_NOPTS_VALUE) {
                shared_ptr<Image> image = i->first;
                
                if (i->second != AV_NOPTS_VALUE) {
-
-                       double const pts = i->second * av_q2d (_format_context->streams[_video_stream]->time_base) + _pts_offset;
-
-                       if (_just_sought) {
-                               /* We just did a seek, so disable any attempts to correct for where we
-                                  are / should be.
-                               */
-                               _video_position = rint (pts * _ffmpeg_content->video_frame_rate ());
-                               _just_sought = false;
-                       }
-
-                       double const next = _video_position / _ffmpeg_content->video_frame_rate();
-                       double const one_frame = 1 / _ffmpeg_content->video_frame_rate ();
-                       double delta = pts - next;
-
-                       while (delta > one_frame) {
-                               /* This PTS is more than one frame forward in time of where we think we should be; emit
-                                  a black frame.
-                               */
-
-                               /* XXX: I think this should be a copy of the last frame... */
-                               boost::shared_ptr<Image> black (
-                                       new Image (
-                                               static_cast<AVPixelFormat> (_frame->format),
-                                               libdcp::Size (video_codec_context()->width, video_codec_context()->height),
-                                               true
-                                               )
-                                       );
-                               
-                               black->make_black ();
-                               video (image, false, _video_position);
-                               delta -= one_frame;
-                       }
-
-                       if (delta > -one_frame) {
-                               /* This PTS is within a frame of being right; emit this (otherwise it will be dropped) */
-                               video (image, false, _video_position);
-                       }
-                               
+                       double const pts = i->second * av_q2d (_format_context->streams[_video_stream]->time_base) + _pts_offset.seconds ();
+                       video (image, rint (pts * _ffmpeg_content->video_frame_rate ()));
                } else {
                } else {
-                       shared_ptr<const Film> film = _film.lock ();
-                       assert (film);
-                       film->log()->log ("Dropping frame without PTS");
+                       _log->log ("Dropping frame without PTS");
                }
        }
 
                }
        }
 
@@ -548,14 +520,6 @@ FFmpegDecoder::setup_subtitle ()
        }
 }
 
        }
 }
 
-bool
-FFmpegDecoder::done () const
-{
-       bool const vd = !_decode_video || (_video_position >= _ffmpeg_content->video_length());
-       bool const ad = !_decode_audio || !_ffmpeg_content->audio_stream() || (_audio_position >= _ffmpeg_content->audio_length());
-       return vd && ad;
-}
-       
 void
 FFmpegDecoder::decode_subtitle_packet ()
 {
 void
 FFmpegDecoder::decode_subtitle_packet ()
 {
@@ -569,20 +533,20 @@ FFmpegDecoder::decode_subtitle_packet ()
           indicate that the previous subtitle should stop.
        */
        if (sub.num_rects <= 0) {
           indicate that the previous subtitle should stop.
        */
        if (sub.num_rects <= 0) {
-               subtitle (shared_ptr<Image> (), dcpomatic::Rect<double> (), 0, 0);
+               image_subtitle (ContentTime (), ContentTime (), shared_ptr<Image> (), dcpomatic::Rect<double> ());
                return;
        } else if (sub.num_rects > 1) {
                throw DecodeError (_("multi-part subtitles not yet supported"));
        }
                
                return;
        } else if (sub.num_rects > 1) {
                throw DecodeError (_("multi-part subtitles not yet supported"));
        }
                
-       /* Subtitle PTS in seconds (within the source, not taking into account any of the
+       /* Subtitle PTS (within the source, not taking into account any of the
           source that we may have chopped off for the DCP)
        */
           source that we may have chopped off for the DCP)
        */
-       double const packet_time = (static_cast<double> (sub.pts ) / AV_TIME_BASE) + _pts_offset;
-
+       ContentTime packet_time = ContentTime::from_seconds (static_cast<double> (sub.pts) / AV_TIME_BASE) + _pts_offset;
+       
        /* hence start time for this sub */
        /* hence start time for this sub */
-       Time const from = (packet_time + (double (sub.start_display_time) / 1e3)) * TIME_HZ;
-       Time const to = (packet_time + (double (sub.end_display_time) / 1e3)) * TIME_HZ;
+       ContentTime const from = packet_time + ContentTime::from_seconds (sub.start_display_time / 1e3);
+       ContentTime const to = packet_time + ContentTime::from_seconds (sub.end_display_time / 1e3);
 
        AVSubtitleRect const * rect = sub.rects[0];
 
 
        AVSubtitleRect const * rect = sub.rects[0];
 
@@ -593,7 +557,7 @@ FFmpegDecoder::decode_subtitle_packet ()
        /* Note RGBA is expressed little-endian, so the first byte in the word is R, second
           G, third B, fourth A.
        */
        /* Note RGBA is expressed little-endian, so the first byte in the word is R, second
           G, third B, fourth A.
        */
-       shared_ptr<Image> image (new Image (PIX_FMT_RGBA, libdcp::Size (rect->w, rect->h), true));
+       shared_ptr<Image> image (new Image (PIX_FMT_RGBA, dcp::Size (rect->w, rect->h), true));
 
        /* Start of the first line in the subtitle */
        uint8_t* sub_p = rect->pict.data[0];
 
        /* Start of the first line in the subtitle */
        uint8_t* sub_p = rect->pict.data[0];
@@ -615,20 +579,19 @@ FFmpegDecoder::decode_subtitle_packet ()
                out_p += image->stride()[0] / sizeof (uint32_t);
        }
 
                out_p += image->stride()[0] / sizeof (uint32_t);
        }
 
-       libdcp::Size const vs = _ffmpeg_content->video_size ();
+       dcp::Size const vs = _ffmpeg_content->video_size ();
 
 
-       subtitle (
+       image_subtitle (
+               from,
+               to,
                image,
                dcpomatic::Rect<double> (
                        static_cast<double> (rect->x) / vs.width,
                        static_cast<double> (rect->y) / vs.height,
                        static_cast<double> (rect->w) / vs.width,
                        static_cast<double> (rect->h) / vs.height
                image,
                dcpomatic::Rect<double> (
                        static_cast<double> (rect->x) / vs.width,
                        static_cast<double> (rect->y) / vs.height,
                        static_cast<double> (rect->w) / vs.width,
                        static_cast<double> (rect->h) / vs.height
-                       ),
-               from,
-               to
+                       )
                );
                );
-                         
        
        avsubtitle_free (&sub);
 }
        
        avsubtitle_free (&sub);
 }