Improve FFmpeg sync, in theory.
authorCarl Hetherington <cth@carlh.net>
Wed, 26 Jun 2013 15:35:43 +0000 (16:35 +0100)
committerCarl Hetherington <cth@carlh.net>
Wed, 26 Jun 2013 15:35:43 +0000 (16:35 +0100)
15 files changed:
src/lib/audio_decoder.cc
src/lib/audio_decoder.h
src/lib/ffmpeg_content.cc
src/lib/ffmpeg_content.h
src/lib/ffmpeg_decoder.cc
src/lib/ffmpeg_decoder.h
src/lib/ffmpeg_examiner.cc
src/lib/ffmpeg_examiner.h
src/lib/imagemagick_decoder.cc
src/lib/sndfile_decoder.cc
src/lib/video_decoder.cc
src/lib/video_decoder.h
test/ffmpeg_examiner_test.cc
test/ffmpeg_pts_offset.cc [new file with mode: 0644]
test/test.cc

index 3964719104956a46e16f734fd8acaadcaa7b9df0..dc49a1846e74ada429e20603d213666d858ecd52 100644 (file)
@@ -33,7 +33,7 @@ using boost::shared_ptr;
 
 AudioDecoder::AudioDecoder (shared_ptr<const Film> f)
        : Decoder (f)
-       , _next_audio_frame (0)
+       , _audio_position (0)
 {
 }
 
@@ -71,5 +71,5 @@ void
 AudioDecoder::audio (shared_ptr<const AudioBuffers> data, AudioContent::Frame frame)
 {
        Audio (data, frame);
-       _next_audio_frame = frame + data->frames ();
+       _audio_position = frame + data->frames ();
 }
index 168348c2e3fdf52e0bb7c5a617dbd07718ce5e71..ddfb296c9a9b4fffbf8e2b94928594907566fb68 100644 (file)
@@ -43,7 +43,7 @@ public:
 protected:
 
        void audio (boost::shared_ptr<const AudioBuffers>, AudioContent::Frame);
-       AudioContent::Frame _next_audio_frame;
+       AudioContent::Frame _audio_position;
 };
 
 #endif
index 378bd98cbb80519e608dcdd16acc20879b71f531..1135cc9a3add1f07e2416bba28a279748120de8c 100644 (file)
@@ -296,7 +296,7 @@ FFmpegAudioStream::FFmpegAudioStream (shared_ptr<const cxml::Node> node)
        frame_rate = node->number_child<int> ("FrameRate");
        channels = node->number_child<int64_t> ("Channels");
        mapping = AudioMapping (node->node_child ("Mapping"));
-       start = node->optional_number_child<Time> ("Start");
+       first_audio = node->optional_number_child<Time> ("FirstAudio");
 }
 
 void
@@ -306,8 +306,8 @@ FFmpegAudioStream::as_xml (xmlpp::Node* root) const
        root->add_child("Id")->add_child_text (lexical_cast<string> (id));
        root->add_child("FrameRate")->add_child_text (lexical_cast<string> (frame_rate));
        root->add_child("Channels")->add_child_text (lexical_cast<string> (channels));
-       if (start) {
-               root->add_child("Start")->add_child_text (lexical_cast<string> (start));
+       if (first_audio) {
+               root->add_child("FirstAudio")->add_child_text (lexical_cast<string> (first_audio));
        }
        mapping.as_xml (root->add_child("Mapping"));
 }
index fc45267ee553a26d0e3141bdc2abad02e7388c7d..c5ccee77a4509c55b87f374147a5333bdcd0685b 100644 (file)
@@ -46,7 +46,7 @@ public:
         int frame_rate;
        int channels;
        AudioMapping mapping;
-       boost::optional<Time> start;
+       boost::optional<double> first_audio;
 };
 
 extern bool operator== (FFmpegAudioStream const & a, FFmpegAudioStream const & b);
@@ -134,6 +134,11 @@ public:
 
         void set_subtitle_stream (boost::shared_ptr<FFmpegSubtitleStream>);
         void set_audio_stream (boost::shared_ptr<FFmpegAudioStream>);
+
+       boost::optional<Time> first_video () const {
+               boost::mutex::scoped_lock lm (_mutex);
+               return _first_video;
+       }
        
 private:
        std::vector<boost::shared_ptr<FFmpegSubtitleStream> > _subtitle_streams;
index 1d8a00866e0627fa82dc583cd664e02573335b83..2d1792390dbdbb2e743124d5b6d0c8d87c832afd 100644 (file)
@@ -66,8 +66,30 @@ FFmpegDecoder::FFmpegDecoder (shared_ptr<const Film> f, shared_ptr<const FFmpegC
        , _subtitle_codec (0)
        , _decode_video (video)
        , _decode_audio (audio)
+       , _pts_offset (0)
 {
        setup_subtitle ();
+
+       if (video && audio && c->audio_stream() && c->first_video() && c->audio_stream()->first_audio) {
+               _pts_offset = compute_pts_offset (c->first_video().get(), c->audio_stream()->first_audio.get(), c->video_frame_rate());
+       }
+}
+
+double
+FFmpegDecoder::compute_pts_offset (double first_video, double first_audio, float video_frame_rate)
+{
+       assert (first_video >= 0);
+       assert (first_audio >= 0);
+       
+       double const old_first_video = first_video;
+       
+       /* Round the first video to a frame boundary */
+       if (fabs (rint (first_video * video_frame_rate) - first_video * video_frame_rate) > 1e-6) {
+               first_video = ceil (first_video * video_frame_rate) / video_frame_rate;
+       }
+
+       /* Compute the required offset (also removing any common start delay) */
+       return first_video - old_first_video - min (first_video, first_audio);
 }
 
 FFmpegDecoder::~FFmpegDecoder ()
@@ -108,8 +130,8 @@ FFmpegDecoder::pass ()
                }
 
                /* Stop us being asked for any more data */
-               _next_video_frame = _ffmpeg_content->video_length ();
-               _next_audio_frame = _ffmpeg_content->audio_length ();
+               _video_position = _ffmpeg_content->video_length ();
+               _audio_position = _ffmpeg_content->audio_length ();
                return;
        }
 
@@ -267,11 +289,11 @@ FFmpegDecoder::seek (VideoContent::Frame frame)
 void
 FFmpegDecoder::seek_back ()
 {
-       if (_next_video_frame == 0) {
+       if (_video_position == 0) {
                return;
        }
        
-       do_seek (_next_video_frame - 1, true, true);
+       do_seek (_video_position - 1, true, true);
 }
 
 void
@@ -327,17 +349,33 @@ FFmpegDecoder::decode_audio_packet ()
                int const decode_result = avcodec_decode_audio4 (audio_codec_context(), _frame, &frame_finished, &copy_packet);
                if (decode_result >= 0) {
                        if (frame_finished) {
-                       
-                               /* Where we are in the source, in seconds */
-                               double const source_pts_seconds = av_q2d (_format_context->streams[copy_packet.stream_index]->time_base)
-                                       * av_frame_get_best_effort_timestamp(_frame);
+
+                               if (_audio_position == 0) {
+                                       /* Where we are in the source, in seconds */
+                                       double const pts = av_q2d (_format_context->streams[copy_packet.stream_index]->time_base)
+                                               * av_frame_get_best_effort_timestamp(_frame) - _pts_offset;
+
+                                       if (pts > 0) {
+                                               /* Emit some silence */
+                                               shared_ptr<AudioBuffers> silence (
+                                                       new AudioBuffers (
+                                                               _ffmpeg_content->audio_channels(),
+                                                               pts * _ffmpeg_content->content_audio_frame_rate()
+                                                               )
+                                                       );
+                                               
+                                               silence->make_silent ();
+                                               audio (silence, _audio_position);
+                                       }
+                               }
+                                       
                                
                                int const data_size = av_samples_get_buffer_size (
                                        0, audio_codec_context()->channels, _frame->nb_samples, audio_sample_format (), 1
                                        );
                                
                                assert (audio_codec_context()->channels == _ffmpeg_content->audio_channels());
-                               Audio (deinterleave_audio (_frame->data, data_size), source_pts_seconds * _ffmpeg_content->content_audio_frame_rate());
+                               audio (deinterleave_audio (_frame->data, data_size), _audio_position);
                        }
                        
                        copy_packet.data += decode_result;
@@ -389,8 +427,8 @@ FFmpegDecoder::decode_video_packet ()
                int64_t const bet = av_frame_get_best_effort_timestamp (_frame);
                if (bet != AV_NOPTS_VALUE) {
 
-                       double const pts = bet * av_q2d (_format_context->streams[_video_stream]->time_base);
-                       double const next = _next_video_frame / _ffmpeg_content->video_frame_rate();
+                       double const pts = bet * av_q2d (_format_context->streams[_video_stream]->time_base) - _pts_offset;
+                       double const next = _video_position / _ffmpeg_content->video_frame_rate();
                        double const one_frame = 1 / _ffmpeg_content->video_frame_rate ();
                        double delta = pts - next;
 
@@ -407,13 +445,13 @@ FFmpegDecoder::decode_video_packet ()
                                        );
                                
                                black->make_black ();
-                               video (image, false, _next_video_frame);
+                               video (image, false, _video_position);
                                delta -= one_frame;
                        }
 
                        if (delta > -one_frame) {
                                /* This PTS is within a frame of being right; emit this (otherwise it will be dropped) */
-                               video (image, false, _next_video_frame);
+                               video (image, false, _video_position);
                        }
                } else {
                        shared_ptr<const Film> film = _film.lock ();
@@ -450,8 +488,8 @@ FFmpegDecoder::setup_subtitle ()
 bool
 FFmpegDecoder::done () const
 {
-       bool const vd = !_decode_video || (_next_video_frame >= _ffmpeg_content->video_length());
-       bool const ad = !_decode_audio || !_ffmpeg_content->audio_stream() || (_next_audio_frame >= _ffmpeg_content->audio_length());
+       bool const vd = !_decode_video || (_video_position >= _ffmpeg_content->video_length());
+       bool const ad = !_decode_audio || !_ffmpeg_content->audio_stream() || (_audio_position >= _ffmpeg_content->audio_length());
        return vd && ad;
 }
        
index a8eabb9725c533870d38887436f68ae793232716..8f0482aad8331dcd12685e1d4724e08469b20eb8 100644 (file)
@@ -38,6 +38,7 @@ extern "C" {
 #include "ffmpeg.h"
 
 class Film;
+class ffmpeg_pts_offset_test;
 
 /** @class FFmpegDecoder
  *  @brief A decoder using FFmpeg to decode content.
@@ -54,11 +55,14 @@ public:
        bool done () const;
 
 private:
+       friend class ::ffmpeg_pts_offset_test;
 
        /* No copy construction */
        FFmpegDecoder (FFmpegDecoder const &);
        FFmpegDecoder& operator= (FFmpegDecoder const &);
 
+       static double compute_pts_offset (double, double, float);
+
        void setup_subtitle ();
 
        AVSampleFormat audio_sample_format () const;
@@ -79,4 +83,6 @@ private:
 
        bool _decode_video;
        bool _decode_audio;
+
+       double _pts_offset;
 };
index 6f1524f50e2ccf95be3c33c1ef89c9157158fe55..f45b0fe52e6dad125de259ca773d2f2c45eb8689 100644 (file)
@@ -79,9 +79,9 @@ FFmpegExaminer::FFmpegExaminer (shared_ptr<const FFmpegContent> c)
                        }
                } else {
                        for (size_t i = 0; i < _audio_streams.size(); ++i) {
-                               if (_packet.stream_index == _audio_streams[i]->id && !_audio_streams[i]->start) {
+                               if (_packet.stream_index == _audio_streams[i]->id && !_audio_streams[i]->first_audio) {
                                        if (avcodec_decode_audio4 (context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
-                                               _audio_streams[i]->start = frame_time (_audio_streams[i]->id);
+                                               _audio_streams[i]->first_audio = frame_time (_audio_streams[i]->id);
                                        }
                                }
                        }
@@ -90,7 +90,7 @@ FFmpegExaminer::FFmpegExaminer (shared_ptr<const FFmpegContent> c)
                bool have_all_audio = true;
                size_t i = 0;
                while (i < _audio_streams.size() && have_all_audio) {
-                       have_all_audio = _audio_streams[i]->start;
+                       have_all_audio = _audio_streams[i]->first_audio;
                        ++i;
                }
 
@@ -102,14 +102,14 @@ FFmpegExaminer::FFmpegExaminer (shared_ptr<const FFmpegContent> c)
        }
 }
 
-optional<Time>
+optional<double>
 FFmpegExaminer::frame_time (int stream) const
 {
-       optional<Time> t;
+       optional<double> t;
        
        int64_t const bet = av_frame_get_best_effort_timestamp (_frame);
        if (bet != AV_NOPTS_VALUE) {
-               t = bet * av_q2d (_format_context->streams[stream]->time_base) * TIME_HZ;
+               t = bet * av_q2d (_format_context->streams[stream]->time_base);
        }
 
        return t;
index 5cf9c2d0a5b8454c2918b196d8227cdcbc071619..ec84865ed7cce45fdcc40999e5bb4f197fb55ce2 100644 (file)
@@ -41,15 +41,15 @@ public:
                return _audio_streams;
        }
 
-       boost::optional<Time> first_video () const {
+       boost::optional<double> first_video () const {
                return _first_video;
        }
        
 private:
        std::string stream_name (AVStream* s) const;
-       boost::optional<Time> frame_time (int) const;
+       boost::optional<double> frame_time (int) const;
        
         std::vector<boost::shared_ptr<FFmpegSubtitleStream> > _subtitle_streams;
         std::vector<boost::shared_ptr<FFmpegAudioStream> > _audio_streams;
-       boost::optional<Time> _first_video;
+       boost::optional<double> _first_video;
 };
index 49b9d4911c2ff5671048860c1f9fdd1bdf5591cf..04d3d9df7c47d166c52e7692b3977d550993b547 100644 (file)
@@ -43,12 +43,12 @@ ImageMagickDecoder::ImageMagickDecoder (shared_ptr<const Film> f, shared_ptr<con
 void
 ImageMagickDecoder::pass ()
 {
-       if (_next_video_frame >= _imagemagick_content->video_length ()) {
+       if (_video_position >= _imagemagick_content->video_length ()) {
                return;
        }
 
        if (_image) {
-               video (_image, true, _next_video_frame);
+               video (_image, true, _video_position);
                return;
        }
 
@@ -71,25 +71,25 @@ ImageMagickDecoder::pass ()
 
        delete magick_image;
 
-       video (_image, false, _next_video_frame);
+       video (_image, false, _video_position);
 }
 
 void
 ImageMagickDecoder::seek (VideoContent::Frame frame)
 {
-       _next_video_frame = frame;
+       _video_position = frame;
 }
 
 void
 ImageMagickDecoder::seek_back ()
 {
-       if (_next_video_frame > 0) {
-               _next_video_frame--;
+       if (_video_position > 0) {
+               _video_position--;
        }
 }
 
 bool
 ImageMagickDecoder::done () const
 {
-       return _next_video_frame >= _imagemagick_content->video_length ();
+       return _video_position >= _imagemagick_content->video_length ();
 }
index 9030021e7c672d551eb309c788c4b4d9f7a7299a..80a6afd2baf79eae384164a310119a127e21c5bf 100644 (file)
@@ -115,5 +115,5 @@ SndfileDecoder::audio_frame_rate () const
 bool
 SndfileDecoder::done () const
 {
-       return _next_audio_frame > _sndfile_content->audio_length ();
+       return _audio_position >= _sndfile_content->audio_length ();
 }
index b5cc7d15887717e9a506141d74ab0066f265c3be..f61e63d4d1a4c312c99054c436bfee462f2afdab 100644 (file)
@@ -30,7 +30,7 @@ using boost::shared_ptr;
 
 VideoDecoder::VideoDecoder (shared_ptr<const Film> f)
        : Decoder (f)
-       , _next_video_frame (0)
+       , _video_position (0)
 {
 
 }
@@ -39,7 +39,7 @@ void
 VideoDecoder::video (shared_ptr<const Image> image, bool same, VideoContent::Frame frame)
 {
         Video (image, same, frame);
-       _next_video_frame = frame + 1;
+       _video_position = frame + 1;
 }
 
 #if 0
index c86248417bdd8b4f8147fd149fb6fbe3279e2775..d24219d956cba2df1b92b8a58e2d3f5bc8d24d78 100644 (file)
@@ -43,7 +43,7 @@ public:
 protected:
 
        void video (boost::shared_ptr<const Image>, bool, VideoContent::Frame);
-       VideoContent::Frame _next_video_frame;
+       VideoContent::Frame _video_position;
 };
 
 #endif
index 6e73038f10c6e125640317f9b73132d594982593..296444ba76bcda955a23da23211603b831ed465e 100644 (file)
@@ -25,7 +25,7 @@ BOOST_AUTO_TEST_CASE (ffmpeg_examiner_test)
        shared_ptr<FFmpegContent> content (new FFmpegContent (film, "test/data/count300bd24.m2ts"));
        shared_ptr<FFmpegExaminer> examiner (new FFmpegExaminer (content));
 
-       BOOST_CHECK_EQUAL (examiner->first_video().get(), 57604000);
+       BOOST_CHECK_EQUAL (examiner->first_video().get(), 600.04166666666674);
        BOOST_CHECK_EQUAL (examiner->audio_streams().size(), 1);
-       BOOST_CHECK_EQUAL (examiner->audio_streams()[0]->start.get(), 57600000);
+       BOOST_CHECK_EQUAL (examiner->audio_streams()[0]->first_audio.get(), 600);
 }
diff --git a/test/ffmpeg_pts_offset.cc b/test/ffmpeg_pts_offset.cc
new file mode 100644 (file)
index 0000000..15c2b38
--- /dev/null
@@ -0,0 +1,39 @@
+/*
+    Copyright (C) 2013 Carl Hetherington <cth@carlh.net>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+*/
+
+BOOST_AUTO_TEST_CASE (ffmpeg_pts_offset_test)
+{
+       /* Sound == video so no offset required */
+       BOOST_CHECK_EQUAL (FFmpegDecoder::compute_pts_offset (0, 0, 24), 0);
+
+       /* Common offset should be removed */
+       BOOST_CHECK_CLOSE (FFmpegDecoder::compute_pts_offset (42, 42, 24), -42, 1e-9);
+
+       /* Video is on a frame boundary */
+       BOOST_CHECK_EQUAL (FFmpegDecoder::compute_pts_offset (1.0 / 24.0, 0, 24), 0);
+
+       /* Again, video is on a frame boundary */
+       BOOST_CHECK_EQUAL (FFmpegDecoder::compute_pts_offset (1.0 / 23.97, 0, 23.97), 0);
+
+       /* And again, video is on a frame boundary */
+       BOOST_CHECK_EQUAL (FFmpegDecoder::compute_pts_offset (3.0 / 23.97, 0, 23.97), 0);
+
+       /* Off a frame boundary */
+       BOOST_CHECK_CLOSE (FFmpegDecoder::compute_pts_offset (1.0 / 24.0 - 0.0215, 0, 24), 0.0215, 1e-9);
+}
index 3434a3f09af9c4a79384643f50dc6b06e43dddec..4494540a2ac31e5069f1a6c3f192b4c8be837b14 100644 (file)
@@ -154,6 +154,7 @@ check_dcp (string ref, string check)
        BOOST_CHECK (ref_dcp.equals (check_dcp, options, boost::bind (note, _1, _2)));
 }
 
+#include "ffmpeg_pts_offset.cc"
 #include "ffmpeg_examiner_test.cc"
 #include "black_fill_test.cc"
 #include "scaling_test.cc"