Improve FFmpeg sync, in theory.

author Carl Hetherington <cth@carlh.net>

Wed, 26 Jun 2013 15:35:43 +0000 (16:35 +0100)

committer Carl Hetherington <cth@carlh.net>

Wed, 26 Jun 2013 15:35:43 +0000 (16:35 +0100)
author Carl Hetherington <cth@carlh.net>
Wed, 26 Jun 2013 15:35:43 +0000 (16:35 +0100)
committer Carl Hetherington <cth@carlh.net>
Wed, 26 Jun 2013 15:35:43 +0000 (16:35 +0100)
diff --git a/src/lib/audio_decoder.cc b/src/lib/audio_decoder.cc

index 3964719104956a46e16f734fd8acaadcaa7b9df0..dc49a1846e74ada429e20603d213666d858ecd52 100644 (file)
--- a/src/lib/audio_decoder.cc
+++ b/src/lib/audio_decoder.cc
@@ -33,7 +33,7 @@ using boost::shared_ptr;
  
  AudioDecoder::AudioDecoder (shared_ptr<const Film> f)
         : Decoder (f)
-       , _next_audio_frame (0)
+       , _audio_position (0)
  {
  }
  
@@ -71,5 +71,5 @@ void
  AudioDecoder::audio (shared_ptr<const AudioBuffers> data, AudioContent::Frame frame)
  {
         Audio (data, frame);
-       _next_audio_frame = frame + data->frames ();
+       _audio_position = frame + data->frames ();
  }
diff --git a/src/lib/audio_decoder.h b/src/lib/audio_decoder.h

index 168348c2e3fdf52e0bb7c5a617dbd07718ce5e71..ddfb296c9a9b4fffbf8e2b94928594907566fb68 100644 (file)
--- a/src/lib/audio_decoder.h
+++ b/src/lib/audio_decoder.h
@@ -43,7 +43,7 @@ public:
  protected:
  
         void audio (boost::shared_ptr<const AudioBuffers>, AudioContent::Frame);
-       AudioContent::Frame _next_audio_frame;
+       AudioContent::Frame _audio_position;
  };
  
  #endif
diff --git a/src/lib/ffmpeg_content.cc b/src/lib/ffmpeg_content.cc

index 378bd98cbb80519e608dcdd16acc20879b71f531..1135cc9a3add1f07e2416bba28a279748120de8c 100644 (file)
--- a/src/lib/ffmpeg_content.cc
+++ b/src/lib/ffmpeg_content.cc
@@ -296,7 +296,7 @@ FFmpegAudioStream::FFmpegAudioStream (shared_ptr<const cxml::Node> node)
         frame_rate = node->number_child<int> ("FrameRate");
         channels = node->number_child<int64_t> ("Channels");
         mapping = AudioMapping (node->node_child ("Mapping"));
-       start = node->optional_number_child<Time> ("Start");
+       first_audio = node->optional_number_child<Time> ("FirstAudio");
  }
  
  void
@@ -306,8 +306,8 @@ FFmpegAudioStream::as_xml (xmlpp::Node* root) const
         root->add_child("Id")->add_child_text (lexical_cast<string> (id));
         root->add_child("FrameRate")->add_child_text (lexical_cast<string> (frame_rate));
         root->add_child("Channels")->add_child_text (lexical_cast<string> (channels));
-       if (start) {
-               root->add_child("Start")->add_child_text (lexical_cast<string> (start));
+       if (first_audio) {
+               root->add_child("FirstAudio")->add_child_text (lexical_cast<string> (first_audio));
         }
         mapping.as_xml (root->add_child("Mapping"));
  }
diff --git a/src/lib/ffmpeg_content.h b/src/lib/ffmpeg_content.h

index fc45267ee553a26d0e3141bdc2abad02e7388c7d..c5ccee77a4509c55b87f374147a5333bdcd0685b 100644 (file)
--- a/src/lib/ffmpeg_content.h
+++ b/src/lib/ffmpeg_content.h
@@ -46,7 +46,7 @@ public:
          int frame_rate;
         int channels;
         AudioMapping mapping;
-       boost::optional<Time> start;
+       boost::optional<double> first_audio;
  };
  
  extern bool operator== (FFmpegAudioStream const & a, FFmpegAudioStream const & b);
@@ -134,6 +134,11 @@ public:
  
          void set_subtitle_stream (boost::shared_ptr<FFmpegSubtitleStream>);
          void set_audio_stream (boost::shared_ptr<FFmpegAudioStream>);
+
+       boost::optional<Time> first_video () const {
+               boost::mutex::scoped_lock lm (_mutex);
+               return _first_video;
+       }
         
  private:
         std::vector<boost::shared_ptr<FFmpegSubtitleStream> > _subtitle_streams;
diff --git a/src/lib/ffmpeg_decoder.cc b/src/lib/ffmpeg_decoder.cc

index 1d8a00866e0627fa82dc583cd664e02573335b83..2d1792390dbdbb2e743124d5b6d0c8d87c832afd 100644 (file)
--- a/src/lib/ffmpeg_decoder.cc
+++ b/src/lib/ffmpeg_decoder.cc
@@ -66,8 +66,30 @@ FFmpegDecoder::FFmpegDecoder (shared_ptr<const Film> f, shared_ptr<const FFmpegC
         , _subtitle_codec (0)
         , _decode_video (video)
         , _decode_audio (audio)
+       , _pts_offset (0)
  {
         setup_subtitle ();
+
+       if (video && audio && c->audio_stream() && c->first_video() && c->audio_stream()->first_audio) {
+               _pts_offset = compute_pts_offset (c->first_video().get(), c->audio_stream()->first_audio.get(), c->video_frame_rate());
+       }
+}
+
+double
+FFmpegDecoder::compute_pts_offset (double first_video, double first_audio, float video_frame_rate)
+{
+       assert (first_video >= 0);
+       assert (first_audio >= 0);
+       
+       double const old_first_video = first_video;
+       
+       /* Round the first video to a frame boundary */
+       if (fabs (rint (first_video * video_frame_rate) - first_video * video_frame_rate) > 1e-6) {
+               first_video = ceil (first_video * video_frame_rate) / video_frame_rate;
+       }
+
+       /* Compute the required offset (also removing any common start delay) */
+       return first_video - old_first_video - min (first_video, first_audio);
  }
  
  FFmpegDecoder::~FFmpegDecoder ()
@@ -108,8 +130,8 @@ FFmpegDecoder::pass ()
                 }
  
                 /* Stop us being asked for any more data */
-               _next_video_frame = _ffmpeg_content->video_length ();
-               _next_audio_frame = _ffmpeg_content->audio_length ();
+               _video_position = _ffmpeg_content->video_length ();
+               _audio_position = _ffmpeg_content->audio_length ();
                 return;
         }
  
@@ -267,11 +289,11 @@ FFmpegDecoder::seek (VideoContent::Frame frame)
  void
  FFmpegDecoder::seek_back ()
  {
-       if (_next_video_frame == 0) {
+       if (_video_position == 0) {
                 return;
         }
         
-       do_seek (_next_video_frame - 1, true, true);
+       do_seek (_video_position - 1, true, true);
  }
  
  void
@@ -327,17 +349,33 @@ FFmpegDecoder::decode_audio_packet ()
                 int const decode_result = avcodec_decode_audio4 (audio_codec_context(), _frame, &frame_finished, &copy_packet);
                 if (decode_result >= 0) {
                         if (frame_finished) {
-                       
-                               /* Where we are in the source, in seconds */
-                               double const source_pts_seconds = av_q2d (_format_context->streams[copy_packet.stream_index]->time_base)
-                                       * av_frame_get_best_effort_timestamp(_frame);
+
+                               if (_audio_position == 0) {
+                                       /* Where we are in the source, in seconds */
+                                       double const pts = av_q2d (_format_context->streams[copy_packet.stream_index]->time_base)
+                                               * av_frame_get_best_effort_timestamp(_frame) - _pts_offset;
+
+                                       if (pts > 0) {
+                                               /* Emit some silence */
+                                               shared_ptr<AudioBuffers> silence (
+                                                       new AudioBuffers (
+                                                               _ffmpeg_content->audio_channels(),
+                                                               pts * _ffmpeg_content->content_audio_frame_rate()
+                                                               )
+                                                       );
+                                               
+                                               silence->make_silent ();
+                                               audio (silence, _audio_position);
+                                       }
+                               }
+                                       
                                 
                                 int const data_size = av_samples_get_buffer_size (
                                         0, audio_codec_context()->channels, _frame->nb_samples, audio_sample_format (), 1
                                         );
                                 
                                 assert (audio_codec_context()->channels == _ffmpeg_content->audio_channels());
-                               Audio (deinterleave_audio (_frame->data, data_size), source_pts_seconds * _ffmpeg_content->content_audio_frame_rate());
+                               audio (deinterleave_audio (_frame->data, data_size), _audio_position);
                         }
                         
                         copy_packet.data += decode_result;
@@ -389,8 +427,8 @@ FFmpegDecoder::decode_video_packet ()
                 int64_t const bet = av_frame_get_best_effort_timestamp (_frame);
                 if (bet != AV_NOPTS_VALUE) {
  
-                       double const pts = bet * av_q2d (_format_context->streams[_video_stream]->time_base);
-                       double const next = _next_video_frame / _ffmpeg_content->video_frame_rate();
+                       double const pts = bet * av_q2d (_format_context->streams[_video_stream]->time_base) - _pts_offset;
+                       double const next = _video_position / _ffmpeg_content->video_frame_rate();
                         double const one_frame = 1 / _ffmpeg_content->video_frame_rate ();
                         double delta = pts - next;
  
@@ -407,13 +445,13 @@ FFmpegDecoder::decode_video_packet ()
                                         );
                                 
                                 black->make_black ();
-                               video (image, false, _next_video_frame);
+                               video (image, false, _video_position);
                                 delta -= one_frame;
                         }
  
                         if (delta > -one_frame) {
                                 /* This PTS is within a frame of being right; emit this (otherwise it will be dropped) */
-                               video (image, false, _next_video_frame);
+                               video (image, false, _video_position);
                         }
                 } else {
                         shared_ptr<const Film> film = _film.lock ();
@@ -450,8 +488,8 @@ FFmpegDecoder::setup_subtitle ()
  bool
  FFmpegDecoder::done () const
  {
-       bool const vd = !_decode_video || (_next_video_frame >= _ffmpeg_content->video_length());
-       bool const ad = !_decode_audio || !_ffmpeg_content->audio_stream() || (_next_audio_frame >= _ffmpeg_content->audio_length());
+       bool const vd = !_decode_video || (_video_position >= _ffmpeg_content->video_length());
+       bool const ad = !_decode_audio || !_ffmpeg_content->audio_stream() || (_audio_position >= _ffmpeg_content->audio_length());
         return vd && ad;
  }
         
diff --git a/src/lib/ffmpeg_decoder.h b/src/lib/ffmpeg_decoder.h

index a8eabb9725c533870d38887436f68ae793232716..8f0482aad8331dcd12685e1d4724e08469b20eb8 100644 (file)
--- a/src/lib/ffmpeg_decoder.h
+++ b/src/lib/ffmpeg_decoder.h
@@ -38,6 +38,7 @@ extern "C" {
  #include "ffmpeg.h"
  
  class Film;
+class ffmpeg_pts_offset_test;
  
  /** @class FFmpegDecoder
   *  @brief A decoder using FFmpeg to decode content.
@@ -54,11 +55,14 @@ public:
         bool done () const;
  
  private:
+       friend class ::ffmpeg_pts_offset_test;
  
         /* No copy construction */
         FFmpegDecoder (FFmpegDecoder const &);
         FFmpegDecoder& operator= (FFmpegDecoder const &);
  
+       static double compute_pts_offset (double, double, float);
+
         void setup_subtitle ();
  
         AVSampleFormat audio_sample_format () const;
@@ -79,4 +83,6 @@ private:
  
         bool _decode_video;
         bool _decode_audio;
+
+       double _pts_offset;
  };
diff --git a/src/lib/ffmpeg_examiner.cc b/src/lib/ffmpeg_examiner.cc

index 6f1524f50e2ccf95be3c33c1ef89c9157158fe55..f45b0fe52e6dad125de259ca773d2f2c45eb8689 100644 (file)
--- a/src/lib/ffmpeg_examiner.cc
+++ b/src/lib/ffmpeg_examiner.cc
@@ -79,9 +79,9 @@ FFmpegExaminer::FFmpegExaminer (shared_ptr<const FFmpegContent> c)
                         }
                 } else {
                         for (size_t i = 0; i < _audio_streams.size(); ++i) {
-                               if (_packet.stream_index == _audio_streams[i]->id && !_audio_streams[i]->start) {
+                               if (_packet.stream_index == _audio_streams[i]->id && !_audio_streams[i]->first_audio) {
                                         if (avcodec_decode_audio4 (context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
-                                               _audio_streams[i]->start = frame_time (_audio_streams[i]->id);
+                                               _audio_streams[i]->first_audio = frame_time (_audio_streams[i]->id);
                                         }
                                 }
                         }
@@ -90,7 +90,7 @@ FFmpegExaminer::FFmpegExaminer (shared_ptr<const FFmpegContent> c)
                 bool have_all_audio = true;
                 size_t i = 0;
                 while (i < _audio_streams.size() && have_all_audio) {
-                       have_all_audio = _audio_streams[i]->start;
+                       have_all_audio = _audio_streams[i]->first_audio;
                         ++i;
                 }
  
@@ -102,14 +102,14 @@ FFmpegExaminer::FFmpegExaminer (shared_ptr<const FFmpegContent> c)
         }
  }
  
-optional<Time>
+optional<double>
  FFmpegExaminer::frame_time (int stream) const
  {
-       optional<Time> t;
+       optional<double> t;
         
         int64_t const bet = av_frame_get_best_effort_timestamp (_frame);
         if (bet != AV_NOPTS_VALUE) {
-               t = bet * av_q2d (_format_context->streams[stream]->time_base) * TIME_HZ;
+               t = bet * av_q2d (_format_context->streams[stream]->time_base);
         }
  
         return t;
diff --git a/src/lib/ffmpeg_examiner.h b/src/lib/ffmpeg_examiner.h

index 5cf9c2d0a5b8454c2918b196d8227cdcbc071619..ec84865ed7cce45fdcc40999e5bb4f197fb55ce2 100644 (file)
--- a/src/lib/ffmpeg_examiner.h
+++ b/src/lib/ffmpeg_examiner.h
@@ -41,15 +41,15 @@ public:
                 return _audio_streams;
         }
  
-       boost::optional<Time> first_video () const {
+       boost::optional<double> first_video () const {
                 return _first_video;
         }
         
  private:
         std::string stream_name (AVStream* s) const;
-       boost::optional<Time> frame_time (int) const;
+       boost::optional<double> frame_time (int) const;
         
          std::vector<boost::shared_ptr<FFmpegSubtitleStream> > _subtitle_streams;
          std::vector<boost::shared_ptr<FFmpegAudioStream> > _audio_streams;
-       boost::optional<Time> _first_video;
+       boost::optional<double> _first_video;
  };
diff --git a/src/lib/imagemagick_decoder.cc b/src/lib/imagemagick_decoder.cc

index 49b9d4911c2ff5671048860c1f9fdd1bdf5591cf..04d3d9df7c47d166c52e7692b3977d550993b547 100644 (file)
--- a/src/lib/imagemagick_decoder.cc
+++ b/src/lib/imagemagick_decoder.cc
@@ -43,12 +43,12 @@ ImageMagickDecoder::ImageMagickDecoder (shared_ptr<const Film> f, shared_ptr<con
  void
  ImageMagickDecoder::pass ()
  {
-       if (_next_video_frame >= _imagemagick_content->video_length ()) {
+       if (_video_position >= _imagemagick_content->video_length ()) {
                 return;
         }
  
         if (_image) {
-               video (_image, true, _next_video_frame);
+               video (_image, true, _video_position);
                 return;
         }
  
@@ -71,25 +71,25 @@ ImageMagickDecoder::pass ()
  
         delete magick_image;
  
-       video (_image, false, _next_video_frame);
+       video (_image, false, _video_position);
  }
  
  void
  ImageMagickDecoder::seek (VideoContent::Frame frame)
  {
-       _next_video_frame = frame;
+       _video_position = frame;
  }
  
  void
  ImageMagickDecoder::seek_back ()
  {
-       if (_next_video_frame > 0) {
-               _next_video_frame--;
+       if (_video_position > 0) {
+               _video_position--;
         }
  }
  
  bool
  ImageMagickDecoder::done () const
  {
-       return _next_video_frame >= _imagemagick_content->video_length ();
+       return _video_position >= _imagemagick_content->video_length ();
  }
diff --git a/src/lib/sndfile_decoder.cc b/src/lib/sndfile_decoder.cc

index 9030021e7c672d551eb309c788c4b4d9f7a7299a..80a6afd2baf79eae384164a310119a127e21c5bf 100644 (file)
--- a/src/lib/sndfile_decoder.cc
+++ b/src/lib/sndfile_decoder.cc
@@ -115,5 +115,5 @@ SndfileDecoder::audio_frame_rate () const
  bool
  SndfileDecoder::done () const
  {
-       return _next_audio_frame > _sndfile_content->audio_length ();
+       return _audio_position >= _sndfile_content->audio_length ();
  }
diff --git a/src/lib/video_decoder.cc b/src/lib/video_decoder.cc

index b5cc7d15887717e9a506141d74ab0066f265c3be..f61e63d4d1a4c312c99054c436bfee462f2afdab 100644 (file)
--- a/src/lib/video_decoder.cc
+++ b/src/lib/video_decoder.cc
@@ -30,7 +30,7 @@ using boost::shared_ptr;
  
  VideoDecoder::VideoDecoder (shared_ptr<const Film> f)
         : Decoder (f)
-       , _next_video_frame (0)
+       , _video_position (0)
  {
  
  }
@@ -39,7 +39,7 @@ void
  VideoDecoder::video (shared_ptr<const Image> image, bool same, VideoContent::Frame frame)
  {
          Video (image, same, frame);
-       _next_video_frame = frame + 1;
+       _video_position = frame + 1;
  }
  
  #if 0
diff --git a/src/lib/video_decoder.h b/src/lib/video_decoder.h

index c86248417bdd8b4f8147fd149fb6fbe3279e2775..d24219d956cba2df1b92b8a58e2d3f5bc8d24d78 100644 (file)
--- a/src/lib/video_decoder.h
+++ b/src/lib/video_decoder.h
@@ -43,7 +43,7 @@ public:
  protected:
  
         void video (boost::shared_ptr<const Image>, bool, VideoContent::Frame);
-       VideoContent::Frame _next_video_frame;
+       VideoContent::Frame _video_position;
  };
  
  #endif
diff --git a/test/ffmpeg_examiner_test.cc b/test/ffmpeg_examiner_test.cc

index 6e73038f10c6e125640317f9b73132d594982593..296444ba76bcda955a23da23211603b831ed465e 100644 (file)
--- a/test/ffmpeg_examiner_test.cc
+++ b/test/ffmpeg_examiner_test.cc
@@ -25,7 +25,7 @@ BOOST_AUTO_TEST_CASE (ffmpeg_examiner_test)
         shared_ptr<FFmpegContent> content (new FFmpegContent (film, "test/data/count300bd24.m2ts"));
         shared_ptr<FFmpegExaminer> examiner (new FFmpegExaminer (content));
  
-       BOOST_CHECK_EQUAL (examiner->first_video().get(), 57604000);
+       BOOST_CHECK_EQUAL (examiner->first_video().get(), 600.04166666666674);
         BOOST_CHECK_EQUAL (examiner->audio_streams().size(), 1);
-       BOOST_CHECK_EQUAL (examiner->audio_streams()[0]->start.get(), 57600000);
+       BOOST_CHECK_EQUAL (examiner->audio_streams()[0]->first_audio.get(), 600);
  }
diff --git a/test/ffmpeg_pts_offset.cc b/test/ffmpeg_pts_offset.cc

new file mode 100644 (file)

index 0000000..15c2b38
--- /dev/null
+++ b/test/ffmpeg_pts_offset.cc
@@ -0,0 +1,39 @@
+/*
+    Copyright (C) 2013 Carl Hetherington <cth@carlh.net>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+*/
+
+BOOST_AUTO_TEST_CASE (ffmpeg_pts_offset_test)
+{
+       /* Sound == video so no offset required */
+       BOOST_CHECK_EQUAL (FFmpegDecoder::compute_pts_offset (0, 0, 24), 0);
+
+       /* Common offset should be removed */
+       BOOST_CHECK_CLOSE (FFmpegDecoder::compute_pts_offset (42, 42, 24), -42, 1e-9);
+
+       /* Video is on a frame boundary */
+       BOOST_CHECK_EQUAL (FFmpegDecoder::compute_pts_offset (1.0 / 24.0, 0, 24), 0);
+
+       /* Again, video is on a frame boundary */
+       BOOST_CHECK_EQUAL (FFmpegDecoder::compute_pts_offset (1.0 / 23.97, 0, 23.97), 0);
+
+       /* And again, video is on a frame boundary */
+       BOOST_CHECK_EQUAL (FFmpegDecoder::compute_pts_offset (3.0 / 23.97, 0, 23.97), 0);
+
+       /* Off a frame boundary */
+       BOOST_CHECK_CLOSE (FFmpegDecoder::compute_pts_offset (1.0 / 24.0 - 0.0215, 0, 24), 0.0215, 1e-9);
+}
diff --git a/test/test.cc b/test/test.cc

index 3434a3f09af9c4a79384643f50dc6b06e43dddec..4494540a2ac31e5069f1a6c3f192b4c8be837b14 100644 (file)
--- a/test/test.cc
+++ b/test/test.cc
@@ -154,6 +154,7 @@ check_dcp (string ref, string check)
         BOOST_CHECK (ref_dcp.equals (check_dcp, options, boost::bind (note, _1, _2)));
  }
  
+#include "ffmpeg_pts_offset.cc"
  #include "ffmpeg_examiner_test.cc"
  #include "black_fill_test.cc"
  #include "scaling_test.cc"
author	Carl Hetherington <cth@carlh.net>
	Wed, 26 Jun 2013 15:35:43 +0000 (16:35 +0100)
committer	Carl Hetherington <cth@carlh.net>
	Wed, 26 Jun 2013 15:35:43 +0000 (16:35 +0100)
src/lib/audio_decoder.cc		patch \| blob \| history
src/lib/audio_decoder.h		patch \| blob \| history
src/lib/ffmpeg_content.cc		patch \| blob \| history
src/lib/ffmpeg_content.h		patch \| blob \| history
src/lib/ffmpeg_decoder.cc		patch \| blob \| history
src/lib/ffmpeg_decoder.h		patch \| blob \| history
src/lib/ffmpeg_examiner.cc		patch \| blob \| history
src/lib/ffmpeg_examiner.h		patch \| blob \| history
src/lib/imagemagick_decoder.cc		patch \| blob \| history
src/lib/sndfile_decoder.cc		patch \| blob \| history
src/lib/video_decoder.cc		patch \| blob \| history
src/lib/video_decoder.h		patch \| blob \| history
test/ffmpeg_examiner_test.cc		patch \| blob \| history
test/ffmpeg_pts_offset.cc	[new file with mode: 0644]	patch \| blob
test/test.cc		patch \| blob \| history