Do audio/video pts sync in a hopefully much more sensible way.

author Carl Hetherington <cth@carlh.net>

Mon, 22 Oct 2012 13:35:00 +0000 (14:35 +0100)

committer Carl Hetherington <cth@carlh.net>

Mon, 22 Oct 2012 13:35:00 +0000 (14:35 +0100)
author Carl Hetherington <cth@carlh.net>
Mon, 22 Oct 2012 13:35:00 +0000 (14:35 +0100)
committer Carl Hetherington <cth@carlh.net>
Mon, 22 Oct 2012 13:35:00 +0000 (14:35 +0100)
diff --git a/.be/aff5ca2c-44ee-4ed6-800b-4abe9c3e794c/bugs/cc075bd0-4641-4b2c-83b1-4adb05433f71/values b/.be/aff5ca2c-44ee-4ed6-800b-4abe9c3e794c/bugs/cc075bd0-4641-4b2c-83b1-4adb05433f71/values

new file mode 100644 (file)

index 0000000..0fd81c0
--- /dev/null
+++ b/.be/aff5ca2c-44ee-4ed6-800b-4abe9c3e794c/bugs/cc075bd0-4641-4b2c-83b1-4adb05433f71/values
@@ -0,0 +1,17 @@
+creator: Carl Hetherington <cth@carlh.net>
+
+
+reporter: Carl Hetherington <cth@carlh.net>
+
+
+severity: minor
+
+
+status: open
+
+
+summary: Do the delay line in deinterleaved floats to simplify code.
+
+
+time: Mon, 22 Oct 2012 13:27:50 +0000
+
diff --git a/.be/aff5ca2c-44ee-4ed6-800b-4abe9c3e794c/bugs/e6b5de1f-5dbe-42f2-b103-e8bb680c4140/values b/.be/aff5ca2c-44ee-4ed6-800b-4abe9c3e794c/bugs/e6b5de1f-5dbe-42f2-b103-e8bb680c4140/values

new file mode 100644 (file)

index 0000000..7b9e044
--- /dev/null
+++ b/.be/aff5ca2c-44ee-4ed6-800b-4abe9c3e794c/bugs/e6b5de1f-5dbe-42f2-b103-e8bb680c4140/values
@@ -0,0 +1,17 @@
+creator: Carl Hetherington <cth@carlh.net>
+
+
+reporter: Carl Hetherington <cth@carlh.net>
+
+
+severity: minor
+
+
+status: open
+
+
+summary: Decoder should use its own methods more than FilmState
+
+
+time: Mon, 22 Oct 2012 13:28:57 +0000
+
diff --git a/src/lib/decoder.cc b/src/lib/decoder.cc

index c9235f8e1d595ae0d60b537a88f696222c3b2980..65e5ff7225ccb1e913ed54c6d2366183121d600a 100644 (file)
--- a/src/lib/decoder.cc
+++ b/src/lib/decoder.cc
@@ -90,12 +90,10 @@ Decoder::~Decoder ()
  void
  Decoder::process_begin ()
  {
-       _delay_in_bytes = _fs->total_audio_delay() * _fs->audio_sample_rate() * _fs->audio_channels() * bytes_per_audio_sample() / 1000;
+       _delay_in_bytes = _fs->audio_delay() * _fs->audio_sample_rate() * _fs->audio_channels() * bytes_per_audio_sample() / 1000;
         delete _delay_line;
         _delay_line = new DelayLine (_delay_in_bytes);
  
-       _log->log (String::compose ("Decoding audio with total delay of %1", _fs->total_audio_delay()));
-
         _audio_frames_processed = 0;
  }
  
@@ -129,6 +127,10 @@ Decoder::process_end ()
                 _log->log (String::compose ("DCP length is %1; %2 frames of audio processed.", _fs->dcp_length(), _audio_frames_processed));
                 _log->log (String::compose ("Adding %1 frames of silence to the end.", audio_short_by_frames));
  
+               /* XXX: this is slightly questionable; does memset () give silence with all
+                  sample formats?
+               */
+
                 int64_t bytes = audio_short_by_frames * _fs->audio_channels() * bytes_per_audio_sample();
                 
                 int64_t const silence_size = 16 * 1024 * _fs->audio_channels() * bytes_per_audio_sample();
diff --git a/src/lib/decoder.h b/src/lib/decoder.h

index 7559217eb13f914a2acd4f68a5b1407c6893705e..e81fbb24ec5ea4d43544222b963054fecb144d13 100644 (file)
--- a/src/lib/decoder.h
+++ b/src/lib/decoder.h
@@ -71,11 +71,6 @@ public:
         virtual int64_t audio_channel_layout () const = 0;
         virtual bool has_subtitles () const = 0;
  
-       /** @return amount of extra unwanted audio at the start (or -ve for unwanted video) in milliseconds */
-       virtual int audio_to_discard () const {
-               return 0;
-       }
-
         void process_begin ();
         bool pass ();
         void process_end ();
@@ -117,6 +112,8 @@ protected:
         void process_audio (uint8_t *, int);
         void process_subtitle (boost::shared_ptr<TimedSubtitle>);
  
+       int bytes_per_audio_sample () const;
+       
         /** our FilmState */
         boost::shared_ptr<const FilmState> _fs;
         /** our options */
@@ -137,7 +134,6 @@ protected:
  private:
         void setup_video_filters ();
         void emit_audio (uint8_t* data, int size);
-       int bytes_per_audio_sample () const;
         
         /** last video frame to be processed */
         int _video_frame;
diff --git a/src/lib/examine_content_job.cc b/src/lib/examine_content_job.cc

index d12e060691e61a08d876e764aedc1907761d895a..f0fb29812ffc99bf012a116e6216fa97ec0febae 100644 (file)
--- a/src/lib/examine_content_job.cc
+++ b/src/lib/examine_content_job.cc
@@ -72,7 +72,6 @@ ExamineContentJob::run ()
         fs->set_length (_decoder->last_video_frame ());
  
         _log->log (String::compose ("Video length is %1 frames", _decoder->last_video_frame()));
-       _log->log (String::compose ("%1ms of audio to discard", _decoder->audio_to_discard()));
  
         ascend ();
  
@@ -111,10 +110,3 @@ ExamineContentJob::last_video_frame () const
  {
         return _decoder->last_video_frame ();
  }
-
-int
-ExamineContentJob::audio_to_discard () const
-{
-       return _decoder->audio_to_discard ();
-}
-
diff --git a/src/lib/examine_content_job.h b/src/lib/examine_content_job.h

index d8e94f1ec2f6cb5755336a4ffd682a0c866f476c..3bbd673a87e66aab5521b0ddb1666d5404da66ab 100644 (file)
--- a/src/lib/examine_content_job.h
+++ b/src/lib/examine_content_job.h
@@ -38,7 +38,6 @@ public:
         void run ();
  
         int last_video_frame () const;
-       int audio_to_discard () const;
  
  private:
         boost::shared_ptr<Decoder> _decoder;
diff --git a/src/lib/ffmpeg_decoder.cc b/src/lib/ffmpeg_decoder.cc

index d8a541be39b0539d81be155c1b82d33d877ff483..c74fee008458a53914daaf38ea4244f4166d5d31 100644 (file)
--- a/src/lib/ffmpeg_decoder.cc
+++ b/src/lib/ffmpeg_decoder.cc
@@ -67,6 +67,7 @@ FFmpegDecoder::FFmpegDecoder (boost::shared_ptr<const FilmState> s, boost::share
         , _subtitle_codec_context (0)
         , _subtitle_codec (0)
         , _first_video_pts (-1)
+       , _first_audio_pts (-1)
  {
         setup_general ();
         setup_video ();
@@ -246,16 +247,56 @@ FFmpegDecoder::do_pass ()
  
         if (_packet.stream_index == _video_stream) {
  
+               if (_first_video_pts == -1) {
+                       _first_video_pts = _packet.pts;
+               }
+               
                 int frame_finished;
                 if (avcodec_decode_video2 (_video_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
-                       if (_first_video_pts == -1) {
-                               _first_video_pts = _packet.pts;
-                       }
                         process_video (_frame);
                 }
  
-       } else if (_audio_stream >= 0 && _packet.stream_index == _audio_stream && _opt->decode_audio) {
-               
+       } else if (_audio_stream >= 0 && _packet.stream_index == _audio_stream && _opt->decode_audio && (_first_video_pts != -1 && _packet.pts > _first_video_pts)) {
+
+               /* Note: We only decode audio if we've had our first video packet through, and if this
+                  packet comes after it.  Until then it is thrown away.
+               */
+
+               if (_first_audio_pts == -1) {
+                       _first_audio_pts = _packet.pts;
+
+                       /* This is our first audio packet, and if we've arrived here we must have had our
+                          first video packet.  Push some silence to make up the gap between our first
+                          video packet and our first audio.
+                       */
+                       
+                       AVStream* v = _format_context->streams[_video_stream];
+                       AVStream* a = _format_context->streams[_audio_stream];
+                       
+                       assert (v->time_base.num == a->time_base.num);
+                       assert (v->time_base.den == a->time_base.den);
+
+                       /* samples of silence that we must push */
+                       int const s = rint (av_q2d (v->time_base) * (_first_audio_pts - _first_video_pts) * audio_sample_rate ());
+
+                       _log->log (
+                               String::compose (
+                                       "First video at %1, first audio at %2, pushing %3 samples of silence",
+                                       _first_video_pts, _first_audio_pts, s
+                                       )
+                               );
+
+                       /* hence bytes */
+                       int const b = s * audio_channels() * bytes_per_audio_sample();
+
+                       /* XXX: this assumes that it won't be too much, and there are shaky assumptions
+                          that all sound representations are silent with memset()ed zero data.
+                       */
+                       uint8_t silence[b];
+                       memset (silence, 0, b);
+                       process_audio (silence, b);
+               }
+
                 avcodec_get_frame_defaults (_frame);
                 
                 int frame_finished;
@@ -424,14 +465,3 @@ FFmpegDecoder::stream_name (AVStream* s) const
         return n.str ();
  }
  
-int
-FFmpegDecoder::audio_to_discard () const
-{
-       AVStream* v = _format_context->streams[_video_stream];
-       AVStream* a = _format_context->streams[_audio_stream];
-
-       assert (v->time_base.num == a->time_base.num);
-       assert (v->time_base.den == a->time_base.den);
-
-       return rint (av_q2d (v->time_base) * 1000 * (_first_video_pts - _first_audio_pts));
-}
diff --git a/src/lib/ffmpeg_decoder.h b/src/lib/ffmpeg_decoder.h

index dc10635a5da7ad2f8d4fa3ebf47dcd14d9daab4d..ac4cd6fcfa1dae4ad18258bad2e349c5186abc91 100644 (file)
--- a/src/lib/ffmpeg_decoder.h
+++ b/src/lib/ffmpeg_decoder.h
@@ -65,8 +65,6 @@ public:
         AVSampleFormat audio_sample_format () const;
         int64_t audio_channel_layout () const;
         bool has_subtitles () const;
-       int bytes_per_audio_sample () const;
-       int audio_to_discard () const;
  
         std::vector<AudioStream> audio_streams () const;
         std::vector<SubtitleStream> subtitle_streams () const;
diff --git a/src/lib/film.cc b/src/lib/film.cc

index 3a8b29f8603a8ddf30938a0bc06f6f005b2c37cd..aa0bfa21192cfb6d1c87c2bf858198c1b283072c 100644 (file)
--- a/src/lib/film.cc
+++ b/src/lib/film.cc
@@ -256,7 +256,6 @@ void
  Film::examine_content_post_gui ()
  {
         set_length (_examine_content_job->last_video_frame ());
-       set_audio_to_discard (_examine_content_job->audio_to_discard ());
         _examine_content_job.reset ();
  
         string const tdir = dir ("thumbs");
diff --git a/src/lib/film_state.cc b/src/lib/film_state.cc

index d59d7ce7b3627e68774fe0a7c43464bfdd62cc8a..283830f59703b39b287feb843f1f4c12ad53040b 100644 (file)
--- a/src/lib/film_state.cc
+++ b/src/lib/film_state.cc
@@ -126,7 +126,6 @@ FilmState::write_metadata () const
         }
  
         f << "frames_per_second " << _frames_per_second << "\n";
-       f << "audio_to_discard " << _audio_to_discard << "\n";
         
         _dirty = false;
  }
@@ -231,8 +230,6 @@ FilmState::read_metadata ()
                         _subtitle_streams.push_back (SubtitleStream (v));
                 } else if (k == "frames_per_second") {
                         _frames_per_second = atof (v.c_str ());
-               } else if (k == "audio_to_discard") {
-                       _audio_to_discard = atoi (v.c_str ());
                 }
         }
                 
@@ -812,13 +809,6 @@ FilmState::set_frames_per_second (float f)
         _frames_per_second = f;
         signal_changed (FRAMES_PER_SECOND);
  }
-
-void
-FilmState::set_audio_to_discard (int a)
-{
-       _audio_to_discard = a;
-       signal_changed (AUDIO_TO_DISCARD);
-}
         
  void
  FilmState::signal_changed (Property p)
@@ -842,9 +832,3 @@ FilmState::audio_channels () const
  
         return _audio_streams[_audio_stream].channels ();
  }
-
-int
-FilmState::total_audio_delay () const
-{
-       return _audio_delay - _audio_to_discard;
-}
diff --git a/src/lib/film_state.h b/src/lib/film_state.h

index d1d7489f6ba0c5c73cc48f2d43f964fa9d672cc9..8940d0e5df19206b829ae1e15e476e9ce9fa4b9d 100644 (file)
--- a/src/lib/film_state.h
+++ b/src/lib/film_state.h
@@ -73,7 +73,6 @@ public:
                 , _audio_sample_rate (0)
                 , _has_subtitles (false)
                 , _frames_per_second (0)
-               , _audio_to_discard (0)
                 , _dirty (false)
         {}
  
@@ -114,7 +113,6 @@ public:
                 , _audio_streams     (o._audio_streams)
                 , _subtitle_streams  (o._subtitle_streams)
                 , _frames_per_second (o._frames_per_second)
-               , _audio_to_discard  (o._audio_to_discard)
                 , _dirty             (o._dirty)
         {}
  
@@ -150,7 +148,6 @@ public:
         }
  
         int audio_channels () const;
-       int total_audio_delay () const;
  
         enum Property {
                 NONE,
@@ -182,7 +179,6 @@ public:
                 AUDIO_STREAMS,
                 SUBTITLE_STREAMS,
                 FRAMES_PER_SECOND,
-               AUDIO_TO_DISCARD
         };
  
  
@@ -342,11 +338,7 @@ public:
                 return _frames_per_second;
         }
  
-       int audio_to_discard () const {
-               return _audio_to_discard;
-       }
  
-       
         /* SET */
  
         void set_directory (std::string);
@@ -390,7 +382,6 @@ public:
         void set_audio_streams (std::vector<AudioStream>);
         void set_subtitle_streams (std::vector<SubtitleStream>);
         void set_frames_per_second (float);
-       void set_audio_to_discard (int);
  
         /** Emitted when some property has changed */
         mutable sigc::signal1<void, Property> Changed;
@@ -480,10 +471,6 @@ private:
         std::vector<SubtitleStream> _subtitle_streams;
         /** Frames per second of the source */
         float _frames_per_second;
-       /** Number of milliseconds of audio to discard at the start of this film
-           in order to sync audio with video.  Can be negative.
-       */
-       int _audio_to_discard;
  
         mutable bool _dirty;
author	Carl Hetherington <cth@carlh.net>
	Mon, 22 Oct 2012 13:35:00 +0000 (14:35 +0100)
committer	Carl Hetherington <cth@carlh.net>
	Mon, 22 Oct 2012 13:35:00 +0000 (14:35 +0100)
.be/aff5ca2c-44ee-4ed6-800b-4abe9c3e794c/bugs/cc075bd0-4641-4b2c-83b1-4adb05433f71/values	[new file with mode: 0644]	patch \| blob
.be/aff5ca2c-44ee-4ed6-800b-4abe9c3e794c/bugs/e6b5de1f-5dbe-42f2-b103-e8bb680c4140/values	[new file with mode: 0644]	patch \| blob
src/lib/decoder.cc		patch \| blob \| history
src/lib/decoder.h		patch \| blob \| history
src/lib/examine_content_job.cc		patch \| blob \| history
src/lib/examine_content_job.h		patch \| blob \| history
src/lib/ffmpeg_decoder.cc		patch \| blob \| history
src/lib/ffmpeg_decoder.h		patch \| blob \| history
src/lib/film.cc		patch \| blob \| history
src/lib/film_state.cc		patch \| blob \| history
src/lib/film_state.h		patch \| blob \| history