Do audio/video pts sync in a hopefully much more sensible way.
authorCarl Hetherington <cth@carlh.net>
Mon, 22 Oct 2012 13:35:00 +0000 (14:35 +0100)
committerCarl Hetherington <cth@carlh.net>
Mon, 22 Oct 2012 13:35:00 +0000 (14:35 +0100)
.be/aff5ca2c-44ee-4ed6-800b-4abe9c3e794c/bugs/cc075bd0-4641-4b2c-83b1-4adb05433f71/values [new file with mode: 0644]
.be/aff5ca2c-44ee-4ed6-800b-4abe9c3e794c/bugs/e6b5de1f-5dbe-42f2-b103-e8bb680c4140/values [new file with mode: 0644]
src/lib/decoder.cc
src/lib/decoder.h
src/lib/examine_content_job.cc
src/lib/examine_content_job.h
src/lib/ffmpeg_decoder.cc
src/lib/ffmpeg_decoder.h
src/lib/film.cc
src/lib/film_state.cc
src/lib/film_state.h

diff --git a/.be/aff5ca2c-44ee-4ed6-800b-4abe9c3e794c/bugs/cc075bd0-4641-4b2c-83b1-4adb05433f71/values b/.be/aff5ca2c-44ee-4ed6-800b-4abe9c3e794c/bugs/cc075bd0-4641-4b2c-83b1-4adb05433f71/values
new file mode 100644 (file)
index 0000000..0fd81c0
--- /dev/null
@@ -0,0 +1,17 @@
+creator: Carl Hetherington <cth@carlh.net>
+
+
+reporter: Carl Hetherington <cth@carlh.net>
+
+
+severity: minor
+
+
+status: open
+
+
+summary: Do the delay line in deinterleaved floats to simplify code.
+
+
+time: Mon, 22 Oct 2012 13:27:50 +0000
+
diff --git a/.be/aff5ca2c-44ee-4ed6-800b-4abe9c3e794c/bugs/e6b5de1f-5dbe-42f2-b103-e8bb680c4140/values b/.be/aff5ca2c-44ee-4ed6-800b-4abe9c3e794c/bugs/e6b5de1f-5dbe-42f2-b103-e8bb680c4140/values
new file mode 100644 (file)
index 0000000..7b9e044
--- /dev/null
@@ -0,0 +1,17 @@
+creator: Carl Hetherington <cth@carlh.net>
+
+
+reporter: Carl Hetherington <cth@carlh.net>
+
+
+severity: minor
+
+
+status: open
+
+
+summary: Decoder should use its own methods more than FilmState
+
+
+time: Mon, 22 Oct 2012 13:28:57 +0000
+
index c9235f8e1d595ae0d60b537a88f696222c3b2980..65e5ff7225ccb1e913ed54c6d2366183121d600a 100644 (file)
@@ -90,12 +90,10 @@ Decoder::~Decoder ()
 void
 Decoder::process_begin ()
 {
-       _delay_in_bytes = _fs->total_audio_delay() * _fs->audio_sample_rate() * _fs->audio_channels() * bytes_per_audio_sample() / 1000;
+       _delay_in_bytes = _fs->audio_delay() * _fs->audio_sample_rate() * _fs->audio_channels() * bytes_per_audio_sample() / 1000;
        delete _delay_line;
        _delay_line = new DelayLine (_delay_in_bytes);
 
-       _log->log (String::compose ("Decoding audio with total delay of %1", _fs->total_audio_delay()));
-
        _audio_frames_processed = 0;
 }
 
@@ -129,6 +127,10 @@ Decoder::process_end ()
                _log->log (String::compose ("DCP length is %1; %2 frames of audio processed.", _fs->dcp_length(), _audio_frames_processed));
                _log->log (String::compose ("Adding %1 frames of silence to the end.", audio_short_by_frames));
 
+               /* XXX: this is slightly questionable; does memset () give silence with all
+                  sample formats?
+               */
+
                int64_t bytes = audio_short_by_frames * _fs->audio_channels() * bytes_per_audio_sample();
                
                int64_t const silence_size = 16 * 1024 * _fs->audio_channels() * bytes_per_audio_sample();
index 7559217eb13f914a2acd4f68a5b1407c6893705e..e81fbb24ec5ea4d43544222b963054fecb144d13 100644 (file)
@@ -71,11 +71,6 @@ public:
        virtual int64_t audio_channel_layout () const = 0;
        virtual bool has_subtitles () const = 0;
 
-       /** @return amount of extra unwanted audio at the start (or -ve for unwanted video) in milliseconds */
-       virtual int audio_to_discard () const {
-               return 0;
-       }
-
        void process_begin ();
        bool pass ();
        void process_end ();
@@ -117,6 +112,8 @@ protected:
        void process_audio (uint8_t *, int);
        void process_subtitle (boost::shared_ptr<TimedSubtitle>);
 
+       int bytes_per_audio_sample () const;
+       
        /** our FilmState */
        boost::shared_ptr<const FilmState> _fs;
        /** our options */
@@ -137,7 +134,6 @@ protected:
 private:
        void setup_video_filters ();
        void emit_audio (uint8_t* data, int size);
-       int bytes_per_audio_sample () const;
        
        /** last video frame to be processed */
        int _video_frame;
index d12e060691e61a08d876e764aedc1907761d895a..f0fb29812ffc99bf012a116e6216fa97ec0febae 100644 (file)
@@ -72,7 +72,6 @@ ExamineContentJob::run ()
        fs->set_length (_decoder->last_video_frame ());
 
        _log->log (String::compose ("Video length is %1 frames", _decoder->last_video_frame()));
-       _log->log (String::compose ("%1ms of audio to discard", _decoder->audio_to_discard()));
 
        ascend ();
 
@@ -111,10 +110,3 @@ ExamineContentJob::last_video_frame () const
 {
        return _decoder->last_video_frame ();
 }
-
-int
-ExamineContentJob::audio_to_discard () const
-{
-       return _decoder->audio_to_discard ();
-}
-
index d8e94f1ec2f6cb5755336a4ffd682a0c866f476c..3bbd673a87e66aab5521b0ddb1666d5404da66ab 100644 (file)
@@ -38,7 +38,6 @@ public:
        void run ();
 
        int last_video_frame () const;
-       int audio_to_discard () const;
 
 private:
        boost::shared_ptr<Decoder> _decoder;
index d8a541be39b0539d81be155c1b82d33d877ff483..c74fee008458a53914daaf38ea4244f4166d5d31 100644 (file)
@@ -67,6 +67,7 @@ FFmpegDecoder::FFmpegDecoder (boost::shared_ptr<const FilmState> s, boost::share
        , _subtitle_codec_context (0)
        , _subtitle_codec (0)
        , _first_video_pts (-1)
+       , _first_audio_pts (-1)
 {
        setup_general ();
        setup_video ();
@@ -246,16 +247,56 @@ FFmpegDecoder::do_pass ()
 
        if (_packet.stream_index == _video_stream) {
 
+               if (_first_video_pts == -1) {
+                       _first_video_pts = _packet.pts;
+               }
+               
                int frame_finished;
                if (avcodec_decode_video2 (_video_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
-                       if (_first_video_pts == -1) {
-                               _first_video_pts = _packet.pts;
-                       }
                        process_video (_frame);
                }
 
-       } else if (_audio_stream >= 0 && _packet.stream_index == _audio_stream && _opt->decode_audio) {
-               
+       } else if (_audio_stream >= 0 && _packet.stream_index == _audio_stream && _opt->decode_audio && (_first_video_pts != -1 && _packet.pts > _first_video_pts)) {
+
+               /* Note: We only decode audio if we've had our first video packet through, and if this
+                  packet comes after it.  Until then it is thrown away.
+               */
+
+               if (_first_audio_pts == -1) {
+                       _first_audio_pts = _packet.pts;
+
+                       /* This is our first audio packet, and if we've arrived here we must have had our
+                          first video packet.  Push some silence to make up the gap between our first
+                          video packet and our first audio.
+                       */
+                       
+                       AVStream* v = _format_context->streams[_video_stream];
+                       AVStream* a = _format_context->streams[_audio_stream];
+                       
+                       assert (v->time_base.num == a->time_base.num);
+                       assert (v->time_base.den == a->time_base.den);
+
+                       /* samples of silence that we must push */
+                       int const s = rint (av_q2d (v->time_base) * (_first_audio_pts - _first_video_pts) * audio_sample_rate ());
+
+                       _log->log (
+                               String::compose (
+                                       "First video at %1, first audio at %2, pushing %3 samples of silence",
+                                       _first_video_pts, _first_audio_pts, s
+                                       )
+                               );
+
+                       /* hence bytes */
+                       int const b = s * audio_channels() * bytes_per_audio_sample();
+
+                       /* XXX: this assumes that it won't be too much, and there are shaky assumptions
+                          that all sound representations are silent with memset()ed zero data.
+                       */
+                       uint8_t silence[b];
+                       memset (silence, 0, b);
+                       process_audio (silence, b);
+               }
+
                avcodec_get_frame_defaults (_frame);
                
                int frame_finished;
@@ -424,14 +465,3 @@ FFmpegDecoder::stream_name (AVStream* s) const
        return n.str ();
 }
 
-int
-FFmpegDecoder::audio_to_discard () const
-{
-       AVStream* v = _format_context->streams[_video_stream];
-       AVStream* a = _format_context->streams[_audio_stream];
-
-       assert (v->time_base.num == a->time_base.num);
-       assert (v->time_base.den == a->time_base.den);
-
-       return rint (av_q2d (v->time_base) * 1000 * (_first_video_pts - _first_audio_pts));
-}
index dc10635a5da7ad2f8d4fa3ebf47dcd14d9daab4d..ac4cd6fcfa1dae4ad18258bad2e349c5186abc91 100644 (file)
@@ -65,8 +65,6 @@ public:
        AVSampleFormat audio_sample_format () const;
        int64_t audio_channel_layout () const;
        bool has_subtitles () const;
-       int bytes_per_audio_sample () const;
-       int audio_to_discard () const;
 
        std::vector<AudioStream> audio_streams () const;
        std::vector<SubtitleStream> subtitle_streams () const;
index 3a8b29f8603a8ddf30938a0bc06f6f005b2c37cd..aa0bfa21192cfb6d1c87c2bf858198c1b283072c 100644 (file)
@@ -256,7 +256,6 @@ void
 Film::examine_content_post_gui ()
 {
        set_length (_examine_content_job->last_video_frame ());
-       set_audio_to_discard (_examine_content_job->audio_to_discard ());
        _examine_content_job.reset ();
 
        string const tdir = dir ("thumbs");
index d59d7ce7b3627e68774fe0a7c43464bfdd62cc8a..283830f59703b39b287feb843f1f4c12ad53040b 100644 (file)
@@ -126,7 +126,6 @@ FilmState::write_metadata () const
        }
 
        f << "frames_per_second " << _frames_per_second << "\n";
-       f << "audio_to_discard " << _audio_to_discard << "\n";
        
        _dirty = false;
 }
@@ -231,8 +230,6 @@ FilmState::read_metadata ()
                        _subtitle_streams.push_back (SubtitleStream (v));
                } else if (k == "frames_per_second") {
                        _frames_per_second = atof (v.c_str ());
-               } else if (k == "audio_to_discard") {
-                       _audio_to_discard = atoi (v.c_str ());
                }
        }
                
@@ -812,13 +809,6 @@ FilmState::set_frames_per_second (float f)
        _frames_per_second = f;
        signal_changed (FRAMES_PER_SECOND);
 }
-
-void
-FilmState::set_audio_to_discard (int a)
-{
-       _audio_to_discard = a;
-       signal_changed (AUDIO_TO_DISCARD);
-}
        
 void
 FilmState::signal_changed (Property p)
@@ -842,9 +832,3 @@ FilmState::audio_channels () const
 
        return _audio_streams[_audio_stream].channels ();
 }
-
-int
-FilmState::total_audio_delay () const
-{
-       return _audio_delay - _audio_to_discard;
-}
index d1d7489f6ba0c5c73cc48f2d43f964fa9d672cc9..8940d0e5df19206b829ae1e15e476e9ce9fa4b9d 100644 (file)
@@ -73,7 +73,6 @@ public:
                , _audio_sample_rate (0)
                , _has_subtitles (false)
                , _frames_per_second (0)
-               , _audio_to_discard (0)
                , _dirty (false)
        {}
 
@@ -114,7 +113,6 @@ public:
                , _audio_streams     (o._audio_streams)
                , _subtitle_streams  (o._subtitle_streams)
                , _frames_per_second (o._frames_per_second)
-               , _audio_to_discard  (o._audio_to_discard)
                , _dirty             (o._dirty)
        {}
 
@@ -150,7 +148,6 @@ public:
        }
 
        int audio_channels () const;
-       int total_audio_delay () const;
 
        enum Property {
                NONE,
@@ -182,7 +179,6 @@ public:
                AUDIO_STREAMS,
                SUBTITLE_STREAMS,
                FRAMES_PER_SECOND,
-               AUDIO_TO_DISCARD
        };
 
 
@@ -342,11 +338,7 @@ public:
                return _frames_per_second;
        }
 
-       int audio_to_discard () const {
-               return _audio_to_discard;
-       }
 
-       
        /* SET */
 
        void set_directory (std::string);
@@ -390,7 +382,6 @@ public:
        void set_audio_streams (std::vector<AudioStream>);
        void set_subtitle_streams (std::vector<SubtitleStream>);
        void set_frames_per_second (float);
-       void set_audio_to_discard (int);
 
        /** Emitted when some property has changed */
        mutable sigc::signal1<void, Property> Changed;
@@ -480,10 +471,6 @@ private:
        std::vector<SubtitleStream> _subtitle_streams;
        /** Frames per second of the source */
        float _frames_per_second;
-       /** Number of milliseconds of audio to discard at the start of this film
-           in order to sync audio with video.  Can be negative.
-       */
-       int _audio_to_discard;
 
        mutable bool _dirty;