From 71b7808830e17423453502ed5112b70bf90fff1b Mon Sep 17 00:00:00 2001 From: Carl Hetherington Date: Mon, 22 Oct 2012 14:35:00 +0100 Subject: [PATCH] Do audio/video pts sync in a hopefully much more sensible way. --- .../values | 17 +++++ .../values | 17 +++++ src/lib/decoder.cc | 8 ++- src/lib/decoder.h | 8 +-- src/lib/examine_content_job.cc | 8 --- src/lib/examine_content_job.h | 1 - src/lib/ffmpeg_decoder.cc | 62 ++++++++++++++----- src/lib/ffmpeg_decoder.h | 2 - src/lib/film.cc | 1 - src/lib/film_state.cc | 16 ----- src/lib/film_state.h | 13 ---- 11 files changed, 87 insertions(+), 66 deletions(-) create mode 100644 .be/aff5ca2c-44ee-4ed6-800b-4abe9c3e794c/bugs/cc075bd0-4641-4b2c-83b1-4adb05433f71/values create mode 100644 .be/aff5ca2c-44ee-4ed6-800b-4abe9c3e794c/bugs/e6b5de1f-5dbe-42f2-b103-e8bb680c4140/values diff --git a/.be/aff5ca2c-44ee-4ed6-800b-4abe9c3e794c/bugs/cc075bd0-4641-4b2c-83b1-4adb05433f71/values b/.be/aff5ca2c-44ee-4ed6-800b-4abe9c3e794c/bugs/cc075bd0-4641-4b2c-83b1-4adb05433f71/values new file mode 100644 index 000000000..0fd81c0cd --- /dev/null +++ b/.be/aff5ca2c-44ee-4ed6-800b-4abe9c3e794c/bugs/cc075bd0-4641-4b2c-83b1-4adb05433f71/values @@ -0,0 +1,17 @@ +creator: Carl Hetherington + + +reporter: Carl Hetherington + + +severity: minor + + +status: open + + +summary: Do the delay line in deinterleaved floats to simplify code. + + +time: Mon, 22 Oct 2012 13:27:50 +0000 + diff --git a/.be/aff5ca2c-44ee-4ed6-800b-4abe9c3e794c/bugs/e6b5de1f-5dbe-42f2-b103-e8bb680c4140/values b/.be/aff5ca2c-44ee-4ed6-800b-4abe9c3e794c/bugs/e6b5de1f-5dbe-42f2-b103-e8bb680c4140/values new file mode 100644 index 000000000..7b9e04496 --- /dev/null +++ b/.be/aff5ca2c-44ee-4ed6-800b-4abe9c3e794c/bugs/e6b5de1f-5dbe-42f2-b103-e8bb680c4140/values @@ -0,0 +1,17 @@ +creator: Carl Hetherington + + +reporter: Carl Hetherington + + +severity: minor + + +status: open + + +summary: Decoder should use its own methods more than FilmState + + +time: Mon, 22 Oct 2012 13:28:57 +0000 + diff --git a/src/lib/decoder.cc b/src/lib/decoder.cc index c9235f8e1..65e5ff722 100644 --- a/src/lib/decoder.cc +++ b/src/lib/decoder.cc @@ -90,12 +90,10 @@ Decoder::~Decoder () void Decoder::process_begin () { - _delay_in_bytes = _fs->total_audio_delay() * _fs->audio_sample_rate() * _fs->audio_channels() * bytes_per_audio_sample() / 1000; + _delay_in_bytes = _fs->audio_delay() * _fs->audio_sample_rate() * _fs->audio_channels() * bytes_per_audio_sample() / 1000; delete _delay_line; _delay_line = new DelayLine (_delay_in_bytes); - _log->log (String::compose ("Decoding audio with total delay of %1", _fs->total_audio_delay())); - _audio_frames_processed = 0; } @@ -129,6 +127,10 @@ Decoder::process_end () _log->log (String::compose ("DCP length is %1; %2 frames of audio processed.", _fs->dcp_length(), _audio_frames_processed)); _log->log (String::compose ("Adding %1 frames of silence to the end.", audio_short_by_frames)); + /* XXX: this is slightly questionable; does memset () give silence with all + sample formats? + */ + int64_t bytes = audio_short_by_frames * _fs->audio_channels() * bytes_per_audio_sample(); int64_t const silence_size = 16 * 1024 * _fs->audio_channels() * bytes_per_audio_sample(); diff --git a/src/lib/decoder.h b/src/lib/decoder.h index 7559217eb..e81fbb24e 100644 --- a/src/lib/decoder.h +++ b/src/lib/decoder.h @@ -71,11 +71,6 @@ public: virtual int64_t audio_channel_layout () const = 0; virtual bool has_subtitles () const = 0; - /** @return amount of extra unwanted audio at the start (or -ve for unwanted video) in milliseconds */ - virtual int audio_to_discard () const { - return 0; - } - void process_begin (); bool pass (); void process_end (); @@ -117,6 +112,8 @@ protected: void process_audio (uint8_t *, int); void process_subtitle (boost::shared_ptr); + int bytes_per_audio_sample () const; + /** our FilmState */ boost::shared_ptr _fs; /** our options */ @@ -137,7 +134,6 @@ protected: private: void setup_video_filters (); void emit_audio (uint8_t* data, int size); - int bytes_per_audio_sample () const; /** last video frame to be processed */ int _video_frame; diff --git a/src/lib/examine_content_job.cc b/src/lib/examine_content_job.cc index d12e06069..f0fb29812 100644 --- a/src/lib/examine_content_job.cc +++ b/src/lib/examine_content_job.cc @@ -72,7 +72,6 @@ ExamineContentJob::run () fs->set_length (_decoder->last_video_frame ()); _log->log (String::compose ("Video length is %1 frames", _decoder->last_video_frame())); - _log->log (String::compose ("%1ms of audio to discard", _decoder->audio_to_discard())); ascend (); @@ -111,10 +110,3 @@ ExamineContentJob::last_video_frame () const { return _decoder->last_video_frame (); } - -int -ExamineContentJob::audio_to_discard () const -{ - return _decoder->audio_to_discard (); -} - diff --git a/src/lib/examine_content_job.h b/src/lib/examine_content_job.h index d8e94f1ec..3bbd673a8 100644 --- a/src/lib/examine_content_job.h +++ b/src/lib/examine_content_job.h @@ -38,7 +38,6 @@ public: void run (); int last_video_frame () const; - int audio_to_discard () const; private: boost::shared_ptr _decoder; diff --git a/src/lib/ffmpeg_decoder.cc b/src/lib/ffmpeg_decoder.cc index d8a541be3..c74fee008 100644 --- a/src/lib/ffmpeg_decoder.cc +++ b/src/lib/ffmpeg_decoder.cc @@ -67,6 +67,7 @@ FFmpegDecoder::FFmpegDecoder (boost::shared_ptr s, boost::share , _subtitle_codec_context (0) , _subtitle_codec (0) , _first_video_pts (-1) + , _first_audio_pts (-1) { setup_general (); setup_video (); @@ -246,16 +247,56 @@ FFmpegDecoder::do_pass () if (_packet.stream_index == _video_stream) { + if (_first_video_pts == -1) { + _first_video_pts = _packet.pts; + } + int frame_finished; if (avcodec_decode_video2 (_video_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) { - if (_first_video_pts == -1) { - _first_video_pts = _packet.pts; - } process_video (_frame); } - } else if (_audio_stream >= 0 && _packet.stream_index == _audio_stream && _opt->decode_audio) { - + } else if (_audio_stream >= 0 && _packet.stream_index == _audio_stream && _opt->decode_audio && (_first_video_pts != -1 && _packet.pts > _first_video_pts)) { + + /* Note: We only decode audio if we've had our first video packet through, and if this + packet comes after it. Until then it is thrown away. + */ + + if (_first_audio_pts == -1) { + _first_audio_pts = _packet.pts; + + /* This is our first audio packet, and if we've arrived here we must have had our + first video packet. Push some silence to make up the gap between our first + video packet and our first audio. + */ + + AVStream* v = _format_context->streams[_video_stream]; + AVStream* a = _format_context->streams[_audio_stream]; + + assert (v->time_base.num == a->time_base.num); + assert (v->time_base.den == a->time_base.den); + + /* samples of silence that we must push */ + int const s = rint (av_q2d (v->time_base) * (_first_audio_pts - _first_video_pts) * audio_sample_rate ()); + + _log->log ( + String::compose ( + "First video at %1, first audio at %2, pushing %3 samples of silence", + _first_video_pts, _first_audio_pts, s + ) + ); + + /* hence bytes */ + int const b = s * audio_channels() * bytes_per_audio_sample(); + + /* XXX: this assumes that it won't be too much, and there are shaky assumptions + that all sound representations are silent with memset()ed zero data. + */ + uint8_t silence[b]; + memset (silence, 0, b); + process_audio (silence, b); + } + avcodec_get_frame_defaults (_frame); int frame_finished; @@ -424,14 +465,3 @@ FFmpegDecoder::stream_name (AVStream* s) const return n.str (); } -int -FFmpegDecoder::audio_to_discard () const -{ - AVStream* v = _format_context->streams[_video_stream]; - AVStream* a = _format_context->streams[_audio_stream]; - - assert (v->time_base.num == a->time_base.num); - assert (v->time_base.den == a->time_base.den); - - return rint (av_q2d (v->time_base) * 1000 * (_first_video_pts - _first_audio_pts)); -} diff --git a/src/lib/ffmpeg_decoder.h b/src/lib/ffmpeg_decoder.h index dc10635a5..ac4cd6fcf 100644 --- a/src/lib/ffmpeg_decoder.h +++ b/src/lib/ffmpeg_decoder.h @@ -65,8 +65,6 @@ public: AVSampleFormat audio_sample_format () const; int64_t audio_channel_layout () const; bool has_subtitles () const; - int bytes_per_audio_sample () const; - int audio_to_discard () const; std::vector audio_streams () const; std::vector subtitle_streams () const; diff --git a/src/lib/film.cc b/src/lib/film.cc index 3a8b29f86..aa0bfa211 100644 --- a/src/lib/film.cc +++ b/src/lib/film.cc @@ -256,7 +256,6 @@ void Film::examine_content_post_gui () { set_length (_examine_content_job->last_video_frame ()); - set_audio_to_discard (_examine_content_job->audio_to_discard ()); _examine_content_job.reset (); string const tdir = dir ("thumbs"); diff --git a/src/lib/film_state.cc b/src/lib/film_state.cc index d59d7ce7b..283830f59 100644 --- a/src/lib/film_state.cc +++ b/src/lib/film_state.cc @@ -126,7 +126,6 @@ FilmState::write_metadata () const } f << "frames_per_second " << _frames_per_second << "\n"; - f << "audio_to_discard " << _audio_to_discard << "\n"; _dirty = false; } @@ -231,8 +230,6 @@ FilmState::read_metadata () _subtitle_streams.push_back (SubtitleStream (v)); } else if (k == "frames_per_second") { _frames_per_second = atof (v.c_str ()); - } else if (k == "audio_to_discard") { - _audio_to_discard = atoi (v.c_str ()); } } @@ -812,13 +809,6 @@ FilmState::set_frames_per_second (float f) _frames_per_second = f; signal_changed (FRAMES_PER_SECOND); } - -void -FilmState::set_audio_to_discard (int a) -{ - _audio_to_discard = a; - signal_changed (AUDIO_TO_DISCARD); -} void FilmState::signal_changed (Property p) @@ -842,9 +832,3 @@ FilmState::audio_channels () const return _audio_streams[_audio_stream].channels (); } - -int -FilmState::total_audio_delay () const -{ - return _audio_delay - _audio_to_discard; -} diff --git a/src/lib/film_state.h b/src/lib/film_state.h index d1d7489f6..8940d0e5d 100644 --- a/src/lib/film_state.h +++ b/src/lib/film_state.h @@ -73,7 +73,6 @@ public: , _audio_sample_rate (0) , _has_subtitles (false) , _frames_per_second (0) - , _audio_to_discard (0) , _dirty (false) {} @@ -114,7 +113,6 @@ public: , _audio_streams (o._audio_streams) , _subtitle_streams (o._subtitle_streams) , _frames_per_second (o._frames_per_second) - , _audio_to_discard (o._audio_to_discard) , _dirty (o._dirty) {} @@ -150,7 +148,6 @@ public: } int audio_channels () const; - int total_audio_delay () const; enum Property { NONE, @@ -182,7 +179,6 @@ public: AUDIO_STREAMS, SUBTITLE_STREAMS, FRAMES_PER_SECOND, - AUDIO_TO_DISCARD }; @@ -342,11 +338,7 @@ public: return _frames_per_second; } - int audio_to_discard () const { - return _audio_to_discard; - } - /* SET */ void set_directory (std::string); @@ -390,7 +382,6 @@ public: void set_audio_streams (std::vector); void set_subtitle_streams (std::vector); void set_frames_per_second (float); - void set_audio_to_discard (int); /** Emitted when some property has changed */ mutable sigc::signal1 Changed; @@ -480,10 +471,6 @@ private: std::vector _subtitle_streams; /** Frames per second of the source */ float _frames_per_second; - /** Number of milliseconds of audio to discard at the start of this film - in order to sync audio with video. Can be negative. - */ - int _audio_to_discard; mutable bool _dirty; -- 2.30.2