From: Carl Hetherington <cth@carlh.net>
Date: Tue, 11 Jan 2022 15:34:26 +0000 (+0100)
Subject: Use a separate AVFrame for each stream when decoding.
X-Git-Tag: v2.15.183~2
X-Git-Url: https://main.carlh.net/gitweb/?p=dcpomatic.git;a=commitdiff_plain;h=b703142e8750c509174b4d964009aecf93f3d834

Use a separate AVFrame for each stream when decoding.

This seems to be what ffplay does and it feels like it makes sense
as frames may be built from multiple packets AFAICS.
---

diff --git a/src/lib/ffmpeg.cc b/src/lib/ffmpeg.cc
index eed9ab94c..0f63ea172 100644
--- a/src/lib/ffmpeg.cc
+++ b/src/lib/ffmpeg.cc
@@ -72,7 +72,11 @@ FFmpeg::~FFmpeg ()
 		avcodec_free_context (&i);
 	}
 
-	av_frame_free (&_frame);
+	av_frame_free (&_video_frame);
+	for (auto& audio_frame: _audio_frame) {
+		av_frame_free (&audio_frame.second);
+	}
+
 	avformat_close_input (&_format_context);
 }
 
@@ -188,8 +192,8 @@ FFmpeg::setup_general ()
 		}
 	}
 
-	_frame = av_frame_alloc ();
-	if (_frame == 0) {
+	_video_frame = av_frame_alloc ();
+	if (_video_frame == nullptr) {
 		throw std::bad_alloc ();
 	}
 }
@@ -354,3 +358,23 @@ FFmpeg::pts_offset (vector<shared_ptr<FFmpegAudioStream>> audio_streams, optiona
 
 	return po;
 }
+
+
+AVFrame *
+FFmpeg::audio_frame (shared_ptr<const FFmpegAudioStream> stream)
+{
+	auto iter = _audio_frame.find(stream);
+	if (iter != _audio_frame.end()) {
+		return iter->second;
+	}
+
+	auto frame = av_frame_alloc ();
+	if (frame == nullptr) {
+		throw std::bad_alloc();
+	}
+
+	_audio_frame[stream] = frame;
+	return frame;
+
+}
+
diff --git a/src/lib/ffmpeg.h b/src/lib/ffmpeg.h
index b0769971e..59b2dedd5 100644
--- a/src/lib/ffmpeg.h
+++ b/src/lib/ffmpeg.h
@@ -75,11 +75,14 @@ protected:
 
 	AVFormatContext* _format_context = nullptr;
 	std::vector<AVCodecContext*> _codec_context;
-	AVFrame* _frame = nullptr;
 
+	/** AVFrame used for decoding video */
+	AVFrame* _video_frame = nullptr;
 	/** Index of video stream within AVFormatContext */
 	boost::optional<int> _video_stream;
 
+	AVFrame* audio_frame (std::shared_ptr<const FFmpegAudioStream> stream);
+
 	/* It would appear (though not completely verified) that one must have
 	   a mutex around calls to avcodec_open* and avcodec_close... and here
 	   it is.
@@ -92,6 +95,9 @@ private:
 
 	static void ffmpeg_log_callback (void* ptr, int level, const char* fmt, va_list vl);
 	static std::weak_ptr<Log> _ffmpeg_log;
+
+	/** AVFrames used for decoding audio streams; accessed with audio_frame() */
+	std::map<std::shared_ptr<const FFmpegAudioStream>, AVFrame*> _audio_frame;
 };
 
 
diff --git a/src/lib/ffmpeg_decoder.cc b/src/lib/ffmpeg_decoder.cc
index ea961a894..e5685f661 100644
--- a/src/lib/ffmpeg_decoder.cc
+++ b/src/lib/ffmpeg_decoder.cc
@@ -122,7 +122,7 @@ FFmpegDecoder::flush ()
 			/* EOF can happen if we've already sent a flush packet */
 			throw DecodeError (N_("avcodec_send_packet"), N_("FFmpegDecoder::flush"), r);
 		}
-		r = avcodec_receive_frame (context, _frame);
+		r = avcodec_receive_frame (context, audio_frame(i));
 		if (r >= 0) {
 			process_audio_frame (i);
 			did_something = true;
@@ -447,10 +447,11 @@ FFmpegDecoder::audio_stream_from_index (int index) const
 void
 FFmpegDecoder::process_audio_frame (shared_ptr<FFmpegAudioStream> stream)
 {
-	auto data = deinterleave_audio (_frame);
+	auto frame = audio_frame (stream);
+	auto data = deinterleave_audio (frame);
 
 	ContentTime ct;
-	if (_frame->pts == AV_NOPTS_VALUE) {
+	if (frame->pts == AV_NOPTS_VALUE) {
 		/* In some streams we see not every frame coming through with a timestamp; for those
 		   that have AV_NOPTS_VALUE we need to work out the timestamp ourselves.  This is
 		   particularly noticeable with TrueHD streams (see #1111).
@@ -460,7 +461,7 @@ FFmpegDecoder::process_audio_frame (shared_ptr<FFmpegAudioStream> stream)
 		}
 	} else {
 		ct = ContentTime::from_seconds (
-			_frame->best_effort_timestamp *
+			frame->best_effort_timestamp *
 			av_q2d (stream->stream(_format_context)->time_base))
 			+ _pts_offset;
 	}
@@ -481,7 +482,7 @@ FFmpegDecoder::process_audio_frame (shared_ptr<FFmpegAudioStream> stream)
 			to_string(ct),
 			data->frames(),
 			stream->id(),
-			_frame->best_effort_timestamp,
+			frame->best_effort_timestamp,
 			av_q2d(stream->stream(_format_context)->time_base),
 			to_string(_pts_offset)
 			);
@@ -503,13 +504,14 @@ FFmpegDecoder::decode_and_process_audio_packet (AVPacket* packet)
 	}
 
 	auto context = _codec_context[stream->index(_format_context)];
+	auto frame = audio_frame (stream);
 
 	int r = avcodec_send_packet (context, packet);
 	if (r < 0) {
 		LOG_WARNING("avcodec_send_packet returned %1 for an audio packet", r);
 	}
 	while (r >= 0) {
-		r = avcodec_receive_frame (context, _frame);
+		r = avcodec_receive_frame (context, frame);
 		if (r == AVERROR(EAGAIN)) {
 			/* More input is required */
 			return;
@@ -536,7 +538,7 @@ FFmpegDecoder::decode_and_process_video_packet (AVPacket* packet)
 		LOG_WARNING("avcodec_send_packet returned %1 for a video packet", r);
 	}
 
-	r = avcodec_receive_frame (context, _frame);
+	r = avcodec_receive_frame (context, _video_frame);
 	if (r == AVERROR(EAGAIN) || r == AVERROR_EOF || (r < 0 && !packet)) {
 		/* More input is required, no more frames are coming, or we are flushing and there was
 		 * some error which we just want to ignore.
@@ -553,21 +555,21 @@ FFmpegDecoder::decode_and_process_video_packet (AVPacket* packet)
 	shared_ptr<VideoFilterGraph> graph;
 
 	auto i = _filter_graphs.begin();
-	while (i != _filter_graphs.end() && !(*i)->can_process (dcp::Size (_frame->width, _frame->height), (AVPixelFormat) _frame->format)) {
+	while (i != _filter_graphs.end() && !(*i)->can_process(dcp::Size(_video_frame->width, _video_frame->height), (AVPixelFormat) _video_frame->format)) {
 		++i;
 	}
 
 	if (i == _filter_graphs.end ()) {
 		dcp::Fraction vfr (lrint(_ffmpeg_content->video_frame_rate().get() * 1000), 1000);
-		graph = make_shared<VideoFilterGraph>(dcp::Size (_frame->width, _frame->height), (AVPixelFormat) _frame->format, vfr);
+		graph = make_shared<VideoFilterGraph>(dcp::Size(_video_frame->width, _video_frame->height), (AVPixelFormat) _video_frame->format, vfr);
 		graph->setup (_ffmpeg_content->filters ());
 		_filter_graphs.push_back (graph);
-		LOG_GENERAL (N_("New graph for %1x%2, pixel format %3"), _frame->width, _frame->height, _frame->format);
+		LOG_GENERAL (N_("New graph for %1x%2, pixel format %3"), _video_frame->width, _video_frame->height, _video_frame->format);
 	} else {
 		graph = *i;
 	}
 
-	auto images = graph->process (_frame);
+	auto images = graph->process (_video_frame);
 
 	for (auto const& i: images) {
 
diff --git a/src/lib/ffmpeg_examiner.cc b/src/lib/ffmpeg_examiner.cc
index 0a236d836..5e53f0974 100644
--- a/src/lib/ffmpeg_examiner.cc
+++ b/src/lib/ffmpeg_examiner.cc
@@ -229,7 +229,7 @@ FFmpegExaminer::video_packet (AVCodecContext* context, string& temporal_referenc
 		throw DecodeError (N_("avcodec_send_packet"), N_("FFmpegExaminer::video_packet"), r);
 	}
 
-	r = avcodec_receive_frame (context, _frame);
+	r = avcodec_receive_frame (context, _video_frame);
 	if (r == AVERROR(EAGAIN)) {
 		/* More input is required */
 		return true;
@@ -239,16 +239,17 @@ FFmpegExaminer::video_packet (AVCodecContext* context, string& temporal_referenc
 	}
 
 	if (!_first_video) {
-		_first_video = frame_time (_format_context->streams[_video_stream.get()]);
+		_first_video = frame_time (_video_frame, _format_context->streams[_video_stream.get()]);
 	}
 	if (_need_video_length) {
 		_video_length = frame_time (
+			_video_frame,
 			_format_context->streams[_video_stream.get()]
 			).get_value_or (ContentTime ()).frames_round (video_frame_rate().get ());
 	}
 	if (temporal_reference.size() < (PULLDOWN_CHECK_FRAMES * 2)) {
-		temporal_reference += (_frame->top_field_first ? "T" : "B");
-		temporal_reference += (_frame->repeat_pict ? "3" : "2");
+		temporal_reference += (_video_frame->top_field_first ? "T" : "B");
+		temporal_reference += (_video_frame->repeat_pict ? "3" : "2");
 	}
 
 	return true;
@@ -271,22 +272,24 @@ FFmpegExaminer::audio_packet (AVCodecContext* context, shared_ptr<FFmpegAudioStr
 		throw DecodeError (N_("avcodec_send_packet"), N_("FFmpegExaminer::audio_packet"), r);
 	}
 
-	if (avcodec_receive_frame (context, _frame) < 0) {
+	auto frame = audio_frame (stream);
+
+	if (avcodec_receive_frame (context, frame) < 0) {
 		return;
 	}
 
-	stream->first_audio = frame_time (stream->stream(_format_context));
+	stream->first_audio = frame_time (frame, stream->stream(_format_context));
 }
 
 
 optional<ContentTime>
-FFmpegExaminer::frame_time (AVStream* s) const
+FFmpegExaminer::frame_time (AVFrame* frame, AVStream* stream) const
 {
 	optional<ContentTime> t;
 
-	int64_t const bet = _frame->best_effort_timestamp;
+	int64_t const bet = frame->best_effort_timestamp;
 	if (bet != AV_NOPTS_VALUE) {
-		t = ContentTime::from_seconds (bet * av_q2d (s->time_base));
+		t = ContentTime::from_seconds (bet * av_q2d(stream->time_base));
 	}
 
 	return t;
diff --git a/src/lib/ffmpeg_examiner.h b/src/lib/ffmpeg_examiner.h
index f978eb52b..36efc92b1 100644
--- a/src/lib/ffmpeg_examiner.h
+++ b/src/lib/ffmpeg_examiner.h
@@ -91,7 +91,7 @@ private:
 
 	std::string stream_name (AVStream* s) const;
 	std::string subtitle_stream_name (AVStream* s) const;
-	boost::optional<dcpomatic::ContentTime> frame_time (AVStream* s) const;
+	boost::optional<dcpomatic::ContentTime> frame_time (AVFrame* frame, AVStream* stream) const;
 
 	std::vector<std::shared_ptr<FFmpegSubtitleStream>> _subtitle_streams;
 	std::vector<std::shared_ptr<FFmpegAudioStream>> _audio_streams;