2 Copyright (C) 2012 Carl Hetherington <cth@carlh.net>
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 /** @file src/ffmpeg_decoder.cc
21 * @brief A decoder using FFmpeg to decode content.
30 #include <boost/lexical_cast.hpp>
33 #include <libavcodec/avcodec.h>
34 #include <libavformat/avformat.h>
35 #include <libswscale/swscale.h>
36 #include <libpostproc/postprocess.h>
41 #include "transcoder.h"
45 #include "exceptions.h"
49 #include "ffmpeg_decoder.h"
50 #include "filter_graph.h"
56 using std::stringstream;
58 using boost::shared_ptr;
59 using boost::optional;
60 using boost::dynamic_pointer_cast;
62 FFmpegDecoder::FFmpegDecoder (shared_ptr<Film> f, shared_ptr<const Options> o, Job* j)
64 , VideoDecoder (f, o, j)
65 , AudioDecoder (f, o, j)
69 , _video_codec_context (0)
71 , _audio_codec_context (0)
73 , _subtitle_codec_context (0)
82 FFmpegDecoder::~FFmpegDecoder ()
84 if (_audio_codec_context) {
85 avcodec_close (_audio_codec_context);
88 if (_video_codec_context) {
89 avcodec_close (_video_codec_context);
92 if (_subtitle_codec_context) {
93 avcodec_close (_subtitle_codec_context);
98 avformat_close_input (&_format_context);
102 FFmpegDecoder::setup_general ()
106 if (avformat_open_input (&_format_context, _film->content_path().c_str(), 0, 0) < 0) {
107 throw OpenFileError (_film->content_path ());
110 if (avformat_find_stream_info (_format_context, 0) < 0) {
111 throw DecodeError ("could not find stream information");
114 /* Find video, audio and subtitle streams and choose the first of each */
116 for (uint32_t i = 0; i < _format_context->nb_streams; ++i) {
117 AVStream* s = _format_context->streams[i];
118 if (s->codec->codec_type == AVMEDIA_TYPE_VIDEO) {
120 } else if (s->codec->codec_type == AVMEDIA_TYPE_AUDIO) {
122 /* This is a hack; sometimes it seems that _audio_codec_context->channel_layout isn't set up,
123 so bodge it here. No idea why we should have to do this.
126 if (s->codec->channel_layout == 0) {
127 s->codec->channel_layout = av_get_default_channel_layout (s->codec->channels);
130 _audio_streams.push_back (
131 shared_ptr<AudioStream> (
132 new FFmpegAudioStream (stream_name (s), i, s->codec->sample_rate, s->codec->channel_layout)
136 } else if (s->codec->codec_type == AVMEDIA_TYPE_SUBTITLE) {
137 _subtitle_streams.push_back (
138 shared_ptr<SubtitleStream> (
139 new SubtitleStream (stream_name (s), i)
145 if (_video_stream < 0) {
146 throw DecodeError ("could not find video stream");
149 _frame = avcodec_alloc_frame ();
151 throw DecodeError ("could not allocate frame");
156 FFmpegDecoder::setup_video ()
158 _video_codec_context = _format_context->streams[_video_stream]->codec;
159 _video_codec = avcodec_find_decoder (_video_codec_context->codec_id);
161 if (_video_codec == 0) {
162 throw DecodeError ("could not find video decoder");
165 /* I think this prevents problems with green hash on decodes and
166 "changing frame properties on the fly is not supported by all filters"
167 messages with some content. Although I'm not sure; needs checking.
169 AVDictionary* opts = 0;
170 av_dict_set (&opts, "threads", "1", 0);
172 if (avcodec_open2 (_video_codec_context, _video_codec, &opts) < 0) {
173 throw DecodeError ("could not open video decoder");
178 FFmpegDecoder::setup_audio ()
180 if (!_audio_stream) {
184 shared_ptr<FFmpegAudioStream> ffa = dynamic_pointer_cast<FFmpegAudioStream> (_audio_stream);
187 _audio_codec_context = _format_context->streams[ffa->id()]->codec;
188 _audio_codec = avcodec_find_decoder (_audio_codec_context->codec_id);
190 if (_audio_codec == 0) {
191 throw DecodeError ("could not find audio decoder");
194 if (avcodec_open2 (_audio_codec_context, _audio_codec, 0) < 0) {
195 throw DecodeError ("could not open audio decoder");
200 FFmpegDecoder::setup_subtitle ()
202 if (!_subtitle_stream) {
206 _subtitle_codec_context = _format_context->streams[_subtitle_stream->id()]->codec;
207 _subtitle_codec = avcodec_find_decoder (_subtitle_codec_context->codec_id);
209 if (_subtitle_codec == 0) {
210 throw DecodeError ("could not find subtitle decoder");
213 if (avcodec_open2 (_subtitle_codec_context, _subtitle_codec, 0) < 0) {
214 throw DecodeError ("could not open subtitle decoder");
220 FFmpegDecoder::pass ()
222 int r = av_read_frame (_format_context, &_packet);
225 if (r != AVERROR_EOF) {
226 /* Maybe we should fail here, but for now we'll just finish off instead */
228 av_strerror (r, buf, sizeof(buf));
229 _film->log()->log (String::compose ("error on av_read_frame (%1) (%2)", buf, r));
232 /* Get any remaining frames */
237 /* XXX: should we reset _packet.data and size after each *_decode_* call? */
241 while (avcodec_decode_video2 (_video_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
242 filter_and_emit_video (_frame);
245 if (_audio_stream && _opt->decode_audio) {
246 while (avcodec_decode_audio4 (_audio_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
247 int const data_size = av_samples_get_buffer_size (
248 0, _audio_codec_context->channels, _frame->nb_samples, audio_sample_format (), 1
251 assert (_audio_codec_context->channels == _film->audio_channels());
252 Audio (deinterleave_audio (_frame->data[0], data_size));
259 avcodec_get_frame_defaults (_frame);
261 shared_ptr<FFmpegAudioStream> ffa = dynamic_pointer_cast<FFmpegAudioStream> (_audio_stream);
263 if (_packet.stream_index == _video_stream) {
266 int const r = avcodec_decode_video2 (_video_codec_context, _frame, &frame_finished, &_packet);
267 if (r >= 0 && frame_finished) {
269 if (r != _packet.size) {
270 _film->log()->log (String::compose ("Used only %1 bytes of %2 in packet", r, _packet.size));
273 if (_opt->decoder_alignment) {
276 filter_and_emit_video (_frame);
280 } else if (ffa && _packet.stream_index == ffa->id() && _opt->decode_audio) {
283 if (avcodec_decode_audio4 (_audio_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
285 /* Where we are in the source, in seconds */
286 double const source_pts_seconds = av_q2d (_format_context->streams[_packet.stream_index]->time_base)
287 * av_frame_get_best_effort_timestamp(_frame);
289 /* We only decode audio if we've had our first video packet through, and if it
290 was before this packet. Until then audio is thrown away.
293 if (_first_video && _first_video.get() <= source_pts_seconds) {
296 _first_audio = source_pts_seconds;
298 /* This is our first audio frame, and if we've arrived here we must have had our
299 first video frame. Push some silence to make up any gap between our first
300 video frame and our first audio.
303 /* frames of silence that we must push */
304 int const s = rint ((_first_audio.get() - _first_video.get()) * ffa->sample_rate ());
308 "First video at %1, first audio at %2, pushing %3 audio frames of silence for %4 channels (%5 bytes per sample)",
309 _first_video.get(), _first_audio.get(), s, ffa->channels(), bytes_per_audio_sample()
314 shared_ptr<AudioBuffers> audio (new AudioBuffers (ffa->channels(), s));
315 audio->make_silent ();
320 int const data_size = av_samples_get_buffer_size (
321 0, _audio_codec_context->channels, _frame->nb_samples, audio_sample_format (), 1
324 assert (_audio_codec_context->channels == _film->audio_channels());
325 Audio (deinterleave_audio (_frame->data[0], data_size));
329 } else if (_subtitle_stream && _packet.stream_index == _subtitle_stream->id() && _opt->decode_subtitles && _first_video) {
333 if (avcodec_decode_subtitle2 (_subtitle_codec_context, &sub, &got_subtitle, &_packet) && got_subtitle) {
334 /* Sometimes we get an empty AVSubtitle, which is used by some codecs to
335 indicate that the previous subtitle should stop.
337 if (sub.num_rects > 0) {
338 shared_ptr<TimedSubtitle> ts;
340 emit_subtitle (shared_ptr<TimedSubtitle> (new TimedSubtitle (sub, _first_video.get())));
342 /* some problem with the subtitle; we probably didn't understand it */
345 emit_subtitle (shared_ptr<TimedSubtitle> ());
347 avsubtitle_free (&sub);
351 av_free_packet (&_packet);
355 shared_ptr<AudioBuffers>
356 FFmpegDecoder::deinterleave_audio (uint8_t* data, int size)
358 assert (_film->audio_channels());
359 assert (bytes_per_audio_sample());
361 shared_ptr<FFmpegAudioStream> ffa = dynamic_pointer_cast<FFmpegAudioStream> (_audio_stream);
364 /* Deinterleave and convert to float */
366 assert ((size % (bytes_per_audio_sample() * ffa->channels())) == 0);
368 int const total_samples = size / bytes_per_audio_sample();
369 int const frames = total_samples / _film->audio_channels();
370 shared_ptr<AudioBuffers> audio (new AudioBuffers (ffa->channels(), frames));
372 switch (audio_sample_format()) {
373 case AV_SAMPLE_FMT_S16:
375 int16_t* p = (int16_t *) data;
378 for (int i = 0; i < total_samples; ++i) {
379 audio->data(channel)[sample] = float(*p++) / (1 << 15);
382 if (channel == _film->audio_channels()) {
390 case AV_SAMPLE_FMT_S32:
392 int32_t* p = (int32_t *) data;
395 for (int i = 0; i < total_samples; ++i) {
396 audio->data(channel)[sample] = float(*p++) / (1 << 31);
399 if (channel == _film->audio_channels()) {
406 case AV_SAMPLE_FMT_FLTP:
408 float* p = reinterpret_cast<float*> (data);
409 for (int i = 0; i < _film->audio_channels(); ++i) {
410 memcpy (audio->data(i), p, frames * sizeof(float));
424 FFmpegDecoder::frames_per_second () const
426 AVStream* s = _format_context->streams[_video_stream];
428 if (s->avg_frame_rate.num && s->avg_frame_rate.den) {
429 return av_q2d (s->avg_frame_rate);
432 return av_q2d (s->r_frame_rate);
436 FFmpegDecoder::audio_sample_format () const
438 if (_audio_codec_context == 0) {
439 return (AVSampleFormat) 0;
442 return _audio_codec_context->sample_fmt;
446 FFmpegDecoder::native_size () const
448 return Size (_video_codec_context->width, _video_codec_context->height);
452 FFmpegDecoder::pixel_format () const
454 return _video_codec_context->pix_fmt;
458 FFmpegDecoder::time_base_numerator () const
460 return _video_codec_context->time_base.num;
464 FFmpegDecoder::time_base_denominator () const
466 return _video_codec_context->time_base.den;
470 FFmpegDecoder::sample_aspect_ratio_numerator () const
472 return _video_codec_context->sample_aspect_ratio.num;
476 FFmpegDecoder::sample_aspect_ratio_denominator () const
478 return _video_codec_context->sample_aspect_ratio.den;
482 FFmpegDecoder::stream_name (AVStream* s) const
486 AVDictionaryEntry const * lang = av_dict_get (s->metadata, "language", 0, 0);
491 AVDictionaryEntry const * title = av_dict_get (s->metadata, "title", 0, 0);
493 if (!n.str().empty()) {
499 if (n.str().empty()) {
507 FFmpegDecoder::bytes_per_audio_sample () const
509 return av_get_bytes_per_sample (audio_sample_format ());
513 FFmpegDecoder::set_audio_stream (shared_ptr<AudioStream> s)
515 AudioDecoder::set_audio_stream (s);
520 FFmpegDecoder::set_subtitle_stream (shared_ptr<SubtitleStream> s)
522 VideoDecoder::set_subtitle_stream (s);
527 FFmpegDecoder::filter_and_emit_video (AVFrame* frame)
529 shared_ptr<FilterGraph> graph;
531 list<shared_ptr<FilterGraph> >::iterator i = _filter_graphs.begin();
532 while (i != _filter_graphs.end() && !(*i)->can_process (Size (frame->width, frame->height), (AVPixelFormat) frame->format)) {
536 if (i == _filter_graphs.end ()) {
537 graph.reset (new FilterGraph (_film, this, _opt->apply_crop, Size (frame->width, frame->height), (AVPixelFormat) frame->format));
538 _filter_graphs.push_back (graph);
539 _film->log()->log (String::compose ("New graph for %1x%2, pixel format %3", frame->width, frame->height, frame->format));
544 list<shared_ptr<Image> > images = graph->process (frame);
546 for (list<shared_ptr<Image> >::iterator i = images.begin(); i != images.end(); ++i) {
552 FFmpegDecoder::seek (SourceFrame f)
554 int64_t const t = static_cast<int64_t>(f) / (av_q2d (_format_context->streams[_video_stream]->time_base) * frames_per_second());
555 int const r = av_seek_frame (_format_context, _video_stream, t, 0);
556 avcodec_flush_buffers (_video_codec_context);
560 shared_ptr<FFmpegAudioStream>
561 FFmpegAudioStream::create (string t, optional<int> v)
564 /* version < 1; no type in the string, and there's only FFmpeg streams anyway */
565 return shared_ptr<FFmpegAudioStream> (new FFmpegAudioStream (t, v));
571 if (type != "ffmpeg") {
572 return shared_ptr<FFmpegAudioStream> ();
575 return shared_ptr<FFmpegAudioStream> (new FFmpegAudioStream (t, v));
578 FFmpegAudioStream::FFmpegAudioStream (string t, optional<int> version)
586 n >> _id >> channels;
587 _channel_layout = av_get_default_channel_layout (channels);
591 /* Current (marked version 1) */
592 n >> type >> _id >> _sample_rate >> _channel_layout;
593 assert (type == "ffmpeg");
596 for (int i = 0; i < name_index; ++i) {
597 size_t const s = t.find (' ');
598 if (s != string::npos) {
599 t = t.substr (s + 1);
607 FFmpegAudioStream::to_string () const
609 return String::compose ("ffmpeg %1 %2 %3 %4", _id, _sample_rate, _channel_layout, _name);
614 FFmpegDecoder::out_careful ()
616 /* Where we are in the output, in seconds */
617 double const out_pts_seconds = video_frame() / frames_per_second();
619 /* Where we are in the source, in seconds */
620 double const source_pts_seconds = av_q2d (_format_context->streams[_packet.stream_index]->time_base)
621 * av_frame_get_best_effort_timestamp(_frame);
624 String::compose ("Source video frame ready; source at %1, output at %2", source_pts_seconds, out_pts_seconds),
629 _first_video = source_pts_seconds;
632 /* Difference between where we are and where we should be */
633 double const delta = source_pts_seconds - _first_video.get() - out_pts_seconds;
634 double const one_frame = 1 / frames_per_second();
636 /* Insert frames if required to get out_pts_seconds up to pts_seconds */
637 if (delta > one_frame) {
638 int const extra = rint (delta / one_frame);
639 for (int i = 0; i < extra; ++i) {
640 repeat_last_video ();
643 "Extra video frame inserted at %1s; source frame %2, source PTS %3 (at %4 fps)",
644 out_pts_seconds, video_frame(), source_pts_seconds, frames_per_second()
650 if (delta > -one_frame) {
651 /* Process this frame */
652 filter_and_emit_video (_frame);
654 /* Otherwise we are omitting a frame to keep things right */
655 _film->log()->log (String::compose ("Frame removed at %1s", out_pts_seconds));