2 Copyright (C) 2012 Carl Hetherington <cth@carlh.net>
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 /** @file src/ffmpeg_decoder.cc
21 * @brief A decoder using FFmpeg to decode content.
30 #include <boost/lexical_cast.hpp>
33 #include <libavcodec/avcodec.h>
34 #include <libavformat/avformat.h>
35 #include <libswscale/swscale.h>
36 #include <libpostproc/postprocess.h>
41 #include "transcoder.h"
45 #include "exceptions.h"
49 #include "ffmpeg_decoder.h"
50 #include "filter_graph.h"
56 using std::stringstream;
58 using boost::shared_ptr;
59 using boost::optional;
60 using boost::dynamic_pointer_cast;
62 FFmpegDecoder::FFmpegDecoder (shared_ptr<Film> f, shared_ptr<const DecodeOptions> o, Job* j)
64 , VideoDecoder (f, o, j)
65 , AudioDecoder (f, o, j)
69 , _video_codec_context (0)
71 , _audio_codec_context (0)
73 , _subtitle_codec_context (0)
81 _film_connection = f->Changed.connect (bind (&FFmpegDecoder::film_changed, this, _1));
84 FFmpegDecoder::~FFmpegDecoder ()
86 if (_audio_codec_context) {
87 avcodec_close (_audio_codec_context);
90 if (_video_codec_context) {
91 avcodec_close (_video_codec_context);
94 if (_subtitle_codec_context) {
95 avcodec_close (_subtitle_codec_context);
100 avformat_close_input (&_format_context);
104 FFmpegDecoder::setup_general ()
108 if (avformat_open_input (&_format_context, _film->content_path().c_str(), 0, 0) < 0) {
109 throw OpenFileError (_film->content_path ());
112 if (avformat_find_stream_info (_format_context, 0) < 0) {
113 throw DecodeError ("could not find stream information");
116 /* Find video, audio and subtitle streams and choose the first of each */
118 for (uint32_t i = 0; i < _format_context->nb_streams; ++i) {
119 AVStream* s = _format_context->streams[i];
120 if (s->codec->codec_type == AVMEDIA_TYPE_VIDEO) {
122 } else if (s->codec->codec_type == AVMEDIA_TYPE_AUDIO) {
124 /* This is a hack; sometimes it seems that _audio_codec_context->channel_layout isn't set up,
125 so bodge it here. No idea why we should have to do this.
128 if (s->codec->channel_layout == 0) {
129 s->codec->channel_layout = av_get_default_channel_layout (s->codec->channels);
132 _audio_streams.push_back (
133 shared_ptr<AudioStream> (
134 new FFmpegAudioStream (stream_name (s), i, s->codec->sample_rate, s->codec->channel_layout)
138 } else if (s->codec->codec_type == AVMEDIA_TYPE_SUBTITLE) {
139 _subtitle_streams.push_back (
140 shared_ptr<SubtitleStream> (
141 new SubtitleStream (stream_name (s), i)
147 if (_video_stream < 0) {
148 throw DecodeError ("could not find video stream");
151 _frame = avcodec_alloc_frame ();
153 throw DecodeError ("could not allocate frame");
158 FFmpegDecoder::setup_video ()
160 _video_codec_context = _format_context->streams[_video_stream]->codec;
161 _video_codec = avcodec_find_decoder (_video_codec_context->codec_id);
163 if (_video_codec == 0) {
164 throw DecodeError ("could not find video decoder");
167 if (avcodec_open2 (_video_codec_context, _video_codec, 0) < 0) {
168 throw DecodeError ("could not open video decoder");
173 FFmpegDecoder::setup_audio ()
175 if (!_audio_stream) {
179 shared_ptr<FFmpegAudioStream> ffa = dynamic_pointer_cast<FFmpegAudioStream> (_audio_stream);
182 _audio_codec_context = _format_context->streams[ffa->id()]->codec;
183 _audio_codec = avcodec_find_decoder (_audio_codec_context->codec_id);
185 if (_audio_codec == 0) {
186 throw DecodeError ("could not find audio decoder");
189 if (avcodec_open2 (_audio_codec_context, _audio_codec, 0) < 0) {
190 throw DecodeError ("could not open audio decoder");
195 FFmpegDecoder::setup_subtitle ()
197 if (!_subtitle_stream) {
201 _subtitle_codec_context = _format_context->streams[_subtitle_stream->id()]->codec;
202 _subtitle_codec = avcodec_find_decoder (_subtitle_codec_context->codec_id);
204 if (_subtitle_codec == 0) {
205 throw DecodeError ("could not find subtitle decoder");
208 if (avcodec_open2 (_subtitle_codec_context, _subtitle_codec, 0) < 0) {
209 throw DecodeError ("could not open subtitle decoder");
215 FFmpegDecoder::pass ()
217 int r = av_read_frame (_format_context, &_packet);
220 if (r != AVERROR_EOF) {
221 /* Maybe we should fail here, but for now we'll just finish off instead */
223 av_strerror (r, buf, sizeof(buf));
224 _film->log()->log (String::compose ("error on av_read_frame (%1) (%2)", buf, r));
227 /* Get any remaining frames */
232 /* XXX: should we reset _packet.data and size after each *_decode_* call? */
236 while (avcodec_decode_video2 (_video_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
237 filter_and_emit_video (_frame);
240 if (_audio_stream && _opt->decode_audio) {
241 while (avcodec_decode_audio4 (_audio_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
242 int const data_size = av_samples_get_buffer_size (
243 0, _audio_codec_context->channels, _frame->nb_samples, audio_sample_format (), 1
246 assert (_audio_codec_context->channels == _film->audio_channels());
247 Audio (deinterleave_audio (_frame->data[0], data_size));
254 avcodec_get_frame_defaults (_frame);
256 shared_ptr<FFmpegAudioStream> ffa = dynamic_pointer_cast<FFmpegAudioStream> (_audio_stream);
258 if (_packet.stream_index == _video_stream) {
261 int const r = avcodec_decode_video2 (_video_codec_context, _frame, &frame_finished, &_packet);
262 if (r >= 0 && frame_finished) {
264 if (r != _packet.size) {
265 _film->log()->log (String::compose ("Used only %1 bytes of %2 in packet", r, _packet.size));
268 if (_opt->video_sync) {
271 filter_and_emit_video (_frame);
275 } else if (ffa && _packet.stream_index == ffa->id() && _opt->decode_audio) {
278 if (avcodec_decode_audio4 (_audio_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
280 /* Where we are in the source, in seconds */
281 double const source_pts_seconds = av_q2d (_format_context->streams[_packet.stream_index]->time_base)
282 * av_frame_get_best_effort_timestamp(_frame);
284 /* We only decode audio if we've had our first video packet through, and if it
285 was before this packet. Until then audio is thrown away.
288 if (_first_video && _first_video.get() <= source_pts_seconds) {
291 _first_audio = source_pts_seconds;
293 /* This is our first audio frame, and if we've arrived here we must have had our
294 first video frame. Push some silence to make up any gap between our first
295 video frame and our first audio.
298 /* frames of silence that we must push */
299 int const s = rint ((_first_audio.get() - _first_video.get()) * ffa->sample_rate ());
303 "First video at %1, first audio at %2, pushing %3 audio frames of silence for %4 channels (%5 bytes per sample)",
304 _first_video.get(), _first_audio.get(), s, ffa->channels(), bytes_per_audio_sample()
309 shared_ptr<AudioBuffers> audio (new AudioBuffers (ffa->channels(), s));
310 audio->make_silent ();
315 int const data_size = av_samples_get_buffer_size (
316 0, _audio_codec_context->channels, _frame->nb_samples, audio_sample_format (), 1
319 assert (_audio_codec_context->channels == _film->audio_channels());
320 Audio (deinterleave_audio (_frame->data[0], data_size));
324 } else if (_subtitle_stream && _packet.stream_index == _subtitle_stream->id() && _opt->decode_subtitles && _first_video) {
328 if (avcodec_decode_subtitle2 (_subtitle_codec_context, &sub, &got_subtitle, &_packet) && got_subtitle) {
329 /* Sometimes we get an empty AVSubtitle, which is used by some codecs to
330 indicate that the previous subtitle should stop.
332 if (sub.num_rects > 0) {
333 shared_ptr<TimedSubtitle> ts;
335 emit_subtitle (shared_ptr<TimedSubtitle> (new TimedSubtitle (sub, _first_video.get())));
337 /* some problem with the subtitle; we probably didn't understand it */
340 emit_subtitle (shared_ptr<TimedSubtitle> ());
342 avsubtitle_free (&sub);
346 av_free_packet (&_packet);
350 shared_ptr<AudioBuffers>
351 FFmpegDecoder::deinterleave_audio (uint8_t* data, int size)
353 assert (_film->audio_channels());
354 assert (bytes_per_audio_sample());
356 shared_ptr<FFmpegAudioStream> ffa = dynamic_pointer_cast<FFmpegAudioStream> (_audio_stream);
359 /* Deinterleave and convert to float */
361 assert ((size % (bytes_per_audio_sample() * ffa->channels())) == 0);
363 int const total_samples = size / bytes_per_audio_sample();
364 int const frames = total_samples / _film->audio_channels();
365 shared_ptr<AudioBuffers> audio (new AudioBuffers (ffa->channels(), frames));
367 switch (audio_sample_format()) {
368 case AV_SAMPLE_FMT_S16:
370 int16_t* p = (int16_t *) data;
373 for (int i = 0; i < total_samples; ++i) {
374 audio->data(channel)[sample] = float(*p++) / (1 << 15);
377 if (channel == _film->audio_channels()) {
385 case AV_SAMPLE_FMT_S32:
387 int32_t* p = (int32_t *) data;
390 for (int i = 0; i < total_samples; ++i) {
391 audio->data(channel)[sample] = float(*p++) / (1 << 31);
394 if (channel == _film->audio_channels()) {
401 case AV_SAMPLE_FMT_FLTP:
403 float* p = reinterpret_cast<float*> (data);
404 for (int i = 0; i < _film->audio_channels(); ++i) {
405 memcpy (audio->data(i), p, frames * sizeof(float));
419 FFmpegDecoder::frames_per_second () const
421 AVStream* s = _format_context->streams[_video_stream];
423 if (s->avg_frame_rate.num && s->avg_frame_rate.den) {
424 return av_q2d (s->avg_frame_rate);
427 return av_q2d (s->r_frame_rate);
431 FFmpegDecoder::audio_sample_format () const
433 if (_audio_codec_context == 0) {
434 return (AVSampleFormat) 0;
437 return _audio_codec_context->sample_fmt;
441 FFmpegDecoder::native_size () const
443 return Size (_video_codec_context->width, _video_codec_context->height);
447 FFmpegDecoder::pixel_format () const
449 return _video_codec_context->pix_fmt;
453 FFmpegDecoder::time_base_numerator () const
455 return _video_codec_context->time_base.num;
459 FFmpegDecoder::time_base_denominator () const
461 return _video_codec_context->time_base.den;
465 FFmpegDecoder::sample_aspect_ratio_numerator () const
467 return _video_codec_context->sample_aspect_ratio.num;
471 FFmpegDecoder::sample_aspect_ratio_denominator () const
473 return _video_codec_context->sample_aspect_ratio.den;
477 FFmpegDecoder::stream_name (AVStream* s) const
481 AVDictionaryEntry const * lang = av_dict_get (s->metadata, "language", 0, 0);
486 AVDictionaryEntry const * title = av_dict_get (s->metadata, "title", 0, 0);
488 if (!n.str().empty()) {
494 if (n.str().empty()) {
502 FFmpegDecoder::bytes_per_audio_sample () const
504 return av_get_bytes_per_sample (audio_sample_format ());
508 FFmpegDecoder::set_audio_stream (shared_ptr<AudioStream> s)
510 AudioDecoder::set_audio_stream (s);
515 FFmpegDecoder::set_subtitle_stream (shared_ptr<SubtitleStream> s)
517 VideoDecoder::set_subtitle_stream (s);
522 FFmpegDecoder::filter_and_emit_video (AVFrame* frame)
524 boost::mutex::scoped_lock lm (_filter_graphs_mutex);
526 shared_ptr<FilterGraph> graph;
528 list<shared_ptr<FilterGraph> >::iterator i = _filter_graphs.begin();
529 while (i != _filter_graphs.end() && !(*i)->can_process (Size (frame->width, frame->height), (AVPixelFormat) frame->format)) {
533 if (i == _filter_graphs.end ()) {
534 graph.reset (new FilterGraph (_film, this, Size (frame->width, frame->height), (AVPixelFormat) frame->format));
535 _filter_graphs.push_back (graph);
536 _film->log()->log (String::compose ("New graph for %1x%2, pixel format %3", frame->width, frame->height, frame->format));
541 list<shared_ptr<Image> > images = graph->process (frame);
543 SourceFrame const sf = av_q2d (_format_context->streams[_video_stream]->time_base)
544 * av_frame_get_best_effort_timestamp(_frame) * frames_per_second();
546 for (list<shared_ptr<Image> >::iterator i = images.begin(); i != images.end(); ++i) {
552 FFmpegDecoder::seek (SourceFrame f)
554 int64_t const t = static_cast<int64_t>(f) / (av_q2d (_format_context->streams[_video_stream]->time_base) * frames_per_second());
555 int const r = av_seek_frame (_format_context, _video_stream, t, 0);
556 avcodec_flush_buffers (_video_codec_context);
560 shared_ptr<FFmpegAudioStream>
561 FFmpegAudioStream::create (string t, optional<int> v)
564 /* version < 1; no type in the string, and there's only FFmpeg streams anyway */
565 return shared_ptr<FFmpegAudioStream> (new FFmpegAudioStream (t, v));
571 if (type != "ffmpeg") {
572 return shared_ptr<FFmpegAudioStream> ();
575 return shared_ptr<FFmpegAudioStream> (new FFmpegAudioStream (t, v));
578 FFmpegAudioStream::FFmpegAudioStream (string t, optional<int> version)
586 n >> _id >> channels;
587 _channel_layout = av_get_default_channel_layout (channels);
591 /* Current (marked version 1) */
592 n >> type >> _id >> _sample_rate >> _channel_layout;
593 assert (type == "ffmpeg");
596 for (int i = 0; i < name_index; ++i) {
597 size_t const s = t.find (' ');
598 if (s != string::npos) {
599 t = t.substr (s + 1);
607 FFmpegAudioStream::to_string () const
609 return String::compose ("ffmpeg %1 %2 %3 %4", _id, _sample_rate, _channel_layout, _name);
613 FFmpegDecoder::out_with_sync ()
615 /* Where we are in the output, in seconds */
616 double const out_pts_seconds = video_frame() / frames_per_second();
618 /* Where we are in the source, in seconds */
619 double const source_pts_seconds = av_q2d (_format_context->streams[_packet.stream_index]->time_base)
620 * av_frame_get_best_effort_timestamp(_frame);
623 String::compose ("Source video frame ready; source at %1, output at %2", source_pts_seconds, out_pts_seconds),
628 _first_video = source_pts_seconds;
631 /* Difference between where we are and where we should be */
632 double const delta = source_pts_seconds - _first_video.get() - out_pts_seconds;
633 double const one_frame = 1 / frames_per_second();
635 /* Insert frames if required to get out_pts_seconds up to pts_seconds */
636 if (delta > one_frame) {
637 int const extra = rint (delta / one_frame);
638 for (int i = 0; i < extra; ++i) {
639 repeat_last_video ();
642 "Extra video frame inserted at %1s; source frame %2, source PTS %3 (at %4 fps)",
643 out_pts_seconds, video_frame(), source_pts_seconds, frames_per_second()
649 if (delta > -one_frame) {
650 /* Process this frame */
651 filter_and_emit_video (_frame);
653 /* Otherwise we are omitting a frame to keep things right */
654 _film->log()->log (String::compose ("Frame removed at %1s", out_pts_seconds));
659 FFmpegDecoder::film_changed (Film::Property p)
664 boost::mutex::scoped_lock lm (_filter_graphs_mutex);
665 _filter_graphs.clear ();
675 /** @return Length (in video frames) according to our content's header */
677 FFmpegDecoder::length () const
679 return (double(_format_context->duration) / AV_TIME_BASE) * frames_per_second();