2 Copyright (C) 2012 Carl Hetherington <cth@carlh.net>
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 /** @file src/ffmpeg_decoder.cc
21 * @brief A decoder using FFmpeg to decode content.
30 #include <boost/lexical_cast.hpp>
33 #include <libavcodec/avcodec.h>
34 #include <libavformat/avformat.h>
35 #include <libswscale/swscale.h>
36 #include <libpostproc/postprocess.h>
41 #include "transcoder.h"
45 #include "exceptions.h"
49 #include "ffmpeg_decoder.h"
50 #include "filter_graph.h"
58 using std::stringstream;
60 using boost::shared_ptr;
61 using boost::optional;
62 using boost::dynamic_pointer_cast;
65 FFmpegDecoder::FFmpegDecoder (shared_ptr<const Film> f, shared_ptr<FFmpegContent> c, DecodeOptions o)
67 , VideoDecoder (f, c, o)
68 , AudioDecoder (f, c, o)
73 , _video_codec_context (0)
75 , _audio_codec_context (0)
77 , _subtitle_codec_context (0)
90 FFmpegDecoder::~FFmpegDecoder ()
92 if (_audio_codec_context) {
93 avcodec_close (_audio_codec_context);
96 if (_video_codec_context) {
97 avcodec_close (_video_codec_context);
100 if (_subtitle_codec_context) {
101 avcodec_close (_subtitle_codec_context);
106 avformat_close_input (&_format_context);
110 FFmpegDecoder::setup_general ()
114 if (avformat_open_input (&_format_context, _ffmpeg_content->file().string().c_str(), 0, 0) < 0) {
115 throw OpenFileError (_ffmpeg_content->file().string ());
118 if (avformat_find_stream_info (_format_context, 0) < 0) {
119 throw DecodeError (_("could not find stream information"));
122 /* Find video, audio and subtitle streams and choose the first of each */
124 for (uint32_t i = 0; i < _format_context->nb_streams; ++i) {
125 AVStream* s = _format_context->streams[i];
126 if (s->codec->codec_type == AVMEDIA_TYPE_VIDEO) {
128 } else if (s->codec->codec_type == AVMEDIA_TYPE_AUDIO) {
130 /* This is a hack; sometimes it seems that _audio_codec_context->channel_layout isn't set up,
131 so bodge it here. No idea why we should have to do this.
134 if (s->codec->channel_layout == 0) {
135 s->codec->channel_layout = av_get_default_channel_layout (s->codec->channels);
138 _audio_streams.push_back (
139 FFmpegAudioStream (stream_name (s), i, s->codec->sample_rate, s->codec->channel_layout)
142 } else if (s->codec->codec_type == AVMEDIA_TYPE_SUBTITLE) {
143 _subtitle_streams.push_back (FFmpegSubtitleStream (stream_name (s), i));
147 if (_video_stream < 0) {
148 throw DecodeError (N_("could not find video stream"));
151 _frame = avcodec_alloc_frame ();
153 throw DecodeError (N_("could not allocate frame"));
158 FFmpegDecoder::setup_video ()
160 _video_codec_context = _format_context->streams[_video_stream]->codec;
161 _video_codec = avcodec_find_decoder (_video_codec_context->codec_id);
163 if (_video_codec == 0) {
164 throw DecodeError (_("could not find video decoder"));
167 if (avcodec_open2 (_video_codec_context, _video_codec, 0) < 0) {
168 throw DecodeError (N_("could not open video decoder"));
173 FFmpegDecoder::setup_audio ()
175 if (!_ffmpeg_content->audio_stream ()) {
179 _audio_codec_context = _format_context->streams[_ffmpeg_content->audio_stream()->id]->codec;
180 _audio_codec = avcodec_find_decoder (_audio_codec_context->codec_id);
182 if (_audio_codec == 0) {
183 throw DecodeError (_("could not find audio decoder"));
186 if (avcodec_open2 (_audio_codec_context, _audio_codec, 0) < 0) {
187 throw DecodeError (N_("could not open audio decoder"));
192 FFmpegDecoder::setup_subtitle ()
194 if (!_ffmpeg_content->subtitle_stream() || _ffmpeg_content->subtitle_stream()->id >= int (_format_context->nb_streams)) {
198 _subtitle_codec_context = _format_context->streams[_ffmpeg_content->subtitle_stream()->id]->codec;
199 _subtitle_codec = avcodec_find_decoder (_subtitle_codec_context->codec_id);
201 if (_subtitle_codec == 0) {
202 throw DecodeError (_("could not find subtitle decoder"));
205 if (avcodec_open2 (_subtitle_codec_context, _subtitle_codec, 0) < 0) {
206 throw DecodeError (N_("could not open subtitle decoder"));
212 FFmpegDecoder::pass ()
214 int r = av_read_frame (_format_context, &_packet);
217 if (r != AVERROR_EOF) {
218 /* Maybe we should fail here, but for now we'll just finish off instead */
220 av_strerror (r, buf, sizeof(buf));
221 _film->log()->log (String::compose (N_("error on av_read_frame (%1) (%2)"), buf, r));
224 /* Get any remaining frames */
229 /* XXX: should we reset _packet.data and size after each *_decode_* call? */
233 if (_opt.decode_video) {
234 while (avcodec_decode_video2 (_video_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
235 filter_and_emit_video (_frame);
239 if (_ffmpeg_content->audio_stream() && _opt.decode_audio) {
240 decode_audio_packet ();
246 avcodec_get_frame_defaults (_frame);
248 if (_packet.stream_index == _video_stream && _opt.decode_video) {
251 int const r = avcodec_decode_video2 (_video_codec_context, _frame, &frame_finished, &_packet);
252 if (r >= 0 && frame_finished) {
254 if (r != _packet.size) {
255 _film->log()->log (String::compose (N_("Used only %1 bytes of %2 in packet"), r, _packet.size));
258 if (_opt.video_sync) {
261 filter_and_emit_video (_frame);
265 } else if (_ffmpeg_content->audio_stream() && _packet.stream_index == _ffmpeg_content->audio_stream()->id && _opt.decode_audio) {
266 decode_audio_packet ();
267 } else if (_ffmpeg_content->subtitle_stream() && _packet.stream_index == _ffmpeg_content->subtitle_stream()->id && _opt.decode_subtitles && _first_video) {
271 if (avcodec_decode_subtitle2 (_subtitle_codec_context, &sub, &got_subtitle, &_packet) && got_subtitle) {
272 /* Sometimes we get an empty AVSubtitle, which is used by some codecs to
273 indicate that the previous subtitle should stop.
275 if (sub.num_rects > 0) {
276 shared_ptr<TimedSubtitle> ts;
278 emit_subtitle (shared_ptr<TimedSubtitle> (new TimedSubtitle (sub)));
280 /* some problem with the subtitle; we probably didn't understand it */
283 emit_subtitle (shared_ptr<TimedSubtitle> ());
285 avsubtitle_free (&sub);
289 av_free_packet (&_packet);
293 /** @param data pointer to array of pointers to buffers.
294 * Only the first buffer will be used for non-planar data, otherwise there will be one per channel.
296 shared_ptr<AudioBuffers>
297 FFmpegDecoder::deinterleave_audio (uint8_t** data, int size)
299 assert (_ffmpeg_content->audio_channels());
300 assert (bytes_per_audio_sample());
302 /* Deinterleave and convert to float */
304 assert ((size % (bytes_per_audio_sample() * _ffmpeg_content->audio_channels())) == 0);
306 int const total_samples = size / bytes_per_audio_sample();
307 int const frames = total_samples / _ffmpeg_content->audio_channels();
308 shared_ptr<AudioBuffers> audio (new AudioBuffers (_ffmpeg_content->audio_channels(), frames));
310 switch (audio_sample_format()) {
311 case AV_SAMPLE_FMT_S16:
313 int16_t* p = reinterpret_cast<int16_t *> (data[0]);
316 for (int i = 0; i < total_samples; ++i) {
317 audio->data(channel)[sample] = float(*p++) / (1 << 15);
320 if (channel == _ffmpeg_content->audio_channels()) {
328 case AV_SAMPLE_FMT_S16P:
330 int16_t** p = reinterpret_cast<int16_t **> (data);
331 for (int i = 0; i < _ffmpeg_content->audio_channels(); ++i) {
332 for (int j = 0; j < frames; ++j) {
333 audio->data(i)[j] = static_cast<float>(p[i][j]) / (1 << 15);
339 case AV_SAMPLE_FMT_S32:
341 int32_t* p = reinterpret_cast<int32_t *> (data[0]);
344 for (int i = 0; i < total_samples; ++i) {
345 audio->data(channel)[sample] = static_cast<float>(*p++) / (1 << 31);
348 if (channel == _ffmpeg_content->audio_channels()) {
356 case AV_SAMPLE_FMT_FLT:
358 float* p = reinterpret_cast<float*> (data[0]);
361 for (int i = 0; i < total_samples; ++i) {
362 audio->data(channel)[sample] = *p++;
365 if (channel == _ffmpeg_content->audio_channels()) {
373 case AV_SAMPLE_FMT_FLTP:
375 float** p = reinterpret_cast<float**> (data);
376 for (int i = 0; i < _ffmpeg_content->audio_channels(); ++i) {
377 memcpy (audio->data(i), p[i], frames * sizeof(float));
383 throw DecodeError (String::compose (_("Unrecognised audio sample format (%1)"), static_cast<int> (audio_sample_format())));
390 FFmpegDecoder::frames_per_second () const
392 AVStream* s = _format_context->streams[_video_stream];
394 if (s->avg_frame_rate.num && s->avg_frame_rate.den) {
395 return av_q2d (s->avg_frame_rate);
398 return av_q2d (s->r_frame_rate);
402 FFmpegDecoder::audio_sample_format () const
404 if (_audio_codec_context == 0) {
405 return (AVSampleFormat) 0;
408 return _audio_codec_context->sample_fmt;
412 FFmpegDecoder::native_size () const
414 return libdcp::Size (_video_codec_context->width, _video_codec_context->height);
418 FFmpegDecoder::pixel_format () const
420 return _video_codec_context->pix_fmt;
424 FFmpegDecoder::time_base_numerator () const
426 return _video_codec_context->time_base.num;
430 FFmpegDecoder::time_base_denominator () const
432 return _video_codec_context->time_base.den;
436 FFmpegDecoder::sample_aspect_ratio_numerator () const
438 return _video_codec_context->sample_aspect_ratio.num;
442 FFmpegDecoder::sample_aspect_ratio_denominator () const
444 return _video_codec_context->sample_aspect_ratio.den;
448 FFmpegDecoder::stream_name (AVStream* s) const
452 AVDictionaryEntry const * lang = av_dict_get (s->metadata, N_("language"), 0, 0);
457 AVDictionaryEntry const * title = av_dict_get (s->metadata, N_("title"), 0, 0);
459 if (!n.str().empty()) {
465 if (n.str().empty()) {
473 FFmpegDecoder::bytes_per_audio_sample () const
475 return av_get_bytes_per_sample (audio_sample_format ());
479 FFmpegDecoder::filter_and_emit_video (AVFrame* frame)
481 boost::mutex::scoped_lock lm (_filter_graphs_mutex);
483 shared_ptr<FilterGraph> graph;
485 list<shared_ptr<FilterGraph> >::iterator i = _filter_graphs.begin();
486 while (i != _filter_graphs.end() && !(*i)->can_process (libdcp::Size (frame->width, frame->height), (AVPixelFormat) frame->format)) {
490 if (i == _filter_graphs.end ()) {
491 graph.reset (new FilterGraph (_film, this, libdcp::Size (frame->width, frame->height), (AVPixelFormat) frame->format));
492 _filter_graphs.push_back (graph);
493 _film->log()->log (String::compose (N_("New graph for %1x%2, pixel format %3"), frame->width, frame->height, frame->format));
498 list<shared_ptr<Image> > images = graph->process (frame);
500 for (list<shared_ptr<Image> >::iterator i = images.begin(); i != images.end(); ++i) {
501 emit_video (*i, frame_time ());
506 FFmpegDecoder::seek (double p)
508 return do_seek (p, false);
512 FFmpegDecoder::seek_to_last ()
514 /* This AVSEEK_FLAG_BACKWARD in do_seek is a bit of a hack; without it, if we ask for a seek to the same place as last time
515 (used when we change decoder parameters and want to re-fetch the frame) we end up going forwards rather than
516 staying in the same place.
518 return do_seek (last_source_time(), true);
522 FFmpegDecoder::do_seek (double p, bool backwards)
524 int64_t const vt = p / av_q2d (_format_context->streams[_video_stream]->time_base);
526 int const r = av_seek_frame (_format_context, _video_stream, vt, backwards ? AVSEEK_FLAG_BACKWARD : 0);
528 avcodec_flush_buffers (_video_codec_context);
529 if (_subtitle_codec_context) {
530 avcodec_flush_buffers (_subtitle_codec_context);
537 FFmpegDecoder::out_with_sync ()
539 /* Where we are in the output, in seconds */
540 double const out_pts_seconds = video_frame() / frames_per_second();
542 /* Where we are in the source, in seconds */
543 double const source_pts_seconds = av_q2d (_format_context->streams[_packet.stream_index]->time_base)
544 * av_frame_get_best_effort_timestamp(_frame);
547 String::compose (N_("Source video frame ready; source at %1, output at %2"), source_pts_seconds, out_pts_seconds),
552 _first_video = source_pts_seconds;
555 /* Difference between where we are and where we should be */
556 double const delta = source_pts_seconds - _first_video.get() - out_pts_seconds;
557 double const one_frame = 1 / frames_per_second();
559 /* Insert frames if required to get out_pts_seconds up to pts_seconds */
560 if (delta > one_frame) {
561 int const extra = rint (delta / one_frame);
562 for (int i = 0; i < extra; ++i) {
563 repeat_last_video ();
566 N_("Extra video frame inserted at %1s; source frame %2, source PTS %3 (at %4 fps)"),
567 out_pts_seconds, video_frame(), source_pts_seconds, frames_per_second()
573 if (delta > -one_frame) {
574 /* Process this frame */
575 filter_and_emit_video (_frame);
577 /* Otherwise we are omitting a frame to keep things right */
578 _film->log()->log (String::compose (N_("Frame removed at %1s"), out_pts_seconds));
583 FFmpegDecoder::film_changed (Film::Property p)
589 boost::mutex::scoped_lock lm (_filter_graphs_mutex);
590 _filter_graphs.clear ();
600 /** @return Length (in video frames) according to our content's header */
602 FFmpegDecoder::video_length () const
604 return (double(_format_context->duration) / AV_TIME_BASE) * frames_per_second();
608 FFmpegDecoder::frame_time () const
610 return av_frame_get_best_effort_timestamp(_frame) * av_q2d (_format_context->streams[_video_stream]->time_base);
614 FFmpegDecoder::decode_audio_packet ()
616 /* Audio packets can contain multiple frames, so we may have to call avcodec_decode_audio4
620 AVPacket copy_packet = _packet;
622 while (copy_packet.size > 0) {
625 int const decode_result = avcodec_decode_audio4 (_audio_codec_context, _frame, &frame_finished, ©_packet);
626 if (decode_result >= 0 && frame_finished) {
628 /* Where we are in the source, in seconds */
629 double const source_pts_seconds = av_q2d (_format_context->streams[copy_packet.stream_index]->time_base)
630 * av_frame_get_best_effort_timestamp(_frame);
632 /* We only decode audio if we've had our first video packet through, and if it
633 was before this packet. Until then audio is thrown away.
636 if ((_first_video && _first_video.get() <= source_pts_seconds) || !_opt.decode_video) {
638 if (!_first_audio && _opt.decode_video) {
639 _first_audio = source_pts_seconds;
641 /* This is our first audio frame, and if we've arrived here we must have had our
642 first video frame. Push some silence to make up any gap between our first
643 video frame and our first audio.
646 /* frames of silence that we must push */
647 int const s = rint ((_first_audio.get() - _first_video.get()) * _ffmpeg_content->audio_frame_rate ());
651 N_("First video at %1, first audio at %2, pushing %3 audio frames of silence for %4 channels (%5 bytes per sample)"),
652 _first_video.get(), _first_audio.get(), s, _ffmpeg_content->audio_channels(), bytes_per_audio_sample()
657 shared_ptr<AudioBuffers> audio (new AudioBuffers (_ffmpeg_content->audio_channels(), s));
658 audio->make_silent ();
663 int const data_size = av_samples_get_buffer_size (
664 0, _audio_codec_context->channels, _frame->nb_samples, audio_sample_format (), 1
667 assert (_audio_codec_context->channels == _ffmpeg_content->audio_channels());
668 Audio (deinterleave_audio (_frame->data, data_size));
672 if (decode_result >= 0) {
673 copy_packet.data += decode_result;
674 copy_packet.size -= decode_result;