2 Copyright (C) 2012 Carl Hetherington <cth@carlh.net>
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 /** @file src/ffmpeg_decoder.cc
21 * @brief A decoder using FFmpeg to decode content.
30 #include <boost/lexical_cast.hpp>
33 #include <libavcodec/avcodec.h>
34 #include <libavformat/avformat.h>
35 #include <libswscale/swscale.h>
36 #include <libpostproc/postprocess.h>
41 #include "transcoder.h"
44 #include "exceptions.h"
48 #include "ffmpeg_decoder.h"
49 #include "filter_graph.h"
57 using std::stringstream;
59 using boost::shared_ptr;
60 using boost::optional;
61 using boost::dynamic_pointer_cast;
64 FFmpegDecoder::FFmpegDecoder (shared_ptr<const Film> f, shared_ptr<const FFmpegContent> c, bool video, bool audio, bool subtitles, bool video_sync)
72 , _video_codec_context (0)
74 , _audio_codec_context (0)
76 , _subtitle_codec_context (0)
78 , _decode_video (video)
79 , _decode_audio (audio)
80 , _decode_subtitles (subtitles)
81 , _video_sync (video_sync)
93 FFmpegDecoder::~FFmpegDecoder ()
95 if (_audio_codec_context) {
96 avcodec_close (_audio_codec_context);
99 if (_video_codec_context) {
100 avcodec_close (_video_codec_context);
103 if (_subtitle_codec_context) {
104 avcodec_close (_subtitle_codec_context);
109 avformat_close_input (&_format_context);
113 FFmpegDecoder::setup_general ()
117 if (avformat_open_input (&_format_context, _ffmpeg_content->file().string().c_str(), 0, 0) < 0) {
118 throw OpenFileError (_ffmpeg_content->file().string ());
121 if (avformat_find_stream_info (_format_context, 0) < 0) {
122 throw DecodeError (_("could not find stream information"));
125 /* Find video, audio and subtitle streams and choose the first of each */
127 for (uint32_t i = 0; i < _format_context->nb_streams; ++i) {
128 AVStream* s = _format_context->streams[i];
129 if (s->codec->codec_type == AVMEDIA_TYPE_VIDEO) {
131 } else if (s->codec->codec_type == AVMEDIA_TYPE_AUDIO) {
133 /* This is a hack; sometimes it seems that _audio_codec_context->channel_layout isn't set up,
134 so bodge it here. No idea why we should have to do this.
137 if (s->codec->channel_layout == 0) {
138 s->codec->channel_layout = av_get_default_channel_layout (s->codec->channels);
141 _audio_streams.push_back (
142 FFmpegAudioStream (stream_name (s), i, s->codec->sample_rate, s->codec->channel_layout)
145 } else if (s->codec->codec_type == AVMEDIA_TYPE_SUBTITLE) {
146 _subtitle_streams.push_back (FFmpegSubtitleStream (stream_name (s), i));
150 if (_video_stream < 0) {
151 throw DecodeError (N_("could not find video stream"));
154 _frame = avcodec_alloc_frame ();
156 throw DecodeError (N_("could not allocate frame"));
161 FFmpegDecoder::setup_video ()
163 _video_codec_context = _format_context->streams[_video_stream]->codec;
164 _video_codec = avcodec_find_decoder (_video_codec_context->codec_id);
166 if (_video_codec == 0) {
167 throw DecodeError (_("could not find video decoder"));
170 if (avcodec_open2 (_video_codec_context, _video_codec, 0) < 0) {
171 throw DecodeError (N_("could not open video decoder"));
176 FFmpegDecoder::setup_audio ()
178 if (!_ffmpeg_content->audio_stream ()) {
182 _audio_codec_context = _format_context->streams[_ffmpeg_content->audio_stream()->id]->codec;
183 _audio_codec = avcodec_find_decoder (_audio_codec_context->codec_id);
185 if (_audio_codec == 0) {
186 throw DecodeError (_("could not find audio decoder"));
189 if (avcodec_open2 (_audio_codec_context, _audio_codec, 0) < 0) {
190 throw DecodeError (N_("could not open audio decoder"));
195 FFmpegDecoder::setup_subtitle ()
197 if (!_ffmpeg_content->subtitle_stream() || _ffmpeg_content->subtitle_stream()->id >= int (_format_context->nb_streams)) {
201 _subtitle_codec_context = _format_context->streams[_ffmpeg_content->subtitle_stream()->id]->codec;
202 _subtitle_codec = avcodec_find_decoder (_subtitle_codec_context->codec_id);
204 if (_subtitle_codec == 0) {
205 throw DecodeError (_("could not find subtitle decoder"));
208 if (avcodec_open2 (_subtitle_codec_context, _subtitle_codec, 0) < 0) {
209 throw DecodeError (N_("could not open subtitle decoder"));
215 FFmpegDecoder::pass ()
217 int r = av_read_frame (_format_context, &_packet);
220 if (r != AVERROR_EOF) {
221 /* Maybe we should fail here, but for now we'll just finish off instead */
223 av_strerror (r, buf, sizeof(buf));
224 _film->log()->log (String::compose (N_("error on av_read_frame (%1) (%2)"), buf, r));
227 /* Get any remaining frames */
232 /* XXX: should we reset _packet.data and size after each *_decode_* call? */
237 while (avcodec_decode_video2 (_video_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
238 filter_and_emit_video (_frame);
242 if (_ffmpeg_content->audio_stream() && _decode_audio) {
243 decode_audio_packet ();
249 avcodec_get_frame_defaults (_frame);
251 if (_packet.stream_index == _video_stream && _decode_video) {
254 int const r = avcodec_decode_video2 (_video_codec_context, _frame, &frame_finished, &_packet);
255 if (r >= 0 && frame_finished) {
257 if (r != _packet.size) {
258 _film->log()->log (String::compose (N_("Used only %1 bytes of %2 in packet"), r, _packet.size));
264 filter_and_emit_video (_frame);
268 } else if (_ffmpeg_content->audio_stream() && _packet.stream_index == _ffmpeg_content->audio_stream()->id && _decode_audio) {
269 decode_audio_packet ();
270 } else if (_ffmpeg_content->subtitle_stream() && _packet.stream_index == _ffmpeg_content->subtitle_stream()->id && _decode_subtitles && _first_video) {
274 if (avcodec_decode_subtitle2 (_subtitle_codec_context, &sub, &got_subtitle, &_packet) && got_subtitle) {
275 /* Sometimes we get an empty AVSubtitle, which is used by some codecs to
276 indicate that the previous subtitle should stop.
278 if (sub.num_rects > 0) {
279 shared_ptr<TimedSubtitle> ts;
281 emit_subtitle (shared_ptr<TimedSubtitle> (new TimedSubtitle (sub)));
283 /* some problem with the subtitle; we probably didn't understand it */
286 emit_subtitle (shared_ptr<TimedSubtitle> ());
288 avsubtitle_free (&sub);
292 av_free_packet (&_packet);
296 /** @param data pointer to array of pointers to buffers.
297 * Only the first buffer will be used for non-planar data, otherwise there will be one per channel.
299 shared_ptr<AudioBuffers>
300 FFmpegDecoder::deinterleave_audio (uint8_t** data, int size)
302 assert (_ffmpeg_content->audio_channels());
303 assert (bytes_per_audio_sample());
305 /* Deinterleave and convert to float */
307 assert ((size % (bytes_per_audio_sample() * _ffmpeg_content->audio_channels())) == 0);
309 int const total_samples = size / bytes_per_audio_sample();
310 int const frames = total_samples / _ffmpeg_content->audio_channels();
311 shared_ptr<AudioBuffers> audio (new AudioBuffers (_ffmpeg_content->audio_channels(), frames));
313 switch (audio_sample_format()) {
314 case AV_SAMPLE_FMT_S16:
316 int16_t* p = reinterpret_cast<int16_t *> (data[0]);
319 for (int i = 0; i < total_samples; ++i) {
320 audio->data(channel)[sample] = float(*p++) / (1 << 15);
323 if (channel == _ffmpeg_content->audio_channels()) {
331 case AV_SAMPLE_FMT_S16P:
333 int16_t** p = reinterpret_cast<int16_t **> (data);
334 for (int i = 0; i < _ffmpeg_content->audio_channels(); ++i) {
335 for (int j = 0; j < frames; ++j) {
336 audio->data(i)[j] = static_cast<float>(p[i][j]) / (1 << 15);
342 case AV_SAMPLE_FMT_S32:
344 int32_t* p = reinterpret_cast<int32_t *> (data[0]);
347 for (int i = 0; i < total_samples; ++i) {
348 audio->data(channel)[sample] = static_cast<float>(*p++) / (1 << 31);
351 if (channel == _ffmpeg_content->audio_channels()) {
359 case AV_SAMPLE_FMT_FLT:
361 float* p = reinterpret_cast<float*> (data[0]);
364 for (int i = 0; i < total_samples; ++i) {
365 audio->data(channel)[sample] = *p++;
368 if (channel == _ffmpeg_content->audio_channels()) {
376 case AV_SAMPLE_FMT_FLTP:
378 float** p = reinterpret_cast<float**> (data);
379 for (int i = 0; i < _ffmpeg_content->audio_channels(); ++i) {
380 memcpy (audio->data(i), p[i], frames * sizeof(float));
386 throw DecodeError (String::compose (_("Unrecognised audio sample format (%1)"), static_cast<int> (audio_sample_format())));
393 FFmpegDecoder::frames_per_second () const
395 AVStream* s = _format_context->streams[_video_stream];
397 if (s->avg_frame_rate.num && s->avg_frame_rate.den) {
398 return av_q2d (s->avg_frame_rate);
401 return av_q2d (s->r_frame_rate);
405 FFmpegDecoder::audio_sample_format () const
407 if (_audio_codec_context == 0) {
408 return (AVSampleFormat) 0;
411 return _audio_codec_context->sample_fmt;
415 FFmpegDecoder::native_size () const
417 return libdcp::Size (_video_codec_context->width, _video_codec_context->height);
421 FFmpegDecoder::pixel_format () const
423 return _video_codec_context->pix_fmt;
427 FFmpegDecoder::time_base_numerator () const
429 return _video_codec_context->time_base.num;
433 FFmpegDecoder::time_base_denominator () const
435 return _video_codec_context->time_base.den;
439 FFmpegDecoder::sample_aspect_ratio_numerator () const
441 return _video_codec_context->sample_aspect_ratio.num;
445 FFmpegDecoder::sample_aspect_ratio_denominator () const
447 return _video_codec_context->sample_aspect_ratio.den;
451 FFmpegDecoder::stream_name (AVStream* s) const
455 AVDictionaryEntry const * lang = av_dict_get (s->metadata, N_("language"), 0, 0);
460 AVDictionaryEntry const * title = av_dict_get (s->metadata, N_("title"), 0, 0);
462 if (!n.str().empty()) {
468 if (n.str().empty()) {
476 FFmpegDecoder::bytes_per_audio_sample () const
478 return av_get_bytes_per_sample (audio_sample_format ());
482 FFmpegDecoder::filter_and_emit_video (AVFrame* frame)
484 boost::mutex::scoped_lock lm (_filter_graphs_mutex);
486 shared_ptr<FilterGraph> graph;
488 list<shared_ptr<FilterGraph> >::iterator i = _filter_graphs.begin();
489 while (i != _filter_graphs.end() && !(*i)->can_process (libdcp::Size (frame->width, frame->height), (AVPixelFormat) frame->format)) {
493 if (i == _filter_graphs.end ()) {
494 graph.reset (new FilterGraph (_film, this, libdcp::Size (frame->width, frame->height), (AVPixelFormat) frame->format));
495 _filter_graphs.push_back (graph);
496 _film->log()->log (String::compose (N_("New graph for %1x%2, pixel format %3"), frame->width, frame->height, frame->format));
501 list<shared_ptr<Image> > images = graph->process (frame);
503 for (list<shared_ptr<Image> >::iterator i = images.begin(); i != images.end(); ++i) {
504 emit_video (*i, frame_time ());
509 FFmpegDecoder::seek (double p)
511 return do_seek (p, false);
515 FFmpegDecoder::seek_to_last ()
517 /* This AVSEEK_FLAG_BACKWARD in do_seek is a bit of a hack; without it, if we ask for a seek to the same place as last time
518 (used when we change decoder parameters and want to re-fetch the frame) we end up going forwards rather than
519 staying in the same place.
521 return do_seek (last_source_time(), true);
525 FFmpegDecoder::do_seek (double p, bool backwards)
527 int64_t const vt = p / av_q2d (_format_context->streams[_video_stream]->time_base);
529 int const r = av_seek_frame (_format_context, _video_stream, vt, backwards ? AVSEEK_FLAG_BACKWARD : 0);
531 avcodec_flush_buffers (_video_codec_context);
532 if (_subtitle_codec_context) {
533 avcodec_flush_buffers (_subtitle_codec_context);
540 FFmpegDecoder::out_with_sync ()
542 /* Where we are in the output, in seconds */
543 double const out_pts_seconds = video_frame() / frames_per_second();
545 /* Where we are in the source, in seconds */
546 double const source_pts_seconds = av_q2d (_format_context->streams[_packet.stream_index]->time_base)
547 * av_frame_get_best_effort_timestamp(_frame);
550 String::compose (N_("Source video frame ready; source at %1, output at %2"), source_pts_seconds, out_pts_seconds),
555 _first_video = source_pts_seconds;
558 /* Difference between where we are and where we should be */
559 double const delta = source_pts_seconds - _first_video.get() - out_pts_seconds;
560 double const one_frame = 1 / frames_per_second();
562 /* Insert frames if required to get out_pts_seconds up to pts_seconds */
563 if (delta > one_frame) {
564 int const extra = rint (delta / one_frame);
565 for (int i = 0; i < extra; ++i) {
566 repeat_last_video ();
569 N_("Extra video frame inserted at %1s; source frame %2, source PTS %3 (at %4 fps)"),
570 out_pts_seconds, video_frame(), source_pts_seconds, frames_per_second()
576 if (delta > -one_frame) {
577 /* Process this frame */
578 filter_and_emit_video (_frame);
580 /* Otherwise we are omitting a frame to keep things right */
581 _film->log()->log (String::compose (N_("Frame removed at %1s"), out_pts_seconds));
586 FFmpegDecoder::film_changed (Film::Property p)
592 boost::mutex::scoped_lock lm (_filter_graphs_mutex);
593 _filter_graphs.clear ();
603 /** @return Length (in video frames) according to our content's header */
605 FFmpegDecoder::video_length () const
607 return (double(_format_context->duration) / AV_TIME_BASE) * frames_per_second();
611 FFmpegDecoder::frame_time () const
613 return av_frame_get_best_effort_timestamp(_frame) * av_q2d (_format_context->streams[_video_stream]->time_base);
617 FFmpegDecoder::decode_audio_packet ()
619 /* Audio packets can contain multiple frames, so we may have to call avcodec_decode_audio4
623 AVPacket copy_packet = _packet;
625 while (copy_packet.size > 0) {
628 int const decode_result = avcodec_decode_audio4 (_audio_codec_context, _frame, &frame_finished, ©_packet);
629 if (decode_result >= 0 && frame_finished) {
631 /* Where we are in the source, in seconds */
632 double const source_pts_seconds = av_q2d (_format_context->streams[copy_packet.stream_index]->time_base)
633 * av_frame_get_best_effort_timestamp(_frame);
635 /* We only decode audio if we've had our first video packet through, and if it
636 was before this packet. Until then audio is thrown away.
639 if ((_first_video && _first_video.get() <= source_pts_seconds) || !_decode_video) {
641 if (!_first_audio && _decode_video) {
642 _first_audio = source_pts_seconds;
644 /* This is our first audio frame, and if we've arrived here we must have had our
645 first video frame. Push some silence to make up any gap between our first
646 video frame and our first audio.
649 /* frames of silence that we must push */
650 int const s = rint ((_first_audio.get() - _first_video.get()) * _ffmpeg_content->audio_frame_rate ());
654 N_("First video at %1, first audio at %2, pushing %3 audio frames of silence for %4 channels (%5 bytes per sample)"),
655 _first_video.get(), _first_audio.get(), s, _ffmpeg_content->audio_channels(), bytes_per_audio_sample()
660 shared_ptr<AudioBuffers> audio (new AudioBuffers (_ffmpeg_content->audio_channels(), s));
661 audio->make_silent ();
666 int const data_size = av_samples_get_buffer_size (
667 0, _audio_codec_context->channels, _frame->nb_samples, audio_sample_format (), 1
670 assert (_audio_codec_context->channels == _ffmpeg_content->audio_channels());
671 Audio (deinterleave_audio (_frame->data, data_size));
675 if (decode_result >= 0) {
676 copy_packet.data += decode_result;
677 copy_packet.size -= decode_result;