2 Copyright (C) 2012 Carl Hetherington <cth@carlh.net>
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 /** @file src/ffmpeg_decoder.cc
21 * @brief A decoder using FFmpeg to decode content.
30 #include <boost/lexical_cast.hpp>
33 #include <libavcodec/avcodec.h>
34 #include <libavformat/avformat.h>
38 #include "exceptions.h"
42 #include "ffmpeg_decoder.h"
43 #include "filter_graph.h"
44 #include "audio_buffers.h"
45 #include "ffmpeg_content.h"
52 using std::stringstream;
56 using boost::shared_ptr;
57 using boost::optional;
58 using boost::dynamic_pointer_cast;
61 FFmpegDecoder::FFmpegDecoder (shared_ptr<const Film> f, shared_ptr<const FFmpegContent> c, bool video, bool audio)
67 , _subtitle_codec_context (0)
69 , _decode_video (video)
70 , _decode_audio (audio)
71 , _video_pts_offset (0)
72 , _audio_pts_offset (0)
76 /* Audio and video frame PTS values may not start with 0. We want
77 to fiddle them so that:
79 1. One of them starts at time 0.
80 2. The first video PTS value ends up on a frame boundary.
82 Then we remove big initial gaps in PTS and we allow our
83 insertion of black frames to work.
86 audio_pts_to_use = audio_pts_from_ffmpeg + audio_pts_offset;
87 video_pts_to_use = video_pts_from_ffmpeg + video_pts_offset;
90 bool const have_video = video && c->first_video();
91 bool const have_audio = _decode_audio && c->audio_stream () && c->audio_stream()->first_audio;
93 /* First, make one of them start at 0 */
95 if (have_audio && have_video) {
96 _video_pts_offset = _audio_pts_offset = - min (c->first_video().get(), c->audio_stream()->first_audio.get());
97 } else if (have_video) {
98 _video_pts_offset = - c->first_video().get();
99 } else if (have_audio) {
100 _audio_pts_offset = - c->audio_stream()->first_audio.get();
103 /* Now adjust both so that the video pts starts on a frame */
104 if (have_video && have_audio) {
105 double first_video = c->first_video().get() + _video_pts_offset;
106 double const old_first_video = first_video;
108 /* Round the first video up to a frame boundary */
109 if (fabs (rint (first_video * c->video_frame_rate()) - first_video * c->video_frame_rate()) > 1e-6) {
110 first_video = ceil (first_video * c->video_frame_rate()) / c->video_frame_rate ();
113 _video_pts_offset += first_video - old_first_video;
114 _audio_pts_offset += first_video - old_first_video;
118 FFmpegDecoder::~FFmpegDecoder ()
120 boost::mutex::scoped_lock lm (_mutex);
122 if (_subtitle_codec_context) {
123 avcodec_close (_subtitle_codec_context);
128 FFmpegDecoder::flush ()
130 /* Get any remaining frames */
135 /* XXX: should we reset _packet.data and size after each *_decode_* call? */
138 while (decode_video_packet ()) {}
141 if (_ffmpeg_content->audio_stream() && _decode_audio) {
142 decode_audio_packet ();
147 FFmpegDecoder::pass ()
149 int r = av_read_frame (_format_context, &_packet);
152 if (r != AVERROR_EOF) {
153 /* Maybe we should fail here, but for now we'll just finish off instead */
155 av_strerror (r, buf, sizeof(buf));
156 shared_ptr<const Film> film = _film.lock ();
158 film->log()->log (String::compose (N_("error on av_read_frame (%1) (%2)"), buf, r));
165 avcodec_get_frame_defaults (_frame);
167 shared_ptr<const Film> film = _film.lock ();
170 int const si = _packet.stream_index;
172 if (si == _video_stream && _decode_video) {
173 decode_video_packet ();
174 } else if (_ffmpeg_content->audio_stream() && si == _ffmpeg_content->audio_stream()->index (_format_context) && _decode_audio) {
175 decode_audio_packet ();
176 } else if (_ffmpeg_content->subtitle_stream() && si == _ffmpeg_content->subtitle_stream()->index (_format_context) && film->with_subtitles ()) {
177 decode_subtitle_packet ();
180 av_free_packet (&_packet);
184 /** @param data pointer to array of pointers to buffers.
185 * Only the first buffer will be used for non-planar data, otherwise there will be one per channel.
187 shared_ptr<AudioBuffers>
188 FFmpegDecoder::deinterleave_audio (uint8_t** data, int size)
190 assert (_ffmpeg_content->audio_channels());
191 assert (bytes_per_audio_sample());
193 /* Deinterleave and convert to float */
195 assert ((size % (bytes_per_audio_sample() * _ffmpeg_content->audio_channels())) == 0);
197 int const total_samples = size / bytes_per_audio_sample();
198 int const frames = total_samples / _ffmpeg_content->audio_channels();
199 shared_ptr<AudioBuffers> audio (new AudioBuffers (_ffmpeg_content->audio_channels(), frames));
201 switch (audio_sample_format()) {
202 case AV_SAMPLE_FMT_S16:
204 int16_t* p = reinterpret_cast<int16_t *> (data[0]);
207 for (int i = 0; i < total_samples; ++i) {
208 audio->data(channel)[sample] = float(*p++) / (1 << 15);
211 if (channel == _ffmpeg_content->audio_channels()) {
219 case AV_SAMPLE_FMT_S16P:
221 int16_t** p = reinterpret_cast<int16_t **> (data);
222 for (int i = 0; i < _ffmpeg_content->audio_channels(); ++i) {
223 for (int j = 0; j < frames; ++j) {
224 audio->data(i)[j] = static_cast<float>(p[i][j]) / (1 << 15);
230 case AV_SAMPLE_FMT_S32:
232 int32_t* p = reinterpret_cast<int32_t *> (data[0]);
235 for (int i = 0; i < total_samples; ++i) {
236 audio->data(channel)[sample] = static_cast<float>(*p++) / (1 << 31);
239 if (channel == _ffmpeg_content->audio_channels()) {
247 case AV_SAMPLE_FMT_FLT:
249 float* p = reinterpret_cast<float*> (data[0]);
252 for (int i = 0; i < total_samples; ++i) {
253 audio->data(channel)[sample] = *p++;
256 if (channel == _ffmpeg_content->audio_channels()) {
264 case AV_SAMPLE_FMT_FLTP:
266 float** p = reinterpret_cast<float**> (data);
267 for (int i = 0; i < _ffmpeg_content->audio_channels(); ++i) {
268 memcpy (audio->data(i), p[i], frames * sizeof(float));
274 throw DecodeError (String::compose (_("Unrecognised audio sample format (%1)"), static_cast<int> (audio_sample_format())));
281 FFmpegDecoder::audio_sample_format () const
283 if (!_ffmpeg_content->audio_stream()) {
284 return (AVSampleFormat) 0;
287 return audio_codec_context()->sample_fmt;
291 FFmpegDecoder::bytes_per_audio_sample () const
293 return av_get_bytes_per_sample (audio_sample_format ());
297 FFmpegDecoder::minimal_run (boost::function<bool (ContentTime, ContentTime, int)> finished)
300 ContentTime last_video = 0;
301 ContentTime last_audio = 0;
302 bool flushing = false;
304 while (!finished (last_video, last_audio, frames_read)) {
305 int r = av_read_frame (_format_context, &_packet);
307 /* We should flush our decoders here, possibly yielding a few more frames,
308 but the consequence of having to do that is too hideous to contemplate.
309 Instead we give up and say that you can't seek too close to the end
317 double const time_base = av_q2d (_format_context->streams[_packet.stream_index]->time_base);
319 if (_packet.stream_index == _video_stream) {
321 avcodec_get_frame_defaults (_frame);
324 r = avcodec_decode_video2 (video_codec_context(), _frame, &finished, &_packet);
325 if (r >= 0 && finished) {
327 (av_frame_get_best_effort_timestamp (_frame) * time_base + _video_pts_offset) * TIME_HZ
331 } else if (_ffmpeg_content->audio_stream() && _packet.stream_index == _ffmpeg_content->audio_stream()->index (_format_context)) {
332 AVPacket copy_packet = _packet;
333 while (copy_packet.size > 0) {
336 r = avcodec_decode_audio4 (audio_codec_context(), _frame, &finished, &_packet);
337 if (r >= 0 && finished) {
339 (av_frame_get_best_effort_timestamp (_frame) * time_base + _audio_pts_offset) * TIME_HZ
343 copy_packet.data += r;
344 copy_packet.size -= r;
348 av_free_packet (&_packet);
355 FFmpegDecoder::seek_overrun_finished (ContentTime seek, ContentTime last_video, ContentTime last_audio) const
357 return last_video >= seek || last_audio >= seek;
361 FFmpegDecoder::seek_final_finished (int n, int done) const
367 FFmpegDecoder::seek_and_flush (ContentTime t)
369 int64_t const initial_v = ((double (t) / TIME_HZ) - _video_pts_offset) /
370 av_q2d (_format_context->streams[_video_stream]->time_base);
372 av_seek_frame (_format_context, _video_stream, initial_v, AVSEEK_FLAG_BACKWARD);
374 shared_ptr<FFmpegAudioStream> as = _ffmpeg_content->audio_stream ();
376 int64_t initial_a = ((double (t) / TIME_HZ) - _audio_pts_offset) /
377 av_q2d (as->stream(_format_context)->time_base);
379 av_seek_frame (_format_context, as->index (_format_context), initial_a, AVSEEK_FLAG_BACKWARD);
382 avcodec_flush_buffers (video_codec_context());
383 if (audio_codec_context ()) {
384 avcodec_flush_buffers (audio_codec_context ());
386 if (_subtitle_codec_context) {
387 avcodec_flush_buffers (_subtitle_codec_context);
392 FFmpegDecoder::seek (ContentTime time, bool accurate)
394 Decoder::seek (time, accurate);
396 /* If we are doing an accurate seek, our initial shot will be 200ms (200 being
397 a number plucked from the air) earlier than we want to end up. The loop below
398 will hopefully then step through to where we want to be.
401 ContentTime pre_roll = accurate ? (0.2 * TIME_HZ) : 0;
402 ContentTime initial_seek = time - pre_roll;
403 if (initial_seek < 0) {
407 /* Initial seek time in the video stream's timebase */
409 seek_and_flush (initial_seek);
411 if (time == 0 || !accurate) {
412 /* We're already there, or we're as close as we need to be */
416 int const N = minimal_run (boost::bind (&FFmpegDecoder::seek_overrun_finished, this, time, _1, _2));
418 seek_and_flush (initial_seek);
420 minimal_run (boost::bind (&FFmpegDecoder::seek_final_finished, this, N - 1, _3));
425 FFmpegDecoder::decode_audio_packet ()
427 /* Audio packets can contain multiple frames, so we may have to call avcodec_decode_audio4
431 AVPacket copy_packet = _packet;
433 while (copy_packet.size > 0) {
436 int const decode_result = avcodec_decode_audio4 (audio_codec_context(), _frame, &frame_finished, ©_packet);
438 if (decode_result < 0) {
439 shared_ptr<const Film> film = _film.lock ();
441 film->log()->log (String::compose ("avcodec_decode_audio4 failed (%1)", decode_result));
445 if (frame_finished) {
446 ContentTime const t = rint (
447 (av_q2d (_format_context->streams[copy_packet.stream_index]->time_base)
448 * av_frame_get_best_effort_timestamp(_frame) + _audio_pts_offset) * TIME_HZ
451 int const data_size = av_samples_get_buffer_size (
452 0, audio_codec_context()->channels, _frame->nb_samples, audio_sample_format (), 1
455 audio (deinterleave_audio (_frame->data, data_size), t);
458 copy_packet.data += decode_result;
459 copy_packet.size -= decode_result;
464 FFmpegDecoder::decode_video_packet ()
467 if (avcodec_decode_video2 (video_codec_context(), _frame, &frame_finished, &_packet) < 0 || !frame_finished) {
471 boost::mutex::scoped_lock lm (_filter_graphs_mutex);
473 shared_ptr<FilterGraph> graph;
475 list<shared_ptr<FilterGraph> >::iterator i = _filter_graphs.begin();
476 while (i != _filter_graphs.end() && !(*i)->can_process (libdcp::Size (_frame->width, _frame->height), (AVPixelFormat) _frame->format)) {
480 if (i == _filter_graphs.end ()) {
481 shared_ptr<const Film> film = _film.lock ();
484 graph.reset (new FilterGraph (_ffmpeg_content, libdcp::Size (_frame->width, _frame->height), (AVPixelFormat) _frame->format));
485 _filter_graphs.push_back (graph);
487 film->log()->log (String::compose (N_("New graph for %1x%2, pixel format %3"), _frame->width, _frame->height, _frame->format));
492 list<pair<shared_ptr<Image>, int64_t> > images = graph->process (_frame);
494 string post_process = Filter::ffmpeg_strings (_ffmpeg_content->filters()).second;
496 for (list<pair<shared_ptr<Image>, int64_t> >::iterator i = images.begin(); i != images.end(); ++i) {
498 shared_ptr<Image> image = i->first;
499 if (!post_process.empty ()) {
500 image = image->post_process (post_process, true);
503 if (i->second != AV_NOPTS_VALUE) {
504 ContentTime const t = rint ((i->second * av_q2d (_format_context->streams[_video_stream]->time_base) + _video_pts_offset) * TIME_HZ);
505 video (image, false, t);
507 shared_ptr<const Film> film = _film.lock ();
509 film->log()->log ("Dropping frame without PTS");
518 FFmpegDecoder::setup_subtitle ()
520 boost::mutex::scoped_lock lm (_mutex);
522 if (!_ffmpeg_content->subtitle_stream() || _ffmpeg_content->subtitle_stream()->index (_format_context) >= int (_format_context->nb_streams)) {
526 _subtitle_codec_context = _ffmpeg_content->subtitle_stream()->stream(_format_context)->codec;
527 _subtitle_codec = avcodec_find_decoder (_subtitle_codec_context->codec_id);
529 if (_subtitle_codec == 0) {
530 throw DecodeError (_("could not find subtitle decoder"));
533 if (avcodec_open2 (_subtitle_codec_context, _subtitle_codec, 0) < 0) {
534 throw DecodeError (N_("could not open subtitle decoder"));
539 FFmpegDecoder::decode_subtitle_packet ()
543 if (avcodec_decode_subtitle2 (_subtitle_codec_context, &sub, &got_subtitle, &_packet) < 0 || !got_subtitle) {
547 /* Sometimes we get an empty AVSubtitle, which is used by some codecs to
548 indicate that the previous subtitle should stop.
550 if (sub.num_rects <= 0) {
551 subtitle (shared_ptr<Image> (), dcpomatic::Rect<double> (), 0, 0);
553 } else if (sub.num_rects > 1) {
554 throw DecodeError (_("multi-part subtitles not yet supported"));
557 /* Subtitle PTS in seconds (within the source, not taking into account any of the
558 source that we may have chopped off for the DCP)
560 double const packet_time = static_cast<double> (sub.pts) / AV_TIME_BASE;
562 /* hence start time for this sub */
563 DCPTime const from = (packet_time + (double (sub.start_display_time) / 1e3)) * TIME_HZ;
564 DCPTime const to = (packet_time + (double (sub.end_display_time) / 1e3)) * TIME_HZ;
566 AVSubtitleRect const * rect = sub.rects[0];
568 if (rect->type != SUBTITLE_BITMAP) {
569 throw DecodeError (_("non-bitmap subtitles not yet supported"));
572 shared_ptr<Image> image (new Image (PIX_FMT_RGBA, libdcp::Size (rect->w, rect->h), true));
574 /* Start of the first line in the subtitle */
575 uint8_t* sub_p = rect->pict.data[0];
576 /* sub_p looks up into a RGB palette which is here */
577 uint32_t const * palette = (uint32_t *) rect->pict.data[1];
578 /* Start of the output data */
579 uint32_t* out_p = (uint32_t *) image->data()[0];
581 for (int y = 0; y < rect->h; ++y) {
582 uint8_t* sub_line_p = sub_p;
583 uint32_t* out_line_p = out_p;
584 for (int x = 0; x < rect->w; ++x) {
585 *out_line_p++ = palette[*sub_line_p++];
587 sub_p += rect->pict.linesize[0];
588 out_p += image->stride()[0] / sizeof (uint32_t);
591 libdcp::Size const vs = _ffmpeg_content->video_size ();
595 dcpomatic::Rect<double> (
596 static_cast<double> (rect->x) / vs.width,
597 static_cast<double> (rect->y) / vs.height,
598 static_cast<double> (rect->w) / vs.width,
599 static_cast<double> (rect->h) / vs.height
606 avsubtitle_free (&sub);