2 Copyright (C) 2012 Carl Hetherington <cth@carlh.net>
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 /** @file src/ffmpeg_decoder.cc
21 * @brief A decoder using FFmpeg to decode content.
30 #include <boost/lexical_cast.hpp>
33 #include <libavcodec/avcodec.h>
34 #include <libavformat/avformat.h>
38 #include "exceptions.h"
42 #include "ffmpeg_decoder.h"
43 #include "filter_graph.h"
44 #include "audio_buffers.h"
45 #include "ffmpeg_content.h"
52 using std::stringstream;
56 using boost::shared_ptr;
57 using boost::optional;
58 using boost::dynamic_pointer_cast;
61 FFmpegDecoder::FFmpegDecoder (shared_ptr<const Film> f, shared_ptr<const FFmpegContent> c, bool video, bool audio)
67 , _subtitle_codec_context (0)
69 , _decode_video (video)
70 , _decode_audio (audio)
71 , _video_pts_offset (0)
72 , _audio_pts_offset (0)
76 /* Audio and video frame PTS values may not start with 0. We want
77 to fiddle them so that:
79 1. One of them starts at time 0.
80 2. The first video PTS value ends up on a frame boundary.
82 Then we remove big initial gaps in PTS and we allow our
83 insertion of black frames to work.
86 audio_pts_to_use = audio_pts_from_ffmpeg + audio_pts_offset;
87 video_pts_to_use = video_pts_from_ffmpeg + video_pts_offset;
90 bool const have_video = video && c->first_video();
91 bool const have_audio = _decode_audio && c->audio_stream () && c->audio_stream()->first_audio;
93 /* First, make one of them start at 0 */
95 if (have_audio && have_video) {
96 _video_pts_offset = _audio_pts_offset = - min (c->first_video().get(), c->audio_stream()->first_audio.get());
97 } else if (have_video) {
98 _video_pts_offset = - c->first_video().get();
99 } else if (have_audio) {
100 _audio_pts_offset = - c->audio_stream()->first_audio.get();
103 /* Now adjust both so that the video pts starts on a frame */
104 if (have_video && have_audio) {
105 double first_video = c->first_video().get() + _video_pts_offset;
106 double const old_first_video = first_video;
108 /* Round the first video up to a frame boundary */
109 if (fabs (rint (first_video * c->video_frame_rate()) - first_video * c->video_frame_rate()) > 1e-6) {
110 first_video = ceil (first_video * c->video_frame_rate()) / c->video_frame_rate ();
113 _video_pts_offset += first_video - old_first_video;
114 _audio_pts_offset += first_video - old_first_video;
118 FFmpegDecoder::~FFmpegDecoder ()
120 boost::mutex::scoped_lock lm (_mutex);
122 if (_subtitle_codec_context) {
123 avcodec_close (_subtitle_codec_context);
128 FFmpegDecoder::flush ()
130 /* Get any remaining frames */
135 /* XXX: should we reset _packet.data and size after each *_decode_* call? */
138 while (decode_video_packet ()) {}
141 if (_ffmpeg_content->audio_stream() && _decode_audio) {
142 decode_audio_packet ();
147 /* Stop us being asked for any more data */
148 _video_position = _ffmpeg_content->video_length ();
149 _audio_position = _ffmpeg_content->audio_length ();
154 FFmpegDecoder::pass ()
156 int r = av_read_frame (_format_context, &_packet);
159 if (r != AVERROR_EOF) {
160 /* Maybe we should fail here, but for now we'll just finish off instead */
162 av_strerror (r, buf, sizeof(buf));
163 shared_ptr<const Film> film = _film.lock ();
165 film->log()->log (String::compose (N_("error on av_read_frame (%1) (%2)"), buf, r));
172 avcodec_get_frame_defaults (_frame);
174 shared_ptr<const Film> film = _film.lock ();
177 int const si = _packet.stream_index;
179 if (si == _video_stream && _decode_video) {
180 decode_video_packet ();
181 } else if (_ffmpeg_content->audio_stream() && si == _ffmpeg_content->audio_stream()->index (_format_context) && _decode_audio) {
182 decode_audio_packet ();
183 } else if (_ffmpeg_content->subtitle_stream() && si == _ffmpeg_content->subtitle_stream()->index (_format_context) && film->with_subtitles ()) {
184 decode_subtitle_packet ();
187 av_free_packet (&_packet);
191 /** @param data pointer to array of pointers to buffers.
192 * Only the first buffer will be used for non-planar data, otherwise there will be one per channel.
194 shared_ptr<AudioBuffers>
195 FFmpegDecoder::deinterleave_audio (uint8_t** data, int size)
197 assert (_ffmpeg_content->audio_channels());
198 assert (bytes_per_audio_sample());
200 /* Deinterleave and convert to float */
202 assert ((size % (bytes_per_audio_sample() * _ffmpeg_content->audio_channels())) == 0);
204 int const total_samples = size / bytes_per_audio_sample();
205 int const frames = total_samples / _ffmpeg_content->audio_channels();
206 shared_ptr<AudioBuffers> audio (new AudioBuffers (_ffmpeg_content->audio_channels(), frames));
208 switch (audio_sample_format()) {
209 case AV_SAMPLE_FMT_S16:
211 int16_t* p = reinterpret_cast<int16_t *> (data[0]);
214 for (int i = 0; i < total_samples; ++i) {
215 audio->data(channel)[sample] = float(*p++) / (1 << 15);
218 if (channel == _ffmpeg_content->audio_channels()) {
226 case AV_SAMPLE_FMT_S16P:
228 int16_t** p = reinterpret_cast<int16_t **> (data);
229 for (int i = 0; i < _ffmpeg_content->audio_channels(); ++i) {
230 for (int j = 0; j < frames; ++j) {
231 audio->data(i)[j] = static_cast<float>(p[i][j]) / (1 << 15);
237 case AV_SAMPLE_FMT_S32:
239 int32_t* p = reinterpret_cast<int32_t *> (data[0]);
242 for (int i = 0; i < total_samples; ++i) {
243 audio->data(channel)[sample] = static_cast<float>(*p++) / (1 << 31);
246 if (channel == _ffmpeg_content->audio_channels()) {
254 case AV_SAMPLE_FMT_FLT:
256 float* p = reinterpret_cast<float*> (data[0]);
259 for (int i = 0; i < total_samples; ++i) {
260 audio->data(channel)[sample] = *p++;
263 if (channel == _ffmpeg_content->audio_channels()) {
271 case AV_SAMPLE_FMT_FLTP:
273 float** p = reinterpret_cast<float**> (data);
274 for (int i = 0; i < _ffmpeg_content->audio_channels(); ++i) {
275 memcpy (audio->data(i), p[i], frames * sizeof(float));
281 throw DecodeError (String::compose (_("Unrecognised audio sample format (%1)"), static_cast<int> (audio_sample_format())));
288 FFmpegDecoder::audio_sample_format () const
290 if (!_ffmpeg_content->audio_stream()) {
291 return (AVSampleFormat) 0;
294 return audio_codec_context()->sample_fmt;
298 FFmpegDecoder::bytes_per_audio_sample () const
300 return av_get_bytes_per_sample (audio_sample_format ());
304 FFmpegDecoder::minimal_run (boost::function<bool (ContentTime, ContentTime, int)> finished)
307 ContentTime last_video = 0;
308 ContentTime last_audio = 0;
309 bool flushing = false;
311 while (!finished (last_video, last_audio, frames_read)) {
312 int r = av_read_frame (_format_context, &_packet);
314 /* We should flush our decoders here, possibly yielding a few more frames,
315 but the consequence of having to do that is too hideous to contemplate.
316 Instead we give up and say that you can't seek too close to the end
324 double const time_base = av_q2d (_format_context->streams[_packet.stream_index]->time_base);
326 if (_packet.stream_index == _video_stream) {
328 avcodec_get_frame_defaults (_frame);
331 r = avcodec_decode_video2 (video_codec_context(), _frame, &finished, &_packet);
332 if (r >= 0 && finished) {
334 (av_frame_get_best_effort_timestamp (_frame) * time_base + _video_pts_offset) * TIME_HZ
338 } else if (_ffmpeg_content->audio_stream() && _packet.stream_index == _ffmpeg_content->audio_stream()->index (_format_context)) {
339 AVPacket copy_packet = _packet;
340 while (copy_packet.size > 0) {
343 r = avcodec_decode_audio4 (audio_codec_context(), _frame, &finished, &_packet);
344 if (r >= 0 && finished) {
346 (av_frame_get_best_effort_timestamp (_frame) * time_base + _audio_pts_offset) * TIME_HZ
350 copy_packet.data += r;
351 copy_packet.size -= r;
355 av_free_packet (&_packet);
362 FFmpegDecoder::seek_overrun_finished (ContentTime seek, ContentTime last_video, ContentTime last_audio) const
364 return last_video >= seek || last_audio >= seek;
368 FFmpegDecoder::seek_final_finished (int n, int done) const
374 FFmpegDecoder::seek_and_flush (ContentTime t)
376 int64_t const initial_v = ((double (t) / TIME_HZ) - _video_pts_offset) /
377 av_q2d (_format_context->streams[_video_stream]->time_base);
379 av_seek_frame (_format_context, _video_stream, initial_v, AVSEEK_FLAG_BACKWARD);
381 shared_ptr<FFmpegAudioStream> as = _ffmpeg_content->audio_stream ();
383 int64_t initial_a = ((double (t) / TIME_HZ) - _audio_pts_offset) /
384 av_q2d (as->stream(_format_context)->time_base);
386 av_seek_frame (_format_context, as->index (_format_context), initial_a, AVSEEK_FLAG_BACKWARD);
389 avcodec_flush_buffers (video_codec_context());
390 if (audio_codec_context ()) {
391 avcodec_flush_buffers (audio_codec_context ());
393 if (_subtitle_codec_context) {
394 avcodec_flush_buffers (_subtitle_codec_context);
399 FFmpegDecoder::seek (ContentTime time, bool accurate)
401 Decoder::seek (time, accurate);
403 /* If we are doing an accurate seek, our initial shot will be 200ms (200 being
404 a number plucked from the air) earlier than we want to end up. The loop below
405 will hopefully then step through to where we want to be.
408 ContentTime pre_roll = accurate ? (0.2 * TIME_HZ) : 0;
409 ContentTime initial_seek = time - pre_roll;
410 if (initial_seek < 0) {
414 /* Initial seek time in the video stream's timebase */
416 seek_and_flush (initial_seek);
418 if (time == 0 || !accurate) {
419 /* We're already there, or we're as close as we need to be */
423 int const N = minimal_run (boost::bind (&FFmpegDecoder::seek_overrun_finished, this, time, _1, _2));
425 seek_and_flush (initial_seek);
427 minimal_run (boost::bind (&FFmpegDecoder::seek_final_finished, this, N - 1, _3));
432 FFmpegDecoder::decode_audio_packet ()
434 /* Audio packets can contain multiple frames, so we may have to call avcodec_decode_audio4
438 AVPacket copy_packet = _packet;
440 while (copy_packet.size > 0) {
443 int const decode_result = avcodec_decode_audio4 (audio_codec_context(), _frame, &frame_finished, ©_packet);
445 if (decode_result < 0) {
446 shared_ptr<const Film> film = _film.lock ();
448 film->log()->log (String::compose ("avcodec_decode_audio4 failed (%1)", decode_result));
452 if (frame_finished) {
453 ContentTime const t = rint (
454 (av_q2d (_format_context->streams[copy_packet.stream_index]->time_base)
455 * av_frame_get_best_effort_timestamp(_frame) + _audio_pts_offset) * TIME_HZ
458 int const data_size = av_samples_get_buffer_size (
459 0, audio_codec_context()->channels, _frame->nb_samples, audio_sample_format (), 1
462 audio (deinterleave_audio (_frame->data, data_size), t);
465 copy_packet.data += decode_result;
466 copy_packet.size -= decode_result;
471 FFmpegDecoder::decode_video_packet ()
474 if (avcodec_decode_video2 (video_codec_context(), _frame, &frame_finished, &_packet) < 0 || !frame_finished) {
478 boost::mutex::scoped_lock lm (_filter_graphs_mutex);
480 shared_ptr<FilterGraph> graph;
482 list<shared_ptr<FilterGraph> >::iterator i = _filter_graphs.begin();
483 while (i != _filter_graphs.end() && !(*i)->can_process (libdcp::Size (_frame->width, _frame->height), (AVPixelFormat) _frame->format)) {
487 if (i == _filter_graphs.end ()) {
488 shared_ptr<const Film> film = _film.lock ();
491 graph.reset (new FilterGraph (_ffmpeg_content, libdcp::Size (_frame->width, _frame->height), (AVPixelFormat) _frame->format));
492 _filter_graphs.push_back (graph);
494 film->log()->log (String::compose (N_("New graph for %1x%2, pixel format %3"), _frame->width, _frame->height, _frame->format));
499 list<pair<shared_ptr<Image>, int64_t> > images = graph->process (_frame);
501 string post_process = Filter::ffmpeg_strings (_ffmpeg_content->filters()).second;
503 for (list<pair<shared_ptr<Image>, int64_t> >::iterator i = images.begin(); i != images.end(); ++i) {
505 shared_ptr<Image> image = i->first;
506 if (!post_process.empty ()) {
507 image = image->post_process (post_process, true);
510 if (i->second != AV_NOPTS_VALUE) {
511 ContentTime const t = rint ((i->second * av_q2d (_format_context->streams[_video_stream]->time_base) + _video_pts_offset) * TIME_HZ);
512 video (image, false, t);
514 shared_ptr<const Film> film = _film.lock ();
516 film->log()->log ("Dropping frame without PTS");
525 FFmpegDecoder::setup_subtitle ()
527 boost::mutex::scoped_lock lm (_mutex);
529 if (!_ffmpeg_content->subtitle_stream() || _ffmpeg_content->subtitle_stream()->index (_format_context) >= int (_format_context->nb_streams)) {
533 _subtitle_codec_context = _ffmpeg_content->subtitle_stream()->stream(_format_context)->codec;
534 _subtitle_codec = avcodec_find_decoder (_subtitle_codec_context->codec_id);
536 if (_subtitle_codec == 0) {
537 throw DecodeError (_("could not find subtitle decoder"));
540 if (avcodec_open2 (_subtitle_codec_context, _subtitle_codec, 0) < 0) {
541 throw DecodeError (N_("could not open subtitle decoder"));
546 FFmpegDecoder::decode_subtitle_packet ()
550 if (avcodec_decode_subtitle2 (_subtitle_codec_context, &sub, &got_subtitle, &_packet) < 0 || !got_subtitle) {
554 /* Sometimes we get an empty AVSubtitle, which is used by some codecs to
555 indicate that the previous subtitle should stop.
557 if (sub.num_rects <= 0) {
558 subtitle (shared_ptr<Image> (), dcpomatic::Rect<double> (), 0, 0);
560 } else if (sub.num_rects > 1) {
561 throw DecodeError (_("multi-part subtitles not yet supported"));
564 /* Subtitle PTS in seconds (within the source, not taking into account any of the
565 source that we may have chopped off for the DCP)
567 double const packet_time = static_cast<double> (sub.pts) / AV_TIME_BASE;
569 /* hence start time for this sub */
570 DCPTime const from = (packet_time + (double (sub.start_display_time) / 1e3)) * TIME_HZ;
571 DCPTime const to = (packet_time + (double (sub.end_display_time) / 1e3)) * TIME_HZ;
573 AVSubtitleRect const * rect = sub.rects[0];
575 if (rect->type != SUBTITLE_BITMAP) {
576 throw DecodeError (_("non-bitmap subtitles not yet supported"));
579 shared_ptr<Image> image (new Image (PIX_FMT_RGBA, libdcp::Size (rect->w, rect->h), true));
581 /* Start of the first line in the subtitle */
582 uint8_t* sub_p = rect->pict.data[0];
583 /* sub_p looks up into a RGB palette which is here */
584 uint32_t const * palette = (uint32_t *) rect->pict.data[1];
585 /* Start of the output data */
586 uint32_t* out_p = (uint32_t *) image->data()[0];
588 for (int y = 0; y < rect->h; ++y) {
589 uint8_t* sub_line_p = sub_p;
590 uint32_t* out_line_p = out_p;
591 for (int x = 0; x < rect->w; ++x) {
592 *out_line_p++ = palette[*sub_line_p++];
594 sub_p += rect->pict.linesize[0];
595 out_p += image->stride()[0] / sizeof (uint32_t);
598 libdcp::Size const vs = _ffmpeg_content->video_size ();
602 dcpomatic::Rect<double> (
603 static_cast<double> (rect->x) / vs.width,
604 static_cast<double> (rect->y) / vs.height,
605 static_cast<double> (rect->w) / vs.width,
606 static_cast<double> (rect->h) / vs.height
613 avsubtitle_free (&sub);