2 Copyright (C) 2012 Carl Hetherington <cth@carlh.net>
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 /** @file src/ffmpeg_decoder.cc
21 * @brief A decoder using FFmpeg to decode content.
30 #include <boost/lexical_cast.hpp>
33 #include <libavcodec/avcodec.h>
34 #include <libavformat/avformat.h>
38 #include "exceptions.h"
42 #include "ffmpeg_decoder.h"
43 #include "filter_graph.h"
44 #include "audio_buffers.h"
45 #include "ffmpeg_content.h"
52 using std::stringstream;
56 using boost::shared_ptr;
57 using boost::optional;
58 using boost::dynamic_pointer_cast;
61 FFmpegDecoder::FFmpegDecoder (shared_ptr<const Film> f, shared_ptr<const FFmpegContent> c, bool video, bool audio)
67 , _subtitle_codec_context (0)
69 , _decode_video (video)
70 , _decode_audio (audio)
75 /* Audio and video frame PTS values may not start with 0. We want
76 to fiddle them so that:
78 1. One of them starts at time 0.
79 2. The first video PTS value ends up on a frame boundary.
81 Then we remove big initial gaps in PTS and we allow our
82 insertion of black frames to work.
84 We will do pts_to_use = pts_from_ffmpeg + pts_offset;
87 bool const have_video = video && c->first_video();
88 bool const have_audio = _decode_audio && c->audio_stream () && c->audio_stream()->first_audio;
90 /* First, make one of them start at 0 */
92 if (have_audio && have_video) {
93 _pts_offset = - min (c->first_video().get(), c->audio_stream()->first_audio.get());
94 } else if (have_video) {
95 _pts_offset = - c->first_video().get();
96 } else if (have_audio) {
97 _pts_offset = - c->audio_stream()->first_audio.get();
100 /* Now adjust both so that the video pts starts on a frame */
101 if (have_video && have_audio) {
102 double first_video = c->first_video().get() + _pts_offset;
103 double const old_first_video = first_video;
105 /* Round the first video up to a frame boundary */
106 if (fabs (rint (first_video * c->video_frame_rate()) - first_video * c->video_frame_rate()) > 1e-6) {
107 first_video = ceil (first_video * c->video_frame_rate()) / c->video_frame_rate ();
110 _pts_offset += first_video - old_first_video;
114 FFmpegDecoder::~FFmpegDecoder ()
116 boost::mutex::scoped_lock lm (_mutex);
118 if (_subtitle_codec_context) {
119 avcodec_close (_subtitle_codec_context);
124 FFmpegDecoder::flush ()
126 /* Get any remaining frames */
131 /* XXX: should we reset _packet.data and size after each *_decode_* call? */
134 while (decode_video_packet ()) {}
137 if (_ffmpeg_content->audio_stream() && _decode_audio) {
138 decode_audio_packet ();
141 AudioDecoder::flush ();
145 FFmpegDecoder::pass ()
147 int r = av_read_frame (_format_context, &_packet);
150 if (r != AVERROR_EOF) {
151 /* Maybe we should fail here, but for now we'll just finish off instead */
153 av_strerror (r, buf, sizeof(buf));
154 shared_ptr<const Film> film = _film.lock ();
156 film->log()->log (String::compose (N_("error on av_read_frame (%1) (%2)"), buf, r));
163 avcodec_get_frame_defaults (_frame);
165 shared_ptr<const Film> film = _film.lock ();
168 int const si = _packet.stream_index;
170 if (si == _video_stream && _decode_video) {
171 decode_video_packet ();
172 } else if (_ffmpeg_content->audio_stream() && si == _ffmpeg_content->audio_stream()->index (_format_context) && _decode_audio) {
173 decode_audio_packet ();
174 } else if (_ffmpeg_content->subtitle_stream() && si == _ffmpeg_content->subtitle_stream()->index (_format_context) && film->with_subtitles ()) {
175 decode_subtitle_packet ();
178 av_free_packet (&_packet);
182 /** @param data pointer to array of pointers to buffers.
183 * Only the first buffer will be used for non-planar data, otherwise there will be one per channel.
185 shared_ptr<AudioBuffers>
186 FFmpegDecoder::deinterleave_audio (uint8_t** data, int size)
188 assert (_ffmpeg_content->audio_channels());
189 assert (bytes_per_audio_sample());
191 /* Deinterleave and convert to float */
193 assert ((size % (bytes_per_audio_sample() * _ffmpeg_content->audio_channels())) == 0);
195 int const total_samples = size / bytes_per_audio_sample();
196 int const frames = total_samples / _ffmpeg_content->audio_channels();
197 shared_ptr<AudioBuffers> audio (new AudioBuffers (_ffmpeg_content->audio_channels(), frames));
199 switch (audio_sample_format()) {
200 case AV_SAMPLE_FMT_S16:
202 int16_t* p = reinterpret_cast<int16_t *> (data[0]);
205 for (int i = 0; i < total_samples; ++i) {
206 audio->data(channel)[sample] = float(*p++) / (1 << 15);
209 if (channel == _ffmpeg_content->audio_channels()) {
217 case AV_SAMPLE_FMT_S16P:
219 int16_t** p = reinterpret_cast<int16_t **> (data);
220 for (int i = 0; i < _ffmpeg_content->audio_channels(); ++i) {
221 for (int j = 0; j < frames; ++j) {
222 audio->data(i)[j] = static_cast<float>(p[i][j]) / (1 << 15);
228 case AV_SAMPLE_FMT_S32:
230 int32_t* p = reinterpret_cast<int32_t *> (data[0]);
233 for (int i = 0; i < total_samples; ++i) {
234 audio->data(channel)[sample] = static_cast<float>(*p++) / (1 << 31);
237 if (channel == _ffmpeg_content->audio_channels()) {
245 case AV_SAMPLE_FMT_FLT:
247 float* p = reinterpret_cast<float*> (data[0]);
250 for (int i = 0; i < total_samples; ++i) {
251 audio->data(channel)[sample] = *p++;
254 if (channel == _ffmpeg_content->audio_channels()) {
262 case AV_SAMPLE_FMT_FLTP:
264 float** p = reinterpret_cast<float**> (data);
265 for (int i = 0; i < _ffmpeg_content->audio_channels(); ++i) {
266 memcpy (audio->data(i), p[i], frames * sizeof(float));
272 throw DecodeError (String::compose (_("Unrecognised audio sample format (%1)"), static_cast<int> (audio_sample_format())));
279 FFmpegDecoder::audio_sample_format () const
281 if (!_ffmpeg_content->audio_stream()) {
282 return (AVSampleFormat) 0;
285 return audio_codec_context()->sample_fmt;
289 FFmpegDecoder::bytes_per_audio_sample () const
291 return av_get_bytes_per_sample (audio_sample_format ());
295 FFmpegDecoder::minimal_run (boost::function<bool (optional<ContentTime>, optional<ContentTime>, int)> finished)
298 optional<ContentTime> last_video;
299 optional<ContentTime> last_audio;
301 while (!finished (last_video, last_audio, frames_read)) {
302 int r = av_read_frame (_format_context, &_packet);
304 /* We should flush our decoders here, possibly yielding a few more frames,
305 but the consequence of having to do that is too hideous to contemplate.
306 Instead we give up and say that you can't seek too close to the end
314 double const time_base = av_q2d (_format_context->streams[_packet.stream_index]->time_base);
316 if (_packet.stream_index == _video_stream) {
318 avcodec_get_frame_defaults (_frame);
321 r = avcodec_decode_video2 (video_codec_context(), _frame, &finished, &_packet);
322 if (r >= 0 && finished) {
324 (av_frame_get_best_effort_timestamp (_frame) * time_base + _pts_offset) * TIME_HZ
328 } else if (_ffmpeg_content->audio_stream() && _packet.stream_index == _ffmpeg_content->audio_stream()->index (_format_context)) {
329 AVPacket copy_packet = _packet;
330 while (copy_packet.size > 0) {
333 r = avcodec_decode_audio4 (audio_codec_context(), _frame, &finished, &_packet);
334 if (r >= 0 && finished) {
336 (av_frame_get_best_effort_timestamp (_frame) * time_base + _pts_offset) * TIME_HZ
340 copy_packet.data += r;
341 copy_packet.size -= r;
345 av_free_packet (&_packet);
352 FFmpegDecoder::seek_overrun_finished (ContentTime seek, optional<ContentTime> last_video, optional<ContentTime> last_audio) const
354 return (last_video && last_video.get() >= seek) || (last_audio && last_audio.get() >= seek);
358 FFmpegDecoder::seek_final_finished (int n, int done) const
364 FFmpegDecoder::seek_and_flush (ContentTime t)
366 int64_t s = ((double (t) / TIME_HZ) - _pts_offset) /
367 av_q2d (_format_context->streams[_video_stream]->time_base);
369 if (_ffmpeg_content->audio_stream ()) {
372 ((double (t) / TIME_HZ) - _pts_offset) /
373 av_q2d (_ffmpeg_content->audio_stream()->stream(_format_context)->time_base)
378 /* Ridiculous empirical hack */
381 av_seek_frame (_format_context, _video_stream, s, AVSEEK_FLAG_BACKWARD);
383 avcodec_flush_buffers (video_codec_context());
384 if (audio_codec_context ()) {
385 avcodec_flush_buffers (audio_codec_context ());
387 if (_subtitle_codec_context) {
388 avcodec_flush_buffers (_subtitle_codec_context);
393 FFmpegDecoder::seek (ContentTime time, bool accurate)
395 Decoder::seek (time, accurate);
396 AudioDecoder::seek (time, accurate);
398 /* If we are doing an accurate seek, our initial shot will be 200ms (200 being
399 a number plucked from the air) earlier than we want to end up. The loop below
400 will hopefully then step through to where we want to be.
403 ContentTime pre_roll = accurate ? (0.2 * TIME_HZ) : 0;
404 ContentTime initial_seek = time - pre_roll;
405 if (initial_seek < 0) {
409 /* Initial seek time in the video stream's timebase */
411 seek_and_flush (initial_seek);
418 int const N = minimal_run (boost::bind (&FFmpegDecoder::seek_overrun_finished, this, time, _1, _2));
420 seek_and_flush (initial_seek);
422 minimal_run (boost::bind (&FFmpegDecoder::seek_final_finished, this, N - 1, _3));
427 FFmpegDecoder::decode_audio_packet ()
429 /* Audio packets can contain multiple frames, so we may have to call avcodec_decode_audio4
433 AVPacket copy_packet = _packet;
435 while (copy_packet.size > 0) {
438 int const decode_result = avcodec_decode_audio4 (audio_codec_context(), _frame, &frame_finished, ©_packet);
440 if (decode_result < 0) {
441 shared_ptr<const Film> film = _film.lock ();
443 film->log()->log (String::compose ("avcodec_decode_audio4 failed (%1)", decode_result));
447 if (frame_finished) {
448 int const data_size = av_samples_get_buffer_size (
449 0, audio_codec_context()->channels, _frame->nb_samples, audio_sample_format (), 1
452 audio (deinterleave_audio (_frame->data, data_size));
455 copy_packet.data += decode_result;
456 copy_packet.size -= decode_result;
461 FFmpegDecoder::decode_video_packet ()
464 if (avcodec_decode_video2 (video_codec_context(), _frame, &frame_finished, &_packet) < 0 || !frame_finished) {
468 boost::mutex::scoped_lock lm (_filter_graphs_mutex);
470 shared_ptr<FilterGraph> graph;
472 list<shared_ptr<FilterGraph> >::iterator i = _filter_graphs.begin();
473 while (i != _filter_graphs.end() && !(*i)->can_process (libdcp::Size (_frame->width, _frame->height), (AVPixelFormat) _frame->format)) {
477 if (i == _filter_graphs.end ()) {
478 shared_ptr<const Film> film = _film.lock ();
481 graph.reset (new FilterGraph (_ffmpeg_content, libdcp::Size (_frame->width, _frame->height), (AVPixelFormat) _frame->format));
482 _filter_graphs.push_back (graph);
484 film->log()->log (String::compose (N_("New graph for %1x%2, pixel format %3"), _frame->width, _frame->height, _frame->format));
489 list<pair<shared_ptr<Image>, int64_t> > images = graph->process (_frame);
491 string post_process = Filter::ffmpeg_strings (_ffmpeg_content->filters()).second;
493 for (list<pair<shared_ptr<Image>, int64_t> >::iterator i = images.begin(); i != images.end(); ++i) {
495 shared_ptr<Image> image = i->first;
496 if (!post_process.empty ()) {
497 image = image->post_process (post_process, true);
500 if (i->second != AV_NOPTS_VALUE) {
501 double const pts = i->second * av_q2d (_format_context->streams[_video_stream]->time_base) + _pts_offset;
502 VideoFrame const f = rint (pts * _ffmpeg_content->video_frame_rate ());
503 video (image, false, f);
505 shared_ptr<const Film> film = _film.lock ();
507 film->log()->log ("Dropping frame without PTS");
516 FFmpegDecoder::setup_subtitle ()
518 boost::mutex::scoped_lock lm (_mutex);
520 if (!_ffmpeg_content->subtitle_stream() || _ffmpeg_content->subtitle_stream()->index (_format_context) >= int (_format_context->nb_streams)) {
524 _subtitle_codec_context = _ffmpeg_content->subtitle_stream()->stream(_format_context)->codec;
525 _subtitle_codec = avcodec_find_decoder (_subtitle_codec_context->codec_id);
527 if (_subtitle_codec == 0) {
528 throw DecodeError (_("could not find subtitle decoder"));
531 if (avcodec_open2 (_subtitle_codec_context, _subtitle_codec, 0) < 0) {
532 throw DecodeError (N_("could not open subtitle decoder"));
537 FFmpegDecoder::decode_subtitle_packet ()
541 if (avcodec_decode_subtitle2 (_subtitle_codec_context, &sub, &got_subtitle, &_packet) < 0 || !got_subtitle) {
545 /* Sometimes we get an empty AVSubtitle, which is used by some codecs to
546 indicate that the previous subtitle should stop.
548 if (sub.num_rects <= 0) {
549 subtitle (shared_ptr<Image> (), dcpomatic::Rect<double> (), 0, 0);
551 } else if (sub.num_rects > 1) {
552 throw DecodeError (_("multi-part subtitles not yet supported"));
555 /* Subtitle PTS in seconds (within the source, not taking into account any of the
556 source that we may have chopped off for the DCP)
558 double const packet_time = static_cast<double> (sub.pts) / AV_TIME_BASE;
560 /* hence start time for this sub */
561 ContentTime const from = (packet_time + (double (sub.start_display_time) / 1e3)) * TIME_HZ;
562 ContentTime const to = (packet_time + (double (sub.end_display_time) / 1e3)) * TIME_HZ;
564 AVSubtitleRect const * rect = sub.rects[0];
566 if (rect->type != SUBTITLE_BITMAP) {
567 throw DecodeError (_("non-bitmap subtitles not yet supported"));
570 shared_ptr<Image> image (new Image (PIX_FMT_RGBA, libdcp::Size (rect->w, rect->h), true));
572 /* Start of the first line in the subtitle */
573 uint8_t* sub_p = rect->pict.data[0];
574 /* sub_p looks up into a RGB palette which is here */
575 uint32_t const * palette = (uint32_t *) rect->pict.data[1];
576 /* Start of the output data */
577 uint32_t* out_p = (uint32_t *) image->data()[0];
579 for (int y = 0; y < rect->h; ++y) {
580 uint8_t* sub_line_p = sub_p;
581 uint32_t* out_line_p = out_p;
582 for (int x = 0; x < rect->w; ++x) {
583 *out_line_p++ = palette[*sub_line_p++];
585 sub_p += rect->pict.linesize[0];
586 out_p += image->stride()[0] / sizeof (uint32_t);
589 libdcp::Size const vs = _ffmpeg_content->video_size ();
593 dcpomatic::Rect<double> (
594 static_cast<double> (rect->x) / vs.width,
595 static_cast<double> (rect->y) / vs.height,
596 static_cast<double> (rect->w) / vs.width,
597 static_cast<double> (rect->h) / vs.height
604 avsubtitle_free (&sub);
608 FFmpegDecoder::first_audio () const
610 if (!_ffmpeg_content->audio_stream ()) {
614 return _ffmpeg_content->audio_stream()->first_audio.get_value_or(0) + _pts_offset;