2 Copyright (C) 2012 Carl Hetherington <cth@carlh.net>
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 /** @file src/ffmpeg_decoder.cc
21 * @brief A decoder using FFmpeg to decode content.
30 #include <boost/lexical_cast.hpp>
33 #include <libavcodec/avcodec.h>
34 #include <libavformat/avformat.h>
38 #include "exceptions.h"
42 #include "ffmpeg_decoder.h"
43 #include "filter_graph.h"
44 #include "audio_buffers.h"
45 #include "ffmpeg_content.h"
52 using std::stringstream;
56 using boost::shared_ptr;
57 using boost::optional;
58 using boost::dynamic_pointer_cast;
61 FFmpegDecoder::FFmpegDecoder (shared_ptr<const Film> f, shared_ptr<const FFmpegContent> c, bool video, bool audio)
67 , _subtitle_codec_context (0)
69 , _decode_video (video)
70 , _decode_audio (audio)
75 /* Audio and video frame PTS values may not start with 0. We want
76 to fiddle them so that:
78 1. One of them starts at time 0.
79 2. The first video PTS value ends up on a frame boundary.
81 Then we remove big initial gaps in PTS and we allow our
82 insertion of black frames to work.
84 We will do pts_to_use = pts_from_ffmpeg + pts_offset;
87 bool const have_video = video && c->first_video();
88 bool const have_audio = _decode_audio && c->audio_stream () && c->audio_stream()->first_audio;
90 /* First, make one of them start at 0 */
92 if (have_audio && have_video) {
93 _pts_offset = - min (c->first_video().get(), c->audio_stream()->first_audio.get());
94 } else if (have_video) {
95 _pts_offset = - c->first_video().get();
96 } else if (have_audio) {
97 _pts_offset = - c->audio_stream()->first_audio.get();
100 /* Now adjust both so that the video pts starts on a frame */
101 if (have_video && have_audio) {
102 ContentTime first_video = c->first_video().get() + _pts_offset;
103 ContentTime const old_first_video = first_video;
104 _pts_offset += first_video.round_up (c->video_frame_rate ()) - old_first_video;
108 FFmpegDecoder::~FFmpegDecoder ()
110 boost::mutex::scoped_lock lm (_mutex);
112 if (_subtitle_codec_context) {
113 avcodec_close (_subtitle_codec_context);
118 FFmpegDecoder::flush ()
120 /* Get any remaining frames */
125 /* XXX: should we reset _packet.data and size after each *_decode_* call? */
128 while (decode_video_packet ()) {}
131 if (_ffmpeg_content->audio_stream() && _decode_audio) {
132 decode_audio_packet ();
133 AudioDecoder::flush ();
138 FFmpegDecoder::pass ()
140 int r = av_read_frame (_format_context, &_packet);
143 if (r != AVERROR_EOF) {
144 /* Maybe we should fail here, but for now we'll just finish off instead */
146 av_strerror (r, buf, sizeof(buf));
147 shared_ptr<const Film> film = _film.lock ();
149 film->log()->log (String::compose (N_("error on av_read_frame (%1) (%2)"), buf, r));
156 shared_ptr<const Film> film = _film.lock ();
159 int const si = _packet.stream_index;
161 if (si == _video_stream && _decode_video) {
162 decode_video_packet ();
163 } else if (_ffmpeg_content->audio_stream() && _ffmpeg_content->audio_stream()->uses_index (_format_context, si) && _decode_audio) {
164 decode_audio_packet ();
165 } else if (_ffmpeg_content->subtitle_stream() && _ffmpeg_content->subtitle_stream()->uses_index (_format_context, si) && film->with_subtitles ()) {
166 decode_subtitle_packet ();
169 av_free_packet (&_packet);
173 /** @param data pointer to array of pointers to buffers.
174 * Only the first buffer will be used for non-planar data, otherwise there will be one per channel.
176 shared_ptr<AudioBuffers>
177 FFmpegDecoder::deinterleave_audio (uint8_t** data, int size)
179 assert (_ffmpeg_content->audio_channels());
180 assert (bytes_per_audio_sample());
182 /* Deinterleave and convert to float */
184 assert ((size % (bytes_per_audio_sample() * _ffmpeg_content->audio_channels())) == 0);
186 int const total_samples = size / bytes_per_audio_sample();
187 int const frames = total_samples / _ffmpeg_content->audio_channels();
188 shared_ptr<AudioBuffers> audio (new AudioBuffers (_ffmpeg_content->audio_channels(), frames));
190 switch (audio_sample_format()) {
191 case AV_SAMPLE_FMT_S16:
193 int16_t* p = reinterpret_cast<int16_t *> (data[0]);
196 for (int i = 0; i < total_samples; ++i) {
197 audio->data(channel)[sample] = float(*p++) / (1 << 15);
200 if (channel == _ffmpeg_content->audio_channels()) {
208 case AV_SAMPLE_FMT_S16P:
210 int16_t** p = reinterpret_cast<int16_t **> (data);
211 for (int i = 0; i < _ffmpeg_content->audio_channels(); ++i) {
212 for (int j = 0; j < frames; ++j) {
213 audio->data(i)[j] = static_cast<float>(p[i][j]) / (1 << 15);
219 case AV_SAMPLE_FMT_S32:
221 int32_t* p = reinterpret_cast<int32_t *> (data[0]);
224 for (int i = 0; i < total_samples; ++i) {
225 audio->data(channel)[sample] = static_cast<float>(*p++) / (1 << 31);
228 if (channel == _ffmpeg_content->audio_channels()) {
236 case AV_SAMPLE_FMT_FLT:
238 float* p = reinterpret_cast<float*> (data[0]);
241 for (int i = 0; i < total_samples; ++i) {
242 audio->data(channel)[sample] = *p++;
245 if (channel == _ffmpeg_content->audio_channels()) {
253 case AV_SAMPLE_FMT_FLTP:
255 float** p = reinterpret_cast<float**> (data);
256 for (int i = 0; i < _ffmpeg_content->audio_channels(); ++i) {
257 memcpy (audio->data(i), p[i], frames * sizeof(float));
263 throw DecodeError (String::compose (_("Unrecognised audio sample format (%1)"), static_cast<int> (audio_sample_format())));
270 FFmpegDecoder::audio_sample_format () const
272 if (!_ffmpeg_content->audio_stream()) {
273 return (AVSampleFormat) 0;
276 return audio_codec_context()->sample_fmt;
280 FFmpegDecoder::bytes_per_audio_sample () const
282 return av_get_bytes_per_sample (audio_sample_format ());
286 FFmpegDecoder::minimal_run (boost::function<bool (optional<ContentTime>, optional<ContentTime>, int)> finished)
289 optional<ContentTime> last_video;
290 optional<ContentTime> last_audio;
292 while (!finished (last_video, last_audio, frames_read)) {
293 int r = av_read_frame (_format_context, &_packet);
295 /* We should flush our decoders here, possibly yielding a few more frames,
296 but the consequence of having to do that is too hideous to contemplate.
297 Instead we give up and say that you can't seek too close to the end
305 double const time_base = av_q2d (_format_context->streams[_packet.stream_index]->time_base);
307 if (_packet.stream_index == _video_stream) {
309 avcodec_get_frame_defaults (_frame);
312 r = avcodec_decode_video2 (video_codec_context(), _frame, &finished, &_packet);
313 if (r >= 0 && finished) {
314 last_video = ContentTime::from_seconds (av_frame_get_best_effort_timestamp (_frame) * time_base) + _pts_offset;
317 } else if (_ffmpeg_content->audio_stream() && _ffmpeg_content->audio_stream()->uses_index (_format_context, _packet.stream_index)) {
318 AVPacket copy_packet = _packet;
319 while (copy_packet.size > 0) {
322 r = avcodec_decode_audio4 (audio_codec_context(), _frame, &finished, &_packet);
323 if (r >= 0 && finished) {
324 last_audio = ContentTime::from_seconds (av_frame_get_best_effort_timestamp (_frame) * time_base) + _pts_offset;
327 copy_packet.data += r;
328 copy_packet.size -= r;
332 av_free_packet (&_packet);
339 FFmpegDecoder::seek_overrun_finished (ContentTime seek, optional<ContentTime> last_video, optional<ContentTime> last_audio) const
341 return (last_video && last_video.get() >= seek) || (last_audio && last_audio.get() >= seek);
345 FFmpegDecoder::seek_final_finished (int n, int done) const
351 FFmpegDecoder::seek_and_flush (ContentTime t)
353 ContentTime const u = t - _pts_offset;
354 int64_t s = u.seconds() / av_q2d (_format_context->streams[_video_stream]->time_base);
356 if (_ffmpeg_content->audio_stream ()) {
358 s, int64_t (u.seconds() / av_q2d (_ffmpeg_content->audio_stream()->stream(_format_context)->time_base))
362 /* Ridiculous empirical hack */
368 av_seek_frame (_format_context, _video_stream, s, 0);
370 avcodec_flush_buffers (video_codec_context());
371 if (audio_codec_context ()) {
372 avcodec_flush_buffers (audio_codec_context ());
374 if (_subtitle_codec_context) {
375 avcodec_flush_buffers (_subtitle_codec_context);
380 FFmpegDecoder::seek (ContentTime time, bool accurate)
382 Decoder::seek (time, accurate);
384 AudioDecoder::seek (time, accurate);
387 /* If we are doing an accurate seek, our initial shot will be 200ms (200 being
388 a number plucked from the air) earlier than we want to end up. The loop below
389 will hopefully then step through to where we want to be.
392 ContentTime pre_roll = accurate ? ContentTime::from_seconds (0.2) : ContentTime (0);
393 ContentTime initial_seek = time - pre_roll;
394 if (initial_seek < ContentTime (0)) {
395 initial_seek = ContentTime (0);
398 /* Initial seek time in the video stream's timebase */
400 seek_and_flush (initial_seek);
407 int const N = minimal_run (boost::bind (&FFmpegDecoder::seek_overrun_finished, this, time, _1, _2));
409 seek_and_flush (initial_seek);
411 minimal_run (boost::bind (&FFmpegDecoder::seek_final_finished, this, N - 1, _3));
416 FFmpegDecoder::decode_audio_packet ()
418 /* Audio packets can contain multiple frames, so we may have to call avcodec_decode_audio4
422 AVPacket copy_packet = _packet;
424 while (copy_packet.size > 0) {
427 int const decode_result = avcodec_decode_audio4 (audio_codec_context(), _frame, &frame_finished, ©_packet);
429 if (decode_result < 0) {
430 shared_ptr<const Film> film = _film.lock ();
432 film->log()->log (String::compose ("avcodec_decode_audio4 failed (%1)", decode_result));
436 if (frame_finished) {
437 ContentTime const ct = ContentTime::from_seconds (
438 av_frame_get_best_effort_timestamp (_frame) *
439 av_q2d (_ffmpeg_content->audio_stream()->stream (_format_context)->time_base))
442 int const data_size = av_samples_get_buffer_size (
443 0, audio_codec_context()->channels, _frame->nb_samples, audio_sample_format (), 1
446 audio (deinterleave_audio (_frame->data, data_size), ct);
449 copy_packet.data += decode_result;
450 copy_packet.size -= decode_result;
455 FFmpegDecoder::decode_video_packet ()
458 if (avcodec_decode_video2 (video_codec_context(), _frame, &frame_finished, &_packet) < 0 || !frame_finished) {
462 boost::mutex::scoped_lock lm (_filter_graphs_mutex);
464 shared_ptr<FilterGraph> graph;
466 list<shared_ptr<FilterGraph> >::iterator i = _filter_graphs.begin();
467 while (i != _filter_graphs.end() && !(*i)->can_process (dcp::Size (_frame->width, _frame->height), (AVPixelFormat) _frame->format)) {
471 if (i == _filter_graphs.end ()) {
472 shared_ptr<const Film> film = _film.lock ();
475 graph.reset (new FilterGraph (_ffmpeg_content, dcp::Size (_frame->width, _frame->height), (AVPixelFormat) _frame->format));
476 _filter_graphs.push_back (graph);
478 film->log()->log (String::compose (N_("New graph for %1x%2, pixel format %3"), _frame->width, _frame->height, _frame->format));
483 list<pair<shared_ptr<Image>, int64_t> > images = graph->process (_frame);
485 string post_process = Filter::ffmpeg_strings (_ffmpeg_content->filters()).second;
487 for (list<pair<shared_ptr<Image>, int64_t> >::iterator i = images.begin(); i != images.end(); ++i) {
489 shared_ptr<Image> image = i->first;
490 if (!post_process.empty ()) {
491 image = image->post_process (post_process, true);
494 if (i->second != AV_NOPTS_VALUE) {
495 video (image, false, ContentTime::from_seconds (i->second * av_q2d (_format_context->streams[_video_stream]->time_base)) + _pts_offset);
497 shared_ptr<const Film> film = _film.lock ();
499 film->log()->log ("Dropping frame without PTS");
508 FFmpegDecoder::setup_subtitle ()
510 boost::mutex::scoped_lock lm (_mutex);
512 if (!_ffmpeg_content->subtitle_stream()) {
516 _subtitle_codec_context = _ffmpeg_content->subtitle_stream()->stream(_format_context)->codec;
517 if (_subtitle_codec_context == 0) {
518 throw DecodeError (N_("could not find subtitle stream"));
521 _subtitle_codec = avcodec_find_decoder (_subtitle_codec_context->codec_id);
523 if (_subtitle_codec == 0) {
524 throw DecodeError (N_("could not find subtitle decoder"));
527 if (avcodec_open2 (_subtitle_codec_context, _subtitle_codec, 0) < 0) {
528 throw DecodeError (N_("could not open subtitle decoder"));
533 FFmpegDecoder::decode_subtitle_packet ()
537 if (avcodec_decode_subtitle2 (_subtitle_codec_context, &sub, &got_subtitle, &_packet) < 0 || !got_subtitle) {
541 /* Sometimes we get an empty AVSubtitle, which is used by some codecs to
542 indicate that the previous subtitle should stop.
544 if (sub.num_rects <= 0) {
545 image_subtitle (shared_ptr<Image> (), dcpomatic::Rect<double> (), ContentTime (), ContentTime ());
547 } else if (sub.num_rects > 1) {
548 throw DecodeError (_("multi-part subtitles not yet supported"));
551 /* Subtitle PTS (within the source, not taking into account any of the
552 source that we may have chopped off for the DCP)
554 ContentTime packet_time = ContentTime::from_seconds (static_cast<double> (sub.pts) / AV_TIME_BASE) + _pts_offset;
556 /* hence start time for this sub */
557 ContentTime const from = packet_time + ContentTime::from_seconds (sub.start_display_time / 1e3);
558 ContentTime const to = packet_time + ContentTime::from_seconds (sub.end_display_time / 1e3);
560 AVSubtitleRect const * rect = sub.rects[0];
562 if (rect->type != SUBTITLE_BITMAP) {
563 throw DecodeError (_("non-bitmap subtitles not yet supported"));
566 /* Note RGBA is expressed little-endian, so the first byte in the word is R, second
567 G, third B, fourth A.
569 shared_ptr<Image> image (new Image (PIX_FMT_RGBA, dcp::Size (rect->w, rect->h), true));
571 /* Start of the first line in the subtitle */
572 uint8_t* sub_p = rect->pict.data[0];
573 /* sub_p looks up into a BGRA palette which is here
574 (i.e. first byte B, second G, third R, fourth A)
576 uint32_t const * palette = (uint32_t *) rect->pict.data[1];
577 /* Start of the output data */
578 uint32_t* out_p = (uint32_t *) image->data()[0];
580 for (int y = 0; y < rect->h; ++y) {
581 uint8_t* sub_line_p = sub_p;
582 uint32_t* out_line_p = out_p;
583 for (int x = 0; x < rect->w; ++x) {
584 uint32_t const p = palette[*sub_line_p++];
585 *out_line_p++ = ((p & 0xff) << 16) | (p & 0xff00) | ((p & 0xff0000) >> 16) | (p & 0xff000000);
587 sub_p += rect->pict.linesize[0];
588 out_p += image->stride()[0] / sizeof (uint32_t);
591 dcp::Size const vs = _ffmpeg_content->video_size ();
595 dcpomatic::Rect<double> (
596 static_cast<double> (rect->x) / vs.width,
597 static_cast<double> (rect->y) / vs.height,
598 static_cast<double> (rect->w) / vs.width,
599 static_cast<double> (rect->h) / vs.height
606 avsubtitle_free (&sub);