2 Copyright (C) 2012-2014 Carl Hetherington <cth@carlh.net>
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 /** @file src/ffmpeg_decoder.cc
21 * @brief A decoder using FFmpeg to decode content.
32 #include <libavcodec/avcodec.h>
33 #include <libavformat/avformat.h>
36 #include "exceptions.h"
40 #include "ffmpeg_decoder.h"
41 #include "ffmpeg_audio_stream.h"
42 #include "ffmpeg_subtitle_stream.h"
43 #include "filter_graph.h"
44 #include "audio_buffers.h"
45 #include "ffmpeg_content.h"
46 #include "image_proxy.h"
51 #define LOG_GENERAL(...) _video_content->film()->log()->log (String::compose (__VA_ARGS__), Log::TYPE_GENERAL);
52 #define LOG_ERROR(...) _video_content->film()->log()->log (String::compose (__VA_ARGS__), Log::TYPE_ERROR);
53 #define LOG_WARNING(...) _video_content->film()->log()->log (__VA_ARGS__, Log::TYPE_WARNING);
58 using std::stringstream;
62 using boost::shared_ptr;
63 using boost::optional;
64 using boost::dynamic_pointer_cast;
67 FFmpegDecoder::FFmpegDecoder (shared_ptr<const FFmpegContent> c, shared_ptr<Log> log)
73 , _subtitle_codec_context (0)
76 /* Audio and video frame PTS values may not start with 0. We want
77 to fiddle them so that:
79 1. One of them starts at time 0.
80 2. The first video PTS value ends up on a frame boundary.
82 Then we remove big initial gaps in PTS and we allow our
83 insertion of black frames to work.
85 We will do pts_to_use = pts_from_ffmpeg + pts_offset;
88 bool const have_video = c->first_video();
89 bool const have_audio = c->audio_stream () && c->audio_stream()->first_audio;
91 /* First, make one of them start at 0 */
93 if (have_audio && have_video) {
94 _pts_offset = - min (c->first_video().get(), c->audio_stream()->first_audio.get());
95 } else if (have_video) {
96 _pts_offset = - c->first_video().get();
97 } else if (have_audio) {
98 _pts_offset = - c->audio_stream()->first_audio.get();
101 /* Now adjust both so that the video pts starts on a frame */
102 if (have_video && have_audio) {
103 ContentTime first_video = c->first_video().get() + _pts_offset;
104 ContentTime const old_first_video = first_video;
105 _pts_offset += first_video.round_up (c->video_frame_rate ()) - old_first_video;
109 FFmpegDecoder::~FFmpegDecoder ()
111 boost::mutex::scoped_lock lm (_mutex);
113 if (_subtitle_codec_context) {
114 avcodec_close (_subtitle_codec_context);
119 FFmpegDecoder::flush ()
121 /* Get any remaining frames */
126 /* XXX: should we reset _packet.data and size after each *_decode_* call? */
128 while (decode_video_packet ()) {}
130 if (_ffmpeg_content->audio_stream()) {
131 decode_audio_packet ();
132 AudioDecoder::flush ();
137 FFmpegDecoder::pass ()
139 int r = av_read_frame (_format_context, &_packet);
142 if (r != AVERROR_EOF) {
143 /* Maybe we should fail here, but for now we'll just finish off instead */
145 av_strerror (r, buf, sizeof(buf));
146 LOG_ERROR (N_("error on av_read_frame (%1) (%2)"), buf, r);
153 int const si = _packet.stream_index;
155 if (si == _video_stream) {
156 decode_video_packet ();
157 } else if (_ffmpeg_content->audio_stream() && _ffmpeg_content->audio_stream()->uses_index (_format_context, si)) {
158 decode_audio_packet ();
159 } else if (_ffmpeg_content->subtitle_stream() && _ffmpeg_content->subtitle_stream()->uses_index (_format_context, si)) {
160 decode_subtitle_packet ();
163 av_free_packet (&_packet);
167 /** @param data pointer to array of pointers to buffers.
168 * Only the first buffer will be used for non-planar data, otherwise there will be one per channel.
170 shared_ptr<AudioBuffers>
171 FFmpegDecoder::deinterleave_audio (uint8_t** data, int size)
173 assert (_ffmpeg_content->audio_channels());
174 assert (bytes_per_audio_sample());
176 /* Deinterleave and convert to float */
178 assert ((size % (bytes_per_audio_sample() * _ffmpeg_content->audio_channels())) == 0);
180 int const total_samples = size / bytes_per_audio_sample();
181 int const frames = total_samples / _ffmpeg_content->audio_channels();
182 shared_ptr<AudioBuffers> audio (new AudioBuffers (_ffmpeg_content->audio_channels(), frames));
184 switch (audio_sample_format()) {
185 case AV_SAMPLE_FMT_U8:
187 uint8_t* p = reinterpret_cast<uint8_t *> (data[0]);
190 for (int i = 0; i < total_samples; ++i) {
191 audio->data(channel)[sample] = float(*p++) / (1 << 23);
194 if (channel == _ffmpeg_content->audio_channels()) {
202 case AV_SAMPLE_FMT_S16:
204 int16_t* p = reinterpret_cast<int16_t *> (data[0]);
207 for (int i = 0; i < total_samples; ++i) {
208 audio->data(channel)[sample] = float(*p++) / (1 << 15);
211 if (channel == _ffmpeg_content->audio_channels()) {
219 case AV_SAMPLE_FMT_S16P:
221 int16_t** p = reinterpret_cast<int16_t **> (data);
222 for (int i = 0; i < _ffmpeg_content->audio_channels(); ++i) {
223 for (int j = 0; j < frames; ++j) {
224 audio->data(i)[j] = static_cast<float>(p[i][j]) / (1 << 15);
230 case AV_SAMPLE_FMT_S32:
232 int32_t* p = reinterpret_cast<int32_t *> (data[0]);
235 for (int i = 0; i < total_samples; ++i) {
236 audio->data(channel)[sample] = static_cast<float>(*p++) / (1 << 31);
239 if (channel == _ffmpeg_content->audio_channels()) {
247 case AV_SAMPLE_FMT_FLT:
249 float* p = reinterpret_cast<float*> (data[0]);
252 for (int i = 0; i < total_samples; ++i) {
253 audio->data(channel)[sample] = *p++;
256 if (channel == _ffmpeg_content->audio_channels()) {
264 case AV_SAMPLE_FMT_FLTP:
266 float** p = reinterpret_cast<float**> (data);
267 for (int i = 0; i < _ffmpeg_content->audio_channels(); ++i) {
268 memcpy (audio->data(i), p[i], frames * sizeof(float));
274 throw DecodeError (String::compose (_("Unrecognised audio sample format (%1)"), static_cast<int> (audio_sample_format())));
281 FFmpegDecoder::audio_sample_format () const
283 if (!_ffmpeg_content->audio_stream()) {
284 return (AVSampleFormat) 0;
287 return audio_codec_context()->sample_fmt;
291 FFmpegDecoder::bytes_per_audio_sample () const
293 return av_get_bytes_per_sample (audio_sample_format ());
297 FFmpegDecoder::minimal_run (boost::function<bool (optional<ContentTime>, optional<ContentTime>, int)> finished)
300 optional<ContentTime> last_video;
301 optional<ContentTime> last_audio;
303 while (!finished (last_video, last_audio, frames_read)) {
304 int r = av_read_frame (_format_context, &_packet);
306 /* We should flush our decoders here, possibly yielding a few more frames,
307 but the consequence of having to do that is too hideous to contemplate.
308 Instead we give up and say that you can't seek too close to the end
316 double const time_base = av_q2d (_format_context->streams[_packet.stream_index]->time_base);
318 if (_packet.stream_index == _video_stream) {
320 av_frame_unref (_frame);
323 r = avcodec_decode_video2 (video_codec_context(), _frame, &got_picture, &_packet);
324 if (r >= 0 && got_picture) {
325 last_video = ContentTime::from_seconds (av_frame_get_best_effort_timestamp (_frame) * time_base) + _pts_offset;
328 } else if (_ffmpeg_content->audio_stream() && _ffmpeg_content->audio_stream()->uses_index (_format_context, _packet.stream_index)) {
329 AVPacket copy_packet = _packet;
330 while (copy_packet.size > 0) {
333 r = avcodec_decode_audio4 (audio_codec_context(), _frame, &got_frame, &_packet);
334 if (r >= 0 && got_frame) {
335 last_audio = ContentTime::from_seconds (av_frame_get_best_effort_timestamp (_frame) * time_base) + _pts_offset;
338 copy_packet.data += r;
339 copy_packet.size -= r;
343 av_free_packet (&_packet);
350 FFmpegDecoder::seek_overrun_finished (ContentTime seek, optional<ContentTime> last_video, optional<ContentTime> last_audio) const
352 return (last_video && last_video.get() >= seek) || (last_audio && last_audio.get() >= seek);
356 FFmpegDecoder::seek_final_finished (int n, int done) const
362 FFmpegDecoder::seek_and_flush (ContentTime t)
364 ContentTime const u = t - _pts_offset;
365 int64_t s = u.seconds() / av_q2d (_format_context->streams[_video_stream]->time_base);
367 if (_ffmpeg_content->audio_stream ()) {
369 s, int64_t (u.seconds() / av_q2d (_ffmpeg_content->audio_stream()->stream(_format_context)->time_base))
373 /* Ridiculous empirical hack */
379 av_seek_frame (_format_context, _video_stream, s, 0);
381 avcodec_flush_buffers (video_codec_context());
382 if (audio_codec_context ()) {
383 avcodec_flush_buffers (audio_codec_context ());
385 if (_subtitle_codec_context) {
386 avcodec_flush_buffers (_subtitle_codec_context);
391 FFmpegDecoder::seek (ContentTime time, bool accurate)
393 VideoDecoder::seek (time, accurate);
394 AudioDecoder::seek (time, accurate);
396 /* If we are doing an accurate seek, our initial shot will be 2s (2 being
397 a number plucked from the air) earlier than we want to end up. The loop below
398 will hopefully then step through to where we want to be.
401 ContentTime pre_roll = accurate ? ContentTime::from_seconds (2) : ContentTime (0);
402 ContentTime initial_seek = time - pre_roll;
403 if (initial_seek < ContentTime (0)) {
404 initial_seek = ContentTime (0);
407 /* Initial seek time in the video stream's timebase */
409 seek_and_flush (initial_seek);
416 int const N = minimal_run (boost::bind (&FFmpegDecoder::seek_overrun_finished, this, time, _1, _2));
418 seek_and_flush (initial_seek);
420 minimal_run (boost::bind (&FFmpegDecoder::seek_final_finished, this, N - 1, _3));
425 FFmpegDecoder::decode_audio_packet ()
427 /* Audio packets can contain multiple frames, so we may have to call avcodec_decode_audio4
431 AVPacket copy_packet = _packet;
433 while (copy_packet.size > 0) {
436 int const decode_result = avcodec_decode_audio4 (audio_codec_context(), _frame, &frame_finished, ©_packet);
438 if (decode_result < 0) {
439 LOG_ERROR ("avcodec_decode_audio4 failed (%1)", decode_result);
443 if (frame_finished) {
444 ContentTime const ct = ContentTime::from_seconds (
445 av_frame_get_best_effort_timestamp (_frame) *
446 av_q2d (_ffmpeg_content->audio_stream()->stream (_format_context)->time_base))
449 int const data_size = av_samples_get_buffer_size (
450 0, audio_codec_context()->channels, _frame->nb_samples, audio_sample_format (), 1
453 audio (deinterleave_audio (_frame->data, data_size), ct);
456 copy_packet.data += decode_result;
457 copy_packet.size -= decode_result;
462 FFmpegDecoder::decode_video_packet ()
465 if (avcodec_decode_video2 (video_codec_context(), _frame, &frame_finished, &_packet) < 0 || !frame_finished) {
469 boost::mutex::scoped_lock lm (_filter_graphs_mutex);
471 shared_ptr<FilterGraph> graph;
473 list<shared_ptr<FilterGraph> >::iterator i = _filter_graphs.begin();
474 while (i != _filter_graphs.end() && !(*i)->can_process (dcp::Size (_frame->width, _frame->height), (AVPixelFormat) _frame->format)) {
478 if (i == _filter_graphs.end ()) {
479 graph.reset (new FilterGraph (_ffmpeg_content, dcp::Size (_frame->width, _frame->height), (AVPixelFormat) _frame->format));
480 _filter_graphs.push_back (graph);
481 LOG_GENERAL (N_("New graph for %1x%2, pixel format %3"), _frame->width, _frame->height, _frame->format);
486 list<pair<shared_ptr<Image>, int64_t> > images = graph->process (_frame);
488 for (list<pair<shared_ptr<Image>, int64_t> >::iterator i = images.begin(); i != images.end(); ++i) {
490 shared_ptr<Image> image = i->first;
492 if (i->second != AV_NOPTS_VALUE) {
493 double const pts = i->second * av_q2d (_format_context->streams[_video_stream]->time_base) + _pts_offset.seconds ();
495 shared_ptr<ImageProxy> (new RawImageProxy (image, _video_content->film()->log())),
496 rint (pts * _ffmpeg_content->video_frame_rate ())
499 LOG_WARNING ("Dropping frame without PTS");
507 FFmpegDecoder::decode_subtitle_packet ()
511 if (avcodec_decode_subtitle2 (_subtitle_codec_context, &sub, &got_subtitle, &_packet) < 0 || !got_subtitle) {
515 /* Sometimes we get an empty AVSubtitle, which is used by some codecs to
516 indicate that the previous subtitle should stop.
518 if (sub.num_rects <= 0) {
519 image_subtitle (ContentTimePeriod (), shared_ptr<Image> (), dcpomatic::Rect<double> ());
521 } else if (sub.num_rects > 1) {
522 throw DecodeError (_("multi-part subtitles not yet supported"));
525 /* Subtitle PTS (within the source, not taking into account any of the
526 source that we may have chopped off for the DCP)
528 ContentTimePeriod period = subtitle_period (sub) + _pts_offset;
530 AVSubtitleRect const * rect = sub.rects[0];
532 if (rect->type != SUBTITLE_BITMAP) {
534 // throw DecodeError (_("non-bitmap subtitles not yet supported"));
538 /* Note RGBA is expressed little-endian, so the first byte in the word is R, second
539 G, third B, fourth A.
541 shared_ptr<Image> image (new Image (PIX_FMT_RGBA, dcp::Size (rect->w, rect->h), true));
543 /* Start of the first line in the subtitle */
544 uint8_t* sub_p = rect->pict.data[0];
545 /* sub_p looks up into a BGRA palette which is here
546 (i.e. first byte B, second G, third R, fourth A)
548 uint32_t const * palette = (uint32_t *) rect->pict.data[1];
549 /* Start of the output data */
550 uint32_t* out_p = (uint32_t *) image->data()[0];
552 for (int y = 0; y < rect->h; ++y) {
553 uint8_t* sub_line_p = sub_p;
554 uint32_t* out_line_p = out_p;
555 for (int x = 0; x < rect->w; ++x) {
556 uint32_t const p = palette[*sub_line_p++];
557 *out_line_p++ = ((p & 0xff) << 16) | (p & 0xff00) | ((p & 0xff0000) >> 16) | (p & 0xff000000);
559 sub_p += rect->pict.linesize[0];
560 out_p += image->stride()[0] / sizeof (uint32_t);
563 dcp::Size const vs = _ffmpeg_content->video_size ();
568 dcpomatic::Rect<double> (
569 static_cast<double> (rect->x) / vs.width,
570 static_cast<double> (rect->y) / vs.height,
571 static_cast<double> (rect->w) / vs.width,
572 static_cast<double> (rect->h) / vs.height
576 avsubtitle_free (&sub);
580 FFmpegDecoder::has_subtitle_during (ContentTimePeriod p) const
582 return _ffmpeg_content->has_subtitle_during (p);