2 Copyright (C) 2012-2014 Carl Hetherington <cth@carlh.net>
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 /** @file src/ffmpeg_decoder.cc
21 * @brief A decoder using FFmpeg to decode content.
32 #include <libavcodec/avcodec.h>
33 #include <libavformat/avformat.h>
36 #include "exceptions.h"
40 #include "ffmpeg_decoder.h"
41 #include "ffmpeg_audio_stream.h"
42 #include "ffmpeg_subtitle_stream.h"
43 #include "filter_graph.h"
44 #include "audio_buffers.h"
45 #include "ffmpeg_content.h"
46 #include "image_proxy.h"
53 using std::stringstream;
57 using boost::shared_ptr;
58 using boost::optional;
59 using boost::dynamic_pointer_cast;
62 FFmpegDecoder::FFmpegDecoder (shared_ptr<const FFmpegContent> c, shared_ptr<Log> log)
68 , _subtitle_codec_context (0)
71 /* Audio and video frame PTS values may not start with 0. We want
72 to fiddle them so that:
74 1. One of them starts at time 0.
75 2. The first video PTS value ends up on a frame boundary.
77 Then we remove big initial gaps in PTS and we allow our
78 insertion of black frames to work.
80 We will do pts_to_use = pts_from_ffmpeg + pts_offset;
83 bool const have_video = c->first_video();
84 bool const have_audio = c->audio_stream () && c->audio_stream()->first_audio;
86 /* First, make one of them start at 0 */
88 if (have_audio && have_video) {
89 _pts_offset = - min (c->first_video().get(), c->audio_stream()->first_audio.get());
90 } else if (have_video) {
91 _pts_offset = - c->first_video().get();
92 } else if (have_audio) {
93 _pts_offset = - c->audio_stream()->first_audio.get();
96 /* Now adjust both so that the video pts starts on a frame */
97 if (have_video && have_audio) {
98 ContentTime first_video = c->first_video().get() + _pts_offset;
99 ContentTime const old_first_video = first_video;
100 _pts_offset += first_video.round_up (c->video_frame_rate ()) - old_first_video;
104 FFmpegDecoder::~FFmpegDecoder ()
106 boost::mutex::scoped_lock lm (_mutex);
108 if (_subtitle_codec_context) {
109 avcodec_close (_subtitle_codec_context);
114 FFmpegDecoder::flush ()
116 /* Get any remaining frames */
121 /* XXX: should we reset _packet.data and size after each *_decode_* call? */
123 while (decode_video_packet ()) {}
125 if (_ffmpeg_content->audio_stream()) {
126 decode_audio_packet ();
127 AudioDecoder::flush ();
132 FFmpegDecoder::pass ()
134 int r = av_read_frame (_format_context, &_packet);
137 if (r != AVERROR_EOF) {
138 /* Maybe we should fail here, but for now we'll just finish off instead */
140 av_strerror (r, buf, sizeof(buf));
141 _log->log (String::compose (N_("error on av_read_frame (%1) (%2)"), buf, r));
148 int const si = _packet.stream_index;
150 if (si == _video_stream) {
151 decode_video_packet ();
152 } else if (_ffmpeg_content->audio_stream() && _ffmpeg_content->audio_stream()->uses_index (_format_context, si)) {
153 decode_audio_packet ();
154 } else if (_ffmpeg_content->subtitle_stream() && _ffmpeg_content->subtitle_stream()->uses_index (_format_context, si)) {
155 decode_subtitle_packet ();
158 av_free_packet (&_packet);
162 /** @param data pointer to array of pointers to buffers.
163 * Only the first buffer will be used for non-planar data, otherwise there will be one per channel.
165 shared_ptr<AudioBuffers>
166 FFmpegDecoder::deinterleave_audio (uint8_t** data, int size)
168 assert (_ffmpeg_content->audio_channels());
169 assert (bytes_per_audio_sample());
171 /* Deinterleave and convert to float */
173 assert ((size % (bytes_per_audio_sample() * _ffmpeg_content->audio_channels())) == 0);
175 int const total_samples = size / bytes_per_audio_sample();
176 int const frames = total_samples / _ffmpeg_content->audio_channels();
177 shared_ptr<AudioBuffers> audio (new AudioBuffers (_ffmpeg_content->audio_channels(), frames));
179 switch (audio_sample_format()) {
180 case AV_SAMPLE_FMT_U8:
182 uint8_t* p = reinterpret_cast<uint8_t *> (data[0]);
185 for (int i = 0; i < total_samples; ++i) {
186 audio->data(channel)[sample] = float(*p++) / (1 << 23);
189 if (channel == _ffmpeg_content->audio_channels()) {
197 case AV_SAMPLE_FMT_S16:
199 int16_t* p = reinterpret_cast<int16_t *> (data[0]);
202 for (int i = 0; i < total_samples; ++i) {
203 audio->data(channel)[sample] = float(*p++) / (1 << 15);
206 if (channel == _ffmpeg_content->audio_channels()) {
214 case AV_SAMPLE_FMT_S16P:
216 int16_t** p = reinterpret_cast<int16_t **> (data);
217 for (int i = 0; i < _ffmpeg_content->audio_channels(); ++i) {
218 for (int j = 0; j < frames; ++j) {
219 audio->data(i)[j] = static_cast<float>(p[i][j]) / (1 << 15);
225 case AV_SAMPLE_FMT_S32:
227 int32_t* p = reinterpret_cast<int32_t *> (data[0]);
230 for (int i = 0; i < total_samples; ++i) {
231 audio->data(channel)[sample] = static_cast<float>(*p++) / (1 << 31);
234 if (channel == _ffmpeg_content->audio_channels()) {
242 case AV_SAMPLE_FMT_FLT:
244 float* p = reinterpret_cast<float*> (data[0]);
247 for (int i = 0; i < total_samples; ++i) {
248 audio->data(channel)[sample] = *p++;
251 if (channel == _ffmpeg_content->audio_channels()) {
259 case AV_SAMPLE_FMT_FLTP:
261 float** p = reinterpret_cast<float**> (data);
262 for (int i = 0; i < _ffmpeg_content->audio_channels(); ++i) {
263 memcpy (audio->data(i), p[i], frames * sizeof(float));
269 throw DecodeError (String::compose (_("Unrecognised audio sample format (%1)"), static_cast<int> (audio_sample_format())));
276 FFmpegDecoder::audio_sample_format () const
278 if (!_ffmpeg_content->audio_stream()) {
279 return (AVSampleFormat) 0;
282 return audio_codec_context()->sample_fmt;
286 FFmpegDecoder::bytes_per_audio_sample () const
288 return av_get_bytes_per_sample (audio_sample_format ());
292 FFmpegDecoder::minimal_run (boost::function<bool (optional<ContentTime>, optional<ContentTime>, int)> finished)
295 optional<ContentTime> last_video;
296 optional<ContentTime> last_audio;
298 while (!finished (last_video, last_audio, frames_read)) {
299 int r = av_read_frame (_format_context, &_packet);
301 /* We should flush our decoders here, possibly yielding a few more frames,
302 but the consequence of having to do that is too hideous to contemplate.
303 Instead we give up and say that you can't seek too close to the end
311 double const time_base = av_q2d (_format_context->streams[_packet.stream_index]->time_base);
313 if (_packet.stream_index == _video_stream) {
315 avcodec_get_frame_defaults (_frame);
318 r = avcodec_decode_video2 (video_codec_context(), _frame, &got_picture, &_packet);
319 if (r >= 0 && got_picture) {
320 last_video = ContentTime::from_seconds (av_frame_get_best_effort_timestamp (_frame) * time_base) + _pts_offset;
323 } else if (_ffmpeg_content->audio_stream() && _ffmpeg_content->audio_stream()->uses_index (_format_context, _packet.stream_index)) {
324 AVPacket copy_packet = _packet;
325 while (copy_packet.size > 0) {
328 r = avcodec_decode_audio4 (audio_codec_context(), _frame, &got_frame, &_packet);
329 if (r >= 0 && got_frame) {
330 last_audio = ContentTime::from_seconds (av_frame_get_best_effort_timestamp (_frame) * time_base) + _pts_offset;
333 copy_packet.data += r;
334 copy_packet.size -= r;
338 av_free_packet (&_packet);
345 FFmpegDecoder::seek_overrun_finished (ContentTime seek, optional<ContentTime> last_video, optional<ContentTime> last_audio) const
347 return (last_video && last_video.get() >= seek) || (last_audio && last_audio.get() >= seek);
351 FFmpegDecoder::seek_final_finished (int n, int done) const
357 FFmpegDecoder::seek_and_flush (ContentTime t)
359 ContentTime const u = t - _pts_offset;
360 int64_t s = u.seconds() / av_q2d (_format_context->streams[_video_stream]->time_base);
362 if (_ffmpeg_content->audio_stream ()) {
364 s, int64_t (u.seconds() / av_q2d (_ffmpeg_content->audio_stream()->stream(_format_context)->time_base))
368 /* Ridiculous empirical hack */
374 av_seek_frame (_format_context, _video_stream, s, 0);
376 avcodec_flush_buffers (video_codec_context());
377 if (audio_codec_context ()) {
378 avcodec_flush_buffers (audio_codec_context ());
380 if (_subtitle_codec_context) {
381 avcodec_flush_buffers (_subtitle_codec_context);
386 FFmpegDecoder::seek (ContentTime time, bool accurate)
388 VideoDecoder::seek (time, accurate);
389 AudioDecoder::seek (time, accurate);
391 /* If we are doing an accurate seek, our initial shot will be 2s (2 being
392 a number plucked from the air) earlier than we want to end up. The loop below
393 will hopefully then step through to where we want to be.
396 ContentTime pre_roll = accurate ? ContentTime::from_seconds (2) : ContentTime (0);
397 ContentTime initial_seek = time - pre_roll;
398 if (initial_seek < ContentTime (0)) {
399 initial_seek = ContentTime (0);
402 /* Initial seek time in the video stream's timebase */
404 seek_and_flush (initial_seek);
411 int const N = minimal_run (boost::bind (&FFmpegDecoder::seek_overrun_finished, this, time, _1, _2));
413 seek_and_flush (initial_seek);
415 minimal_run (boost::bind (&FFmpegDecoder::seek_final_finished, this, N - 1, _3));
420 FFmpegDecoder::decode_audio_packet ()
422 /* Audio packets can contain multiple frames, so we may have to call avcodec_decode_audio4
426 AVPacket copy_packet = _packet;
428 while (copy_packet.size > 0) {
431 int const decode_result = avcodec_decode_audio4 (audio_codec_context(), _frame, &frame_finished, ©_packet);
433 if (decode_result < 0) {
434 _log->log (String::compose ("avcodec_decode_audio4 failed (%1)", decode_result));
438 if (frame_finished) {
439 ContentTime const ct = ContentTime::from_seconds (
440 av_frame_get_best_effort_timestamp (_frame) *
441 av_q2d (_ffmpeg_content->audio_stream()->stream (_format_context)->time_base))
444 int const data_size = av_samples_get_buffer_size (
445 0, audio_codec_context()->channels, _frame->nb_samples, audio_sample_format (), 1
448 audio (deinterleave_audio (_frame->data, data_size), ct);
451 copy_packet.data += decode_result;
452 copy_packet.size -= decode_result;
457 FFmpegDecoder::decode_video_packet ()
460 if (avcodec_decode_video2 (video_codec_context(), _frame, &frame_finished, &_packet) < 0 || !frame_finished) {
464 boost::mutex::scoped_lock lm (_filter_graphs_mutex);
466 shared_ptr<FilterGraph> graph;
468 list<shared_ptr<FilterGraph> >::iterator i = _filter_graphs.begin();
469 while (i != _filter_graphs.end() && !(*i)->can_process (dcp::Size (_frame->width, _frame->height), (AVPixelFormat) _frame->format)) {
473 if (i == _filter_graphs.end ()) {
474 graph.reset (new FilterGraph (_ffmpeg_content, dcp::Size (_frame->width, _frame->height), (AVPixelFormat) _frame->format));
475 _filter_graphs.push_back (graph);
476 _log->log (String::compose (N_("New graph for %1x%2, pixel format %3"), _frame->width, _frame->height, _frame->format));
481 list<pair<shared_ptr<Image>, int64_t> > images = graph->process (_frame);
483 for (list<pair<shared_ptr<Image>, int64_t> >::iterator i = images.begin(); i != images.end(); ++i) {
485 shared_ptr<Image> image = i->first;
487 if (i->second != AV_NOPTS_VALUE) {
488 double const pts = i->second * av_q2d (_format_context->streams[_video_stream]->time_base) + _pts_offset.seconds ();
489 video (shared_ptr<ImageProxy> (new RawImageProxy (image)), rint (pts * _ffmpeg_content->video_frame_rate ()));
491 _log->log ("Dropping frame without PTS");
499 FFmpegDecoder::decode_subtitle_packet ()
503 if (avcodec_decode_subtitle2 (_subtitle_codec_context, &sub, &got_subtitle, &_packet) < 0 || !got_subtitle) {
507 /* Sometimes we get an empty AVSubtitle, which is used by some codecs to
508 indicate that the previous subtitle should stop.
510 if (sub.num_rects <= 0) {
511 image_subtitle (ContentTimePeriod (), shared_ptr<Image> (), dcpomatic::Rect<double> ());
513 } else if (sub.num_rects > 1) {
514 throw DecodeError (_("multi-part subtitles not yet supported"));
517 /* Subtitle PTS (within the source, not taking into account any of the
518 source that we may have chopped off for the DCP)
520 ContentTimePeriod period = subtitle_period (sub) + _pts_offset;
522 AVSubtitleRect const * rect = sub.rects[0];
524 if (rect->type != SUBTITLE_BITMAP) {
526 // throw DecodeError (_("non-bitmap subtitles not yet supported"));
530 /* Note RGBA is expressed little-endian, so the first byte in the word is R, second
531 G, third B, fourth A.
533 shared_ptr<Image> image (new Image (PIX_FMT_RGBA, dcp::Size (rect->w, rect->h), true));
535 /* Start of the first line in the subtitle */
536 uint8_t* sub_p = rect->pict.data[0];
537 /* sub_p looks up into a BGRA palette which is here
538 (i.e. first byte B, second G, third R, fourth A)
540 uint32_t const * palette = (uint32_t *) rect->pict.data[1];
541 /* Start of the output data */
542 uint32_t* out_p = (uint32_t *) image->data()[0];
544 for (int y = 0; y < rect->h; ++y) {
545 uint8_t* sub_line_p = sub_p;
546 uint32_t* out_line_p = out_p;
547 for (int x = 0; x < rect->w; ++x) {
548 uint32_t const p = palette[*sub_line_p++];
549 *out_line_p++ = ((p & 0xff) << 16) | (p & 0xff00) | ((p & 0xff0000) >> 16) | (p & 0xff000000);
551 sub_p += rect->pict.linesize[0];
552 out_p += image->stride()[0] / sizeof (uint32_t);
555 dcp::Size const vs = _ffmpeg_content->video_size ();
560 dcpomatic::Rect<double> (
561 static_cast<double> (rect->x) / vs.width,
562 static_cast<double> (rect->y) / vs.height,
563 static_cast<double> (rect->w) / vs.width,
564 static_cast<double> (rect->h) / vs.height
568 avsubtitle_free (&sub);