2 Copyright (C) 2012-2014 Carl Hetherington <cth@carlh.net>
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 /** @file src/ffmpeg_decoder.cc
21 * @brief A decoder using FFmpeg to decode content.
32 #include <libavcodec/avcodec.h>
33 #include <libavformat/avformat.h>
36 #include "exceptions.h"
40 #include "ffmpeg_decoder.h"
41 #include "filter_graph.h"
42 #include "audio_buffers.h"
43 #include "ffmpeg_content.h"
50 using std::stringstream;
54 using boost::shared_ptr;
55 using boost::optional;
56 using boost::dynamic_pointer_cast;
59 FFmpegDecoder::FFmpegDecoder (shared_ptr<const FFmpegContent> c, shared_ptr<Log> log)
64 , _subtitle_codec_context (0)
69 /* Audio and video frame PTS values may not start with 0. We want
70 to fiddle them so that:
72 1. One of them starts at time 0.
73 2. The first video PTS value ends up on a frame boundary.
75 Then we remove big initial gaps in PTS and we allow our
76 insertion of black frames to work.
78 We will do pts_to_use = pts_from_ffmpeg + pts_offset;
81 bool const have_video = c->first_video();
82 bool const have_audio = c->audio_stream () && c->audio_stream()->first_audio;
84 /* First, make one of them start at 0 */
86 if (have_audio && have_video) {
87 _pts_offset = - min (c->first_video().get(), c->audio_stream()->first_audio.get());
88 } else if (have_video) {
89 _pts_offset = - c->first_video().get();
90 } else if (have_audio) {
91 _pts_offset = - c->audio_stream()->first_audio.get();
94 /* Now adjust both so that the video pts starts on a frame */
95 if (have_video && have_audio) {
96 ContentTime first_video = c->first_video().get() + _pts_offset;
97 ContentTime const old_first_video = first_video;
98 _pts_offset += first_video.round_up (c->video_frame_rate ()) - old_first_video;
102 FFmpegDecoder::~FFmpegDecoder ()
104 boost::mutex::scoped_lock lm (_mutex);
106 if (_subtitle_codec_context) {
107 avcodec_close (_subtitle_codec_context);
112 FFmpegDecoder::flush ()
114 /* Get any remaining frames */
119 /* XXX: should we reset _packet.data and size after each *_decode_* call? */
121 while (decode_video_packet ()) {}
123 if (_ffmpeg_content->audio_stream()) {
124 decode_audio_packet ();
125 AudioDecoder::flush ();
130 FFmpegDecoder::pass ()
132 int r = av_read_frame (_format_context, &_packet);
135 if (r != AVERROR_EOF) {
136 /* Maybe we should fail here, but for now we'll just finish off instead */
138 av_strerror (r, buf, sizeof(buf));
139 _log->log (String::compose (N_("error on av_read_frame (%1) (%2)"), buf, r));
146 int const si = _packet.stream_index;
148 if (si == _video_stream) {
149 decode_video_packet ();
150 } else if (_ffmpeg_content->audio_stream() && _ffmpeg_content->audio_stream()->uses_index (_format_context, si)) {
151 decode_audio_packet ();
152 } else if (_ffmpeg_content->subtitle_stream() && _ffmpeg_content->subtitle_stream()->uses_index (_format_context, si)) {
153 decode_subtitle_packet ();
156 av_free_packet (&_packet);
160 /** @param data pointer to array of pointers to buffers.
161 * Only the first buffer will be used for non-planar data, otherwise there will be one per channel.
163 shared_ptr<AudioBuffers>
164 FFmpegDecoder::deinterleave_audio (uint8_t** data, int size)
166 assert (_ffmpeg_content->audio_channels());
167 assert (bytes_per_audio_sample());
169 /* Deinterleave and convert to float */
171 assert ((size % (bytes_per_audio_sample() * _ffmpeg_content->audio_channels())) == 0);
173 int const total_samples = size / bytes_per_audio_sample();
174 int const frames = total_samples / _ffmpeg_content->audio_channels();
175 shared_ptr<AudioBuffers> audio (new AudioBuffers (_ffmpeg_content->audio_channels(), frames));
177 switch (audio_sample_format()) {
178 case AV_SAMPLE_FMT_U8:
180 uint8_t* p = reinterpret_cast<uint8_t *> (data[0]);
183 for (int i = 0; i < total_samples; ++i) {
184 audio->data(channel)[sample] = float(*p++) / (1 << 23);
187 if (channel == _ffmpeg_content->audio_channels()) {
195 case AV_SAMPLE_FMT_S16:
197 int16_t* p = reinterpret_cast<int16_t *> (data[0]);
200 for (int i = 0; i < total_samples; ++i) {
201 audio->data(channel)[sample] = float(*p++) / (1 << 15);
204 if (channel == _ffmpeg_content->audio_channels()) {
212 case AV_SAMPLE_FMT_S16P:
214 int16_t** p = reinterpret_cast<int16_t **> (data);
215 for (int i = 0; i < _ffmpeg_content->audio_channels(); ++i) {
216 for (int j = 0; j < frames; ++j) {
217 audio->data(i)[j] = static_cast<float>(p[i][j]) / (1 << 15);
223 case AV_SAMPLE_FMT_S32:
225 int32_t* p = reinterpret_cast<int32_t *> (data[0]);
228 for (int i = 0; i < total_samples; ++i) {
229 audio->data(channel)[sample] = static_cast<float>(*p++) / (1 << 31);
232 if (channel == _ffmpeg_content->audio_channels()) {
240 case AV_SAMPLE_FMT_FLT:
242 float* p = reinterpret_cast<float*> (data[0]);
245 for (int i = 0; i < total_samples; ++i) {
246 audio->data(channel)[sample] = *p++;
249 if (channel == _ffmpeg_content->audio_channels()) {
257 case AV_SAMPLE_FMT_FLTP:
259 float** p = reinterpret_cast<float**> (data);
260 for (int i = 0; i < _ffmpeg_content->audio_channels(); ++i) {
261 memcpy (audio->data(i), p[i], frames * sizeof(float));
267 throw DecodeError (String::compose (_("Unrecognised audio sample format (%1)"), static_cast<int> (audio_sample_format())));
274 FFmpegDecoder::audio_sample_format () const
276 if (!_ffmpeg_content->audio_stream()) {
277 return (AVSampleFormat) 0;
280 return audio_codec_context()->sample_fmt;
284 FFmpegDecoder::bytes_per_audio_sample () const
286 return av_get_bytes_per_sample (audio_sample_format ());
290 FFmpegDecoder::minimal_run (boost::function<bool (optional<ContentTime>, optional<ContentTime>, int)> finished)
293 optional<ContentTime> last_video;
294 optional<ContentTime> last_audio;
296 while (!finished (last_video, last_audio, frames_read)) {
297 int r = av_read_frame (_format_context, &_packet);
299 /* We should flush our decoders here, possibly yielding a few more frames,
300 but the consequence of having to do that is too hideous to contemplate.
301 Instead we give up and say that you can't seek too close to the end
309 double const time_base = av_q2d (_format_context->streams[_packet.stream_index]->time_base);
311 if (_packet.stream_index == _video_stream) {
313 avcodec_get_frame_defaults (_frame);
316 r = avcodec_decode_video2 (video_codec_context(), _frame, &got_picture, &_packet);
317 if (r >= 0 && got_picture) {
318 last_video = ContentTime::from_seconds (av_frame_get_best_effort_timestamp (_frame) * time_base) + _pts_offset;
321 } else if (_ffmpeg_content->audio_stream() && _ffmpeg_content->audio_stream()->uses_index (_format_context, _packet.stream_index)) {
322 AVPacket copy_packet = _packet;
323 while (copy_packet.size > 0) {
326 r = avcodec_decode_audio4 (audio_codec_context(), _frame, &got_frame, &_packet);
327 if (r >= 0 && got_frame) {
328 last_audio = ContentTime::from_seconds (av_frame_get_best_effort_timestamp (_frame) * time_base) + _pts_offset;
331 copy_packet.data += r;
332 copy_packet.size -= r;
336 av_free_packet (&_packet);
343 FFmpegDecoder::seek_overrun_finished (ContentTime seek, optional<ContentTime> last_video, optional<ContentTime> last_audio) const
345 return (last_video && last_video.get() >= seek) || (last_audio && last_audio.get() >= seek);
349 FFmpegDecoder::seek_final_finished (int n, int done) const
355 FFmpegDecoder::seek_and_flush (ContentTime t)
357 ContentTime const u = t - _pts_offset;
358 int64_t s = u.seconds() / av_q2d (_format_context->streams[_video_stream]->time_base);
360 if (_ffmpeg_content->audio_stream ()) {
362 s, int64_t (u.seconds() / av_q2d (_ffmpeg_content->audio_stream()->stream(_format_context)->time_base))
366 /* Ridiculous empirical hack */
372 av_seek_frame (_format_context, _video_stream, s, 0);
374 avcodec_flush_buffers (video_codec_context());
375 if (audio_codec_context ()) {
376 avcodec_flush_buffers (audio_codec_context ());
378 if (_subtitle_codec_context) {
379 avcodec_flush_buffers (_subtitle_codec_context);
384 FFmpegDecoder::seek (ContentTime time, bool accurate)
386 VideoDecoder::seek (time, accurate);
387 AudioDecoder::seek (time, accurate);
389 /* If we are doing an accurate seek, our initial shot will be 2s (2 being
390 a number plucked from the air) earlier than we want to end up. The loop below
391 will hopefully then step through to where we want to be.
394 ContentTime pre_roll = accurate ? ContentTime::from_seconds (2) : ContentTime (0);
395 ContentTime initial_seek = time - pre_roll;
396 if (initial_seek < ContentTime (0)) {
397 initial_seek = ContentTime (0);
400 /* Initial seek time in the video stream's timebase */
402 seek_and_flush (initial_seek);
409 int const N = minimal_run (boost::bind (&FFmpegDecoder::seek_overrun_finished, this, time, _1, _2));
411 seek_and_flush (initial_seek);
413 minimal_run (boost::bind (&FFmpegDecoder::seek_final_finished, this, N - 1, _3));
418 FFmpegDecoder::decode_audio_packet ()
420 /* Audio packets can contain multiple frames, so we may have to call avcodec_decode_audio4
424 AVPacket copy_packet = _packet;
426 while (copy_packet.size > 0) {
429 int const decode_result = avcodec_decode_audio4 (audio_codec_context(), _frame, &frame_finished, ©_packet);
431 if (decode_result < 0) {
432 _log->log (String::compose ("avcodec_decode_audio4 failed (%1)", decode_result));
436 if (frame_finished) {
437 ContentTime const ct = ContentTime::from_seconds (
438 av_frame_get_best_effort_timestamp (_frame) *
439 av_q2d (_ffmpeg_content->audio_stream()->stream (_format_context)->time_base))
442 int const data_size = av_samples_get_buffer_size (
443 0, audio_codec_context()->channels, _frame->nb_samples, audio_sample_format (), 1
446 audio (deinterleave_audio (_frame->data, data_size), ct);
449 copy_packet.data += decode_result;
450 copy_packet.size -= decode_result;
455 FFmpegDecoder::decode_video_packet ()
458 if (avcodec_decode_video2 (video_codec_context(), _frame, &frame_finished, &_packet) < 0 || !frame_finished) {
462 boost::mutex::scoped_lock lm (_filter_graphs_mutex);
464 shared_ptr<FilterGraph> graph;
466 list<shared_ptr<FilterGraph> >::iterator i = _filter_graphs.begin();
467 while (i != _filter_graphs.end() && !(*i)->can_process (dcp::Size (_frame->width, _frame->height), (AVPixelFormat) _frame->format)) {
471 if (i == _filter_graphs.end ()) {
472 graph.reset (new FilterGraph (_ffmpeg_content, dcp::Size (_frame->width, _frame->height), (AVPixelFormat) _frame->format));
473 _filter_graphs.push_back (graph);
474 _log->log (String::compose (N_("New graph for %1x%2, pixel format %3"), _frame->width, _frame->height, _frame->format));
479 list<pair<shared_ptr<Image>, int64_t> > images = graph->process (_frame);
481 for (list<pair<shared_ptr<Image>, int64_t> >::iterator i = images.begin(); i != images.end(); ++i) {
483 shared_ptr<Image> image = i->first;
485 if (i->second != AV_NOPTS_VALUE) {
486 double const pts = i->second * av_q2d (_format_context->streams[_video_stream]->time_base) + _pts_offset.seconds ();
487 video (image, rint (pts * _ffmpeg_content->video_frame_rate ()));
489 _log->log ("Dropping frame without PTS");
498 FFmpegDecoder::setup_subtitle ()
500 boost::mutex::scoped_lock lm (_mutex);
502 if (!_ffmpeg_content->subtitle_stream()) {
506 _subtitle_codec_context = _ffmpeg_content->subtitle_stream()->stream(_format_context)->codec;
507 if (_subtitle_codec_context == 0) {
508 throw DecodeError (N_("could not find subtitle stream"));
511 _subtitle_codec = avcodec_find_decoder (_subtitle_codec_context->codec_id);
513 if (_subtitle_codec == 0) {
514 throw DecodeError (N_("could not find subtitle decoder"));
517 if (avcodec_open2 (_subtitle_codec_context, _subtitle_codec, 0) < 0) {
518 throw DecodeError (N_("could not open subtitle decoder"));
523 FFmpegDecoder::decode_subtitle_packet ()
527 if (avcodec_decode_subtitle2 (_subtitle_codec_context, &sub, &got_subtitle, &_packet) < 0 || !got_subtitle) {
531 /* Sometimes we get an empty AVSubtitle, which is used by some codecs to
532 indicate that the previous subtitle should stop.
534 if (sub.num_rects <= 0) {
535 image_subtitle (ContentTime (), ContentTime (), shared_ptr<Image> (), dcpomatic::Rect<double> ());
537 } else if (sub.num_rects > 1) {
538 throw DecodeError (_("multi-part subtitles not yet supported"));
541 /* Subtitle PTS (within the source, not taking into account any of the
542 source that we may have chopped off for the DCP)
544 ContentTime packet_time = ContentTime::from_seconds (static_cast<double> (sub.pts) / AV_TIME_BASE) + _pts_offset;
546 /* hence start time for this sub */
547 ContentTime const from = packet_time + ContentTime::from_seconds (sub.start_display_time / 1e3);
548 ContentTime const to = packet_time + ContentTime::from_seconds (sub.end_display_time / 1e3);
550 AVSubtitleRect const * rect = sub.rects[0];
552 if (rect->type != SUBTITLE_BITMAP) {
554 // throw DecodeError (_("non-bitmap subtitles not yet supported"));
558 /* Note RGBA is expressed little-endian, so the first byte in the word is R, second
559 G, third B, fourth A.
561 shared_ptr<Image> image (new Image (PIX_FMT_RGBA, dcp::Size (rect->w, rect->h), true));
563 /* Start of the first line in the subtitle */
564 uint8_t* sub_p = rect->pict.data[0];
565 /* sub_p looks up into a BGRA palette which is here
566 (i.e. first byte B, second G, third R, fourth A)
568 uint32_t const * palette = (uint32_t *) rect->pict.data[1];
569 /* Start of the output data */
570 uint32_t* out_p = (uint32_t *) image->data()[0];
572 for (int y = 0; y < rect->h; ++y) {
573 uint8_t* sub_line_p = sub_p;
574 uint32_t* out_line_p = out_p;
575 for (int x = 0; x < rect->w; ++x) {
576 uint32_t const p = palette[*sub_line_p++];
577 *out_line_p++ = ((p & 0xff) << 16) | (p & 0xff00) | ((p & 0xff0000) >> 16) | (p & 0xff000000);
579 sub_p += rect->pict.linesize[0];
580 out_p += image->stride()[0] / sizeof (uint32_t);
583 dcp::Size const vs = _ffmpeg_content->video_size ();
589 dcpomatic::Rect<double> (
590 static_cast<double> (rect->x) / vs.width,
591 static_cast<double> (rect->y) / vs.height,
592 static_cast<double> (rect->w) / vs.width,
593 static_cast<double> (rect->h) / vs.height
597 avsubtitle_free (&sub);