2 Copyright (C) 2012 Carl Hetherington <cth@carlh.net>
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 /** @file src/ffmpeg_decoder.cc
21 * @brief A decoder using FFmpeg to decode content.
30 #include <boost/lexical_cast.hpp>
33 #include <libavcodec/avcodec.h>
34 #include <libavformat/avformat.h>
37 #include "exceptions.h"
41 #include "ffmpeg_decoder.h"
42 #include "filter_graph.h"
43 #include "audio_buffers.h"
44 #include "ffmpeg_content.h"
51 using std::stringstream;
55 using boost::shared_ptr;
56 using boost::optional;
57 using boost::dynamic_pointer_cast;
60 FFmpegDecoder::FFmpegDecoder (shared_ptr<const FFmpegContent> c, shared_ptr<Log> log, bool video, bool audio, bool subtitles)
65 , _subtitle_codec_context (0)
67 , _decode_video (video)
68 , _decode_audio (audio)
69 , _decode_subtitles (subtitles)
74 /* Audio and video frame PTS values may not start with 0. We want
75 to fiddle them so that:
77 1. One of them starts at time 0.
78 2. The first video PTS value ends up on a frame boundary.
80 Then we remove big initial gaps in PTS and we allow our
81 insertion of black frames to work.
83 We will do pts_to_use = pts_from_ffmpeg + pts_offset;
86 bool const have_video = video && c->first_video();
87 bool const have_audio = _decode_audio && c->audio_stream () && c->audio_stream()->first_audio;
89 /* First, make one of them start at 0 */
91 if (have_audio && have_video) {
92 _pts_offset = - min (c->first_video().get(), c->audio_stream()->first_audio.get());
93 } else if (have_video) {
94 _pts_offset = - c->first_video().get();
95 } else if (have_audio) {
96 _pts_offset = - c->audio_stream()->first_audio.get();
99 /* Now adjust both so that the video pts starts on a frame */
100 if (have_video && have_audio) {
101 ContentTime first_video = c->first_video().get() + _pts_offset;
102 ContentTime const old_first_video = first_video;
103 _pts_offset += first_video.round_up (c->video_frame_rate ()) - old_first_video;
107 FFmpegDecoder::~FFmpegDecoder ()
109 boost::mutex::scoped_lock lm (_mutex);
111 if (_subtitle_codec_context) {
112 avcodec_close (_subtitle_codec_context);
117 FFmpegDecoder::flush ()
119 /* Get any remaining frames */
124 /* XXX: should we reset _packet.data and size after each *_decode_* call? */
127 while (decode_video_packet ()) {}
130 if (_ffmpeg_content->audio_stream() && _decode_audio) {
131 decode_audio_packet ();
132 AudioDecoder::flush ();
137 FFmpegDecoder::pass ()
139 int r = av_read_frame (_format_context, &_packet);
142 if (r != AVERROR_EOF) {
143 /* Maybe we should fail here, but for now we'll just finish off instead */
145 av_strerror (r, buf, sizeof(buf));
146 _log->log (String::compose (N_("error on av_read_frame (%1) (%2)"), buf, r));
153 int const si = _packet.stream_index;
155 if (si == _video_stream && _decode_video) {
156 decode_video_packet ();
157 } else if (_ffmpeg_content->audio_stream() && _ffmpeg_content->audio_stream()->uses_index (_format_context, si) && _decode_audio) {
158 decode_audio_packet ();
159 } else if (_ffmpeg_content->subtitle_stream() && _ffmpeg_content->subtitle_stream()->uses_index (_format_context, si) && _decode_subtitles) {
160 decode_subtitle_packet ();
163 av_free_packet (&_packet);
167 /** @param data pointer to array of pointers to buffers.
168 * Only the first buffer will be used for non-planar data, otherwise there will be one per channel.
170 shared_ptr<AudioBuffers>
171 FFmpegDecoder::deinterleave_audio (uint8_t** data, int size)
173 assert (_ffmpeg_content->audio_channels());
174 assert (bytes_per_audio_sample());
176 /* Deinterleave and convert to float */
178 assert ((size % (bytes_per_audio_sample() * _ffmpeg_content->audio_channels())) == 0);
180 int const total_samples = size / bytes_per_audio_sample();
181 int const frames = total_samples / _ffmpeg_content->audio_channels();
182 shared_ptr<AudioBuffers> audio (new AudioBuffers (_ffmpeg_content->audio_channels(), frames));
184 switch (audio_sample_format()) {
185 case AV_SAMPLE_FMT_S16:
187 int16_t* p = reinterpret_cast<int16_t *> (data[0]);
190 for (int i = 0; i < total_samples; ++i) {
191 audio->data(channel)[sample] = float(*p++) / (1 << 15);
194 if (channel == _ffmpeg_content->audio_channels()) {
202 case AV_SAMPLE_FMT_S16P:
204 int16_t** p = reinterpret_cast<int16_t **> (data);
205 for (int i = 0; i < _ffmpeg_content->audio_channels(); ++i) {
206 for (int j = 0; j < frames; ++j) {
207 audio->data(i)[j] = static_cast<float>(p[i][j]) / (1 << 15);
213 case AV_SAMPLE_FMT_S32:
215 int32_t* p = reinterpret_cast<int32_t *> (data[0]);
218 for (int i = 0; i < total_samples; ++i) {
219 audio->data(channel)[sample] = static_cast<float>(*p++) / (1 << 31);
222 if (channel == _ffmpeg_content->audio_channels()) {
230 case AV_SAMPLE_FMT_FLT:
232 float* p = reinterpret_cast<float*> (data[0]);
235 for (int i = 0; i < total_samples; ++i) {
236 audio->data(channel)[sample] = *p++;
239 if (channel == _ffmpeg_content->audio_channels()) {
247 case AV_SAMPLE_FMT_FLTP:
249 float** p = reinterpret_cast<float**> (data);
250 for (int i = 0; i < _ffmpeg_content->audio_channels(); ++i) {
251 memcpy (audio->data(i), p[i], frames * sizeof(float));
257 throw DecodeError (String::compose (_("Unrecognised audio sample format (%1)"), static_cast<int> (audio_sample_format())));
264 FFmpegDecoder::audio_sample_format () const
266 if (!_ffmpeg_content->audio_stream()) {
267 return (AVSampleFormat) 0;
270 return audio_codec_context()->sample_fmt;
274 FFmpegDecoder::bytes_per_audio_sample () const
276 return av_get_bytes_per_sample (audio_sample_format ());
280 FFmpegDecoder::minimal_run (boost::function<bool (optional<ContentTime>, optional<ContentTime>, int)> finished)
283 optional<ContentTime> last_video;
284 optional<ContentTime> last_audio;
286 while (!finished (last_video, last_audio, frames_read)) {
287 int r = av_read_frame (_format_context, &_packet);
289 /* We should flush our decoders here, possibly yielding a few more frames,
290 but the consequence of having to do that is too hideous to contemplate.
291 Instead we give up and say that you can't seek too close to the end
299 double const time_base = av_q2d (_format_context->streams[_packet.stream_index]->time_base);
301 if (_packet.stream_index == _video_stream) {
303 avcodec_get_frame_defaults (_frame);
306 r = avcodec_decode_video2 (video_codec_context(), _frame, &finished, &_packet);
307 if (r >= 0 && finished) {
308 last_video = ContentTime::from_seconds (av_frame_get_best_effort_timestamp (_frame) * time_base) + _pts_offset;
311 } else if (_ffmpeg_content->audio_stream() && _ffmpeg_content->audio_stream()->uses_index (_format_context, _packet.stream_index)) {
312 AVPacket copy_packet = _packet;
313 while (copy_packet.size > 0) {
316 r = avcodec_decode_audio4 (audio_codec_context(), _frame, &finished, &_packet);
317 if (r >= 0 && finished) {
318 last_audio = ContentTime::from_seconds (av_frame_get_best_effort_timestamp (_frame) * time_base) + _pts_offset;
321 copy_packet.data += r;
322 copy_packet.size -= r;
326 av_free_packet (&_packet);
333 FFmpegDecoder::seek_overrun_finished (ContentTime seek, optional<ContentTime> last_video, optional<ContentTime> last_audio) const
335 return (last_video && last_video.get() >= seek) || (last_audio && last_audio.get() >= seek);
339 FFmpegDecoder::seek_final_finished (int n, int done) const
345 FFmpegDecoder::seek_and_flush (ContentTime t)
347 ContentTime const u = t - _pts_offset;
348 int64_t s = u.seconds() / av_q2d (_format_context->streams[_video_stream]->time_base);
350 if (_ffmpeg_content->audio_stream ()) {
352 s, int64_t (u.seconds() / av_q2d (_ffmpeg_content->audio_stream()->stream(_format_context)->time_base))
356 /* Ridiculous empirical hack */
362 av_seek_frame (_format_context, _video_stream, s, 0);
364 avcodec_flush_buffers (video_codec_context());
365 if (audio_codec_context ()) {
366 avcodec_flush_buffers (audio_codec_context ());
368 if (_subtitle_codec_context) {
369 avcodec_flush_buffers (_subtitle_codec_context);
374 FFmpegDecoder::seek (ContentTime time, bool accurate)
376 Decoder::seek (time, accurate);
378 AudioDecoder::seek (time, accurate);
381 /* If we are doing an accurate seek, our initial shot will be 200ms (200 being
382 a number plucked from the air) earlier than we want to end up. The loop below
383 will hopefully then step through to where we want to be.
386 ContentTime pre_roll = accurate ? ContentTime::from_seconds (0.2) : ContentTime (0);
387 ContentTime initial_seek = time - pre_roll;
388 if (initial_seek < ContentTime (0)) {
389 initial_seek = ContentTime (0);
392 /* Initial seek time in the video stream's timebase */
394 seek_and_flush (initial_seek);
401 int const N = minimal_run (boost::bind (&FFmpegDecoder::seek_overrun_finished, this, time, _1, _2));
403 seek_and_flush (initial_seek);
405 minimal_run (boost::bind (&FFmpegDecoder::seek_final_finished, this, N - 1, _3));
410 FFmpegDecoder::decode_audio_packet ()
412 /* Audio packets can contain multiple frames, so we may have to call avcodec_decode_audio4
416 AVPacket copy_packet = _packet;
418 while (copy_packet.size > 0) {
421 int const decode_result = avcodec_decode_audio4 (audio_codec_context(), _frame, &frame_finished, ©_packet);
423 if (decode_result < 0) {
424 _log->log (String::compose ("avcodec_decode_audio4 failed (%1)", decode_result));
428 if (frame_finished) {
429 ContentTime const ct = ContentTime::from_seconds (
430 av_frame_get_best_effort_timestamp (_frame) *
431 av_q2d (_ffmpeg_content->audio_stream()->stream (_format_context)->time_base))
434 int const data_size = av_samples_get_buffer_size (
435 0, audio_codec_context()->channels, _frame->nb_samples, audio_sample_format (), 1
438 audio (deinterleave_audio (_frame->data, data_size), ct);
441 copy_packet.data += decode_result;
442 copy_packet.size -= decode_result;
447 FFmpegDecoder::decode_video_packet ()
450 if (avcodec_decode_video2 (video_codec_context(), _frame, &frame_finished, &_packet) < 0 || !frame_finished) {
454 boost::mutex::scoped_lock lm (_filter_graphs_mutex);
456 shared_ptr<FilterGraph> graph;
458 list<shared_ptr<FilterGraph> >::iterator i = _filter_graphs.begin();
459 while (i != _filter_graphs.end() && !(*i)->can_process (dcp::Size (_frame->width, _frame->height), (AVPixelFormat) _frame->format)) {
463 if (i == _filter_graphs.end ()) {
464 graph.reset (new FilterGraph (_ffmpeg_content, dcp::Size (_frame->width, _frame->height), (AVPixelFormat) _frame->format));
465 _filter_graphs.push_back (graph);
466 _log->log (String::compose (N_("New graph for %1x%2, pixel format %3"), _frame->width, _frame->height, _frame->format));
471 list<pair<shared_ptr<Image>, int64_t> > images = graph->process (_frame);
473 string post_process = Filter::ffmpeg_strings (_ffmpeg_content->filters()).second;
475 for (list<pair<shared_ptr<Image>, int64_t> >::iterator i = images.begin(); i != images.end(); ++i) {
477 shared_ptr<Image> image = i->first;
478 if (!post_process.empty ()) {
479 image = image->post_process (post_process, true);
482 if (i->second != AV_NOPTS_VALUE) {
483 video (image, false, ContentTime::from_seconds (i->second * av_q2d (_format_context->streams[_video_stream]->time_base)) + _pts_offset);
485 _log->log ("Dropping frame without PTS");
494 FFmpegDecoder::setup_subtitle ()
496 boost::mutex::scoped_lock lm (_mutex);
498 if (!_ffmpeg_content->subtitle_stream()) {
502 _subtitle_codec_context = _ffmpeg_content->subtitle_stream()->stream(_format_context)->codec;
503 if (_subtitle_codec_context == 0) {
504 throw DecodeError (N_("could not find subtitle stream"));
507 _subtitle_codec = avcodec_find_decoder (_subtitle_codec_context->codec_id);
509 if (_subtitle_codec == 0) {
510 throw DecodeError (N_("could not find subtitle decoder"));
513 if (avcodec_open2 (_subtitle_codec_context, _subtitle_codec, 0) < 0) {
514 throw DecodeError (N_("could not open subtitle decoder"));
519 FFmpegDecoder::decode_subtitle_packet ()
523 if (avcodec_decode_subtitle2 (_subtitle_codec_context, &sub, &got_subtitle, &_packet) < 0 || !got_subtitle) {
527 /* Sometimes we get an empty AVSubtitle, which is used by some codecs to
528 indicate that the previous subtitle should stop.
530 if (sub.num_rects <= 0) {
531 image_subtitle (shared_ptr<Image> (), dcpomatic::Rect<double> (), ContentTime (), ContentTime ());
533 } else if (sub.num_rects > 1) {
534 throw DecodeError (_("multi-part subtitles not yet supported"));
537 /* Subtitle PTS (within the source, not taking into account any of the
538 source that we may have chopped off for the DCP)
540 ContentTime packet_time = ContentTime::from_seconds (static_cast<double> (sub.pts) / AV_TIME_BASE) + _pts_offset;
542 /* hence start time for this sub */
543 ContentTime const from = packet_time + ContentTime::from_seconds (sub.start_display_time / 1e3);
544 ContentTime const to = packet_time + ContentTime::from_seconds (sub.end_display_time / 1e3);
546 AVSubtitleRect const * rect = sub.rects[0];
548 if (rect->type != SUBTITLE_BITMAP) {
549 throw DecodeError (_("non-bitmap subtitles not yet supported"));
552 /* Note RGBA is expressed little-endian, so the first byte in the word is R, second
553 G, third B, fourth A.
555 shared_ptr<Image> image (new Image (PIX_FMT_RGBA, dcp::Size (rect->w, rect->h), true));
557 /* Start of the first line in the subtitle */
558 uint8_t* sub_p = rect->pict.data[0];
559 /* sub_p looks up into a BGRA palette which is here
560 (i.e. first byte B, second G, third R, fourth A)
562 uint32_t const * palette = (uint32_t *) rect->pict.data[1];
563 /* Start of the output data */
564 uint32_t* out_p = (uint32_t *) image->data()[0];
566 for (int y = 0; y < rect->h; ++y) {
567 uint8_t* sub_line_p = sub_p;
568 uint32_t* out_line_p = out_p;
569 for (int x = 0; x < rect->w; ++x) {
570 uint32_t const p = palette[*sub_line_p++];
571 *out_line_p++ = ((p & 0xff) << 16) | (p & 0xff00) | ((p & 0xff0000) >> 16) | (p & 0xff000000);
573 sub_p += rect->pict.linesize[0];
574 out_p += image->stride()[0] / sizeof (uint32_t);
577 dcp::Size const vs = _ffmpeg_content->video_size ();
581 dcpomatic::Rect<double> (
582 static_cast<double> (rect->x) / vs.width,
583 static_cast<double> (rect->y) / vs.height,
584 static_cast<double> (rect->w) / vs.width,
585 static_cast<double> (rect->h) / vs.height
592 avsubtitle_free (&sub);