2 Copyright (C) 2012-2014 Carl Hetherington <cth@carlh.net>
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 /** @file src/ffmpeg_decoder.cc
21 * @brief A decoder using FFmpeg to decode content.
32 #include <libavcodec/avcodec.h>
33 #include <libavformat/avformat.h>
36 #include "exceptions.h"
40 #include "ffmpeg_decoder.h"
41 #include "filter_graph.h"
42 #include "audio_buffers.h"
43 #include "ffmpeg_content.h"
44 #include "image_proxy.h"
51 using std::stringstream;
55 using boost::shared_ptr;
56 using boost::optional;
57 using boost::dynamic_pointer_cast;
60 FFmpegDecoder::FFmpegDecoder (shared_ptr<const FFmpegContent> c, shared_ptr<Log> log)
65 , _subtitle_codec_context (0)
70 /* Audio and video frame PTS values may not start with 0. We want
71 to fiddle them so that:
73 1. One of them starts at time 0.
74 2. The first video PTS value ends up on a frame boundary.
76 Then we remove big initial gaps in PTS and we allow our
77 insertion of black frames to work.
79 We will do pts_to_use = pts_from_ffmpeg + pts_offset;
82 bool const have_video = c->first_video();
83 bool const have_audio = c->audio_stream () && c->audio_stream()->first_audio;
85 /* First, make one of them start at 0 */
87 if (have_audio && have_video) {
88 _pts_offset = - min (c->first_video().get(), c->audio_stream()->first_audio.get());
89 } else if (have_video) {
90 _pts_offset = - c->first_video().get();
91 } else if (have_audio) {
92 _pts_offset = - c->audio_stream()->first_audio.get();
95 /* Now adjust both so that the video pts starts on a frame */
96 if (have_video && have_audio) {
97 ContentTime first_video = c->first_video().get() + _pts_offset;
98 ContentTime const old_first_video = first_video;
99 _pts_offset += first_video.round_up (c->video_frame_rate ()) - old_first_video;
103 FFmpegDecoder::~FFmpegDecoder ()
105 boost::mutex::scoped_lock lm (_mutex);
107 if (_subtitle_codec_context) {
108 avcodec_close (_subtitle_codec_context);
113 FFmpegDecoder::flush ()
115 /* Get any remaining frames */
120 /* XXX: should we reset _packet.data and size after each *_decode_* call? */
122 while (decode_video_packet ()) {}
124 if (_ffmpeg_content->audio_stream()) {
125 decode_audio_packet ();
126 AudioDecoder::flush ();
131 FFmpegDecoder::pass ()
133 int r = av_read_frame (_format_context, &_packet);
136 if (r != AVERROR_EOF) {
137 /* Maybe we should fail here, but for now we'll just finish off instead */
139 av_strerror (r, buf, sizeof(buf));
140 _log->log (String::compose (N_("error on av_read_frame (%1) (%2)"), buf, r));
147 int const si = _packet.stream_index;
149 if (si == _video_stream) {
150 decode_video_packet ();
151 } else if (_ffmpeg_content->audio_stream() && _ffmpeg_content->audio_stream()->uses_index (_format_context, si)) {
152 decode_audio_packet ();
153 } else if (_ffmpeg_content->subtitle_stream() && _ffmpeg_content->subtitle_stream()->uses_index (_format_context, si)) {
154 decode_subtitle_packet ();
157 av_free_packet (&_packet);
161 /** @param data pointer to array of pointers to buffers.
162 * Only the first buffer will be used for non-planar data, otherwise there will be one per channel.
164 shared_ptr<AudioBuffers>
165 FFmpegDecoder::deinterleave_audio (uint8_t** data, int size)
167 assert (_ffmpeg_content->audio_channels());
168 assert (bytes_per_audio_sample());
170 /* Deinterleave and convert to float */
172 assert ((size % (bytes_per_audio_sample() * _ffmpeg_content->audio_channels())) == 0);
174 int const total_samples = size / bytes_per_audio_sample();
175 int const frames = total_samples / _ffmpeg_content->audio_channels();
176 shared_ptr<AudioBuffers> audio (new AudioBuffers (_ffmpeg_content->audio_channels(), frames));
178 switch (audio_sample_format()) {
179 case AV_SAMPLE_FMT_U8:
181 uint8_t* p = reinterpret_cast<uint8_t *> (data[0]);
184 for (int i = 0; i < total_samples; ++i) {
185 audio->data(channel)[sample] = float(*p++) / (1 << 23);
188 if (channel == _ffmpeg_content->audio_channels()) {
196 case AV_SAMPLE_FMT_S16:
198 int16_t* p = reinterpret_cast<int16_t *> (data[0]);
201 for (int i = 0; i < total_samples; ++i) {
202 audio->data(channel)[sample] = float(*p++) / (1 << 15);
205 if (channel == _ffmpeg_content->audio_channels()) {
213 case AV_SAMPLE_FMT_S16P:
215 int16_t** p = reinterpret_cast<int16_t **> (data);
216 for (int i = 0; i < _ffmpeg_content->audio_channels(); ++i) {
217 for (int j = 0; j < frames; ++j) {
218 audio->data(i)[j] = static_cast<float>(p[i][j]) / (1 << 15);
224 case AV_SAMPLE_FMT_S32:
226 int32_t* p = reinterpret_cast<int32_t *> (data[0]);
229 for (int i = 0; i < total_samples; ++i) {
230 audio->data(channel)[sample] = static_cast<float>(*p++) / (1 << 31);
233 if (channel == _ffmpeg_content->audio_channels()) {
241 case AV_SAMPLE_FMT_FLT:
243 float* p = reinterpret_cast<float*> (data[0]);
246 for (int i = 0; i < total_samples; ++i) {
247 audio->data(channel)[sample] = *p++;
250 if (channel == _ffmpeg_content->audio_channels()) {
258 case AV_SAMPLE_FMT_FLTP:
260 float** p = reinterpret_cast<float**> (data);
261 for (int i = 0; i < _ffmpeg_content->audio_channels(); ++i) {
262 memcpy (audio->data(i), p[i], frames * sizeof(float));
268 throw DecodeError (String::compose (_("Unrecognised audio sample format (%1)"), static_cast<int> (audio_sample_format())));
275 FFmpegDecoder::audio_sample_format () const
277 if (!_ffmpeg_content->audio_stream()) {
278 return (AVSampleFormat) 0;
281 return audio_codec_context()->sample_fmt;
285 FFmpegDecoder::bytes_per_audio_sample () const
287 return av_get_bytes_per_sample (audio_sample_format ());
291 FFmpegDecoder::minimal_run (boost::function<bool (optional<ContentTime>, optional<ContentTime>, int)> finished)
294 optional<ContentTime> last_video;
295 optional<ContentTime> last_audio;
297 while (!finished (last_video, last_audio, frames_read)) {
298 int r = av_read_frame (_format_context, &_packet);
300 /* We should flush our decoders here, possibly yielding a few more frames,
301 but the consequence of having to do that is too hideous to contemplate.
302 Instead we give up and say that you can't seek too close to the end
310 double const time_base = av_q2d (_format_context->streams[_packet.stream_index]->time_base);
312 if (_packet.stream_index == _video_stream) {
314 avcodec_get_frame_defaults (_frame);
317 r = avcodec_decode_video2 (video_codec_context(), _frame, &got_picture, &_packet);
318 if (r >= 0 && got_picture) {
319 last_video = ContentTime::from_seconds (av_frame_get_best_effort_timestamp (_frame) * time_base) + _pts_offset;
322 } else if (_ffmpeg_content->audio_stream() && _ffmpeg_content->audio_stream()->uses_index (_format_context, _packet.stream_index)) {
323 AVPacket copy_packet = _packet;
324 while (copy_packet.size > 0) {
327 r = avcodec_decode_audio4 (audio_codec_context(), _frame, &got_frame, &_packet);
328 if (r >= 0 && got_frame) {
329 last_audio = ContentTime::from_seconds (av_frame_get_best_effort_timestamp (_frame) * time_base) + _pts_offset;
332 copy_packet.data += r;
333 copy_packet.size -= r;
337 av_free_packet (&_packet);
344 FFmpegDecoder::seek_overrun_finished (ContentTime seek, optional<ContentTime> last_video, optional<ContentTime> last_audio) const
346 return (last_video && last_video.get() >= seek) || (last_audio && last_audio.get() >= seek);
350 FFmpegDecoder::seek_final_finished (int n, int done) const
356 FFmpegDecoder::seek_and_flush (ContentTime t)
358 ContentTime const u = t - _pts_offset;
359 int64_t s = u.seconds() / av_q2d (_format_context->streams[_video_stream]->time_base);
361 if (_ffmpeg_content->audio_stream ()) {
363 s, int64_t (u.seconds() / av_q2d (_ffmpeg_content->audio_stream()->stream(_format_context)->time_base))
367 /* Ridiculous empirical hack */
373 av_seek_frame (_format_context, _video_stream, s, 0);
375 avcodec_flush_buffers (video_codec_context());
376 if (audio_codec_context ()) {
377 avcodec_flush_buffers (audio_codec_context ());
379 if (_subtitle_codec_context) {
380 avcodec_flush_buffers (_subtitle_codec_context);
385 FFmpegDecoder::seek (ContentTime time, bool accurate)
387 VideoDecoder::seek (time, accurate);
388 AudioDecoder::seek (time, accurate);
390 /* If we are doing an accurate seek, our initial shot will be 2s (2 being
391 a number plucked from the air) earlier than we want to end up. The loop below
392 will hopefully then step through to where we want to be.
395 ContentTime pre_roll = accurate ? ContentTime::from_seconds (2) : ContentTime (0);
396 ContentTime initial_seek = time - pre_roll;
397 if (initial_seek < ContentTime (0)) {
398 initial_seek = ContentTime (0);
401 /* Initial seek time in the video stream's timebase */
403 seek_and_flush (initial_seek);
410 int const N = minimal_run (boost::bind (&FFmpegDecoder::seek_overrun_finished, this, time, _1, _2));
412 seek_and_flush (initial_seek);
414 minimal_run (boost::bind (&FFmpegDecoder::seek_final_finished, this, N - 1, _3));
419 FFmpegDecoder::decode_audio_packet ()
421 /* Audio packets can contain multiple frames, so we may have to call avcodec_decode_audio4
425 AVPacket copy_packet = _packet;
427 while (copy_packet.size > 0) {
430 int const decode_result = avcodec_decode_audio4 (audio_codec_context(), _frame, &frame_finished, ©_packet);
432 if (decode_result < 0) {
433 _log->log (String::compose ("avcodec_decode_audio4 failed (%1)", decode_result));
437 if (frame_finished) {
438 ContentTime const ct = ContentTime::from_seconds (
439 av_frame_get_best_effort_timestamp (_frame) *
440 av_q2d (_ffmpeg_content->audio_stream()->stream (_format_context)->time_base))
443 int const data_size = av_samples_get_buffer_size (
444 0, audio_codec_context()->channels, _frame->nb_samples, audio_sample_format (), 1
447 audio (deinterleave_audio (_frame->data, data_size), ct);
450 copy_packet.data += decode_result;
451 copy_packet.size -= decode_result;
456 FFmpegDecoder::decode_video_packet ()
459 if (avcodec_decode_video2 (video_codec_context(), _frame, &frame_finished, &_packet) < 0 || !frame_finished) {
463 boost::mutex::scoped_lock lm (_filter_graphs_mutex);
465 shared_ptr<FilterGraph> graph;
467 list<shared_ptr<FilterGraph> >::iterator i = _filter_graphs.begin();
468 while (i != _filter_graphs.end() && !(*i)->can_process (dcp::Size (_frame->width, _frame->height), (AVPixelFormat) _frame->format)) {
472 if (i == _filter_graphs.end ()) {
473 graph.reset (new FilterGraph (_ffmpeg_content, dcp::Size (_frame->width, _frame->height), (AVPixelFormat) _frame->format));
474 _filter_graphs.push_back (graph);
475 _log->log (String::compose (N_("New graph for %1x%2, pixel format %3"), _frame->width, _frame->height, _frame->format));
480 list<pair<shared_ptr<Image>, int64_t> > images = graph->process (_frame);
482 for (list<pair<shared_ptr<Image>, int64_t> >::iterator i = images.begin(); i != images.end(); ++i) {
484 shared_ptr<Image> image = i->first;
486 if (i->second != AV_NOPTS_VALUE) {
487 double const pts = i->second * av_q2d (_format_context->streams[_video_stream]->time_base) + _pts_offset.seconds ();
488 video (shared_ptr<ImageProxy> (new RawImageProxy (image)), rint (pts * _ffmpeg_content->video_frame_rate ()));
490 _log->log ("Dropping frame without PTS");
499 FFmpegDecoder::setup_subtitle ()
501 boost::mutex::scoped_lock lm (_mutex);
503 if (!_ffmpeg_content->subtitle_stream()) {
507 _subtitle_codec_context = _ffmpeg_content->subtitle_stream()->stream(_format_context)->codec;
508 if (_subtitle_codec_context == 0) {
509 throw DecodeError (N_("could not find subtitle stream"));
512 _subtitle_codec = avcodec_find_decoder (_subtitle_codec_context->codec_id);
514 if (_subtitle_codec == 0) {
515 throw DecodeError (N_("could not find subtitle decoder"));
518 if (avcodec_open2 (_subtitle_codec_context, _subtitle_codec, 0) < 0) {
519 throw DecodeError (N_("could not open subtitle decoder"));
524 FFmpegDecoder::decode_subtitle_packet ()
528 if (avcodec_decode_subtitle2 (_subtitle_codec_context, &sub, &got_subtitle, &_packet) < 0 || !got_subtitle) {
532 /* Sometimes we get an empty AVSubtitle, which is used by some codecs to
533 indicate that the previous subtitle should stop.
535 if (sub.num_rects <= 0) {
536 image_subtitle (ContentTime (), ContentTime (), shared_ptr<Image> (), dcpomatic::Rect<double> ());
538 } else if (sub.num_rects > 1) {
539 throw DecodeError (_("multi-part subtitles not yet supported"));
542 /* Subtitle PTS (within the source, not taking into account any of the
543 source that we may have chopped off for the DCP)
545 ContentTime packet_time = ContentTime::from_seconds (static_cast<double> (sub.pts) / AV_TIME_BASE) + _pts_offset;
547 /* hence start time for this sub */
548 ContentTime const from = packet_time + ContentTime::from_seconds (sub.start_display_time / 1e3);
549 ContentTime const to = packet_time + ContentTime::from_seconds (sub.end_display_time / 1e3);
551 AVSubtitleRect const * rect = sub.rects[0];
553 if (rect->type != SUBTITLE_BITMAP) {
555 // throw DecodeError (_("non-bitmap subtitles not yet supported"));
559 /* Note RGBA is expressed little-endian, so the first byte in the word is R, second
560 G, third B, fourth A.
562 shared_ptr<Image> image (new Image (PIX_FMT_RGBA, dcp::Size (rect->w, rect->h), true));
564 /* Start of the first line in the subtitle */
565 uint8_t* sub_p = rect->pict.data[0];
566 /* sub_p looks up into a BGRA palette which is here
567 (i.e. first byte B, second G, third R, fourth A)
569 uint32_t const * palette = (uint32_t *) rect->pict.data[1];
570 /* Start of the output data */
571 uint32_t* out_p = (uint32_t *) image->data()[0];
573 for (int y = 0; y < rect->h; ++y) {
574 uint8_t* sub_line_p = sub_p;
575 uint32_t* out_line_p = out_p;
576 for (int x = 0; x < rect->w; ++x) {
577 uint32_t const p = palette[*sub_line_p++];
578 *out_line_p++ = ((p & 0xff) << 16) | (p & 0xff00) | ((p & 0xff0000) >> 16) | (p & 0xff000000);
580 sub_p += rect->pict.linesize[0];
581 out_p += image->stride()[0] / sizeof (uint32_t);
584 dcp::Size const vs = _ffmpeg_content->video_size ();
590 dcpomatic::Rect<double> (
591 static_cast<double> (rect->x) / vs.width,
592 static_cast<double> (rect->y) / vs.height,
593 static_cast<double> (rect->w) / vs.width,
594 static_cast<double> (rect->h) / vs.height
598 avsubtitle_free (&sub);