2 Copyright (C) 2012-2014 Carl Hetherington <cth@carlh.net>
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 /** @file src/ffmpeg_decoder.cc
21 * @brief A decoder using FFmpeg to decode content.
32 #include <libavcodec/avcodec.h>
33 #include <libavformat/avformat.h>
36 #include "exceptions.h"
40 #include "ffmpeg_decoder.h"
41 #include "ffmpeg_audio_stream.h"
42 #include "ffmpeg_subtitle_stream.h"
43 #include "filter_graph.h"
44 #include "audio_buffers.h"
45 #include "ffmpeg_content.h"
46 #include "image_proxy.h"
51 #define LOG_GENERAL(...) _video_content->film()->log()->log (String::compose (__VA_ARGS__), Log::TYPE_GENERAL);
52 #define LOG_ERROR(...) _video_content->film()->log()->log (String::compose (__VA_ARGS__), Log::TYPE_ERROR);
53 #define LOG_WARNING(...) _video_content->film()->log()->log (__VA_ARGS__, Log::TYPE_WARNING);
58 using std::stringstream;
62 using boost::shared_ptr;
63 using boost::optional;
64 using boost::dynamic_pointer_cast;
67 FFmpegDecoder::FFmpegDecoder (shared_ptr<const FFmpegContent> c, shared_ptr<Log> log)
74 /* Audio and video frame PTS values may not start with 0. We want
75 to fiddle them so that:
77 1. One of them starts at time 0.
78 2. The first video PTS value ends up on a frame boundary.
80 Then we remove big initial gaps in PTS and we allow our
81 insertion of black frames to work.
83 We will do pts_to_use = pts_from_ffmpeg + pts_offset;
86 bool const have_video = c->first_video();
87 bool const have_audio = c->audio_stream () && c->audio_stream()->first_audio;
89 /* First, make one of them start at 0 */
91 if (have_audio && have_video) {
92 _pts_offset = - min (c->first_video().get(), c->audio_stream()->first_audio.get());
93 } else if (have_video) {
94 _pts_offset = - c->first_video().get();
95 } else if (have_audio) {
96 _pts_offset = - c->audio_stream()->first_audio.get();
99 /* Now adjust both so that the video pts starts on a frame */
100 if (have_video && have_audio) {
101 ContentTime first_video = c->first_video().get() + _pts_offset;
102 ContentTime const old_first_video = first_video;
103 _pts_offset += first_video.round_up (c->video_frame_rate ()) - old_first_video;
108 FFmpegDecoder::flush ()
110 /* Get any remaining frames */
115 /* XXX: should we reset _packet.data and size after each *_decode_* call? */
117 while (decode_video_packet ()) {}
119 if (_ffmpeg_content->audio_stream()) {
120 decode_audio_packet ();
121 AudioDecoder::flush ();
126 FFmpegDecoder::pass ()
128 int r = av_read_frame (_format_context, &_packet);
131 if (r != AVERROR_EOF) {
132 /* Maybe we should fail here, but for now we'll just finish off instead */
134 av_strerror (r, buf, sizeof(buf));
135 LOG_ERROR (N_("error on av_read_frame (%1) (%2)"), buf, r);
142 int const si = _packet.stream_index;
144 if (si == _video_stream) {
145 decode_video_packet ();
146 } else if (_ffmpeg_content->audio_stream() && _ffmpeg_content->audio_stream()->uses_index (_format_context, si)) {
147 decode_audio_packet ();
148 } else if (_ffmpeg_content->subtitle_stream() && _ffmpeg_content->subtitle_stream()->uses_index (_format_context, si)) {
149 decode_subtitle_packet ();
152 av_free_packet (&_packet);
156 /** @param data pointer to array of pointers to buffers.
157 * Only the first buffer will be used for non-planar data, otherwise there will be one per channel.
159 shared_ptr<AudioBuffers>
160 FFmpegDecoder::deinterleave_audio (uint8_t** data, int size)
162 assert (_ffmpeg_content->audio_channels());
163 assert (bytes_per_audio_sample());
165 /* Deinterleave and convert to float */
167 assert ((size % (bytes_per_audio_sample() * _ffmpeg_content->audio_channels())) == 0);
169 int const total_samples = size / bytes_per_audio_sample();
170 int const frames = total_samples / _ffmpeg_content->audio_channels();
171 shared_ptr<AudioBuffers> audio (new AudioBuffers (_ffmpeg_content->audio_channels(), frames));
173 switch (audio_sample_format()) {
174 case AV_SAMPLE_FMT_U8:
176 uint8_t* p = reinterpret_cast<uint8_t *> (data[0]);
179 for (int i = 0; i < total_samples; ++i) {
180 audio->data(channel)[sample] = float(*p++) / (1 << 23);
183 if (channel == _ffmpeg_content->audio_channels()) {
191 case AV_SAMPLE_FMT_S16:
193 int16_t* p = reinterpret_cast<int16_t *> (data[0]);
196 for (int i = 0; i < total_samples; ++i) {
197 audio->data(channel)[sample] = float(*p++) / (1 << 15);
200 if (channel == _ffmpeg_content->audio_channels()) {
208 case AV_SAMPLE_FMT_S16P:
210 int16_t** p = reinterpret_cast<int16_t **> (data);
211 for (int i = 0; i < _ffmpeg_content->audio_channels(); ++i) {
212 for (int j = 0; j < frames; ++j) {
213 audio->data(i)[j] = static_cast<float>(p[i][j]) / (1 << 15);
219 case AV_SAMPLE_FMT_S32:
221 int32_t* p = reinterpret_cast<int32_t *> (data[0]);
224 for (int i = 0; i < total_samples; ++i) {
225 audio->data(channel)[sample] = static_cast<float>(*p++) / (1 << 31);
228 if (channel == _ffmpeg_content->audio_channels()) {
236 case AV_SAMPLE_FMT_FLT:
238 float* p = reinterpret_cast<float*> (data[0]);
241 for (int i = 0; i < total_samples; ++i) {
242 audio->data(channel)[sample] = *p++;
245 if (channel == _ffmpeg_content->audio_channels()) {
253 case AV_SAMPLE_FMT_FLTP:
255 float** p = reinterpret_cast<float**> (data);
256 for (int i = 0; i < _ffmpeg_content->audio_channels(); ++i) {
257 memcpy (audio->data(i), p[i], frames * sizeof(float));
263 throw DecodeError (String::compose (_("Unrecognised audio sample format (%1)"), static_cast<int> (audio_sample_format())));
270 FFmpegDecoder::audio_sample_format () const
272 if (!_ffmpeg_content->audio_stream()) {
273 return (AVSampleFormat) 0;
276 return audio_codec_context()->sample_fmt;
280 FFmpegDecoder::bytes_per_audio_sample () const
282 return av_get_bytes_per_sample (audio_sample_format ());
286 FFmpegDecoder::minimal_run (boost::function<bool (optional<ContentTime>, optional<ContentTime>, int)> finished)
289 optional<ContentTime> last_video;
290 optional<ContentTime> last_audio;
292 while (!finished (last_video, last_audio, frames_read)) {
293 int r = av_read_frame (_format_context, &_packet);
295 /* We should flush our decoders here, possibly yielding a few more frames,
296 but the consequence of having to do that is too hideous to contemplate.
297 Instead we give up and say that you can't seek too close to the end
305 double const time_base = av_q2d (_format_context->streams[_packet.stream_index]->time_base);
307 if (_packet.stream_index == _video_stream) {
309 av_frame_unref (_frame);
312 r = avcodec_decode_video2 (video_codec_context(), _frame, &got_picture, &_packet);
313 if (r >= 0 && got_picture) {
314 last_video = ContentTime::from_seconds (av_frame_get_best_effort_timestamp (_frame) * time_base) + _pts_offset;
317 } else if (_ffmpeg_content->audio_stream() && _ffmpeg_content->audio_stream()->uses_index (_format_context, _packet.stream_index)) {
318 AVPacket copy_packet = _packet;
319 while (copy_packet.size > 0) {
322 r = avcodec_decode_audio4 (audio_codec_context(), _frame, &got_frame, &_packet);
323 if (r >= 0 && got_frame) {
324 last_audio = ContentTime::from_seconds (av_frame_get_best_effort_timestamp (_frame) * time_base) + _pts_offset;
327 copy_packet.data += r;
328 copy_packet.size -= r;
332 av_free_packet (&_packet);
339 FFmpegDecoder::seek_overrun_finished (ContentTime seek, optional<ContentTime> last_video, optional<ContentTime> last_audio) const
341 return (last_video && last_video.get() >= seek) || (last_audio && last_audio.get() >= seek);
345 FFmpegDecoder::seek_final_finished (int n, int done) const
351 FFmpegDecoder::seek_and_flush (ContentTime t)
353 ContentTime const u = t - _pts_offset;
354 int64_t s = u.seconds() / av_q2d (_format_context->streams[_video_stream]->time_base);
356 if (_ffmpeg_content->audio_stream ()) {
358 s, int64_t (u.seconds() / av_q2d (_ffmpeg_content->audio_stream()->stream(_format_context)->time_base))
362 /* Ridiculous empirical hack */
368 av_seek_frame (_format_context, _video_stream, s, 0);
370 avcodec_flush_buffers (video_codec_context());
371 if (audio_codec_context ()) {
372 avcodec_flush_buffers (audio_codec_context ());
374 if (subtitle_codec_context ()) {
375 avcodec_flush_buffers (subtitle_codec_context ());
380 FFmpegDecoder::seek (ContentTime time, bool accurate)
382 VideoDecoder::seek (time, accurate);
383 AudioDecoder::seek (time, accurate);
385 /* If we are doing an accurate seek, our initial shot will be 2s (2 being
386 a number plucked from the air) earlier than we want to end up. The loop below
387 will hopefully then step through to where we want to be.
390 ContentTime pre_roll = accurate ? ContentTime::from_seconds (2) : ContentTime (0);
391 ContentTime initial_seek = time - pre_roll;
392 if (initial_seek < ContentTime (0)) {
393 initial_seek = ContentTime (0);
396 /* Initial seek time in the video stream's timebase */
398 seek_and_flush (initial_seek);
405 int const N = minimal_run (boost::bind (&FFmpegDecoder::seek_overrun_finished, this, time, _1, _2));
407 seek_and_flush (initial_seek);
409 minimal_run (boost::bind (&FFmpegDecoder::seek_final_finished, this, N - 1, _3));
414 FFmpegDecoder::decode_audio_packet ()
416 /* Audio packets can contain multiple frames, so we may have to call avcodec_decode_audio4
420 AVPacket copy_packet = _packet;
422 while (copy_packet.size > 0) {
425 int const decode_result = avcodec_decode_audio4 (audio_codec_context(), _frame, &frame_finished, ©_packet);
427 if (decode_result < 0) {
428 LOG_ERROR ("avcodec_decode_audio4 failed (%1)", decode_result);
432 if (frame_finished) {
433 ContentTime const ct = ContentTime::from_seconds (
434 av_frame_get_best_effort_timestamp (_frame) *
435 av_q2d (_ffmpeg_content->audio_stream()->stream (_format_context)->time_base))
438 int const data_size = av_samples_get_buffer_size (
439 0, audio_codec_context()->channels, _frame->nb_samples, audio_sample_format (), 1
442 audio (deinterleave_audio (_frame->data, data_size), ct);
445 copy_packet.data += decode_result;
446 copy_packet.size -= decode_result;
451 FFmpegDecoder::decode_video_packet ()
454 if (avcodec_decode_video2 (video_codec_context(), _frame, &frame_finished, &_packet) < 0 || !frame_finished) {
458 boost::mutex::scoped_lock lm (_filter_graphs_mutex);
460 shared_ptr<FilterGraph> graph;
462 list<shared_ptr<FilterGraph> >::iterator i = _filter_graphs.begin();
463 while (i != _filter_graphs.end() && !(*i)->can_process (dcp::Size (_frame->width, _frame->height), (AVPixelFormat) _frame->format)) {
467 if (i == _filter_graphs.end ()) {
468 graph.reset (new FilterGraph (_ffmpeg_content, dcp::Size (_frame->width, _frame->height), (AVPixelFormat) _frame->format));
469 _filter_graphs.push_back (graph);
470 LOG_GENERAL (N_("New graph for %1x%2, pixel format %3"), _frame->width, _frame->height, _frame->format);
475 list<pair<shared_ptr<Image>, int64_t> > images = graph->process (_frame);
477 for (list<pair<shared_ptr<Image>, int64_t> >::iterator i = images.begin(); i != images.end(); ++i) {
479 shared_ptr<Image> image = i->first;
481 if (i->second != AV_NOPTS_VALUE) {
482 double const pts = i->second * av_q2d (_format_context->streams[_video_stream]->time_base) + _pts_offset.seconds ();
484 shared_ptr<ImageProxy> (new RawImageProxy (image, _video_content->film()->log())),
485 rint (pts * _ffmpeg_content->video_frame_rate ())
488 LOG_WARNING ("Dropping frame without PTS");
496 FFmpegDecoder::decode_subtitle_packet ()
500 if (avcodec_decode_subtitle2 (subtitle_codec_context(), &sub, &got_subtitle, &_packet) < 0 || !got_subtitle) {
504 /* Sometimes we get an empty AVSubtitle, which is used by some codecs to
505 indicate that the previous subtitle should stop.
507 if (sub.num_rects <= 0) {
508 image_subtitle (ContentTimePeriod (), shared_ptr<Image> (), dcpomatic::Rect<double> ());
510 } else if (sub.num_rects > 1) {
511 throw DecodeError (_("multi-part subtitles not yet supported"));
514 /* Subtitle PTS (within the source, not taking into account any of the
515 source that we may have chopped off for the DCP)
517 ContentTimePeriod period = subtitle_period (sub) + _pts_offset;
519 AVSubtitleRect const * rect = sub.rects[0];
521 if (rect->type != SUBTITLE_BITMAP) {
523 // throw DecodeError (_("non-bitmap subtitles not yet supported"));
527 /* Note RGBA is expressed little-endian, so the first byte in the word is R, second
528 G, third B, fourth A.
530 shared_ptr<Image> image (new Image (PIX_FMT_RGBA, dcp::Size (rect->w, rect->h), true));
532 /* Start of the first line in the subtitle */
533 uint8_t* sub_p = rect->pict.data[0];
534 /* sub_p looks up into a BGRA palette which is here
535 (i.e. first byte B, second G, third R, fourth A)
537 uint32_t const * palette = (uint32_t *) rect->pict.data[1];
538 /* Start of the output data */
539 uint32_t* out_p = (uint32_t *) image->data()[0];
541 for (int y = 0; y < rect->h; ++y) {
542 uint8_t* sub_line_p = sub_p;
543 uint32_t* out_line_p = out_p;
544 for (int x = 0; x < rect->w; ++x) {
545 uint32_t const p = palette[*sub_line_p++];
546 *out_line_p++ = ((p & 0xff) << 16) | (p & 0xff00) | ((p & 0xff0000) >> 16) | (p & 0xff000000);
548 sub_p += rect->pict.linesize[0];
549 out_p += image->stride()[0] / sizeof (uint32_t);
552 dcp::Size const vs = _ffmpeg_content->video_size ();
557 dcpomatic::Rect<double> (
558 static_cast<double> (rect->x) / vs.width,
559 static_cast<double> (rect->y) / vs.height,
560 static_cast<double> (rect->w) / vs.width,
561 static_cast<double> (rect->h) / vs.height
565 avsubtitle_free (&sub);
569 FFmpegDecoder::has_subtitle_during (ContentTimePeriod p) const
571 return _ffmpeg_content->has_subtitle_during (p);