2 Copyright (C) 2012 Carl Hetherington <cth@carlh.net>
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 /** @file src/ffmpeg_decoder.cc
21 * @brief A decoder using FFmpeg to decode content.
30 #include <boost/lexical_cast.hpp>
33 #include <libavcodec/avcodec.h>
34 #include <libavformat/avformat.h>
38 #include "exceptions.h"
42 #include "ffmpeg_decoder.h"
43 #include "filter_graph.h"
44 #include "audio_buffers.h"
45 #include "ffmpeg_content.h"
52 using std::stringstream;
56 using boost::shared_ptr;
57 using boost::optional;
58 using boost::dynamic_pointer_cast;
61 FFmpegDecoder::FFmpegDecoder (shared_ptr<const Film> f, shared_ptr<const FFmpegContent> c, bool video, bool audio)
67 , _subtitle_codec_context (0)
69 , _decode_video (video)
70 , _decode_audio (audio)
75 /* Audio and video frame PTS values may not start with 0. We want
76 to fiddle them so that:
78 1. One of them starts at time 0.
79 2. The first video PTS value ends up on a frame boundary.
81 Then we remove big initial gaps in PTS and we allow our
82 insertion of black frames to work.
84 We will do pts_to_use = pts_from_ffmpeg + pts_offset;
87 bool const have_video = video && c->first_video();
88 bool const have_audio = _decode_audio && c->audio_stream () && c->audio_stream()->first_audio;
90 /* First, make one of them start at 0 */
92 if (have_audio && have_video) {
93 _pts_offset = - min (c->first_video().get(), c->audio_stream()->first_audio.get());
94 } else if (have_video) {
95 _pts_offset = - c->first_video().get();
96 } else if (have_audio) {
97 _pts_offset = - c->audio_stream()->first_audio.get();
100 /* Now adjust both so that the video pts starts on a frame */
101 if (have_video && have_audio) {
102 double first_video = c->first_video().get() + _pts_offset;
103 double const old_first_video = first_video;
105 /* Round the first video up to a frame boundary */
106 if (fabs (rint (first_video * c->video_frame_rate()) - first_video * c->video_frame_rate()) > 1e-6) {
107 first_video = ceil (first_video * c->video_frame_rate()) / c->video_frame_rate ();
110 _pts_offset += first_video - old_first_video;
114 FFmpegDecoder::~FFmpegDecoder ()
116 boost::mutex::scoped_lock lm (_mutex);
118 if (_subtitle_codec_context) {
119 avcodec_close (_subtitle_codec_context);
124 FFmpegDecoder::flush ()
126 /* Get any remaining frames */
131 /* XXX: should we reset _packet.data and size after each *_decode_* call? */
134 while (decode_video_packet ()) {}
137 if (_ffmpeg_content->audio_stream() && _decode_audio) {
138 decode_audio_packet ();
141 AudioDecoder::flush ();
145 FFmpegDecoder::pass ()
147 int r = av_read_frame (_format_context, &_packet);
150 if (r != AVERROR_EOF) {
151 /* Maybe we should fail here, but for now we'll just finish off instead */
153 av_strerror (r, buf, sizeof(buf));
154 shared_ptr<const Film> film = _film.lock ();
156 film->log()->log (String::compose (N_("error on av_read_frame (%1) (%2)"), buf, r));
163 avcodec_get_frame_defaults (_frame);
165 shared_ptr<const Film> film = _film.lock ();
168 int const si = _packet.stream_index;
170 if (si == _video_stream && _decode_video) {
171 decode_video_packet ();
172 } else if (_ffmpeg_content->audio_stream() && si == _ffmpeg_content->audio_stream()->index (_format_context) && _decode_audio) {
173 decode_audio_packet ();
174 } else if (_ffmpeg_content->subtitle_stream() && si == _ffmpeg_content->subtitle_stream()->index (_format_context) && film->with_subtitles ()) {
175 decode_subtitle_packet ();
178 av_free_packet (&_packet);
182 /** @param data pointer to array of pointers to buffers.
183 * Only the first buffer will be used for non-planar data, otherwise there will be one per channel.
185 shared_ptr<AudioBuffers>
186 FFmpegDecoder::deinterleave_audio (uint8_t** data, int size)
188 assert (_ffmpeg_content->audio_channels());
189 assert (bytes_per_audio_sample());
191 /* Deinterleave and convert to float */
193 assert ((size % (bytes_per_audio_sample() * _ffmpeg_content->audio_channels())) == 0);
195 int const total_samples = size / bytes_per_audio_sample();
196 int const frames = total_samples / _ffmpeg_content->audio_channels();
197 shared_ptr<AudioBuffers> audio (new AudioBuffers (_ffmpeg_content->audio_channels(), frames));
199 switch (audio_sample_format()) {
200 case AV_SAMPLE_FMT_S16:
202 int16_t* p = reinterpret_cast<int16_t *> (data[0]);
205 for (int i = 0; i < total_samples; ++i) {
206 audio->data(channel)[sample] = float(*p++) / (1 << 15);
209 if (channel == _ffmpeg_content->audio_channels()) {
217 case AV_SAMPLE_FMT_S16P:
219 int16_t** p = reinterpret_cast<int16_t **> (data);
220 for (int i = 0; i < _ffmpeg_content->audio_channels(); ++i) {
221 for (int j = 0; j < frames; ++j) {
222 audio->data(i)[j] = static_cast<float>(p[i][j]) / (1 << 15);
228 case AV_SAMPLE_FMT_S32:
230 int32_t* p = reinterpret_cast<int32_t *> (data[0]);
233 for (int i = 0; i < total_samples; ++i) {
234 audio->data(channel)[sample] = static_cast<float>(*p++) / (1 << 31);
237 if (channel == _ffmpeg_content->audio_channels()) {
245 case AV_SAMPLE_FMT_FLT:
247 float* p = reinterpret_cast<float*> (data[0]);
250 for (int i = 0; i < total_samples; ++i) {
251 audio->data(channel)[sample] = *p++;
254 if (channel == _ffmpeg_content->audio_channels()) {
262 case AV_SAMPLE_FMT_FLTP:
264 float** p = reinterpret_cast<float**> (data);
265 for (int i = 0; i < _ffmpeg_content->audio_channels(); ++i) {
266 memcpy (audio->data(i), p[i], frames * sizeof(float));
272 throw DecodeError (String::compose (_("Unrecognised audio sample format (%1)"), static_cast<int> (audio_sample_format())));
279 FFmpegDecoder::audio_sample_format () const
281 if (!_ffmpeg_content->audio_stream()) {
282 return (AVSampleFormat) 0;
285 return audio_codec_context()->sample_fmt;
289 FFmpegDecoder::bytes_per_audio_sample () const
291 return av_get_bytes_per_sample (audio_sample_format ());
295 FFmpegDecoder::minimal_run (boost::function<bool (optional<ContentTime>, optional<ContentTime>, int)> finished)
298 optional<ContentTime> last_video;
299 optional<ContentTime> last_audio;
301 while (!finished (last_video, last_audio, frames_read)) {
302 int r = av_read_frame (_format_context, &_packet);
304 /* We should flush our decoders here, possibly yielding a few more frames,
305 but the consequence of having to do that is too hideous to contemplate.
306 Instead we give up and say that you can't seek too close to the end
314 double const time_base = av_q2d (_format_context->streams[_packet.stream_index]->time_base);
316 if (_packet.stream_index == _video_stream) {
318 avcodec_get_frame_defaults (_frame);
321 r = avcodec_decode_video2 (video_codec_context(), _frame, &finished, &_packet);
322 if (r >= 0 && finished) {
324 (av_frame_get_best_effort_timestamp (_frame) * time_base + _pts_offset) * TIME_HZ
328 } else if (_ffmpeg_content->audio_stream() && _packet.stream_index == _ffmpeg_content->audio_stream()->index (_format_context)) {
329 AVPacket copy_packet = _packet;
330 while (copy_packet.size > 0) {
333 r = avcodec_decode_audio4 (audio_codec_context(), _frame, &finished, &_packet);
334 if (r >= 0 && finished) {
336 (av_frame_get_best_effort_timestamp (_frame) * time_base + _pts_offset) * TIME_HZ
340 copy_packet.data += r;
341 copy_packet.size -= r;
345 av_free_packet (&_packet);
352 FFmpegDecoder::seek_overrun_finished (ContentTime seek, optional<ContentTime> last_video, optional<ContentTime> last_audio) const
354 return (last_video && last_video.get() >= seek) || (last_audio && last_audio.get() >= seek);
358 FFmpegDecoder::seek_final_finished (int n, int done) const
364 FFmpegDecoder::seek_and_flush (ContentTime t)
366 int64_t s = ((double (t) / TIME_HZ) - _pts_offset) /
367 av_q2d (_format_context->streams[_video_stream]->time_base);
369 if (_ffmpeg_content->audio_stream ()) {
372 ((double (t) / TIME_HZ) - _pts_offset) /
373 av_q2d (_ffmpeg_content->audio_stream()->stream(_format_context)->time_base)
378 /* Ridiculous empirical hack */
381 av_seek_frame (_format_context, _video_stream, s, AVSEEK_FLAG_BACKWARD);
383 avcodec_flush_buffers (video_codec_context());
384 if (audio_codec_context ()) {
385 avcodec_flush_buffers (audio_codec_context ());
387 if (_subtitle_codec_context) {
388 avcodec_flush_buffers (_subtitle_codec_context);
393 FFmpegDecoder::seek (ContentTime time, bool accurate)
395 Decoder::seek (time, accurate);
396 AudioDecoder::seek (time, accurate);
398 /* If we are doing an accurate seek, our initial shot will be 200ms (200 being
399 a number plucked from the air) earlier than we want to end up. The loop below
400 will hopefully then step through to where we want to be.
403 ContentTime pre_roll = accurate ? (0.2 * TIME_HZ) : 0;
404 ContentTime initial_seek = time - pre_roll;
405 if (initial_seek < 0) {
409 /* Initial seek time in the video stream's timebase */
411 seek_and_flush (initial_seek);
418 int const N = minimal_run (boost::bind (&FFmpegDecoder::seek_overrun_finished, this, time, _1, _2));
420 seek_and_flush (initial_seek);
422 minimal_run (boost::bind (&FFmpegDecoder::seek_final_finished, this, N - 1, _3));
427 FFmpegDecoder::decode_audio_packet ()
429 /* Audio packets can contain multiple frames, so we may have to call avcodec_decode_audio4
433 AVPacket copy_packet = _packet;
435 while (copy_packet.size > 0) {
438 int const decode_result = avcodec_decode_audio4 (audio_codec_context(), _frame, &frame_finished, ©_packet);
440 if (decode_result < 0) {
441 shared_ptr<const Film> film = _film.lock ();
443 film->log()->log (String::compose ("avcodec_decode_audio4 failed (%1)", decode_result));
447 if (frame_finished) {
448 ContentTime const ct = (
449 av_frame_get_best_effort_timestamp (_frame) *
450 av_q2d (_ffmpeg_content->audio_stream()->stream (_format_context)->time_base)
454 int const data_size = av_samples_get_buffer_size (
455 0, audio_codec_context()->channels, _frame->nb_samples, audio_sample_format (), 1
458 audio (deinterleave_audio (_frame->data, data_size), ct);
461 copy_packet.data += decode_result;
462 copy_packet.size -= decode_result;
467 FFmpegDecoder::decode_video_packet ()
470 if (avcodec_decode_video2 (video_codec_context(), _frame, &frame_finished, &_packet) < 0 || !frame_finished) {
474 boost::mutex::scoped_lock lm (_filter_graphs_mutex);
476 shared_ptr<FilterGraph> graph;
478 list<shared_ptr<FilterGraph> >::iterator i = _filter_graphs.begin();
479 while (i != _filter_graphs.end() && !(*i)->can_process (libdcp::Size (_frame->width, _frame->height), (AVPixelFormat) _frame->format)) {
483 if (i == _filter_graphs.end ()) {
484 shared_ptr<const Film> film = _film.lock ();
487 graph.reset (new FilterGraph (_ffmpeg_content, libdcp::Size (_frame->width, _frame->height), (AVPixelFormat) _frame->format));
488 _filter_graphs.push_back (graph);
490 film->log()->log (String::compose (N_("New graph for %1x%2, pixel format %3"), _frame->width, _frame->height, _frame->format));
495 list<pair<shared_ptr<Image>, int64_t> > images = graph->process (_frame);
497 string post_process = Filter::ffmpeg_strings (_ffmpeg_content->filters()).second;
499 for (list<pair<shared_ptr<Image>, int64_t> >::iterator i = images.begin(); i != images.end(); ++i) {
501 shared_ptr<Image> image = i->first;
502 if (!post_process.empty ()) {
503 image = image->post_process (post_process, true);
506 if (i->second != AV_NOPTS_VALUE) {
507 double const pts = i->second * av_q2d (_format_context->streams[_video_stream]->time_base) + _pts_offset;
508 VideoFrame const f = rint (pts * _ffmpeg_content->video_frame_rate ());
509 video (image, false, f);
511 shared_ptr<const Film> film = _film.lock ();
513 film->log()->log ("Dropping frame without PTS");
522 FFmpegDecoder::setup_subtitle ()
524 boost::mutex::scoped_lock lm (_mutex);
526 if (!_ffmpeg_content->subtitle_stream() || _ffmpeg_content->subtitle_stream()->index (_format_context) >= int (_format_context->nb_streams)) {
530 _subtitle_codec_context = _ffmpeg_content->subtitle_stream()->stream(_format_context)->codec;
531 _subtitle_codec = avcodec_find_decoder (_subtitle_codec_context->codec_id);
533 if (_subtitle_codec == 0) {
534 throw DecodeError (_("could not find subtitle decoder"));
537 if (avcodec_open2 (_subtitle_codec_context, _subtitle_codec, 0) < 0) {
538 throw DecodeError (N_("could not open subtitle decoder"));
543 FFmpegDecoder::decode_subtitle_packet ()
547 if (avcodec_decode_subtitle2 (_subtitle_codec_context, &sub, &got_subtitle, &_packet) < 0 || !got_subtitle) {
551 /* Sometimes we get an empty AVSubtitle, which is used by some codecs to
552 indicate that the previous subtitle should stop.
554 if (sub.num_rects <= 0) {
555 subtitle (shared_ptr<Image> (), dcpomatic::Rect<double> (), 0, 0);
557 } else if (sub.num_rects > 1) {
558 throw DecodeError (_("multi-part subtitles not yet supported"));
561 /* Subtitle PTS in seconds (within the source, not taking into account any of the
562 source that we may have chopped off for the DCP)
564 double const packet_time = (static_cast<double> (sub.pts ) / AV_TIME_BASE) + _pts_offset;
566 /* hence start time for this sub */
567 ContentTime const from = (packet_time + (double (sub.start_display_time) / 1e3)) * TIME_HZ;
568 ContentTime const to = (packet_time + (double (sub.end_display_time) / 1e3)) * TIME_HZ;
570 AVSubtitleRect const * rect = sub.rects[0];
572 if (rect->type != SUBTITLE_BITMAP) {
573 throw DecodeError (_("non-bitmap subtitles not yet supported"));
576 /* Note RGBA is expressed little-endian, so the first byte in the word is R, second
577 G, third B, fourth A.
579 shared_ptr<Image> image (new Image (PIX_FMT_RGBA, libdcp::Size (rect->w, rect->h), true));
581 /* Start of the first line in the subtitle */
582 uint8_t* sub_p = rect->pict.data[0];
583 /* sub_p looks up into a BGRA palette which is here
584 (i.e. first byte B, second G, third R, fourth A)
586 uint32_t const * palette = (uint32_t *) rect->pict.data[1];
587 /* Start of the output data */
588 uint32_t* out_p = (uint32_t *) image->data()[0];
590 for (int y = 0; y < rect->h; ++y) {
591 uint8_t* sub_line_p = sub_p;
592 uint32_t* out_line_p = out_p;
593 for (int x = 0; x < rect->w; ++x) {
594 uint32_t const p = palette[*sub_line_p++];
595 *out_line_p++ = ((p & 0xff) << 16) | (p & 0xff00) | ((p & 0xff0000) >> 16) | (p & 0xff000000);
597 sub_p += rect->pict.linesize[0];
598 out_p += image->stride()[0] / sizeof (uint32_t);
601 libdcp::Size const vs = _ffmpeg_content->video_size ();
605 dcpomatic::Rect<double> (
606 static_cast<double> (rect->x) / vs.width,
607 static_cast<double> (rect->y) / vs.height,
608 static_cast<double> (rect->w) / vs.width,
609 static_cast<double> (rect->h) / vs.height
616 avsubtitle_free (&sub);