2 Copyright (C) 2012 Carl Hetherington <cth@carlh.net>
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 /** @file src/ffmpeg_decoder.cc
21 * @brief A decoder using FFmpeg to decode content.
30 #include <boost/lexical_cast.hpp>
33 #include <libavcodec/avcodec.h>
34 #include <libavformat/avformat.h>
38 #include "exceptions.h"
42 #include "ffmpeg_decoder.h"
43 #include "filter_graph.h"
44 #include "audio_buffers.h"
45 #include "ffmpeg_content.h"
52 using std::stringstream;
56 using boost::shared_ptr;
57 using boost::optional;
58 using boost::dynamic_pointer_cast;
61 FFmpegDecoder::FFmpegDecoder (shared_ptr<const Film> f, shared_ptr<const FFmpegContent> c, bool video, bool audio)
67 , _subtitle_codec_context (0)
69 , _decode_video (video)
70 , _decode_audio (audio)
71 , _video_pts_offset (0)
72 , _audio_pts_offset (0)
76 /* Audio and video frame PTS values may not start with 0. We want
77 to fiddle them so that:
79 1. One of them starts at time 0.
80 2. The first video PTS value ends up on a frame boundary.
82 Then we remove big initial gaps in PTS and we allow our
83 insertion of black frames to work.
86 audio_pts_to_use = audio_pts_from_ffmpeg + audio_pts_offset;
87 video_pts_to_use = video_pts_from_ffmpeg + video_pts_offset;
90 bool const have_video = video && c->first_video();
91 bool const have_audio = _decode_audio && c->audio_stream () && c->audio_stream()->first_audio;
93 /* First, make one of them start at 0 */
95 if (have_audio && have_video) {
96 _video_pts_offset = _audio_pts_offset = - min (c->first_video().get(), c->audio_stream()->first_audio.get());
97 } else if (have_video) {
98 _video_pts_offset = - c->first_video().get();
99 } else if (have_audio) {
100 _audio_pts_offset = - c->audio_stream()->first_audio.get();
103 /* Now adjust both so that the video pts starts on a frame */
104 if (have_video && have_audio) {
105 double first_video = c->first_video().get() + _video_pts_offset;
106 double const old_first_video = first_video;
108 /* Round the first video up to a frame boundary */
109 if (fabs (rint (first_video * c->video_frame_rate()) - first_video * c->video_frame_rate()) > 1e-6) {
110 first_video = ceil (first_video * c->video_frame_rate()) / c->video_frame_rate ();
113 _video_pts_offset += first_video - old_first_video;
114 _audio_pts_offset += first_video - old_first_video;
118 FFmpegDecoder::~FFmpegDecoder ()
120 boost::mutex::scoped_lock lm (_mutex);
122 if (_subtitle_codec_context) {
123 avcodec_close (_subtitle_codec_context);
128 FFmpegDecoder::flush ()
130 /* Get any remaining frames */
135 /* XXX: should we reset _packet.data and size after each *_decode_* call? */
138 while (decode_video_packet ()) {}
141 if (_ffmpeg_content->audio_stream() && _decode_audio) {
142 decode_audio_packet ();
145 AudioDecoder::flush ();
149 FFmpegDecoder::pass ()
151 int r = av_read_frame (_format_context, &_packet);
154 if (r != AVERROR_EOF) {
155 /* Maybe we should fail here, but for now we'll just finish off instead */
157 av_strerror (r, buf, sizeof(buf));
158 shared_ptr<const Film> film = _film.lock ();
160 film->log()->log (String::compose (N_("error on av_read_frame (%1) (%2)"), buf, r));
167 avcodec_get_frame_defaults (_frame);
169 shared_ptr<const Film> film = _film.lock ();
172 int const si = _packet.stream_index;
174 if (si == _video_stream && _decode_video) {
175 decode_video_packet ();
176 } else if (_ffmpeg_content->audio_stream() && si == _ffmpeg_content->audio_stream()->index (_format_context) && _decode_audio) {
177 decode_audio_packet ();
178 } else if (_ffmpeg_content->subtitle_stream() && si == _ffmpeg_content->subtitle_stream()->index (_format_context) && film->with_subtitles ()) {
179 decode_subtitle_packet ();
182 av_free_packet (&_packet);
186 /** @param data pointer to array of pointers to buffers.
187 * Only the first buffer will be used for non-planar data, otherwise there will be one per channel.
189 shared_ptr<AudioBuffers>
190 FFmpegDecoder::deinterleave_audio (uint8_t** data, int size)
192 assert (_ffmpeg_content->audio_channels());
193 assert (bytes_per_audio_sample());
195 /* Deinterleave and convert to float */
197 assert ((size % (bytes_per_audio_sample() * _ffmpeg_content->audio_channels())) == 0);
199 int const total_samples = size / bytes_per_audio_sample();
200 int const frames = total_samples / _ffmpeg_content->audio_channels();
201 shared_ptr<AudioBuffers> audio (new AudioBuffers (_ffmpeg_content->audio_channels(), frames));
203 switch (audio_sample_format()) {
204 case AV_SAMPLE_FMT_S16:
206 int16_t* p = reinterpret_cast<int16_t *> (data[0]);
209 for (int i = 0; i < total_samples; ++i) {
210 audio->data(channel)[sample] = float(*p++) / (1 << 15);
213 if (channel == _ffmpeg_content->audio_channels()) {
221 case AV_SAMPLE_FMT_S16P:
223 int16_t** p = reinterpret_cast<int16_t **> (data);
224 for (int i = 0; i < _ffmpeg_content->audio_channels(); ++i) {
225 for (int j = 0; j < frames; ++j) {
226 audio->data(i)[j] = static_cast<float>(p[i][j]) / (1 << 15);
232 case AV_SAMPLE_FMT_S32:
234 int32_t* p = reinterpret_cast<int32_t *> (data[0]);
237 for (int i = 0; i < total_samples; ++i) {
238 audio->data(channel)[sample] = static_cast<float>(*p++) / (1 << 31);
241 if (channel == _ffmpeg_content->audio_channels()) {
249 case AV_SAMPLE_FMT_FLT:
251 float* p = reinterpret_cast<float*> (data[0]);
254 for (int i = 0; i < total_samples; ++i) {
255 audio->data(channel)[sample] = *p++;
258 if (channel == _ffmpeg_content->audio_channels()) {
266 case AV_SAMPLE_FMT_FLTP:
268 float** p = reinterpret_cast<float**> (data);
269 for (int i = 0; i < _ffmpeg_content->audio_channels(); ++i) {
270 memcpy (audio->data(i), p[i], frames * sizeof(float));
276 throw DecodeError (String::compose (_("Unrecognised audio sample format (%1)"), static_cast<int> (audio_sample_format())));
283 FFmpegDecoder::audio_sample_format () const
285 if (!_ffmpeg_content->audio_stream()) {
286 return (AVSampleFormat) 0;
289 return audio_codec_context()->sample_fmt;
293 FFmpegDecoder::bytes_per_audio_sample () const
295 return av_get_bytes_per_sample (audio_sample_format ());
299 FFmpegDecoder::minimal_run (boost::function<bool (optional<ContentTime>, optional<ContentTime>, int)> finished)
302 optional<ContentTime> last_video;
303 optional<ContentTime> last_audio;
305 while (!finished (last_video, last_audio, frames_read)) {
306 int r = av_read_frame (_format_context, &_packet);
308 /* We should flush our decoders here, possibly yielding a few more frames,
309 but the consequence of having to do that is too hideous to contemplate.
310 Instead we give up and say that you can't seek too close to the end
318 double const time_base = av_q2d (_format_context->streams[_packet.stream_index]->time_base);
320 if (_packet.stream_index == _video_stream) {
322 avcodec_get_frame_defaults (_frame);
325 r = avcodec_decode_video2 (video_codec_context(), _frame, &finished, &_packet);
326 if (r >= 0 && finished) {
328 (av_frame_get_best_effort_timestamp (_frame) * time_base + _video_pts_offset) * TIME_HZ
332 } else if (_ffmpeg_content->audio_stream() && _packet.stream_index == _ffmpeg_content->audio_stream()->index (_format_context)) {
333 AVPacket copy_packet = _packet;
334 while (copy_packet.size > 0) {
337 r = avcodec_decode_audio4 (audio_codec_context(), _frame, &finished, &_packet);
338 if (r >= 0 && finished) {
340 (av_frame_get_best_effort_timestamp (_frame) * time_base + _audio_pts_offset) * TIME_HZ
344 copy_packet.data += r;
345 copy_packet.size -= r;
349 av_free_packet (&_packet);
356 FFmpegDecoder::seek_overrun_finished (ContentTime seek, optional<ContentTime> last_video, optional<ContentTime> last_audio) const
358 return (last_video && last_video.get() >= seek) || (last_audio && last_audio.get() >= seek);
362 FFmpegDecoder::seek_final_finished (int n, int done) const
368 FFmpegDecoder::seek_and_flush (ContentTime t)
370 int64_t s = ((double (t) / TIME_HZ) - _video_pts_offset) /
371 av_q2d (_format_context->streams[_video_stream]->time_base);
373 if (_ffmpeg_content->audio_stream ()) {
376 ((double (t) / TIME_HZ) - _audio_pts_offset) /
377 av_q2d (_ffmpeg_content->audio_stream()->stream(_format_context)->time_base)
382 av_seek_frame (_format_context, _video_stream, s, AVSEEK_FLAG_BACKWARD);
384 avcodec_flush_buffers (video_codec_context());
385 if (audio_codec_context ()) {
386 avcodec_flush_buffers (audio_codec_context ());
388 if (_subtitle_codec_context) {
389 avcodec_flush_buffers (_subtitle_codec_context);
394 FFmpegDecoder::seek (ContentTime time, bool accurate)
396 Decoder::seek (time, accurate);
398 /* If we are doing an accurate seek, our initial shot will be 200ms (200 being
399 a number plucked from the air) earlier than we want to end up. The loop below
400 will hopefully then step through to where we want to be.
403 ContentTime pre_roll = accurate ? (0.2 * TIME_HZ) : 0;
404 ContentTime initial_seek = time - pre_roll;
405 if (initial_seek < 0) {
409 /* Initial seek time in the video stream's timebase */
411 seek_and_flush (initial_seek);
418 int const N = minimal_run (boost::bind (&FFmpegDecoder::seek_overrun_finished, this, time, _1, _2));
420 seek_and_flush (initial_seek);
422 minimal_run (boost::bind (&FFmpegDecoder::seek_final_finished, this, N - 1, _3));
427 FFmpegDecoder::decode_audio_packet ()
429 /* Audio packets can contain multiple frames, so we may have to call avcodec_decode_audio4
433 AVPacket copy_packet = _packet;
435 while (copy_packet.size > 0) {
438 int const decode_result = avcodec_decode_audio4 (audio_codec_context(), _frame, &frame_finished, ©_packet);
440 if (decode_result < 0) {
441 shared_ptr<const Film> film = _film.lock ();
443 film->log()->log (String::compose ("avcodec_decode_audio4 failed (%1)", decode_result));
447 if (frame_finished) {
448 ContentTime const t = rint (
449 (av_q2d (_format_context->streams[copy_packet.stream_index]->time_base)
450 * av_frame_get_best_effort_timestamp(_frame) + _audio_pts_offset) * TIME_HZ
453 int const data_size = av_samples_get_buffer_size (
454 0, audio_codec_context()->channels, _frame->nb_samples, audio_sample_format (), 1
457 audio (deinterleave_audio (_frame->data, data_size), t);
460 copy_packet.data += decode_result;
461 copy_packet.size -= decode_result;
466 FFmpegDecoder::decode_video_packet ()
469 if (avcodec_decode_video2 (video_codec_context(), _frame, &frame_finished, &_packet) < 0 || !frame_finished) {
473 boost::mutex::scoped_lock lm (_filter_graphs_mutex);
475 shared_ptr<FilterGraph> graph;
477 list<shared_ptr<FilterGraph> >::iterator i = _filter_graphs.begin();
478 while (i != _filter_graphs.end() && !(*i)->can_process (libdcp::Size (_frame->width, _frame->height), (AVPixelFormat) _frame->format)) {
482 if (i == _filter_graphs.end ()) {
483 shared_ptr<const Film> film = _film.lock ();
486 graph.reset (new FilterGraph (_ffmpeg_content, libdcp::Size (_frame->width, _frame->height), (AVPixelFormat) _frame->format));
487 _filter_graphs.push_back (graph);
489 film->log()->log (String::compose (N_("New graph for %1x%2, pixel format %3"), _frame->width, _frame->height, _frame->format));
494 list<pair<shared_ptr<Image>, int64_t> > images = graph->process (_frame);
496 string post_process = Filter::ffmpeg_strings (_ffmpeg_content->filters()).second;
498 for (list<pair<shared_ptr<Image>, int64_t> >::iterator i = images.begin(); i != images.end(); ++i) {
500 shared_ptr<Image> image = i->first;
501 if (!post_process.empty ()) {
502 image = image->post_process (post_process, true);
505 if (i->second != AV_NOPTS_VALUE) {
506 ContentTime const t = rint ((i->second * av_q2d (_format_context->streams[_video_stream]->time_base) + _video_pts_offset) * TIME_HZ);
507 video (image, false, t);
509 shared_ptr<const Film> film = _film.lock ();
511 film->log()->log ("Dropping frame without PTS");
520 FFmpegDecoder::setup_subtitle ()
522 boost::mutex::scoped_lock lm (_mutex);
524 if (!_ffmpeg_content->subtitle_stream() || _ffmpeg_content->subtitle_stream()->index (_format_context) >= int (_format_context->nb_streams)) {
528 _subtitle_codec_context = _ffmpeg_content->subtitle_stream()->stream(_format_context)->codec;
529 _subtitle_codec = avcodec_find_decoder (_subtitle_codec_context->codec_id);
531 if (_subtitle_codec == 0) {
532 throw DecodeError (_("could not find subtitle decoder"));
535 if (avcodec_open2 (_subtitle_codec_context, _subtitle_codec, 0) < 0) {
536 throw DecodeError (N_("could not open subtitle decoder"));
541 FFmpegDecoder::decode_subtitle_packet ()
545 if (avcodec_decode_subtitle2 (_subtitle_codec_context, &sub, &got_subtitle, &_packet) < 0 || !got_subtitle) {
549 /* Sometimes we get an empty AVSubtitle, which is used by some codecs to
550 indicate that the previous subtitle should stop.
552 if (sub.num_rects <= 0) {
553 subtitle (shared_ptr<Image> (), dcpomatic::Rect<double> (), 0, 0);
555 } else if (sub.num_rects > 1) {
556 throw DecodeError (_("multi-part subtitles not yet supported"));
559 /* Subtitle PTS in seconds (within the source, not taking into account any of the
560 source that we may have chopped off for the DCP)
562 double const packet_time = static_cast<double> (sub.pts) / AV_TIME_BASE;
564 /* hence start time for this sub */
565 ContentTime const from = (packet_time + (double (sub.start_display_time) / 1e3)) * TIME_HZ;
566 ContentTime const to = (packet_time + (double (sub.end_display_time) / 1e3)) * TIME_HZ;
568 AVSubtitleRect const * rect = sub.rects[0];
570 if (rect->type != SUBTITLE_BITMAP) {
571 throw DecodeError (_("non-bitmap subtitles not yet supported"));
574 shared_ptr<Image> image (new Image (PIX_FMT_RGBA, libdcp::Size (rect->w, rect->h), true));
576 /* Start of the first line in the subtitle */
577 uint8_t* sub_p = rect->pict.data[0];
578 /* sub_p looks up into a RGB palette which is here */
579 uint32_t const * palette = (uint32_t *) rect->pict.data[1];
580 /* Start of the output data */
581 uint32_t* out_p = (uint32_t *) image->data()[0];
583 for (int y = 0; y < rect->h; ++y) {
584 uint8_t* sub_line_p = sub_p;
585 uint32_t* out_line_p = out_p;
586 for (int x = 0; x < rect->w; ++x) {
587 *out_line_p++ = palette[*sub_line_p++];
589 sub_p += rect->pict.linesize[0];
590 out_p += image->stride()[0] / sizeof (uint32_t);
593 libdcp::Size const vs = _ffmpeg_content->video_size ();
597 dcpomatic::Rect<double> (
598 static_cast<double> (rect->x) / vs.width,
599 static_cast<double> (rect->y) / vs.height,
600 static_cast<double> (rect->w) / vs.width,
601 static_cast<double> (rect->h) / vs.height
608 avsubtitle_free (&sub);