2 Copyright (C) 2012 Carl Hetherington <cth@carlh.net>
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 /** @file src/ffmpeg_decoder.cc
21 * @brief A decoder using FFmpeg to decode content.
30 #include <boost/lexical_cast.hpp>
33 #include <libavcodec/avcodec.h>
34 #include <libavformat/avformat.h>
38 #include "exceptions.h"
42 #include "ffmpeg_decoder.h"
43 #include "filter_graph.h"
44 #include "audio_buffers.h"
45 #include "ffmpeg_content.h"
52 using std::stringstream;
56 using boost::shared_ptr;
57 using boost::optional;
58 using boost::dynamic_pointer_cast;
61 FFmpegDecoder::FFmpegDecoder (shared_ptr<const Film> f, shared_ptr<const FFmpegContent> c, bool video, bool audio)
67 , _subtitle_codec_context (0)
69 , _decode_video (video)
70 , _decode_audio (audio)
71 , _video_pts_offset (0)
72 , _audio_pts_offset (0)
76 /* Audio and video frame PTS values may not start with 0. We want
77 to fiddle them so that:
79 1. One of them starts at time 0.
80 2. The first video PTS value ends up on a frame boundary.
82 Then we remove big initial gaps in PTS and we allow our
83 insertion of black frames to work.
86 audio_pts_to_use = audio_pts_from_ffmpeg + audio_pts_offset;
87 video_pts_to_use = video_pts_from_ffmpeg + video_pts_offset;
90 bool const have_video = video && c->first_video();
91 bool const have_audio = _decode_audio && c->audio_stream () && c->audio_stream()->first_audio;
93 /* First, make one of them start at 0 */
95 if (have_audio && have_video) {
96 _video_pts_offset = _audio_pts_offset = - min (c->first_video().get(), c->audio_stream()->first_audio.get());
97 } else if (have_video) {
98 _video_pts_offset = - c->first_video().get();
99 } else if (have_audio) {
100 _audio_pts_offset = - c->audio_stream()->first_audio.get();
103 /* Now adjust both so that the video pts starts on a frame */
104 if (have_video && have_audio) {
105 double first_video = c->first_video().get() + _video_pts_offset;
106 double const old_first_video = first_video;
108 /* Round the first video up to a frame boundary */
109 if (fabs (rint (first_video * c->video_frame_rate()) - first_video * c->video_frame_rate()) > 1e-6) {
110 first_video = ceil (first_video * c->video_frame_rate()) / c->video_frame_rate ();
113 _video_pts_offset += first_video - old_first_video;
114 _audio_pts_offset += first_video - old_first_video;
118 FFmpegDecoder::~FFmpegDecoder ()
120 boost::mutex::scoped_lock lm (_mutex);
122 if (_subtitle_codec_context) {
123 avcodec_close (_subtitle_codec_context);
128 FFmpegDecoder::flush ()
130 /* Get any remaining frames */
135 /* XXX: should we reset _packet.data and size after each *_decode_* call? */
138 while (decode_video_packet ()) {}
141 if (_ffmpeg_content->audio_stream() && _decode_audio) {
142 decode_audio_packet ();
145 AudioDecoder::flush ();
149 FFmpegDecoder::pass ()
151 int r = av_read_frame (_format_context, &_packet);
154 if (r != AVERROR_EOF) {
155 /* Maybe we should fail here, but for now we'll just finish off instead */
157 av_strerror (r, buf, sizeof(buf));
158 shared_ptr<const Film> film = _film.lock ();
160 film->log()->log (String::compose (N_("error on av_read_frame (%1) (%2)"), buf, r));
167 avcodec_get_frame_defaults (_frame);
169 shared_ptr<const Film> film = _film.lock ();
172 int const si = _packet.stream_index;
174 if (si == _video_stream && _decode_video) {
175 decode_video_packet ();
176 } else if (_ffmpeg_content->audio_stream() && si == _ffmpeg_content->audio_stream()->index (_format_context) && _decode_audio) {
177 decode_audio_packet ();
178 } else if (_ffmpeg_content->subtitle_stream() && si == _ffmpeg_content->subtitle_stream()->index (_format_context) && film->with_subtitles ()) {
179 decode_subtitle_packet ();
182 av_free_packet (&_packet);
186 /** @param data pointer to array of pointers to buffers.
187 * Only the first buffer will be used for non-planar data, otherwise there will be one per channel.
189 shared_ptr<AudioBuffers>
190 FFmpegDecoder::deinterleave_audio (uint8_t** data, int size)
192 assert (_ffmpeg_content->audio_channels());
193 assert (bytes_per_audio_sample());
195 /* Deinterleave and convert to float */
197 assert ((size % (bytes_per_audio_sample() * _ffmpeg_content->audio_channels())) == 0);
199 int const total_samples = size / bytes_per_audio_sample();
200 int const frames = total_samples / _ffmpeg_content->audio_channels();
201 shared_ptr<AudioBuffers> audio (new AudioBuffers (_ffmpeg_content->audio_channels(), frames));
203 switch (audio_sample_format()) {
204 case AV_SAMPLE_FMT_S16:
206 int16_t* p = reinterpret_cast<int16_t *> (data[0]);
209 for (int i = 0; i < total_samples; ++i) {
210 audio->data(channel)[sample] = float(*p++) / (1 << 15);
213 if (channel == _ffmpeg_content->audio_channels()) {
221 case AV_SAMPLE_FMT_S16P:
223 int16_t** p = reinterpret_cast<int16_t **> (data);
224 for (int i = 0; i < _ffmpeg_content->audio_channels(); ++i) {
225 for (int j = 0; j < frames; ++j) {
226 audio->data(i)[j] = static_cast<float>(p[i][j]) / (1 << 15);
232 case AV_SAMPLE_FMT_S32:
234 int32_t* p = reinterpret_cast<int32_t *> (data[0]);
237 for (int i = 0; i < total_samples; ++i) {
238 audio->data(channel)[sample] = static_cast<float>(*p++) / (1 << 31);
241 if (channel == _ffmpeg_content->audio_channels()) {
249 case AV_SAMPLE_FMT_FLT:
251 float* p = reinterpret_cast<float*> (data[0]);
254 for (int i = 0; i < total_samples; ++i) {
255 audio->data(channel)[sample] = *p++;
258 if (channel == _ffmpeg_content->audio_channels()) {
266 case AV_SAMPLE_FMT_FLTP:
268 float** p = reinterpret_cast<float**> (data);
269 for (int i = 0; i < _ffmpeg_content->audio_channels(); ++i) {
270 memcpy (audio->data(i), p[i], frames * sizeof(float));
276 throw DecodeError (String::compose (_("Unrecognised audio sample format (%1)"), static_cast<int> (audio_sample_format())));
283 FFmpegDecoder::audio_sample_format () const
285 if (!_ffmpeg_content->audio_stream()) {
286 return (AVSampleFormat) 0;
289 return audio_codec_context()->sample_fmt;
293 FFmpegDecoder::bytes_per_audio_sample () const
295 return av_get_bytes_per_sample (audio_sample_format ());
299 FFmpegDecoder::minimal_run (boost::function<bool (optional<ContentTime>, optional<ContentTime>, int)> finished)
302 optional<ContentTime> last_video;
303 optional<ContentTime> last_audio;
305 while (!finished (last_video, last_audio, frames_read)) {
306 int r = av_read_frame (_format_context, &_packet);
308 /* We should flush our decoders here, possibly yielding a few more frames,
309 but the consequence of having to do that is too hideous to contemplate.
310 Instead we give up and say that you can't seek too close to the end
318 double const time_base = av_q2d (_format_context->streams[_packet.stream_index]->time_base);
320 if (_packet.stream_index == _video_stream) {
322 avcodec_get_frame_defaults (_frame);
325 r = avcodec_decode_video2 (video_codec_context(), _frame, &finished, &_packet);
326 if (r >= 0 && finished) {
328 (av_frame_get_best_effort_timestamp (_frame) * time_base + _video_pts_offset) * TIME_HZ
332 } else if (_ffmpeg_content->audio_stream() && _packet.stream_index == _ffmpeg_content->audio_stream()->index (_format_context)) {
333 AVPacket copy_packet = _packet;
334 while (copy_packet.size > 0) {
337 r = avcodec_decode_audio4 (audio_codec_context(), _frame, &finished, &_packet);
338 if (r >= 0 && finished) {
340 (av_frame_get_best_effort_timestamp (_frame) * time_base + _audio_pts_offset) * TIME_HZ
344 copy_packet.data += r;
345 copy_packet.size -= r;
349 av_free_packet (&_packet);
356 FFmpegDecoder::seek_overrun_finished (ContentTime seek, optional<ContentTime> last_video, optional<ContentTime> last_audio) const
358 return (last_video && last_video.get() >= seek) || (last_audio && last_audio.get() >= seek);
362 FFmpegDecoder::seek_final_finished (int n, int done) const
368 FFmpegDecoder::seek_and_flush (ContentTime t)
370 int64_t const initial_v = ((double (t) / TIME_HZ) - _video_pts_offset) /
371 av_q2d (_format_context->streams[_video_stream]->time_base);
373 int64_t const initial_a = ((double (t) / TIME_HZ) - _audio_pts_offset) /
374 av_q2d (_ffmpeg_content->audio_stream()->stream(_format_context)->time_base);
376 av_seek_frame (_format_context, _video_stream, min (initial_v, initial_a), AVSEEK_FLAG_BACKWARD);
378 avcodec_flush_buffers (video_codec_context());
379 if (audio_codec_context ()) {
380 avcodec_flush_buffers (audio_codec_context ());
382 if (_subtitle_codec_context) {
383 avcodec_flush_buffers (_subtitle_codec_context);
388 FFmpegDecoder::seek (ContentTime time, bool accurate)
390 Decoder::seek (time, accurate);
392 /* If we are doing an accurate seek, our initial shot will be 200ms (200 being
393 a number plucked from the air) earlier than we want to end up. The loop below
394 will hopefully then step through to where we want to be.
397 ContentTime pre_roll = accurate ? (0.2 * TIME_HZ) : 0;
398 ContentTime initial_seek = time - pre_roll;
399 if (initial_seek < 0) {
403 /* Initial seek time in the video stream's timebase */
405 seek_and_flush (initial_seek);
412 int const N = minimal_run (boost::bind (&FFmpegDecoder::seek_overrun_finished, this, time, _1, _2));
414 seek_and_flush (initial_seek);
416 minimal_run (boost::bind (&FFmpegDecoder::seek_final_finished, this, N - 1, _3));
421 FFmpegDecoder::decode_audio_packet ()
423 /* Audio packets can contain multiple frames, so we may have to call avcodec_decode_audio4
427 AVPacket copy_packet = _packet;
429 while (copy_packet.size > 0) {
432 int const decode_result = avcodec_decode_audio4 (audio_codec_context(), _frame, &frame_finished, ©_packet);
434 if (decode_result < 0) {
435 shared_ptr<const Film> film = _film.lock ();
437 film->log()->log (String::compose ("avcodec_decode_audio4 failed (%1)", decode_result));
441 if (frame_finished) {
442 ContentTime const t = rint (
443 (av_q2d (_format_context->streams[copy_packet.stream_index]->time_base)
444 * av_frame_get_best_effort_timestamp(_frame) + _audio_pts_offset) * TIME_HZ
447 int const data_size = av_samples_get_buffer_size (
448 0, audio_codec_context()->channels, _frame->nb_samples, audio_sample_format (), 1
451 audio (deinterleave_audio (_frame->data, data_size), t);
454 copy_packet.data += decode_result;
455 copy_packet.size -= decode_result;
460 FFmpegDecoder::decode_video_packet ()
463 if (avcodec_decode_video2 (video_codec_context(), _frame, &frame_finished, &_packet) < 0 || !frame_finished) {
467 boost::mutex::scoped_lock lm (_filter_graphs_mutex);
469 shared_ptr<FilterGraph> graph;
471 list<shared_ptr<FilterGraph> >::iterator i = _filter_graphs.begin();
472 while (i != _filter_graphs.end() && !(*i)->can_process (libdcp::Size (_frame->width, _frame->height), (AVPixelFormat) _frame->format)) {
476 if (i == _filter_graphs.end ()) {
477 shared_ptr<const Film> film = _film.lock ();
480 graph.reset (new FilterGraph (_ffmpeg_content, libdcp::Size (_frame->width, _frame->height), (AVPixelFormat) _frame->format));
481 _filter_graphs.push_back (graph);
483 film->log()->log (String::compose (N_("New graph for %1x%2, pixel format %3"), _frame->width, _frame->height, _frame->format));
488 list<pair<shared_ptr<Image>, int64_t> > images = graph->process (_frame);
490 string post_process = Filter::ffmpeg_strings (_ffmpeg_content->filters()).second;
492 for (list<pair<shared_ptr<Image>, int64_t> >::iterator i = images.begin(); i != images.end(); ++i) {
494 shared_ptr<Image> image = i->first;
495 if (!post_process.empty ()) {
496 image = image->post_process (post_process, true);
499 if (i->second != AV_NOPTS_VALUE) {
500 ContentTime const t = rint ((i->second * av_q2d (_format_context->streams[_video_stream]->time_base) + _video_pts_offset) * TIME_HZ);
501 video (image, false, t);
503 shared_ptr<const Film> film = _film.lock ();
505 film->log()->log ("Dropping frame without PTS");
514 FFmpegDecoder::setup_subtitle ()
516 boost::mutex::scoped_lock lm (_mutex);
518 if (!_ffmpeg_content->subtitle_stream() || _ffmpeg_content->subtitle_stream()->index (_format_context) >= int (_format_context->nb_streams)) {
522 _subtitle_codec_context = _ffmpeg_content->subtitle_stream()->stream(_format_context)->codec;
523 _subtitle_codec = avcodec_find_decoder (_subtitle_codec_context->codec_id);
525 if (_subtitle_codec == 0) {
526 throw DecodeError (_("could not find subtitle decoder"));
529 if (avcodec_open2 (_subtitle_codec_context, _subtitle_codec, 0) < 0) {
530 throw DecodeError (N_("could not open subtitle decoder"));
535 FFmpegDecoder::decode_subtitle_packet ()
539 if (avcodec_decode_subtitle2 (_subtitle_codec_context, &sub, &got_subtitle, &_packet) < 0 || !got_subtitle) {
543 /* Sometimes we get an empty AVSubtitle, which is used by some codecs to
544 indicate that the previous subtitle should stop.
546 if (sub.num_rects <= 0) {
547 subtitle (shared_ptr<Image> (), dcpomatic::Rect<double> (), 0, 0);
549 } else if (sub.num_rects > 1) {
550 throw DecodeError (_("multi-part subtitles not yet supported"));
553 /* Subtitle PTS in seconds (within the source, not taking into account any of the
554 source that we may have chopped off for the DCP)
556 double const packet_time = static_cast<double> (sub.pts) / AV_TIME_BASE;
558 /* hence start time for this sub */
559 DCPTime const from = (packet_time + (double (sub.start_display_time) / 1e3)) * TIME_HZ;
560 DCPTime const to = (packet_time + (double (sub.end_display_time) / 1e3)) * TIME_HZ;
562 AVSubtitleRect const * rect = sub.rects[0];
564 if (rect->type != SUBTITLE_BITMAP) {
565 throw DecodeError (_("non-bitmap subtitles not yet supported"));
568 shared_ptr<Image> image (new Image (PIX_FMT_RGBA, libdcp::Size (rect->w, rect->h), true));
570 /* Start of the first line in the subtitle */
571 uint8_t* sub_p = rect->pict.data[0];
572 /* sub_p looks up into a RGB palette which is here */
573 uint32_t const * palette = (uint32_t *) rect->pict.data[1];
574 /* Start of the output data */
575 uint32_t* out_p = (uint32_t *) image->data()[0];
577 for (int y = 0; y < rect->h; ++y) {
578 uint8_t* sub_line_p = sub_p;
579 uint32_t* out_line_p = out_p;
580 for (int x = 0; x < rect->w; ++x) {
581 *out_line_p++ = palette[*sub_line_p++];
583 sub_p += rect->pict.linesize[0];
584 out_p += image->stride()[0] / sizeof (uint32_t);
587 libdcp::Size const vs = _ffmpeg_content->video_size ();
591 dcpomatic::Rect<double> (
592 static_cast<double> (rect->x) / vs.width,
593 static_cast<double> (rect->y) / vs.height,
594 static_cast<double> (rect->w) / vs.width,
595 static_cast<double> (rect->h) / vs.height
602 avsubtitle_free (&sub);