2 Copyright (C) 2012-2018 Carl Hetherington <cth@carlh.net>
4 This file is part of DCP-o-matic.
6 DCP-o-matic is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 DCP-o-matic is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with DCP-o-matic. If not, see <http://www.gnu.org/licenses/>.
21 /** @file src/ffmpeg_decoder.cc
22 * @brief A decoder using FFmpeg to decode content.
26 #include "exceptions.h"
30 #include "dcpomatic_log.h"
31 #include "ffmpeg_decoder.h"
32 #include "text_decoder.h"
33 #include "ffmpeg_audio_stream.h"
34 #include "ffmpeg_subtitle_stream.h"
35 #include "video_filter_graph.h"
36 #include "audio_buffers.h"
37 #include "ffmpeg_content.h"
38 #include "raw_image_proxy.h"
39 #include "video_decoder.h"
41 #include "audio_decoder.h"
42 #include "compose.hpp"
43 #include "text_content.h"
44 #include "audio_content.h"
45 #include "frame_interval_checker.h"
46 #include <dcp/subtitle_string.h>
47 #include <sub/ssa_reader.h>
48 #include <sub/subtitle.h>
49 #include <sub/collect.h>
51 #include <libavcodec/avcodec.h>
52 #include <libavformat/avformat.h>
54 #include <boost/algorithm/string.hpp>
70 using std::shared_ptr;
71 using std::make_shared;
72 using boost::is_any_of;
74 using boost::optional;
75 using std::dynamic_pointer_cast;
77 using namespace dcpomatic;
80 FFmpegDecoder::FFmpegDecoder (shared_ptr<const Film> film, shared_ptr<const FFmpegContent> c, bool fast)
83 , _have_current_subtitle (false)
85 if (c->video && c->video->use()) {
86 video = make_shared<VideoDecoder>(this, c);
87 _pts_offset = pts_offset (c->ffmpeg_audio_streams(), c->first_video(), c->active_video_frame_rate(film));
88 /* It doesn't matter what size or pixel format this is, it just needs to be black */
89 _black_image.reset (new Image (AV_PIX_FMT_RGB24, dcp::Size (128, 128), true));
90 _black_image->make_black ();
96 audio = make_shared<AudioDecoder>(this, c->audio, fast);
100 /* XXX: this time here should be the time of the first subtitle, not 0 */
101 text.push_back (make_shared<TextDecoder>(this, c->only_text(), ContentTime()));
104 _next_time.resize (_format_context->nb_streams);
109 FFmpegDecoder::flush ()
111 /* Get any remaining frames */
114 packet.data = nullptr;
116 while (video && decode_video_packet(&packet)) {}
119 packet.data = nullptr;
121 decode_audio_packet (&packet);
124 /* Make sure all streams are the same length and round up to the next video frame */
126 auto const frc = film()->active_frame_rate_change(_ffmpeg_content->position());
127 ContentTime full_length (_ffmpeg_content->full_length(film()), frc);
128 full_length = full_length.ceil (frc.source);
130 double const vfr = _ffmpeg_content->video_frame_rate().get();
131 auto const f = full_length.frames_round (vfr);
132 auto v = video->position(film()).get_value_or(ContentTime()).frames_round(vfr) + 1;
134 video->emit (film(), shared_ptr<const ImageProxy> (new RawImageProxy (_black_image)), v);
139 for (auto i: _ffmpeg_content->ffmpeg_audio_streams ()) {
140 auto a = audio->stream_position(film(), i);
141 /* Unfortunately if a is 0 that really means that we don't know the stream position since
142 there has been no data on it since the last seek. In this case we'll just do nothing
143 here. I'm not sure if that's the right idea.
145 if (a > ContentTime()) {
146 while (a < full_length) {
147 auto to_do = min (full_length - a, ContentTime::from_seconds (0.1));
148 auto silence = make_shared<AudioBuffers>(i->channels(), to_do.frames_ceil (i->frame_rate()));
149 silence->make_silent ();
150 audio->emit (film(), i, silence, a, true);
163 FFmpegDecoder::pass ()
165 auto packet = av_packet_alloc();
166 DCPOMATIC_ASSERT (packet);
168 int r = av_read_frame (_format_context, packet);
170 /* AVERROR_INVALIDDATA can apparently be returned sometimes even when av_read_frame
171 has pretty-much succeeded (and hence generated data which should be processed).
172 Hence it makes sense to continue here in that case.
174 if (r < 0 && r != AVERROR_INVALIDDATA) {
175 if (r != AVERROR_EOF) {
176 /* Maybe we should fail here, but for now we'll just finish off instead */
178 av_strerror (r, buf, sizeof(buf));
179 LOG_ERROR (N_("error on av_read_frame (%1) (%2)"), &buf[0], r);
182 av_packet_free (&packet);
187 int const si = packet->stream_index;
188 auto fc = _ffmpeg_content;
190 if (_video_stream && si == _video_stream.get() && video && !video->ignore()) {
191 decode_video_packet (packet);
192 } else if (fc->subtitle_stream() && fc->subtitle_stream()->uses_index(_format_context, si) && !only_text()->ignore()) {
193 decode_subtitle_packet (packet);
195 decode_audio_packet (packet);
198 av_packet_free (&packet);
203 /** @param data pointer to array of pointers to buffers.
204 * Only the first buffer will be used for non-planar data, otherwise there will be one per channel.
206 shared_ptr<AudioBuffers>
207 FFmpegDecoder::deinterleave_audio (shared_ptr<FFmpegAudioStream> stream) const
209 DCPOMATIC_ASSERT (bytes_per_audio_sample (stream));
211 DCPOMATIC_DISABLE_WARNINGS
212 int const size = av_samples_get_buffer_size (
213 0, stream->stream(_format_context)->codec->channels, _frame->nb_samples, audio_sample_format (stream), 1
215 DCPOMATIC_ENABLE_WARNINGS
217 /* XXX: can't we just use _frame->nb_samples directly here? */
218 /* XXX: can't we use swr_convert() to do the format conversion? */
220 /* Deinterleave and convert to float */
222 /* total_samples and frames will be rounded down here, so if there are stray samples at the end
223 of the block that do not form a complete sample or frame they will be dropped.
225 int const total_samples = size / bytes_per_audio_sample (stream);
226 int const channels = stream->channels();
227 int const frames = total_samples / channels;
228 auto audio = make_shared<AudioBuffers>(channels, frames);
229 auto data = audio->data();
231 switch (audio_sample_format (stream)) {
232 case AV_SAMPLE_FMT_U8:
234 uint8_t* p = reinterpret_cast<uint8_t *> (_frame->data[0]);
237 for (int i = 0; i < total_samples; ++i) {
238 data[channel][sample] = float(*p++) / (1 << 23);
241 if (channel == channels) {
249 case AV_SAMPLE_FMT_S16:
251 int16_t* p = reinterpret_cast<int16_t *> (_frame->data[0]);
254 for (int i = 0; i < total_samples; ++i) {
255 data[channel][sample] = float(*p++) / (1 << 15);
258 if (channel == channels) {
266 case AV_SAMPLE_FMT_S16P:
268 int16_t** p = reinterpret_cast<int16_t **> (_frame->data);
269 for (int i = 0; i < channels; ++i) {
270 for (int j = 0; j < frames; ++j) {
271 data[i][j] = static_cast<float>(p[i][j]) / (1 << 15);
277 case AV_SAMPLE_FMT_S32:
279 int32_t* p = reinterpret_cast<int32_t *> (_frame->data[0]);
282 for (int i = 0; i < total_samples; ++i) {
283 data[channel][sample] = static_cast<float>(*p++) / 2147483648;
286 if (channel == channels) {
294 case AV_SAMPLE_FMT_S32P:
296 int32_t** p = reinterpret_cast<int32_t **> (_frame->data);
297 for (int i = 0; i < channels; ++i) {
298 for (int j = 0; j < frames; ++j) {
299 data[i][j] = static_cast<float>(p[i][j]) / 2147483648;
305 case AV_SAMPLE_FMT_FLT:
307 float* p = reinterpret_cast<float*> (_frame->data[0]);
310 for (int i = 0; i < total_samples; ++i) {
311 data[channel][sample] = *p++;
314 if (channel == channels) {
322 case AV_SAMPLE_FMT_FLTP:
324 float** p = reinterpret_cast<float**> (_frame->data);
325 DCPOMATIC_ASSERT (_frame->channels <= channels);
326 /* Sometimes there aren't as many channels in the _frame as in the stream */
327 for (int i = 0; i < _frame->channels; ++i) {
328 memcpy (data[i], p[i], frames * sizeof(float));
330 for (int i = _frame->channels; i < channels; ++i) {
331 audio->make_silent (i);
337 throw DecodeError (String::compose (_("Unrecognised audio sample format (%1)"), static_cast<int> (audio_sample_format (stream))));
345 FFmpegDecoder::audio_sample_format (shared_ptr<FFmpegAudioStream> stream) const
347 DCPOMATIC_DISABLE_WARNINGS
348 return stream->stream (_format_context)->codec->sample_fmt;
349 DCPOMATIC_ENABLE_WARNINGS
354 FFmpegDecoder::bytes_per_audio_sample (shared_ptr<FFmpegAudioStream> stream) const
356 return av_get_bytes_per_sample (audio_sample_format (stream));
361 FFmpegDecoder::seek (ContentTime time, bool accurate)
363 Decoder::seek (time, accurate);
365 /* If we are doing an `accurate' seek, we need to use pre-roll, as
366 we don't really know what the seek will give us.
369 auto pre_roll = accurate ? ContentTime::from_seconds (2) : ContentTime (0);
372 /* XXX: it seems debatable whether PTS should be used here...
373 http://www.mjbshaw.com/2012/04/seeking-in-ffmpeg-know-your-timestamp.html
376 optional<int> stream;
379 stream = _video_stream;
381 DCPOMATIC_ASSERT (_ffmpeg_content->audio);
382 auto s = dynamic_pointer_cast<FFmpegAudioStream>(_ffmpeg_content->audio->stream());
384 stream = s->index (_format_context);
388 DCPOMATIC_ASSERT (stream);
390 auto u = time - _pts_offset;
391 if (u < ContentTime ()) {
397 u.seconds() / av_q2d (_format_context->streams[stream.get()]->time_base),
402 /* Force re-creation of filter graphs to reset them and hence to make sure
403 they don't have any pre-seek frames knocking about.
405 boost::mutex::scoped_lock lm (_filter_graphs_mutex);
406 _filter_graphs.clear ();
409 if (video_codec_context ()) {
410 avcodec_flush_buffers (video_codec_context());
413 DCPOMATIC_DISABLE_WARNINGS
414 for (auto i: ffmpeg_content()->ffmpeg_audio_streams()) {
415 avcodec_flush_buffers (i->stream(_format_context)->codec);
417 DCPOMATIC_ENABLE_WARNINGS
419 if (subtitle_codec_context ()) {
420 avcodec_flush_buffers (subtitle_codec_context ());
423 _have_current_subtitle = false;
425 for (auto& i: _next_time) {
426 i = optional<ContentTime>();
431 shared_ptr<FFmpegAudioStream>
432 FFmpegDecoder::audio_stream_from_index (int index) const
434 /* XXX: inefficient */
435 auto streams = ffmpeg_content()->ffmpeg_audio_streams();
436 auto stream = streams.begin();
437 while (stream != streams.end() && !(*stream)->uses_index(_format_context, index)) {
441 if (stream == streams.end ()) {
450 FFmpegDecoder::process_audio_frame (shared_ptr<FFmpegAudioStream> stream, int stream_index, int64_t packet_pts)
452 auto data = deinterleave_audio (stream);
455 if (_frame->pts == AV_NOPTS_VALUE) {
456 /* In some streams we see not every frame coming through with a timestamp; for those
457 that have AV_NOPTS_VALUE we need to work out the timestamp ourselves. This is
458 particularly noticeable with TrueHD streams (see #1111).
460 if (_next_time[stream_index]) {
461 ct = *_next_time[stream_index];
464 ct = ContentTime::from_seconds (
465 _frame->best_effort_timestamp *
466 av_q2d (stream->stream(_format_context)->time_base))
470 _next_time[stream_index] = ct + ContentTime::from_frames(data->frames(), stream->frame_rate());
472 if (ct < ContentTime()) {
473 /* Discard audio data that comes before time 0 */
474 auto const remove = min (int64_t(data->frames()), (-ct).frames_ceil(double(stream->frame_rate())));
475 data->move (data->frames() - remove, remove, 0);
476 data->set_frames (data->frames() - remove);
477 ct += ContentTime::from_frames (remove, stream->frame_rate());
480 if (ct < ContentTime()) {
482 "Crazy timestamp %1 for %2 samples in stream %3 packet pts %4 (ts=%5 tb=%6, off=%7)",
487 _frame->best_effort_timestamp,
488 av_q2d(stream->stream(_format_context)->time_base),
489 to_string(_pts_offset)
493 /* Give this data provided there is some, and its time is sane */
494 if (ct >= ContentTime() && data->frames() > 0) {
495 audio->emit (film(), stream, data, ct);
501 FFmpegDecoder::decode_audio_packet (AVPacket* packet)
503 int const stream_index = packet->stream_index;
504 auto stream = audio_stream_from_index (stream_index);
509 /* Audio packets can contain multiple frames, so we may have to call avcodec_decode_audio4
510 several times. Make a simple copy so we can alter data and size.
512 AVPacket copy_packet = *packet;
514 while (copy_packet.size > 0) {
516 DCPOMATIC_DISABLE_WARNINGS
517 int decode_result = avcodec_decode_audio4 (stream->stream(_format_context)->codec, _frame, &frame_finished, ©_packet);
518 DCPOMATIC_ENABLE_WARNINGS
519 if (decode_result < 0) {
520 /* avcodec_decode_audio4 can sometimes return an error even though it has decoded
521 some valid data; for example dca_subframe_footer can return AVERROR_INVALIDDATA
522 if it overreads the auxiliary data. ffplay carries on if frame_finished is true,
523 even in the face of such an error, so I think we should too.
525 Returning from the method here caused mantis #352.
527 LOG_WARNING ("avcodec_decode_audio4 failed (%1)", decode_result);
529 /* Fudge decode_result so that we come out of the while loop when
530 we've processed this data.
532 decode_result = copy_packet.size;
535 if (frame_finished) {
536 process_audio_frame (stream, stream_index, copy_packet.pts);
539 copy_packet.data += decode_result;
540 copy_packet.size -= decode_result;
546 FFmpegDecoder::decode_video_packet (AVPacket* packet)
548 DCPOMATIC_ASSERT (_video_stream);
551 DCPOMATIC_DISABLE_WARNINGS
552 if (avcodec_decode_video2 (video_codec_context(), _frame, &frame_finished, packet) < 0 || !frame_finished) {
555 DCPOMATIC_ENABLE_WARNINGS
557 boost::mutex::scoped_lock lm (_filter_graphs_mutex);
559 shared_ptr<VideoFilterGraph> graph;
561 auto i = _filter_graphs.begin();
562 while (i != _filter_graphs.end() && !(*i)->can_process (dcp::Size (_frame->width, _frame->height), (AVPixelFormat) _frame->format)) {
566 if (i == _filter_graphs.end ()) {
567 dcp::Fraction vfr (lrint(_ffmpeg_content->video_frame_rate().get() * 1000), 1000);
568 graph = make_shared<VideoFilterGraph>(dcp::Size (_frame->width, _frame->height), (AVPixelFormat) _frame->format, vfr);
569 graph->setup (_ffmpeg_content->filters ());
570 _filter_graphs.push_back (graph);
571 LOG_GENERAL (N_("New graph for %1x%2, pixel format %3"), _frame->width, _frame->height, _frame->format);
576 auto images = graph->process (_frame);
578 for (auto const& i: images) {
580 auto image = i.first;
582 if (i.second != AV_NOPTS_VALUE) {
583 double const pts = i.second * av_q2d(_format_context->streams[_video_stream.get()]->time_base) + _pts_offset.seconds();
587 make_shared<RawImageProxy>(image),
588 llrint(pts * _ffmpeg_content->active_video_frame_rate(film()))
591 LOG_WARNING_NC ("Dropping frame without PTS");
600 FFmpegDecoder::decode_subtitle_packet (AVPacket* packet)
604 if (avcodec_decode_subtitle2 (subtitle_codec_context(), &sub, &got_subtitle, packet) < 0 || !got_subtitle) {
608 /* Stop any current subtitle, either at the time it was supposed to stop, or now if now is sooner */
609 if (_have_current_subtitle) {
610 if (_current_subtitle_to) {
611 only_text()->emit_stop (min(*_current_subtitle_to, subtitle_period(sub).from + _pts_offset));
613 only_text()->emit_stop (subtitle_period(sub).from + _pts_offset);
615 _have_current_subtitle = false;
618 if (sub.num_rects <= 0) {
619 /* Nothing new in this subtitle */
623 /* Subtitle PTS (within the source, not taking into account any of the
624 source that we may have chopped off for the DCP).
626 auto sub_period = subtitle_period (sub);
628 from = sub_period.from + _pts_offset;
630 _current_subtitle_to = *sub_period.to + _pts_offset;
632 _current_subtitle_to = optional<ContentTime>();
633 _have_current_subtitle = true;
636 for (unsigned int i = 0; i < sub.num_rects; ++i) {
637 auto const rect = sub.rects[i];
639 switch (rect->type) {
642 case SUBTITLE_BITMAP:
643 decode_bitmap_subtitle (rect, from);
646 cout << "XXX: SUBTITLE_TEXT " << rect->text << "\n";
649 decode_ass_subtitle (rect->ass, from);
654 if (_current_subtitle_to) {
655 only_text()->emit_stop (*_current_subtitle_to);
658 avsubtitle_free (&sub);
663 FFmpegDecoder::decode_bitmap_subtitle (AVSubtitleRect const * rect, ContentTime from)
665 /* Note BGRA is expressed little-endian, so the first byte in the word is B, second
666 G, third R, fourth A.
668 auto image = make_shared<Image>(AV_PIX_FMT_BGRA, dcp::Size (rect->w, rect->h), true);
670 #ifdef DCPOMATIC_HAVE_AVSUBTITLERECT_PICT
671 /* Start of the first line in the subtitle */
672 auto sub_p = rect->pict.data[0];
673 /* sub_p looks up into a BGRA palette which is at rect->pict.data[1];
674 (i.e. first byte B, second G, third R, fourth A)
676 auto const palette = rect->pict.data[1];
678 /* Start of the first line in the subtitle */
679 auto sub_p = rect->data[0];
680 /* sub_p looks up into a BGRA palette which is at rect->data[1].
681 (first byte B, second G, third R, fourth A)
683 auto const* palette = rect->data[1];
685 /* And the stream has a map of those palette colours to colours
686 chosen by the user; created a `mapped' palette from those settings.
688 auto colour_map = ffmpeg_content()->subtitle_stream()->colours();
689 vector<RGBA> mapped_palette (rect->nb_colors);
690 for (int i = 0; i < rect->nb_colors; ++i) {
691 RGBA c (palette[2], palette[1], palette[0], palette[3]);
692 auto j = colour_map.find (c);
693 if (j != colour_map.end ()) {
694 mapped_palette[i] = j->second;
696 /* This colour was not found in the FFmpegSubtitleStream's colour map; probably because
697 it is from a project that was created before this stuff was added. Just use the
698 colour straight from the original palette.
700 mapped_palette[i] = c;
705 /* Start of the output data */
706 auto out_p = image->data()[0];
708 for (int y = 0; y < rect->h; ++y) {
709 auto sub_line_p = sub_p;
710 auto out_line_p = out_p;
711 for (int x = 0; x < rect->w; ++x) {
712 auto const p = mapped_palette[*sub_line_p++];
718 #ifdef DCPOMATIC_HAVE_AVSUBTITLERECT_PICT
719 sub_p += rect->pict.linesize[0];
721 sub_p += rect->linesize[0];
723 out_p += image->stride()[0];
726 int target_width = subtitle_codec_context()->width;
727 if (target_width == 0 && video_codec_context()) {
728 /* subtitle_codec_context()->width == 0 has been seen in the wild but I don't
729 know if it's supposed to mean something from FFmpeg's point of view.
731 target_width = video_codec_context()->width;
733 int target_height = subtitle_codec_context()->height;
734 if (target_height == 0 && video_codec_context()) {
735 target_height = video_codec_context()->height;
737 DCPOMATIC_ASSERT (target_width);
738 DCPOMATIC_ASSERT (target_height);
739 dcpomatic::Rect<double> const scaled_rect (
740 static_cast<double>(rect->x) / target_width,
741 static_cast<double>(rect->y) / target_height,
742 static_cast<double>(rect->w) / target_width,
743 static_cast<double>(rect->h) / target_height
746 only_text()->emit_bitmap_start (from, image, scaled_rect);
751 FFmpegDecoder::decode_ass_subtitle (string ass, ContentTime from)
753 /* We have no styles and no Format: line, so I'm assuming that FFmpeg
754 produces a single format of Dialogue: lines...
759 for (size_t i = 0; i < ass.length(); ++i) {
760 if (commas < 9 && ass[i] == ',') {
762 } else if (commas == 9) {
771 sub::RawSubtitle base;
772 auto raw = sub::SSAReader::parse_line (
775 _ffmpeg_content->video->size().width,
776 _ffmpeg_content->video->size().height
779 for (auto const& i: sub::collect<vector<sub::Subtitle>>(raw)) {
780 only_text()->emit_plain_start (from, i);