Support SSA subtitles embedded within FFmpeg files.
[dcpomatic.git] / src / lib / ffmpeg_examiner.cc
1 /*
2     Copyright (C) 2013-2015 Carl Hetherington <cth@carlh.net>
3
4     This program is free software; you can redistribute it and/or modify
5     it under the terms of the GNU General Public License as published by
6     the Free Software Foundation; either version 2 of the License, or
7     (at your option) any later version.
8
9     This program is distributed in the hope that it will be useful,
10     but WITHOUT ANY WARRANTY; without even the implied warranty of
11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12     GNU General Public License for more details.
13
14     You should have received a copy of the GNU General Public License
15     along with this program; if not, write to the Free Software
16     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17
18 */
19
20 extern "C" {
21 #include <libavcodec/avcodec.h>
22 #include <libavformat/avformat.h>
23 #include <libavutil/pixfmt.h>
24 #include <libavutil/pixdesc.h>
25 }
26 #include "ffmpeg_examiner.h"
27 #include "ffmpeg_content.h"
28 #include "job.h"
29 #include "ffmpeg_audio_stream.h"
30 #include "ffmpeg_subtitle_stream.h"
31 #include "util.h"
32 #include "safe_stringstream.h"
33 #include <boost/foreach.hpp>
34 #include <iostream>
35
36 #include "i18n.h"
37
38 using std::string;
39 using std::cout;
40 using std::max;
41 using boost::shared_ptr;
42 using boost::optional;
43
44 /** @param job job that the examiner is operating in, or 0 */
45 FFmpegExaminer::FFmpegExaminer (shared_ptr<const FFmpegContent> c, shared_ptr<Job> job)
46         : FFmpeg (c)
47         , _video_length (0)
48         , _need_video_length (false)
49 {
50         /* Find audio and subtitle streams */
51
52         for (uint32_t i = 0; i < _format_context->nb_streams; ++i) {
53                 AVStream* s = _format_context->streams[i];
54                 if (s->codec->codec_type == AVMEDIA_TYPE_AUDIO) {
55
56                         /* This is a hack; sometimes it seems that _audio_codec_context->channel_layout isn't set up,
57                            so bodge it here.  No idea why we should have to do this.
58                         */
59
60                         if (s->codec->channel_layout == 0) {
61                                 s->codec->channel_layout = av_get_default_channel_layout (s->codec->channels);
62                         }
63
64                         _audio_streams.push_back (
65                                 shared_ptr<FFmpegAudioStream> (
66                                         new FFmpegAudioStream (audio_stream_name (s), s->id, s->codec->sample_rate, s->codec->channels)
67                                         )
68                                 );
69
70                 } else if (s->codec->codec_type == AVMEDIA_TYPE_SUBTITLE) {
71                         _subtitle_streams.push_back (shared_ptr<FFmpegSubtitleStream> (new FFmpegSubtitleStream (subtitle_stream_name (s), s->id)));
72                 }
73         }
74
75         /* See if the header has duration information in it */
76         _need_video_length = _format_context->duration == AV_NOPTS_VALUE;
77         if (!_need_video_length) {
78                 _video_length = (double (_format_context->duration) / AV_TIME_BASE) * video_frame_rate().get ();
79         }
80
81         if (job) {
82                 if (_need_video_length) {
83                         job->sub (_("Finding length and subtitles"));
84                 } else {
85                         job->sub (_("Finding subtitles"));
86                 }
87         }
88
89         /* Run through until we find:
90          *   - the first video.
91          *   - the first audio for each stream.
92          *   - the subtitle periods for each stream.
93          *
94          * We have to note subtitle periods as otherwise we have no way of knowing
95          * where we should look for subtitles (video and audio are always present,
96          * so they are ok).
97          */
98
99         int64_t const len = _file_group.length ();
100         while (true) {
101                 int r = av_read_frame (_format_context, &_packet);
102                 if (r < 0) {
103                         break;
104                 }
105
106                 if (job) {
107                         if (len > 0) {
108                                 job->set_progress (float (_format_context->pb->pos) / len);
109                         } else {
110                                 job->set_progress_unknown ();
111                         }
112                 }
113
114                 AVCodecContext* context = _format_context->streams[_packet.stream_index]->codec;
115
116                 if (_packet.stream_index == _video_stream) {
117                         video_packet (context);
118                 }
119
120                 bool got_all_audio = true;
121
122                 for (size_t i = 0; i < _audio_streams.size(); ++i) {
123                         if (_audio_streams[i]->uses_index (_format_context, _packet.stream_index)) {
124                                 audio_packet (context, _audio_streams[i]);
125                         }
126                         if (!_audio_streams[i]->first_audio) {
127                                 got_all_audio = false;
128                         }
129                 }
130
131                 for (size_t i = 0; i < _subtitle_streams.size(); ++i) {
132                         if (_subtitle_streams[i]->uses_index (_format_context, _packet.stream_index)) {
133                                 subtitle_packet (context, _subtitle_streams[i]);
134                         }
135                 }
136
137                 av_free_packet (&_packet);
138
139                 if (_first_video && got_all_audio && _subtitle_streams.empty ()) {
140                         /* All done */
141                         break;
142                 }
143         }
144
145         /* Finish off any hanging subtitles at the end */
146         for (LastSubtitleMap::const_iterator i = _last_subtitle_start.begin(); i != _last_subtitle_start.end(); ++i) {
147                 if (i->second) {
148                         if (i->second->image) {
149                                 i->first->add_image_subtitle (
150                                         i->second->id,
151                                         ContentTimePeriod (
152                                                 i->second->time,
153                                                 ContentTime::from_frames (video_length(), video_frame_rate().get_value_or (24))
154                                                 )
155                                         );
156                         } else {
157                                 i->first->add_text_subtitle (
158                                         i->second->id,
159                                         ContentTimePeriod (
160                                                 i->second->time,
161                                                 ContentTime::from_frames (video_length(), video_frame_rate().get_value_or (24))
162                                                 )
163                                         );
164                         }
165                 }
166         }
167
168         /* We just added subtitles to our streams without taking the PTS offset into account;
169            this is because we might not know the PTS offset when the first subtitle is seen.
170            Now we know the PTS offset so we can apply it to those subtitles.
171         */
172         if (video_frame_rate()) {
173                 BOOST_FOREACH (shared_ptr<FFmpegSubtitleStream> i, _subtitle_streams) {
174                         i->add_offset (pts_offset (_audio_streams, _first_video, video_frame_rate().get()));
175                 }
176         }
177 }
178
179 void
180 FFmpegExaminer::video_packet (AVCodecContext* context)
181 {
182         if (_first_video && !_need_video_length) {
183                 return;
184         }
185
186         int frame_finished;
187         if (avcodec_decode_video2 (context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
188                 if (!_first_video) {
189                         _first_video = frame_time (_format_context->streams[_video_stream]);
190                 }
191                 if (_need_video_length) {
192                         _video_length = frame_time (
193                                 _format_context->streams[_video_stream]
194                                 ).get_value_or (ContentTime ()).frames_round (video_frame_rate().get ());
195                 }
196         }
197 }
198
199 void
200 FFmpegExaminer::audio_packet (AVCodecContext* context, shared_ptr<FFmpegAudioStream> stream)
201 {
202         if (stream->first_audio) {
203                 return;
204         }
205
206         int frame_finished;
207         if (avcodec_decode_audio4 (context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
208                 stream->first_audio = frame_time (stream->stream (_format_context));
209         }
210 }
211
212 void
213 FFmpegExaminer::subtitle_packet (AVCodecContext* context, shared_ptr<FFmpegSubtitleStream> stream)
214 {
215         int frame_finished;
216         AVSubtitle sub;
217         if (avcodec_decode_subtitle2 (context, &sub, &frame_finished, &_packet) >= 0 && frame_finished) {
218                 string id = subtitle_id (sub);
219                 FFmpegSubtitlePeriod const period = subtitle_period (sub);
220                 bool const image = subtitle_is_image (sub);
221
222                 LastSubtitleMap::iterator last = _last_subtitle_start.find (stream);
223                 if (last != _last_subtitle_start.end() && last->second) {
224                         /* We have seen the start of a subtitle but not yet the end.  Whatever this is
225                            finishes the previous subtitle, so add it */
226                         if (image) {
227                                 stream->add_image_subtitle (last->second->id, ContentTimePeriod (last->second->time, period.from));
228                         } else {
229                                 stream->add_text_subtitle (last->second->id, ContentTimePeriod (last->second->time, period.from));
230                         }
231                         if (sub.num_rects == 0) {
232                                 /* This is a `proper' end-of-subtitle */
233                                 _last_subtitle_start[stream] = optional<SubtitleStart> ();
234                         } else {
235                                 /* This is just another subtitle, so we start again */
236                                 _last_subtitle_start[stream] = SubtitleStart (id, image, period.from);
237                         }
238                 } else if (sub.num_rects == 1) {
239                         if (period.to) {
240                                 if (image) {
241                                         stream->add_image_subtitle (id, ContentTimePeriod (period.from, period.to.get ()));
242                                 } else {
243                                         stream->add_text_subtitle (id, ContentTimePeriod (period.from, period.to.get ()));
244                                 }
245                         } else {
246                                 _last_subtitle_start[stream] = SubtitleStart (id, image, period.from);
247                         }
248                 }
249                 avsubtitle_free (&sub);
250         }
251 }
252
253 optional<ContentTime>
254 FFmpegExaminer::frame_time (AVStream* s) const
255 {
256         optional<ContentTime> t;
257
258         int64_t const bet = av_frame_get_best_effort_timestamp (_frame);
259         if (bet != AV_NOPTS_VALUE) {
260                 t = ContentTime::from_seconds (bet * av_q2d (s->time_base));
261         }
262
263         return t;
264 }
265
266 optional<double>
267 FFmpegExaminer::video_frame_rate () const
268 {
269         /* This use of r_frame_rate is debateable; there's a few different
270          * frame rates in the format context, but this one seems to be the most
271          * reliable.
272          */
273         return av_q2d (av_stream_get_r_frame_rate (_format_context->streams[_video_stream]));
274 }
275
276 dcp::Size
277 FFmpegExaminer::video_size () const
278 {
279         return dcp::Size (video_codec_context()->width, video_codec_context()->height);
280 }
281
282 /** @return Length according to our content's header */
283 Frame
284 FFmpegExaminer::video_length () const
285 {
286         return max (Frame (1), _video_length);
287 }
288
289 optional<double>
290 FFmpegExaminer::sample_aspect_ratio () const
291 {
292         AVRational sar = av_guess_sample_aspect_ratio (_format_context, _format_context->streams[_video_stream], 0);
293         if (sar.num == 0) {
294                 /* I assume this means that we don't know */
295                 return optional<double> ();
296         }
297         return double (sar.num) / sar.den;
298 }
299
300 string
301 FFmpegExaminer::audio_stream_name (AVStream* s) const
302 {
303         SafeStringStream n;
304
305         n << stream_name (s);
306
307         if (!n.str().empty()) {
308                 n << "; ";
309         }
310
311         n << s->codec->channels << " channels";
312
313         return n.str ();
314 }
315
316 string
317 FFmpegExaminer::subtitle_stream_name (AVStream* s) const
318 {
319         SafeStringStream n;
320
321         n << stream_name (s);
322
323         if (n.str().empty()) {
324                 n << _("unknown");
325         }
326
327         return n.str ();
328 }
329
330 string
331 FFmpegExaminer::stream_name (AVStream* s) const
332 {
333         SafeStringStream n;
334
335         if (s->metadata) {
336                 AVDictionaryEntry const * lang = av_dict_get (s->metadata, "language", 0, 0);
337                 if (lang) {
338                         n << lang->value;
339                 }
340
341                 AVDictionaryEntry const * title = av_dict_get (s->metadata, "title", 0, 0);
342                 if (title) {
343                         if (!n.str().empty()) {
344                                 n << " ";
345                         }
346                         n << title->value;
347                 }
348         }
349
350         return n.str ();
351 }
352
353 int
354 FFmpegExaminer::bits_per_pixel () const
355 {
356         return av_get_bits_per_pixel (av_pix_fmt_desc_get (video_codec_context()->pix_fmt));
357 }
358
359 bool
360 FFmpegExaminer::yuv () const
361 {
362         switch (video_codec_context()->pix_fmt) {
363         case AV_PIX_FMT_YUV420P:
364         case AV_PIX_FMT_YUYV422:
365         case AV_PIX_FMT_YUV422P:
366         case AV_PIX_FMT_YUV444P:
367         case AV_PIX_FMT_YUV410P:
368         case AV_PIX_FMT_YUV411P:
369         case AV_PIX_FMT_YUVJ420P:
370         case AV_PIX_FMT_YUVJ422P:
371         case AV_PIX_FMT_YUVJ444P:
372         case AV_PIX_FMT_UYVY422:
373         case AV_PIX_FMT_UYYVYY411:
374         case AV_PIX_FMT_NV12:
375         case AV_PIX_FMT_NV21:
376         case AV_PIX_FMT_YUV440P:
377         case AV_PIX_FMT_YUVJ440P:
378         case AV_PIX_FMT_YUVA420P:
379         case AV_PIX_FMT_YUV420P16LE:
380         case AV_PIX_FMT_YUV420P16BE:
381         case AV_PIX_FMT_YUV422P16LE:
382         case AV_PIX_FMT_YUV422P16BE:
383         case AV_PIX_FMT_YUV444P16LE:
384         case AV_PIX_FMT_YUV444P16BE:
385         case AV_PIX_FMT_YUV420P9BE:
386         case AV_PIX_FMT_YUV420P9LE:
387         case AV_PIX_FMT_YUV420P10BE:
388         case AV_PIX_FMT_YUV420P10LE:
389         case AV_PIX_FMT_YUV422P10BE:
390         case AV_PIX_FMT_YUV422P10LE:
391         case AV_PIX_FMT_YUV444P9BE:
392         case AV_PIX_FMT_YUV444P9LE:
393         case AV_PIX_FMT_YUV444P10BE:
394         case AV_PIX_FMT_YUV444P10LE:
395         case AV_PIX_FMT_YUV422P9BE:
396         case AV_PIX_FMT_YUV422P9LE:
397         case AV_PIX_FMT_YUVA422P_LIBAV:
398         case AV_PIX_FMT_YUVA444P_LIBAV:
399         case AV_PIX_FMT_YUVA420P9BE:
400         case AV_PIX_FMT_YUVA420P9LE:
401         case AV_PIX_FMT_YUVA422P9BE:
402         case AV_PIX_FMT_YUVA422P9LE:
403         case AV_PIX_FMT_YUVA444P9BE:
404         case AV_PIX_FMT_YUVA444P9LE:
405         case AV_PIX_FMT_YUVA420P10BE:
406         case AV_PIX_FMT_YUVA420P10LE:
407         case AV_PIX_FMT_YUVA422P10BE:
408         case AV_PIX_FMT_YUVA422P10LE:
409         case AV_PIX_FMT_YUVA444P10BE:
410         case AV_PIX_FMT_YUVA444P10LE:
411         case AV_PIX_FMT_YUVA420P16BE:
412         case AV_PIX_FMT_YUVA420P16LE:
413         case AV_PIX_FMT_YUVA422P16BE:
414         case AV_PIX_FMT_YUVA422P16LE:
415         case AV_PIX_FMT_YUVA444P16BE:
416         case AV_PIX_FMT_YUVA444P16LE:
417         case AV_PIX_FMT_NV16:
418         case AV_PIX_FMT_NV20LE:
419         case AV_PIX_FMT_NV20BE:
420         case AV_PIX_FMT_YVYU422:
421         case AV_PIX_FMT_YUVA444P:
422         case AV_PIX_FMT_YUVA422P:
423         case AV_PIX_FMT_YUV420P12BE:
424         case AV_PIX_FMT_YUV420P12LE:
425         case AV_PIX_FMT_YUV420P14BE:
426         case AV_PIX_FMT_YUV420P14LE:
427         case AV_PIX_FMT_YUV422P12BE:
428         case AV_PIX_FMT_YUV422P12LE:
429         case AV_PIX_FMT_YUV422P14BE:
430         case AV_PIX_FMT_YUV422P14LE:
431         case AV_PIX_FMT_YUV444P12BE:
432         case AV_PIX_FMT_YUV444P12LE:
433         case AV_PIX_FMT_YUV444P14BE:
434         case AV_PIX_FMT_YUV444P14LE:
435         case AV_PIX_FMT_YUVJ411P:
436                 return true;
437         default:
438                 return false;
439         }
440 }