Add include for Centos.
[libsub.git] / src / ssa_reader.cc
1 /*
2     Copyright (C) 2016 Carl Hetherington <cth@carlh.net>
3
4     This program is free software; you can redistribute it and/or modify
5     it under the terms of the GNU General Public License as published by
6     the Free Software Foundation; either version 2 of the License, or
7     (at your option) any later version.
8
9     This program is distributed in the hope that it will be useful,
10     but WITHOUT ANY WARRANTY; without even the implied warranty of
11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12     GNU General Public License for more details.
13
14     You should have received a copy of the GNU General Public License
15     along with this program; if not, write to the Free Software
16     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17
18 */
19
20 #include "ssa_reader.h"
21 #include "util.h"
22 #include "sub_assert.h"
23 #include "raw_convert.h"
24 #include "subtitle.h"
25 #include "compose.hpp"
26 #include <locked_sstream.h>
27 #include <boost/algorithm/string.hpp>
28 #include <boost/bind.hpp>
29 #include <boost/foreach.hpp>
30 #include <iostream>
31 #include <vector>
32 #include <cstdio>
33
34 using std::string;
35 using std::vector;
36 using std::map;
37 using std::cout;
38 using std::list;
39 using boost::optional;
40 using boost::function;
41 using namespace boost::algorithm;
42 using namespace sub;
43
44 /** @param s Subtitle string encoded in UTF-8 */
45 SSAReader::SSAReader (string const & s)
46 {
47         locked_stringstream str (s);
48         this->read (boost::bind (&get_line_stringstream, &str));
49 }
50
51 /** @param f Subtitle file encoded in UTF-8 */
52 SSAReader::SSAReader (FILE* f)
53 {
54         this->read (boost::bind (&get_line_file, f));
55 }
56
57 class Style
58 {
59 public:
60         Style ()
61                 : font_size (72)
62                 , primary_colour (255, 255, 255)
63                 , bold (false)
64                 , italic (false)
65                 , underline (false)
66                 , horizontal_reference (HORIZONTAL_CENTRE_OF_SCREEN)
67                 , vertical_reference (BOTTOM_OF_SCREEN)
68                 , vertical_margin (0)
69         {}
70
71         Style (string format_line, string style_line)
72                 : font_size (72)
73                 , primary_colour (255, 255, 255)
74                 , bold (false)
75                 , italic (false)
76                 , underline (false)
77                 , horizontal_reference (HORIZONTAL_CENTRE_OF_SCREEN)
78                 , vertical_reference (BOTTOM_OF_SCREEN)
79                 , vertical_margin (0)
80         {
81                 vector<string> keys;
82                 split (keys, format_line, boost::is_any_of (","));
83                 vector<string> style;
84                 split (style, style_line, boost::is_any_of (","));
85
86                 SUB_ASSERT (!keys.empty());
87                 SUB_ASSERT (!style.empty());
88                 SUB_ASSERT (keys.size() == style.size());
89
90                 for (size_t i = 0; i < style.size(); ++i) {
91                         trim (keys[i]);
92                         trim (style[i]);
93                         if (keys[i] == "Name") {
94                                 name = style[i];
95                         } else if (keys[i] == "Fontname") {
96                                 font_name = style[i];
97                         } else if (keys[i] == "Fontsize") {
98                                 font_size = raw_convert<int> (style[i]);
99                         } else if (keys[i] == "PrimaryColour") {
100                                 primary_colour = colour (raw_convert<int> (style[i]));
101                         } else if (keys[i] == "BackColour") {
102                                 back_colour = colour (raw_convert<int> (style[i]));
103                         } else if (keys[i] == "Bold") {
104                                 bold = style[i] == "-1";
105                         } else if (keys[i] == "Italic") {
106                                 italic = style[i] == "-1";
107                         } else if (keys[i] == "Underline") {
108                                 underline = style[i] == "-1";
109                         } else if (keys[i] == "BorderStyle") {
110                                 if (style[i] == "1") {
111                                         effect = SHADOW;
112                                 }
113                         } else if (keys[i] == "Alignment") {
114                                 /* These values from libass' source code */
115                                 switch ((raw_convert<int> (style[i]) - 1) % 3) {
116                                 case 0:
117                                         horizontal_reference = LEFT_OF_SCREEN;
118                                         break;
119                                 case 1:
120                                         horizontal_reference = HORIZONTAL_CENTRE_OF_SCREEN;
121                                         break;
122                                 case 2:
123                                         horizontal_reference = RIGHT_OF_SCREEN;
124                                         break;
125                                 }
126                                 switch (raw_convert<int> (style[i]) & 12) {
127                                 case 4:
128                                         vertical_reference = TOP_OF_SCREEN;
129                                         break;
130                                 case 8:
131                                         vertical_reference = VERTICAL_CENTRE_OF_SCREEN;
132                                         break;
133                                 case 0:
134                                         vertical_reference = BOTTOM_OF_SCREEN;
135                                         break;
136                                 }
137                         } else if (keys[i] == "MarginV") {
138                                 vertical_margin = raw_convert<int> (style[i]);
139                         }
140                 }
141         }
142
143         string name;
144         optional<string> font_name;
145         int font_size;
146         Colour primary_colour;
147         /** outline colour */
148         optional<Colour> back_colour;
149         bool bold;
150         bool italic;
151         bool underline;
152         optional<Effect> effect;
153         HorizontalReference horizontal_reference;
154         VerticalReference vertical_reference;
155         int vertical_margin;
156
157 private:
158         Colour colour (int c) const
159         {
160                 return Colour (
161                         ((c & 0x0000ff) >>  0) / 255.0,
162                         ((c & 0x00ff00) >>  8) / 255.0,
163                         ((c & 0xff0000) >> 16) / 255.0
164                         );
165         }
166 };
167
168 Time
169 SSAReader::parse_time (string t) const
170 {
171         vector<string> bits;
172         split (bits, t, is_any_of (":."));
173         SUB_ASSERT (bits.size() == 4);
174         return Time::from_hms (
175                 raw_convert<int> (bits[0]),
176                 raw_convert<int> (bits[1]),
177                 raw_convert<int> (bits[2]),
178                 raw_convert<int> (bits[3]) * 10
179                 );
180 }
181
182 /** @param base RawSubtitle filled in with any required common values.
183  *  @param line SSA line string (i.e. just the subtitle, possibly with embedded stuff)
184  *  @return List of RawSubtitles to represent line with vertical reference TOP_OF_SUBTITLE.
185  */
186 list<RawSubtitle>
187 SSAReader::parse_line (RawSubtitle base, string line, int play_res_x, int play_res_y)
188 {
189         enum {
190                 TEXT,
191                 STYLE,
192                 BACKSLASH
193         } state = TEXT;
194
195         list<RawSubtitle> subs;
196         RawSubtitle current = base;
197         string style;
198
199         if (!current.vertical_position.reference) {
200                 current.vertical_position.reference = BOTTOM_OF_SCREEN;
201         }
202
203         if (!current.vertical_position.proportional) {
204                 current.vertical_position.proportional = 0;
205         }
206
207         /* We must have a font size, as there could be a margin specified
208            in pixels and in that case we must know how big the subtitle
209            lines are to work out the position on screen.
210         */
211         if (!current.font_size.points()) {
212                 current.font_size.set_points (72);
213         }
214
215         /* Count the number of line breaks */
216         int line_breaks = 0;
217         if (line.length() > 0) {
218                 for (size_t i = 0; i < line.length() - 1; ++i) {
219                         if (line[i] == '\\' && (line[i+1] == 'n' || line[i+1] == 'N')) {
220                                 ++line_breaks;
221                         }
222                 }
223         }
224
225         /* Imagine that the screen is 792 points (i.e. 11 inches) high (as with DCP) */
226         double const line_size = current.font_size.proportional(792) * 1.2;
227
228         /* Tweak vertical_position accordingly */
229         switch (current.vertical_position.reference.get()) {
230         case TOP_OF_SCREEN:
231         case TOP_OF_SUBTITLE:
232                 /* Nothing to do */
233                 break;
234         case VERTICAL_CENTRE_OF_SCREEN:
235                 current.vertical_position.proportional = current.vertical_position.proportional.get() - ((line_breaks + 1) * line_size) / 2;
236                 break;
237         case BOTTOM_OF_SCREEN:
238                 current.vertical_position.proportional = current.vertical_position.proportional.get() + line_breaks * line_size;
239                 break;
240         }
241
242         for (size_t i = 0; i < line.length(); ++i) {
243                 char const c = line[i];
244                 switch (state) {
245                 case TEXT:
246                         if (c == '{') {
247                                 state = STYLE;
248                         } else if (c == '\\') {
249                                 state = BACKSLASH;
250                         } else if (c != '\r' && c != '\n') {
251                                 current.text += c;
252                         }
253                         break;
254                 case STYLE:
255                         if (c == '}' || c == '\\') {
256                                 if (!current.text.empty ()) {
257                                         subs.push_back (current);
258                                         current.text = "";
259                                 }
260                                 if (style == "\\i1") {
261                                         current.italic = true;
262                                 } else if (style == "\\i0" || style == "\\i") {
263                                         current.italic = false;
264                                 } else if (style == "\\b1") {
265                                         current.bold = true;
266                                 } else if (style == "\\b0") {
267                                         current.bold = false;
268                                 } else if (style == "\\u1") {
269                                         current.underline = true;
270                                 } else if (style == "\\u0") {
271                                         current.underline = false;
272                                 } else if (style == "\\an1" || style == "\\an2" || style == "\\an3") {
273                                         current.vertical_position.reference = sub::BOTTOM_OF_SCREEN;
274                                 } else if (style == "\\an4" || style == "\\an5" || style == "\\an6") {
275                                         current.vertical_position.reference = sub::VERTICAL_CENTRE_OF_SCREEN;
276                                 } else if (style == "\\an7" || style == "\\an8" || style == "\\an9") {
277                                         current.vertical_position.reference = sub::TOP_OF_SCREEN;
278                                 } else if (boost::starts_with(style, "\\pos")) {
279                                         vector<string> bits;
280                                         boost::algorithm::split (bits, style, boost::is_any_of("(,"));
281                                         SUB_ASSERT (bits.size() == 3);
282                                         current.horizontal_position.reference = sub::LEFT_OF_SCREEN;
283                                         current.horizontal_position.proportional = raw_convert<float>(bits[1]) / play_res_x;
284                                         current.vertical_position.reference = sub::TOP_OF_SCREEN;
285                                         current.vertical_position.proportional = raw_convert<float>(bits[2]) / play_res_y;
286                                 } else if (boost::starts_with(style, "\\fs")) {
287                                         SUB_ASSERT (style.length() > 3);
288                                         current.font_size.set_points (raw_convert<int>(style.substr(3)));
289                                 } else if (boost::starts_with(style, "\\c")) {
290                                         /* \c&Hbbggrr& */
291                                         if (style.length() != 11 || style[2] != '&' || style[3] != 'H' || style[10] != '&') {
292                                                 throw SSAError(String::compose("Badly formatted colour tag %1", style));
293                                         }
294                                         int ir, ig, ib;
295                                         if (sscanf(style.c_str() + 4, "%2x%2x%2x", &ib, &ig, &ir) < 3) {
296                                                 throw SSAError(String::compose("Badly formatted colour tag %1", style));
297                                         }
298                                         current.colour = sub::Colour(ir / 255.0, ig / 255.0, ib / 255.0);
299                                 }
300                                 style = "";
301                         }
302
303                         if (c == '}') {
304                                 state = TEXT;
305                         } else {
306                                 style += c;
307                         }
308                         break;
309                 case BACKSLASH:
310                         if (c == 'n' || c == 'N') {
311                                 if (!current.text.empty ()) {
312                                         subs.push_back (current);
313                                         current.text = "";
314                                 }
315                                 /* Move down one line (1.2 times the font size) */
316                                 if (current.vertical_position.reference.get() == BOTTOM_OF_SCREEN) {
317                                         current.vertical_position.proportional = current.vertical_position.proportional.get() - line_size;
318                                 } else {
319                                         current.vertical_position.proportional = current.vertical_position.proportional.get() + line_size;
320                                 }
321                         }
322                         state = TEXT;
323                         break;
324                 }
325         }
326
327         if (!current.text.empty ()) {
328                 subs.push_back (current);
329         }
330
331         return subs;
332 }
333
334 void
335 SSAReader::read (function<optional<string> ()> get_line)
336 {
337         enum {
338                 INFO,
339                 STYLES,
340                 EVENTS
341         } part = INFO;
342
343         int play_res_x = 288;
344         int play_res_y = 288;
345         map<string, Style> styles;
346         string style_format_line;
347         vector<string> event_format;
348
349         while (true) {
350                 optional<string> line = get_line ();
351                 if (!line) {
352                         break;
353                 }
354
355                 trim (*line);
356                 remove_unicode_bom (line);
357
358                 if (starts_with (*line, ";") || line->empty ()) {
359                         continue;
360                 }
361
362                 if (starts_with (*line, "[")) {
363                         /* Section heading */
364                         if (line.get() == "[Script Info]") {
365                                 part = INFO;
366                         } else if (line.get() == "[V4 Styles]" || line.get() == "[V4+ Styles]") {
367                                 part = STYLES;
368                         } else if (line.get() == "[Events]") {
369                                 part = EVENTS;
370                         }
371                         continue;
372                 }
373
374                 size_t const colon = line->find (":");
375                 SUB_ASSERT (colon != string::npos);
376                 string const type = line->substr (0, colon);
377                 string body = line->substr (colon + 1);
378                 trim (body);
379
380                 switch (part) {
381                 case INFO:
382                         if (type == "PlayResX") {
383                                 play_res_x = raw_convert<int> (body);
384                         } else if (type == "PlayResY") {
385                                 play_res_y = raw_convert<int> (body);
386                         }
387                         break;
388                 case STYLES:
389                         if (type == "Format") {
390                                 style_format_line = body;
391                         } else if (type == "Style") {
392                                 SUB_ASSERT (!style_format_line.empty ());
393                                 Style s (style_format_line, body);
394                                 styles[s.name] = s;
395                         }
396                         break;
397                 case EVENTS:
398                         if (type == "Format") {
399                                 split (event_format, body, is_any_of (","));
400                                 BOOST_FOREACH (string& i, event_format) {
401                                         trim (i);
402                                 }
403                         } else if (type == "Dialogue") {
404                                 SUB_ASSERT (!event_format.empty ());
405                                 vector<string> event;
406                                 split (event, body, is_any_of (","));
407
408                                 /* There may be commas in the subtitle part; reassemble any extra parts
409                                    from when we just split it.
410                                 */
411                                 while (event.size() > event_format.size()) {
412                                         string const ex = event.back ();
413                                         event.pop_back ();
414                                         event.back() += "," + ex;
415                                 }
416
417                                 SUB_ASSERT (!event.empty());
418                                 SUB_ASSERT (event_format.size() == event.size());
419
420                                 RawSubtitle sub;
421
422                                 for (size_t i = 0; i < event.size(); ++i) {
423                                         trim (event[i]);
424                                         if (event_format[i] == "Start") {
425                                                 sub.from = parse_time (event[i]);
426                                         } else if (event_format[i] == "End") {
427                                                 sub.to = parse_time (event[i]);
428                                         } else if (event_format[i] == "Style") {
429                                                 /* libass trims leading '*'s from style names, commenting that
430                                                    "they seem to mean literally nothing".  Go figure...
431                                                 */
432                                                 trim_left_if (event[i], boost::is_any_of ("*"));
433                                                 SUB_ASSERT (styles.find(event[i]) != styles.end());
434                                                 Style style = styles[event[i]];
435                                                 sub.font = style.font_name;
436                                                 sub.font_size = FontSize::from_points (style.font_size);
437                                                 sub.colour = style.primary_colour;
438                                                 sub.effect_colour = style.back_colour;
439                                                 sub.bold = style.bold;
440                                                 sub.italic = style.italic;
441                                                 sub.underline = style.underline;
442                                                 sub.effect = style.effect;
443                                                 sub.horizontal_position.reference = style.horizontal_reference;
444                                                 sub.vertical_position.reference = style.vertical_reference;
445                                                 sub.vertical_position.proportional = float(style.vertical_margin) / play_res_y;
446                                         } else if (event_format[i] == "MarginV") {
447                                                 sub.vertical_position.proportional = raw_convert<float>(event[i]) / play_res_y;
448                                         } else if (event_format[i] == "Text") {
449                                                 BOOST_FOREACH (sub::RawSubtitle j, parse_line (sub, event[i], play_res_x, play_res_y)) {
450                                                         _subs.push_back (j);
451                                                 }
452                                         }
453                                 }
454                         }
455                 }
456
457         }
458 }