Change from_argb_hex() (which wasn't being used) to from_rgba_hex().
[libsub.git] / src / ssa_reader.cc
1 /*
2     Copyright (C) 2016-2019 Carl Hetherington <cth@carlh.net>
3
4     This program is free software; you can redistribute it and/or modify
5     it under the terms of the GNU General Public License as published by
6     the Free Software Foundation; either version 2 of the License, or
7     (at your option) any later version.
8
9     This program is distributed in the hope that it will be useful,
10     but WITHOUT ANY WARRANTY; without even the implied warranty of
11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12     GNU General Public License for more details.
13
14     You should have received a copy of the GNU General Public License
15     along with this program; if not, write to the Free Software
16     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17
18 */
19
20 #include "ssa_reader.h"
21 #include "util.h"
22 #include "sub_assert.h"
23 #include "raw_convert.h"
24 #include "subtitle.h"
25 #include "compose.hpp"
26 #include <boost/algorithm/string.hpp>
27 #include <boost/bind/bind.hpp>
28 #include <cstdlib>
29 #include <iostream>
30 #include <vector>
31
32 using std::string;
33 using std::vector;
34 using std::map;
35 using std::cout;
36 using boost::optional;
37 using boost::function;
38 using namespace boost::algorithm;
39 #if BOOST_VERSION >= 106100
40 using namespace boost::placeholders;
41 #endif
42 using namespace sub;
43
44 /** @param s Subtitle string encoded in UTF-8 */
45 SSAReader::SSAReader (string s)
46 {
47         this->read (boost::bind(&get_line_string, &s));
48 }
49
50 /** @param f Subtitle file encoded in UTF-8 */
51 SSAReader::SSAReader (FILE* f)
52 {
53         this->read (boost::bind (&get_line_file, f));
54 }
55
56 Colour
57 h_colour (string s)
58 {
59         if (s.empty() || s[0] != '&' || s[1] != 'H') {
60                 throw SSAError(String::compose("Badly formatted colour tag %1", s));
61         }
62
63         auto start = s.c_str();
64         auto const end = start + s.length();
65         while (start < end && (*start == '&' || *start == 'H')) {
66                 ++start;
67         }
68
69         auto const colour = strtoll(start, nullptr, 16);
70
71         /* XXX: ignoring alpha channel here; note that 00 is opaque and FF is transparent */
72         return sub::Colour(
73                 ((colour & 0x000000ff) >> 0) / 255.0,
74                 ((colour & 0x0000ff00) >> 8) / 255.0,
75                 ((colour & 0x00ff0000) >> 16) / 255.0
76                 );
77 }
78
79 class Style
80 {
81 public:
82         Style ()
83                 : font_size (72)
84                 , primary_colour (255, 255, 255)
85                 , bold (false)
86                 , italic (false)
87                 , underline (false)
88                 , horizontal_reference (HORIZONTAL_CENTRE_OF_SCREEN)
89                 , vertical_reference (BOTTOM_OF_SCREEN)
90                 , vertical_margin (0)
91         {}
92
93         Style (string format_line, string style_line)
94                 : font_size (72)
95                 , primary_colour (255, 255, 255)
96                 , bold (false)
97                 , italic (false)
98                 , underline (false)
99                 , horizontal_reference (HORIZONTAL_CENTRE_OF_SCREEN)
100                 , vertical_reference (BOTTOM_OF_SCREEN)
101                 , vertical_margin (0)
102         {
103                 vector<string> keys;
104                 split (keys, format_line, boost::is_any_of (","));
105                 vector<string> style;
106                 split (style, style_line, boost::is_any_of (","));
107
108                 SUB_ASSERT (!keys.empty());
109                 SUB_ASSERT (!style.empty());
110                 SUB_ASSERT (keys.size() == style.size());
111
112                 for (size_t i = 0; i < style.size(); ++i) {
113                         trim (keys[i]);
114                         trim (style[i]);
115                         if (keys[i] == "Name") {
116                                 name = style[i];
117                         } else if (keys[i] == "Fontname") {
118                                 font_name = style[i];
119                         } else if (keys[i] == "Fontsize") {
120                                 font_size = raw_convert<int> (style[i]);
121                         } else if (keys[i] == "PrimaryColour") {
122                                 primary_colour = colour (style[i]);
123                         } else if (keys[i] == "BackColour") {
124                                 back_colour = colour (style[i]);
125                         } else if (keys[i] == "Bold") {
126                                 bold = style[i] == "-1";
127                         } else if (keys[i] == "Italic") {
128                                 italic = style[i] == "-1";
129                         } else if (keys[i] == "Underline") {
130                                 underline = style[i] == "-1";
131                         } else if (keys[i] == "BorderStyle") {
132                                 if (style[i] == "1") {
133                                         effect = SHADOW;
134                                 }
135                         } else if (keys[i] == "Alignment") {
136                                 if (style[i] == "7" || style[i] == "8" || style[i] == "9") {
137                                         vertical_reference = TOP_OF_SCREEN;
138                                 } else if (style[i] == "4" || style[i] == "5" || style[i] == "6") {
139                                         vertical_reference = VERTICAL_CENTRE_OF_SCREEN;
140                                 } else {
141                                         vertical_reference = BOTTOM_OF_SCREEN;
142                                 }
143                                 if (style[i] == "1" || style[i] == "4" || style[i] == "7") {
144                                         horizontal_reference = LEFT_OF_SCREEN;
145                                 } else if (style[i] == "3" || style[i] == "6" || style[i] == "9") {
146                                         horizontal_reference = RIGHT_OF_SCREEN;
147                                 } else {
148                                         horizontal_reference = HORIZONTAL_CENTRE_OF_SCREEN;
149                                 }
150                         } else if (keys[i] == "MarginV") {
151                                 vertical_margin = raw_convert<int> (style[i]);
152                         }
153                 }
154         }
155
156         string name;
157         optional<string> font_name;
158         int font_size; ///< points
159         Colour primary_colour;
160         /** outline colour */
161         optional<Colour> back_colour;
162         bool bold;
163         bool italic;
164         bool underline;
165         optional<Effect> effect;
166         HorizontalReference horizontal_reference;
167         VerticalReference vertical_reference;
168         int vertical_margin;
169
170 private:
171         Colour colour (string c) const
172         {
173                 if (c.length() > 0 && c[0] == '&') {
174                         /* &Hbbggrr or &Haabbggrr */
175                         return h_colour (c);
176                 } else {
177                         /* integer */
178                         int i = raw_convert<int>(c);
179                         return Colour (
180                                 ((i & 0x0000ff) >>  0) / 255.0,
181                                 ((i & 0x00ff00) >>  8) / 255.0,
182                                 ((i & 0xff0000) >> 16) / 255.0
183                                 );
184                 }
185         }
186 };
187
188 Time
189 SSAReader::parse_time (string t) const
190 {
191         vector<string> bits;
192         split (bits, t, is_any_of (":."));
193         SUB_ASSERT (bits.size() == 4);
194         return Time::from_hms (
195                 raw_convert<int> (bits[0]),
196                 raw_convert<int> (bits[1]),
197                 raw_convert<int> (bits[2]),
198                 raw_convert<int> (bits[3]) * 10
199                 );
200 }
201
202 void
203 SSAReader::parse_style(RawSubtitle& sub, string style, int play_res_x, int play_res_y, Colour primary_colour)
204 {
205         if (style == "\\i1") {
206                 sub.italic = true;
207         } else if (style == "\\i0" || style == "\\i") {
208                 sub.italic = false;
209         } else if (style == "\\b1") {
210                 sub.bold = true;
211         } else if (style == "\\b0") {
212                 sub.bold = false;
213         } else if (style == "\\u1") {
214                 sub.underline = true;
215         } else if (style == "\\u0") {
216                 sub.underline = false;
217         } else if (style == "\\an1") {
218                 sub.horizontal_position.reference = sub::LEFT_OF_SCREEN;
219                 sub.vertical_position.reference = sub::BOTTOM_OF_SCREEN;
220         } else if (style == "\\an2") {
221                 sub.horizontal_position.reference = sub::HORIZONTAL_CENTRE_OF_SCREEN;
222                 sub.vertical_position.reference = sub::BOTTOM_OF_SCREEN;
223         } else if (style == "\\an3") {
224                 sub.horizontal_position.reference = sub::RIGHT_OF_SCREEN;
225                 sub.vertical_position.reference = sub::BOTTOM_OF_SCREEN;
226         } else if (style == "\\an4") {
227                 sub.horizontal_position.reference = sub::LEFT_OF_SCREEN;
228                 sub.vertical_position.reference = sub::VERTICAL_CENTRE_OF_SCREEN;
229         } else if (style == "\\an5") {
230                 sub.horizontal_position.reference = sub::HORIZONTAL_CENTRE_OF_SCREEN;
231                 sub.vertical_position.reference = sub::VERTICAL_CENTRE_OF_SCREEN;
232         } else if (style == "\\an6") {
233                 sub.horizontal_position.reference = sub::RIGHT_OF_SCREEN;
234                 sub.vertical_position.reference = sub::VERTICAL_CENTRE_OF_SCREEN;
235         } else if (style == "\\an7") {
236                 sub.horizontal_position.reference = sub::LEFT_OF_SCREEN;
237                 sub.vertical_position.reference = sub::TOP_OF_SCREEN;
238         } else if (style == "\\an8") {
239                 sub.horizontal_position.reference = sub::HORIZONTAL_CENTRE_OF_SCREEN;
240                 sub.vertical_position.reference = sub::TOP_OF_SCREEN;
241         } else if (style == "\\an9") {
242                 sub.horizontal_position.reference = sub::RIGHT_OF_SCREEN;
243                 sub.vertical_position.reference = sub::TOP_OF_SCREEN;
244         } else if (boost::starts_with(style, "\\pos")) {
245                 vector<string> bits;
246                 boost::algorithm::split (bits, style, boost::is_any_of("(,"));
247                 SUB_ASSERT (bits.size() == 3);
248                 sub.horizontal_position.reference = sub::LEFT_OF_SCREEN;
249                 sub.horizontal_position.proportional = raw_convert<float>(bits[1]) / play_res_x;
250                 sub.vertical_position.reference = sub::TOP_OF_SCREEN;
251                 sub.vertical_position.proportional = raw_convert<float>(bits[2]) / play_res_y;
252         } else if (boost::starts_with(style, "\\fs")) {
253                 SUB_ASSERT (style.length() > 3);
254                 sub.font_size.set_proportional(raw_convert<float>(style.substr(3)) / play_res_y);
255         } else if (boost::starts_with(style, "\\c")) {
256                 /* \c&Hbbggrr& */
257                 if (style.length() > 2) {
258                         sub.colour = h_colour(style.substr(2, style.length() - 3));
259                 } else if (style.length() == 2) {
260                         sub.colour = primary_colour;
261                 } else {
262                         throw SSAError(String::compose("Badly formatted colour tag %1", style));
263                 }
264         }
265 }
266
267 /** @param base RawSubtitle filled in with any required common values.
268  *  @param line SSA line string (i.e. just the subtitle, possibly with embedded stuff)
269  *  @return List of RawSubtitles to represent line with vertical reference TOP_OF_SUBTITLE.
270  */
271 vector<RawSubtitle>
272 SSAReader::parse_line(RawSubtitle base, string line, int play_res_x, int play_res_y, Colour primary_colour)
273 {
274         enum {
275                 TEXT,
276                 STYLE,
277                 BACKSLASH
278         } state = TEXT;
279
280         vector<RawSubtitle> subs;
281         RawSubtitle current = base;
282         string style;
283
284         if (!current.vertical_position.reference) {
285                 current.vertical_position.reference = BOTTOM_OF_SCREEN;
286         }
287
288         /* Any vertical_position that is set in base (and therefore current) is a margin, which
289          * we need to ignore if we end up vertically centering this subtitle.
290          * Clear out vertical_position from current; we'll re-add it from base later
291          * if required.
292          */
293         current.vertical_position.proportional = 0;
294
295         /* We must have a font size, as there could be a margin specified
296            in pixels and in that case we must know how big the subtitle
297            lines are to work out the position on screen.
298         */
299         if (!current.font_size.proportional()) {
300                 current.font_size.set_proportional(72.0 / play_res_y);
301         }
302
303         /* Count the number of line breaks */
304         int line_breaks = 0;
305         if (line.length() > 0) {
306                 for (size_t i = 0; i < line.length() - 1; ++i) {
307                         if (line[i] == '\\' && (line[i+1] == 'n' || line[i+1] == 'N')) {
308                                 ++line_breaks;
309                         }
310                 }
311         }
312
313         /* There are vague indications that with ASS 1 point should equal 1 pixel */
314         double const line_size = current.font_size.proportional(play_res_y) * 1.2;
315
316         for (size_t i = 0; i < line.length(); ++i) {
317                 char const c = line[i];
318                 switch (state) {
319                 case TEXT:
320                         if (c == '{') {
321                                 state = STYLE;
322                         } else if (c == '\\') {
323                                 state = BACKSLASH;
324                         } else if (c != '\r' && c != '\n') {
325                                 current.text += c;
326                         }
327                         break;
328                 case STYLE:
329                         if (c == '}' || c == '\\') {
330                                 if (!current.text.empty ()) {
331                                         subs.push_back (current);
332                                         current.text = "";
333                                 }
334                                 parse_style(current, style, play_res_x, play_res_y, primary_colour);
335                                 style = "";
336                         }
337
338                         if (c == '}') {
339                                 state = TEXT;
340                         } else {
341                                 style += c;
342                         }
343                         break;
344                 case BACKSLASH:
345                         if (c == 'n' || c == 'N') {
346                                 if (!current.text.empty ()) {
347                                         subs.push_back (current);
348                                         current.text = "";
349                                 }
350                                 /* Move down one line (1.2 times the font size) */
351                                 if (current.vertical_position.reference.get() == BOTTOM_OF_SCREEN) {
352                                         current.vertical_position.proportional = current.vertical_position.proportional.get() - line_size;
353                                 } else {
354                                         current.vertical_position.proportional = current.vertical_position.proportional.get() + line_size;
355                                 }
356                         }
357                         state = TEXT;
358                         break;
359                 }
360         }
361
362         if (!current.text.empty ()) {
363                 subs.push_back (current);
364         }
365
366         /* Now we definitely know the vertical position reference we can finish off the position */
367         for (auto& sub: subs) {
368                 switch (sub.vertical_position.reference.get()) {
369                 case TOP_OF_SCREEN:
370                 case TOP_OF_SUBTITLE:
371                         /* Just re-add any margins we came in with */
372                         sub.vertical_position.proportional = sub.vertical_position.proportional.get() + base.vertical_position.proportional.get_value_or(0);
373                         break;
374                 case VERTICAL_CENTRE_OF_SCREEN:
375                         /* Margins are ignored, but we need to centre */
376                         sub.vertical_position.proportional = sub.vertical_position.proportional.get() - ((line_breaks + 1) * line_size) / 2;
377                         break;
378                 case BOTTOM_OF_SCREEN:
379                         /* Re-add margins and account for each line */
380                         sub.vertical_position.proportional =
381                                 sub.vertical_position.proportional.get()
382                                 + base.vertical_position.proportional.get_value_or(0)
383                                 + line_breaks * line_size;
384                         break;
385                 }
386         }
387
388         return subs;
389 }
390
391 void
392 SSAReader::read (function<optional<string> ()> get_line)
393 {
394         enum {
395                 INFO,
396                 STYLES,
397                 EVENTS
398         } part = INFO;
399
400         int play_res_x = 288;
401         int play_res_y = 288;
402         map<string, Style> styles;
403         string style_format_line;
404         vector<string> event_format;
405
406         while (true) {
407                 optional<string> line = get_line ();
408                 if (!line) {
409                         break;
410                 }
411
412                 trim (*line);
413                 remove_unicode_bom (line);
414
415                 if (starts_with (*line, ";") || line->empty ()) {
416                         continue;
417                 }
418
419                 if (starts_with (*line, "[")) {
420                         /* Section heading */
421                         if (line.get() == "[Script Info]") {
422                                 part = INFO;
423                         } else if (line.get() == "[V4 Styles]" || line.get() == "[V4+ Styles]") {
424                                 part = STYLES;
425                         } else if (line.get() == "[Events]") {
426                                 part = EVENTS;
427                         }
428                         continue;
429                 }
430
431                 size_t const colon = line->find (":");
432                 SUB_ASSERT (colon != string::npos);
433                 string const type = line->substr (0, colon);
434                 string body = line->substr (colon + 1);
435                 trim (body);
436
437                 switch (part) {
438                 case INFO:
439                         if (type == "PlayResX") {
440                                 play_res_x = raw_convert<int> (body);
441                         } else if (type == "PlayResY") {
442                                 play_res_y = raw_convert<int> (body);
443                         }
444                         break;
445                 case STYLES:
446                         if (type == "Format") {
447                                 style_format_line = body;
448                         } else if (type == "Style") {
449                                 SUB_ASSERT (!style_format_line.empty ());
450                                 Style s (style_format_line, body);
451                                 styles[s.name] = s;
452                         }
453                         break;
454                 case EVENTS:
455                         if (type == "Format") {
456                                 split (event_format, body, is_any_of (","));
457                                 for (auto& i: event_format) {
458                                         trim (i);
459                                 }
460                         } else if (type == "Dialogue") {
461                                 SUB_ASSERT (!event_format.empty ());
462                                 vector<string> event;
463                                 split (event, body, is_any_of (","));
464
465                                 /* There may be commas in the subtitle part; reassemble any extra parts
466                                    from when we just split it.
467                                 */
468                                 while (event.size() > event_format.size()) {
469                                         string const ex = event.back ();
470                                         event.pop_back ();
471                                         event.back() += "," + ex;
472                                 }
473
474                                 SUB_ASSERT (!event.empty());
475                                 SUB_ASSERT (event_format.size() == event.size());
476
477                                 RawSubtitle sub;
478                                 optional<Style> style;
479
480                                 for (size_t i = 0; i < event.size(); ++i) {
481                                         trim (event[i]);
482                                         if (event_format[i] == "Start") {
483                                                 sub.from = parse_time (event[i]);
484                                         } else if (event_format[i] == "End") {
485                                                 sub.to = parse_time (event[i]);
486                                         } else if (event_format[i] == "Style") {
487                                                 /* libass trims leading '*'s from style names, commenting that
488                                                    "they seem to mean literally nothing".  Go figure...
489                                                 */
490                                                 trim_left_if (event[i], boost::is_any_of ("*"));
491                                                 SUB_ASSERT (styles.find(event[i]) != styles.end());
492                                                 style = styles[event[i]];
493                                                 sub.font = style->font_name;
494                                                 sub.font_size = FontSize::from_proportional(static_cast<float>(style->font_size) / play_res_y);
495                                                 sub.colour = style->primary_colour;
496                                                 sub.effect_colour = style->back_colour;
497                                                 sub.bold = style->bold;
498                                                 sub.italic = style->italic;
499                                                 sub.underline = style->underline;
500                                                 sub.effect = style->effect;
501                                                 sub.horizontal_position.reference = style->horizontal_reference;
502                                                 sub.vertical_position.reference = style->vertical_reference;
503                                                 if (sub.vertical_position.reference != sub::VERTICAL_CENTRE_OF_SCREEN) {
504                                                         sub.vertical_position.proportional = float(style->vertical_margin) / play_res_y;
505                                                 }
506                                         } else if (event_format[i] == "MarginV") {
507                                                 if (event[i] != "0" && sub.vertical_position.reference != sub::VERTICAL_CENTRE_OF_SCREEN) {
508                                                         /* Override the style if its non-zero */
509                                                         sub.vertical_position.proportional = raw_convert<float>(event[i]) / play_res_y;
510                                                 }
511                                         } else if (event_format[i] == "Text") {
512                                                 for (auto j: parse_line(sub, event[i], play_res_x, play_res_y, style ? style->primary_colour : Colour(1, 1, 1))) {
513                                                         _subs.push_back (j);
514                                                 }
515                                         }
516                                 }
517                         }
518                 }
519
520         }
521 }