Test and fix italic support in SSA.
[libsub.git] / src / ssa_reader.cc
1 /*
2     Copyright (C) 2016 Carl Hetherington <cth@carlh.net>
3
4     This program is free software; you can redistribute it and/or modify
5     it under the terms of the GNU General Public License as published by
6     the Free Software Foundation; either version 2 of the License, or
7     (at your option) any later version.
8
9     This program is distributed in the hope that it will be useful,
10     but WITHOUT ANY WARRANTY; without even the implied warranty of
11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12     GNU General Public License for more details.
13
14     You should have received a copy of the GNU General Public License
15     along with this program; if not, write to the Free Software
16     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17
18 */
19
20 #include "ssa_reader.h"
21 #include "util.h"
22 #include "sub_assert.h"
23 #include "raw_convert.h"
24 #include "subtitle.h"
25 #include <boost/algorithm/string.hpp>
26 #include <boost/bind.hpp>
27 #include <boost/foreach.hpp>
28 #include <sstream>
29 #include <iostream>
30 #include <vector>
31
32 using std::string;
33 using std::stringstream;
34 using std::vector;
35 using std::map;
36 using std::cout;
37 using std::list;
38 using boost::optional;
39 using boost::function;
40 using namespace boost::algorithm;
41 using namespace sub;
42
43 /** @param s Subtitle string encoded in UTF-8 */
44 SSAReader::SSAReader (string const & s)
45 {
46         stringstream str (s);
47         this->read (boost::bind (&get_line_stringstream, &str));
48 }
49
50 /** @param f Subtitle file encoded in UTF-8 */
51 SSAReader::SSAReader (FILE* f)
52 {
53         this->read (boost::bind (&get_line_file, f));
54 }
55
56 class Style
57 {
58 public:
59         Style ()
60                 : font_size (72)
61                 , primary_colour (255, 255, 255)
62                 , bold (false)
63                 , italic (false)
64                 , vertical_reference (BOTTOM_OF_SCREEN)
65                 , vertical_margin (0)
66         {}
67
68         Style (string format_line, string style_line)
69                 : font_size (72)
70                 , primary_colour (255, 255, 255)
71                 , bold (false)
72                 , italic (false)
73                 , vertical_reference (BOTTOM_OF_SCREEN)
74                 , vertical_margin (0)
75         {
76                 vector<string> keys;
77                 split (keys, format_line, is_any_of (","));
78                 vector<string> style;
79                 split (style, style_line, is_any_of (","));
80
81                 SUB_ASSERT (!keys.empty());
82                 SUB_ASSERT (!style.empty());
83                 SUB_ASSERT (keys.size() == style.size());
84
85                 for (size_t i = 0; i < style.size(); ++i) {
86                         trim (keys[i]);
87                         trim (style[i]);
88                         if (keys[i] == "Name") {
89                                 name = style[i];
90                         } else if (keys[i] == "Fontname") {
91                                 font_name = style[i];
92                         } else if (keys[i] == "Fontsize") {
93                                 font_size = raw_convert<int> (style[i]);
94                         } else if (keys[i] == "PrimaryColour") {
95                                 primary_colour = colour (raw_convert<int> (style[i]));
96                         } else if (keys[i] == "BackColour") {
97                                 back_colour = colour (raw_convert<int> (style[i]));
98                         } else if (keys[i] == "Bold") {
99                                 bold = style[i] == "-1";
100                         } else if (keys[i] == "Italic") {
101                                 italic = style[i] == "-1";
102                         } else if (keys[i] == "BorderStyle") {
103                                 if (style[i] == "1") {
104                                         effect = SHADOW;
105                                 }
106                         } else if (keys[i] == "Alignment") {
107                                 /* These values from libass' source code */
108                                 switch (raw_convert<int> (style[i]) & 12) {
109                                 case 4:
110                                         vertical_reference = TOP_OF_SCREEN;
111                                         break;
112                                 case 8:
113                                         vertical_reference = CENTRE_OF_SCREEN;
114                                         break;
115                                 case 0:
116                                         vertical_reference = BOTTOM_OF_SCREEN;
117                                         break;
118                                 }
119                         } else if (keys[i] == "MarginV") {
120                                 vertical_margin = raw_convert<int> (style[i]);
121                         }
122                 }
123         }
124
125         string name;
126         optional<string> font_name;
127         int font_size;
128         Colour primary_colour;
129         /** outline colour */
130         optional<Colour> back_colour;
131         bool bold;
132         bool italic;
133         optional<Effect> effect;
134         VerticalReference vertical_reference;
135         int vertical_margin;
136
137 private:
138         Colour colour (int c) const
139         {
140                 return Colour (
141                         ((c & 0x0000ff) >>  0) / 255.0,
142                         ((c & 0x00ff00) >>  8) / 255.0,
143                         ((c & 0xff0000) >> 16) / 255.0
144                         );
145         }
146 };
147
148 Time
149 SSAReader::parse_time (string t) const
150 {
151         vector<string> bits;
152         split (bits, t, is_any_of (":."));
153         SUB_ASSERT (bits.size() == 4);
154         return Time::from_hms (
155                 raw_convert<int> (bits[0]),
156                 raw_convert<int> (bits[1]),
157                 raw_convert<int> (bits[2]),
158                 raw_convert<int> (bits[3]) * 10
159                 );
160 }
161
162 /** @param base RawSubtitle filled in with any required common values.
163  *  @param line SSA line string (i.e. just the subtitle, possibly with embedded stuff)
164  *  @return List of RawSubtitles to represent line with vertical reference TOP_OF_SUBTITLE.
165  */
166 list<RawSubtitle>
167 SSAReader::parse_line (RawSubtitle base, string line)
168 {
169         enum {
170                 TEXT,
171                 STYLE,
172                 BACKSLASH
173         } state = TEXT;
174
175         list<RawSubtitle> subs;
176         RawSubtitle current = base;
177         string style;
178
179         if (!current.vertical_position.reference) {
180                 current.vertical_position.reference = BOTTOM_OF_SCREEN;
181         }
182
183         if (!current.vertical_position.proportional) {
184                 current.vertical_position.proportional = 0;
185         }
186
187         /* We must have a font size, as there could be a margin specified
188            in pixels and in that case we must know how big the subtitle
189            lines are to work out the position on screen.
190         */
191         if (!current.font_size.points()) {
192                 current.font_size.set_points (72);
193         }
194
195         /* Count the number of line breaks */
196         int line_breaks = 0;
197         for (size_t i = 0; i < line.length() - 1; ++i) {
198                 if (line[i] == '\\' && (line[i+1] == 'n' || line[i+1] == 'N')) {
199                         ++line_breaks;
200                 }
201         }
202
203         /* Imagine that the screen is 792 points (i.e. 11 inches) high (as with DCP) */
204         double const line_size = current.font_size.proportional(792) * 1.2;
205
206         /* Tweak vertical_position accordingly */
207         switch (current.vertical_position.reference.get()) {
208         case TOP_OF_SCREEN:
209         case TOP_OF_SUBTITLE:
210                 /* Nothing to do */
211                 break;
212         case CENTRE_OF_SCREEN:
213                 current.vertical_position.proportional = current.vertical_position.proportional.get() - ((line_breaks + 1) * line_size) / 2;
214                 break;
215         case BOTTOM_OF_SCREEN:
216                 current.vertical_position.proportional = current.vertical_position.proportional.get() + line_breaks * line_size;
217                 break;
218         }
219
220         for (size_t i = 0; i < line.length(); ++i) {
221                 char const c = line[i];
222                 switch (state) {
223                 case TEXT:
224                         if (c == '{') {
225                                 state = STYLE;
226                         } else if (c == '\\') {
227                                 state = BACKSLASH;
228                         } else if (c != '\r' && c != '\n') {
229                                 current.text += c;
230                         }
231                         break;
232                 case STYLE:
233                         if (c == '}') {
234                                 if (!current.text.empty ()) {
235                                         subs.push_back (current);
236                                         current.text = "";
237                                 }
238                                 if (style == "\\i1") {
239                                         current.italic = true;
240                                 } else if (style == "\\i0" || style == "\\i") {
241                                         current.italic = false;
242                                 }
243                                 style = "";
244                                 state = TEXT;
245                         } else {
246                                 style += c;
247                         }
248                         break;
249                 case BACKSLASH:
250                         if ((c == 'n' || c == 'N') && !current.text.empty ()) {
251                                 subs.push_back (current);
252                                 current.text = "";
253                                 /* Move down one line (1.2 times the font size) */
254                                 if (current.vertical_position.reference.get() == BOTTOM_OF_SCREEN) {
255                                         current.vertical_position.proportional = current.vertical_position.proportional.get() - line_size;
256                                 } else {
257                                         current.vertical_position.proportional = current.vertical_position.proportional.get() + line_size;
258                                 }
259                         }
260                         state = TEXT;
261                         break;
262                 }
263         }
264
265         if (!current.text.empty ()) {
266                 subs.push_back (current);
267         }
268
269         return subs;
270 }
271
272 void
273 SSAReader::read (function<optional<string> ()> get_line)
274 {
275         enum {
276                 INFO,
277                 STYLES,
278                 EVENTS
279         } part = INFO;
280
281         int play_res_y = 288;
282         map<string, Style> styles;
283         string style_format_line;
284         vector<string> event_format;
285
286         while (true) {
287                 optional<string> line = get_line ();
288                 if (!line) {
289                         break;
290                 }
291
292                 trim (*line);
293                 remove_unicode_bom (line);
294
295                 if (starts_with (*line, ";") || line->empty ()) {
296                         continue;
297                 }
298
299                 if (starts_with (*line, "[")) {
300                         /* Section heading */
301                         if (line.get() == "[Script Info]") {
302                                 part = INFO;
303                         } else if (line.get() == "[V4 Styles]") {
304                                 part = STYLES;
305                         } else if (line.get() == "[Events]") {
306                                 part = EVENTS;
307                         }
308                         continue;
309                 }
310
311                 size_t const colon = line->find (":");
312                 SUB_ASSERT (colon != string::npos);
313                 SUB_ASSERT (line->length() > colon + 1);
314                 string const type = line->substr (0, colon);
315                 string const body = line->substr (colon + 2);
316
317                 switch (part) {
318                 case INFO:
319                         if (type == "PlayResY") {
320                                 play_res_y = raw_convert<int> (body);
321                         }
322                         break;
323                 case STYLES:
324                         if (type == "Format") {
325                                 style_format_line = body;
326                         } else if (type == "Style") {
327                                 SUB_ASSERT (!style_format_line.empty ());
328                                 Style s (style_format_line, body);
329                                 styles[s.name] = s;
330                         }
331                         break;
332                 case EVENTS:
333                         if (type == "Format") {
334                                 split (event_format, body, is_any_of (","));
335                                 BOOST_FOREACH (string& i, event_format) {
336                                         trim (i);
337                                 }
338                         } else if (type == "Dialogue") {
339                                 SUB_ASSERT (!event_format.empty ());
340                                 vector<string> event;
341                                 split (event, body, is_any_of (","));
342
343                                 /* There may be commas in the subtitle part; reassemble any extra parts
344                                    from when we just split it.
345                                 */
346                                 while (event.size() > event_format.size()) {
347                                         string const ex = event.back ();
348                                         event.pop_back ();
349                                         event.back() += "," + ex;
350                                 }
351
352                                 SUB_ASSERT (!event.empty());
353                                 SUB_ASSERT (event_format.size() == event.size());
354
355                                 RawSubtitle sub;
356
357                                 for (size_t i = 0; i < event.size(); ++i) {
358                                         trim (event[i]);
359                                         if (event_format[i] == "Start") {
360                                                 sub.from = parse_time (event[i]);
361                                         } else if (event_format[i] == "End") {
362                                                 sub.to = parse_time (event[i]);
363                                         } else if (event_format[i] == "Style") {
364                                                 SUB_ASSERT (styles.find(event[i]) != styles.end());
365                                                 Style style = styles[event[i]];
366                                                 sub.font = style.font_name;
367                                                 sub.font_size = FontSize::from_points (style.font_size);
368                                                 sub.colour = style.primary_colour;
369                                                 sub.effect_colour = style.back_colour;
370                                                 sub.bold = style.bold;
371                                                 sub.italic = style.italic;
372                                                 sub.effect = style.effect;
373                                                 sub.vertical_position.reference = style.vertical_reference;
374                                                 sub.vertical_position.proportional = float(style.vertical_margin) / play_res_y;
375                                         } else if (event_format[i] == "MarginV") {
376                                                 sub.vertical_position.proportional = raw_convert<float>(event[i]) / play_res_y;
377                                         } else if (event_format[i] == "Text") {
378                                                 BOOST_FOREACH (sub::RawSubtitle j, parse_line (sub, event[i])) {
379                                                         _subs.push_back (j);
380                                                 }
381                                         }
382                                 }
383                         }
384                 }
385
386         }
387 }