Use parse_line to parse all SSA subs.
[libsub.git] / src / ssa_reader.cc
1 /*
2     Copyright (C) 2016 Carl Hetherington <cth@carlh.net>
3
4     This program is free software; you can redistribute it and/or modify
5     it under the terms of the GNU General Public License as published by
6     the Free Software Foundation; either version 2 of the License, or
7     (at your option) any later version.
8
9     This program is distributed in the hope that it will be useful,
10     but WITHOUT ANY WARRANTY; without even the implied warranty of
11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12     GNU General Public License for more details.
13
14     You should have received a copy of the GNU General Public License
15     along with this program; if not, write to the Free Software
16     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17
18 */
19
20 #include "ssa_reader.h"
21 #include "util.h"
22 #include "sub_assert.h"
23 #include "raw_convert.h"
24 #include "subtitle.h"
25 #include <boost/algorithm/string.hpp>
26 #include <boost/bind.hpp>
27 #include <boost/foreach.hpp>
28 #include <sstream>
29 #include <iostream>
30 #include <vector>
31
32 using std::string;
33 using std::stringstream;
34 using std::vector;
35 using std::map;
36 using std::cout;
37 using std::list;
38 using boost::optional;
39 using boost::function;
40 using namespace boost::algorithm;
41 using namespace sub;
42
43 /** @param s Subtitle string encoded in UTF-8 */
44 SSAReader::SSAReader (string const & s)
45 {
46         stringstream str (s);
47         this->read (boost::bind (&get_line_stringstream, &str));
48 }
49
50 /** @param f Subtitle file encoded in UTF-8 */
51 SSAReader::SSAReader (FILE* f)
52 {
53         this->read (boost::bind (&get_line_file, f));
54 }
55
56 class Style
57 {
58 public:
59         Style ()
60                 : font_size (24)
61                 , primary_colour (255, 255, 255)
62                 , bold (false)
63                 , italic (false)
64         {}
65
66         Style (string format_line, string style_line)
67                 : font_size (24)
68                 , primary_colour (255, 255, 255)
69                 , bold (false)
70                 , italic (false)
71         {
72                 vector<string> keys;
73                 split (keys, format_line, is_any_of (","));
74                 vector<string> style;
75                 split (style, style_line, is_any_of (","));
76
77                 SUB_ASSERT (!keys.empty());
78                 SUB_ASSERT (!style.empty());
79                 SUB_ASSERT (keys.size() == style.size());
80
81                 for (size_t i = 0; i < style.size(); ++i) {
82                         trim (keys[i]);
83                         trim (style[i]);
84                         if (keys[i] == "Name") {
85                                 name = style[i];
86                         } else if (keys[i] == "Fontname") {
87                                 font_name = style[i];
88                         } else if (keys[i] == "Fontsize") {
89                                 font_size = raw_convert<int> (style[i]);
90                         } else if (keys[i] == "PrimaryColour") {
91                                 primary_colour = colour (raw_convert<int> (style[i]));
92                         } else if (keys[i] == "BackColour") {
93                                 back_colour = colour (raw_convert<int> (style[i]));
94                         } else if (keys[i] == "Bold") {
95                                 bold = style[i] == "-1";
96                         } else if (keys[i] == "Italic") {
97                                 italic = style[i] == "-1";
98                         } else if (keys[i] == "BorderStyle") {
99                                 if (style[i] == "1") {
100                                         effect = SHADOW;
101                                 }
102                         }
103                 }
104         }
105
106         string name;
107         optional<string> font_name;
108         int font_size;
109         Colour primary_colour;
110         /** outline colour */
111         optional<Colour> back_colour;
112         bool bold;
113         bool italic;
114         optional<Effect> effect;
115
116 private:
117         Colour colour (int c) const
118         {
119                 return Colour (
120                         ((c & 0x0000ff) >>  0) / 255.0,
121                         ((c & 0x00ff00) >>  8) / 255.0,
122                         ((c & 0xff0000) >> 16) / 255.0
123                         );
124         }
125 };
126
127 Time
128 SSAReader::parse_time (string t) const
129 {
130         vector<string> bits;
131         split (bits, t, is_any_of (":."));
132         SUB_ASSERT (bits.size() == 4);
133         return Time::from_hms (
134                 raw_convert<int> (bits[0]),
135                 raw_convert<int> (bits[1]),
136                 raw_convert<int> (bits[2]),
137                 raw_convert<int> (bits[3]) * 10
138                 );
139 }
140
141 /** @param base RawSubtitle filled in with any required common values.
142  *  @param line SSA line string.
143  *  @return List of RawSubtitles to represent line with vertical reference TOP_OF_SUBTITLE.
144  */
145 list<RawSubtitle>
146 SSAReader::parse_line (RawSubtitle base, string line)
147 {
148         enum {
149                 TEXT,
150                 STYLE,
151                 BACKSLASH
152         } state = TEXT;
153
154         list<RawSubtitle> subs;
155         RawSubtitle current = base;
156         string style;
157
158         current.vertical_position.line = 0;
159         /* XXX: arbitrary */
160         current.vertical_position.lines = 32;
161         current.vertical_position.reference = TOP_OF_SUBTITLE;
162
163         for (size_t i = 0; i < line.length(); ++i) {
164                 char const c = line[i];
165                 switch (state) {
166                 case TEXT:
167                         if (c == '{') {
168                                 state = STYLE;
169                         } else if (c == '\\') {
170                                 state = BACKSLASH;
171                         } else if (c != '\r' && c != '\n') {
172                                 current.text += c;
173                         }
174                         break;
175                 case STYLE:
176                         if (c == '}') {
177                                 if (!current.text.empty ()) {
178                                         subs.push_back (current);
179                                         current.text = "";
180                                 }
181                                 if (style == "i1") {
182                                         current.italic = true;
183                                 } else if (style == "i0") {
184                                         current.italic = false;
185                                 }
186                                 style = "";
187                                 state = TEXT;
188                         } else {
189                                 style += c;
190                         }
191                         break;
192                 case BACKSLASH:
193                         if ((c == 'n' || c == 'N') && !current.text.empty ()) {
194                                 subs.push_back (current);
195                                 current.text = "";
196                                 current.vertical_position.line = current.vertical_position.line.get() + 1;
197                         }
198                         state = TEXT;
199                         break;
200                 }
201         }
202
203         if (!current.text.empty ()) {
204                 subs.push_back (current);
205         }
206
207         return subs;
208 }
209
210 void
211 SSAReader::read (function<optional<string> ()> get_line)
212 {
213         enum {
214                 INFO,
215                 STYLES,
216                 EVENTS
217         } part = INFO;
218
219         map<string, Style> styles;
220         string style_format_line;
221         vector<string> event_format;
222
223         while (true) {
224                 optional<string> line = get_line ();
225                 if (!line) {
226                         break;
227                 }
228
229                 trim (*line);
230
231                 if (starts_with (*line, ";") || line->empty ()) {
232                         continue;
233                 }
234
235                 if (starts_with (*line, "[")) {
236                         /* Section heading */
237                         if (line.get() == "[Script Info]") {
238                                 part = INFO;
239                         } else if (line.get() == "[V4 Styles]") {
240                                 part = STYLES;
241                         } else if (line.get() == "[Events]") {
242                                 part = EVENTS;
243                         }
244                         continue;
245                 }
246
247                 size_t const colon = line->find (":");
248                 SUB_ASSERT (colon != string::npos);
249                 SUB_ASSERT (line->length() > colon + 1);
250                 string const type = line->substr (0, colon);
251                 string const body = line->substr (colon + 2);
252
253                 switch (part) {
254                 case INFO:
255                         break;
256                 case STYLES:
257                         if (type == "Format") {
258                                 style_format_line = body;
259                         } else if (type == "Style") {
260                                 SUB_ASSERT (!style_format_line.empty ());
261                                 Style s (style_format_line, body);
262                                 styles[s.name] = s;
263                         }
264                         break;
265                 case EVENTS:
266                         if (type == "Format") {
267                                 split (event_format, body, is_any_of (","));
268                                 BOOST_FOREACH (string& i, event_format) {
269                                         trim (i);
270                                 }
271                         } else if (type == "Dialogue") {
272                                 SUB_ASSERT (!event_format.empty ());
273                                 vector<string> event;
274                                 split (event, body, is_any_of (","));
275
276                                 SUB_ASSERT (!event.empty());
277                                 SUB_ASSERT (event_format.size() == event.size());
278
279                                 RawSubtitle sub;
280
281                                 for (size_t i = 0; i < event.size(); ++i) {
282                                         trim (event[i]);
283                                         if (event_format[i] == "Start") {
284                                                 sub.from = parse_time (event[i]);
285                                         } else if (event_format[i] == "End") {
286                                                 sub.to = parse_time (event[i]);
287                                         } else if (event_format[i] == "Style") {
288                                                 SUB_ASSERT (styles.find(event[i]) != styles.end());
289                                                 Style style = styles[event[i]];
290                                                 sub.font = style.font_name;
291                                                 sub.font_size = FontSize::from_points (style.font_size);
292                                                 sub.colour = style.primary_colour;
293                                                 sub.effect_colour = style.back_colour;
294                                                 sub.bold = style.bold;
295                                                 sub.italic = style.italic;
296                                                 sub.effect = style.effect;
297
298                                                 /* XXX: arbitrary */
299                                                 sub.vertical_position.lines = 32;
300                                                 sub.vertical_position.reference = TOP_OF_SUBTITLE;
301                                                 sub.vertical_position.line = 0;
302
303                                         } else if (event_format[i] == "Text") {
304                                                 BOOST_FOREACH (sub::RawSubtitle j, parse_line (sub, event[i])) {
305                                                         _subs.push_back (j);
306                                                 }
307                                         }
308                                 }
309                         }
310                 }
311
312         }
313 }