Fix font parse error.
[libsub.git] / src / subrip_reader.cc
1 /*
2     Copyright (C) 2014 Carl Hetherington <cth@carlh.net>
3
4     This program is free software; you can redistribute it and/or modify
5     it under the terms of the GNU General Public License as published by
6     the Free Software Foundation; either version 2 of the License, or
7     (at your option) any later version.
8
9     This program is distributed in the hope that it will be useful,
10     but WITHOUT ANY WARRANTY; without even the implied warranty of
11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12     GNU General Public License for more details.
13
14     You should have received a copy of the GNU General Public License
15     along with this program; if not, write to the Free Software
16     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17
18 */
19
20 #include "subrip_reader.h"
21 #include "exceptions.h"
22 #include <boost/algorithm/string.hpp>
23 #include <boost/lexical_cast.hpp>
24 #include <boost/regex.hpp>
25 #include <cstdio>
26 #include <vector>
27
28 using std::string;
29 using std::vector;
30 using std::list;
31 using std::cout;
32 using std::hex;
33 using boost::lexical_cast;
34 using boost::to_upper;
35 using namespace sub;
36
37 SubripReader::SubripReader (FILE* f)
38 {
39         enum {
40                 COUNTER,
41                 METADATA,
42                 CONTENT
43         } state = COUNTER;
44
45         char buffer[256];
46
47         Time from;
48         Time to;
49
50         string line;
51         int line_number = 0;
52
53         while (!feof (f)) {
54                 char* r = fgets (buffer, sizeof (buffer), f);
55                 if (r == 0 || feof (f)) {
56                         break;
57                 }
58
59                 line = string (buffer);
60                 trim_right_if (line, boost::is_any_of ("\n\r"));
61
62                 if (
63                         line.length() >= 3 &&
64                         static_cast<unsigned char> (line[0]) == 0xef &&
65                         static_cast<unsigned char> (line[1]) == 0xbb &&
66                         static_cast<unsigned char> (line[2]) == 0xbf
67                         ) {
68
69                         /* Skip Unicode byte order mark */
70                         line = line.substr (3);
71                 }
72
73                 switch (state) {
74                 case COUNTER:
75                 {
76                         if (line.empty ()) {
77                                 /* a blank line at the start is ok */
78                                 break;
79                         }
80
81                         state = METADATA;
82                 }
83                 break;
84                 case METADATA:
85                 {
86                         vector<string> p;
87                         boost::algorithm::split (p, line, boost::algorithm::is_any_of (" "));
88                         if (p.size() != 3 && p.size() != 7) {
89                                 throw SubripError (line, "a time/position line");
90                         }
91
92                         from = convert_time (p[0]);
93                         to = convert_time (p[2]);
94
95                         /* XXX: should not ignore coordinate specifications */
96
97                         state = CONTENT;
98                         break;
99                 }
100                 case CONTENT:
101                         if (line.empty ()) {
102                                 state = COUNTER;
103                                 line_number = 0;
104                         } else {
105                                 convert_line (line, line_number, from, to);
106                                 line_number++;
107                         }
108                         break;
109                 }
110         }
111 }
112
113 Time
114 SubripReader::convert_time (string t)
115 {
116         vector<string> a;
117         boost::algorithm::split (a, t, boost::is_any_of (":"));
118         if (a.size() != 3) {
119                 throw SubripError (t, "time in the format h:m:s,ms");
120         }
121
122         vector<string> b;
123         boost::algorithm::split (b, a[2], boost::is_any_of (","));
124
125         return Time::from_hms (
126                 lexical_cast<int> (a[0]),
127                 lexical_cast<int> (a[1]),
128                 lexical_cast<int> (b[0]),
129                 lexical_cast<int> (b[1])
130                 );
131 }
132
133 void
134 SubripReader::convert_line (string t, int line_number, Time from, Time to)
135 {
136         enum {
137                 TEXT,
138                 TAG
139         } state = TEXT;
140
141         string tag;
142
143         RawSubtitle p;
144         p.font = "Arial";
145         p.font_size.set_points (48);
146         p.from = from;
147         p.to = to;
148         p.vertical_position.line = line_number;
149         /* XXX: arbitrary */
150         p.vertical_position.lines = 32;
151         p.vertical_position.reference = TOP_OF_SUBTITLE;
152
153         list<Colour> colours;
154         colours.push_back (Colour (1, 1, 1));
155
156         /* XXX: missing <font> support */
157         /* XXX: nesting of tags e.g. <b>foo<i>bar<b>baz</b>fred</i>jim</b> might
158            not work, I think.
159         */
160
161         for (size_t i = 0; i < t.size(); ++i) {
162                 switch (state) {
163                 case TEXT:
164                         if (t[i] == '<' || t[i] == '{') {
165                                 state = TAG;
166                         } else {
167                                 p.text += t[i];
168                         }
169                         break;
170                 case TAG:
171                         if (t[i] == '>' || t[i] == '}') {
172                                 if (tag == "b") {
173                                         maybe_content (p);
174                                         p.bold = true;
175                                 } else if (tag == "/b") {
176                                         maybe_content (p);
177                                         p.bold = false;
178                                 } else if (tag == "i") {
179                                         maybe_content (p);
180                                         p.italic = true;
181                                 } else if (tag == "/i") {
182                                         maybe_content (p);
183                                         p.italic = false;
184                                 } else if (tag == "u") {
185                                         maybe_content (p);
186                                         p.underline = true;
187                                 } else if (tag == "/u") {
188                                         maybe_content (p);
189                                         p.underline = false;
190                                 } else if (boost::starts_with (tag, "font")) {
191                                         maybe_content (p);
192                                         boost::regex re (".*color=\"#([0123456789abcdef]+)\"");
193                                         boost::smatch match;
194                                         if (boost::regex_search (tag, match, re) && string (match[1]).size() == 6) {
195                                                 p.colour = Colour::from_rgb_hex (match[1]);
196                                                 colours.push_back (p.colour);
197                                         }
198                                 } else if (tag == "/font") {
199                                         maybe_content (p);
200                                         colours.pop_back ();
201                                         p.colour = colours.back ();
202                                 }
203                                 tag.clear ();
204                                 state = TEXT;
205                         } else {
206                                 tag += t[i];
207                         }
208                         break;
209                 }
210         }
211
212         maybe_content (p);
213 }
214
215 void
216 SubripReader::maybe_content (RawSubtitle& p)
217 {
218         if (!p.text.empty ()) {
219                 _subs.push_back (p);
220                 p.text.clear ();
221         }
222 }