Add comment.
[libsub.git] / src / subrip_reader.cc
1 /*
2     Copyright (C) 2014 Carl Hetherington <cth@carlh.net>
3
4     This program is free software; you can redistribute it and/or modify
5     it under the terms of the GNU General Public License as published by
6     the Free Software Foundation; either version 2 of the License, or
7     (at your option) any later version.
8
9     This program is distributed in the hope that it will be useful,
10     but WITHOUT ANY WARRANTY; without even the implied warranty of
11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12     GNU General Public License for more details.
13
14     You should have received a copy of the GNU General Public License
15     along with this program; if not, write to the Free Software
16     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17
18 */
19
20 #include "subrip_reader.h"
21 #include "exceptions.h"
22 #include <boost/algorithm/string.hpp>
23 #include <boost/lexical_cast.hpp>
24 #include <cstdio>
25 #include <vector>
26
27 using std::string;
28 using std::vector;
29 using boost::lexical_cast;
30 using namespace sub;
31
32 SubripReader::SubripReader (FILE* f)
33 {
34         enum {
35                 COUNTER,
36                 METADATA,
37                 CONTENT
38         } state = COUNTER;
39
40         char buffer[256];
41
42         TimePair from;
43         TimePair to;
44
45         string line;
46         int line_number = 0;
47
48         while (!feof (f)) {
49                 char* r = fgets (buffer, sizeof (buffer), f);
50                 if (r == 0 || feof (f)) {
51                         break;
52                 }
53
54                 line = string (buffer);
55                 trim_right_if (line, boost::is_any_of ("\n\r"));
56
57                 switch (state) {
58                 case COUNTER:
59                 {
60                         if (line.empty ()) {
61                                 /* a blank line at the start is ok */
62                                 break;
63                         }
64
65                         state = METADATA;
66                 }
67                 break;
68                 case METADATA:
69                 {
70                         vector<string> p;
71                         boost::algorithm::split (p, line, boost::algorithm::is_any_of (" "));
72                         if (p.size() != 3 && p.size() != 7) {
73                                 throw SubripError (line, "a time/position line");
74                         }
75
76                         from = convert_time (p[0]);
77                         to = convert_time (p[2]);
78
79                         /* XXX: should not ignore coordinate specifications */
80                         
81                         state = CONTENT;
82                         break;
83                 }
84                 case CONTENT:
85                         if (line.empty ()) {
86                                 /* XXX: I think this line_number should be set to some sensible value and TOP_OF_SUBTITLE
87                                    should not be used.
88                                 */
89                                 state = COUNTER;
90                                 line_number = 0;
91                         } else {
92                                 convert_line (line, line_number, from, to);
93                                 line_number++;
94                         }
95                         break;
96                 }
97         }
98 }
99
100 TimePair
101 SubripReader::convert_time (string t)
102 {
103         vector<string> a;
104         boost::algorithm::split (a, t, boost::is_any_of (":"));
105         if (a.size() != 3) {
106                 throw SubripError (t, "time in the format h:m:s,ms");
107         }
108
109         vector<string> b;
110         boost::algorithm::split (b, a[2], boost::is_any_of (","));
111
112         return TimePair (
113                 MetricTime (
114                         lexical_cast<int> (a[0]),
115                         lexical_cast<int> (a[1]),
116                         lexical_cast<int> (b[0]),
117                         lexical_cast<int> (b[1])
118                         )
119                 );
120 }
121
122 void
123 SubripReader::convert_line (string t, int line_number, TimePair from, TimePair to)
124 {
125         enum {
126                 TEXT,
127                 TAG
128         } state = TEXT;
129         
130         string tag;
131
132         RawSubtitle p;
133         p.font = "Arial";
134         p.font_size.set_points (48);
135         p.from = from;
136         p.to = to;
137         p.vertical_position.line = line_number;
138         p.vertical_position.reference = TOP_OF_SUBTITLE;
139         
140         /* XXX: missing <font> support */
141         /* XXX: nesting of tags e.g. <b>foo<i>bar<b>baz</b>fred</i>jim</b> might
142            not work, I think.
143         */
144
145         for (size_t i = 0; i < t.size(); ++i) {
146                 switch (state) {
147                 case TEXT:
148                         if (t[i] == '<' || t[i] == '{') {
149                                 state = TAG;
150                         } else {
151                                 p.text += t[i];
152                         }
153                         break;
154                 case TAG:
155                         if (t[i] == '>' || t[i] == '}') {
156                                 if (tag == "b") {
157                                         maybe_content (p);
158                                         p.bold = true;
159                                 } else if (tag == "/b") {
160                                         maybe_content (p);
161                                         p.bold = false;
162                                 } else if (tag == "i") {
163                                         maybe_content (p);
164                                         p.italic = true;
165                                 } else if (tag == "/i") {
166                                         maybe_content (p);
167                                         p.italic = false;
168                                 } else if (tag == "u") {
169                                         maybe_content (p);
170                                         p.underline = true;
171                                 } else if (tag == "/u") {
172                                         maybe_content (p);
173                                         p.underline = false;
174                                 }
175                                 tag.clear ();
176                                 state = TEXT;
177                         } else {
178                                 tag += t[i];
179                         }
180                         break;
181                 }
182         }
183
184         maybe_content (p);
185 }
186
187 void
188 SubripReader::maybe_content (RawSubtitle& p)
189 {
190         if (!p.text.empty ()) {
191                 _subs.push_back (p);
192                 p.text.clear ();
193         }
194 }