Merge master.
[dcpomatic.git] / src / lib / subrip.cc
1 /*
2     Copyright (C) 2014 Carl Hetherington <cth@carlh.net>
3
4     This program is free software; you can redistribute it and/or modify
5     it under the terms of the GNU General Public License as published by
6     the Free Software Foundation; either version 2 of the License, or
7     (at your option) any later version.
8
9     This program is distributed in the hope that it will be useful,
10     but WITHOUT ANY WARRANTY; without even the implied warranty of
11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12     GNU General Public License for more details.
13
14     You should have received a copy of the GNU General Public License
15     along with this program; if not, write to the Free Software
16     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17
18 */
19
20 #include <boost/algorithm/string.hpp>
21 #include "subrip.h"
22 #include "subrip_content.h"
23 #include "subrip_subtitle.h"
24 #include "cross.h"
25 #include "exceptions.h"
26
27 #include "i18n.h"
28
29 using std::string;
30 using std::list;
31 using std::vector;
32 using std::cout;
33 using boost::shared_ptr;
34 using boost::lexical_cast;
35 using boost::algorithm::trim;
36
37 SubRip::SubRip (shared_ptr<const SubRipContent> content)
38 {
39         FILE* f = fopen_boost (content->path (0), "r");
40         if (!f) {
41                 throw OpenFileError (content->path (0));
42         }
43
44         enum {
45                 COUNTER,
46                 METADATA,
47                 CONTENT
48         } state = COUNTER;
49
50         char buffer[256];
51         int next_count = 1;
52
53         boost::optional<SubRipSubtitle> current;
54         list<string> lines;
55         
56         while (!feof (f)) {
57                 fgets (buffer, sizeof (buffer), f);
58                 if (feof (f)) {
59                         break;
60                 }
61                 
62                 string line (buffer);
63                 trim_right_if (line, boost::is_any_of ("\n\r"));
64                 
65                 switch (state) {
66                 case COUNTER:
67                 {
68                         int x = 0;
69                         try {
70                                 x = lexical_cast<int> (line);
71                         } catch (...) {
72
73                         }
74                         
75                         if (x == next_count) {
76                                 state = METADATA;
77                                 ++next_count;
78                                 current = SubRipSubtitle ();
79                         } else {
80                                 throw SubRipError (line, _("a subtitle count"), content->path (0));
81                         }
82                 }
83                 break;
84                 case METADATA:
85                 {
86                         vector<string> p;
87                         boost::algorithm::split (p, line, boost::algorithm::is_any_of (" "));
88                         if (p.size() != 3 && p.size() != 7) {
89                                 throw SubRipError (line, _("a time/position line"), content->path (0));
90                         }
91
92                         current->from = convert_time (p[0]);
93                         current->to = convert_time (p[2]);
94
95                         if (p.size() > 3) {
96                                 current->x1 = convert_coordinate (p[3]);
97                                 current->x2 = convert_coordinate (p[4]);
98                                 current->y1 = convert_coordinate (p[5]);
99                                 current->y2 = convert_coordinate (p[6]);
100                         }
101                         state = CONTENT;
102                         break;
103                 }
104                 case CONTENT:
105                         if (line.empty ()) {
106                                 state = COUNTER;
107                                 current->pieces = convert_content (lines);
108                                 _subtitles.push_back (current.get ());
109                                 current.reset ();
110                                 lines.clear ();
111                         } else {
112                                 lines.push_back (line);
113                         }
114                         break;
115                 }
116         }
117
118         if (state == CONTENT) {
119                 current->pieces = convert_content (lines);
120                 _subtitles.push_back (current.get ());
121         }
122
123         fclose (f);
124 }
125
126 ContentTime
127 SubRip::convert_time (string t)
128 {
129         ContentTime r = 0;
130
131         vector<string> a;
132         boost::algorithm::split (a, t, boost::is_any_of (":"));
133         assert (a.size() == 3);
134         r += lexical_cast<int> (a[0]) * 60 * 60 * TIME_HZ;
135         r += lexical_cast<int> (a[1]) * 60 * TIME_HZ;
136
137         vector<string> b;
138         boost::algorithm::split (b, a[2], boost::is_any_of (","));
139         r += lexical_cast<int> (b[0]) * TIME_HZ;
140         r += lexical_cast<int> (b[1]) * TIME_HZ / 1000;
141
142         return r;
143 }
144
145 int
146 SubRip::convert_coordinate (string t)
147 {
148         vector<string> a;
149         boost::algorithm::split (a, t, boost::is_any_of (":"));
150         assert (a.size() == 2);
151         return lexical_cast<int> (a[1]);
152 }
153
154 void
155 SubRip::maybe_content (list<SubRipSubtitlePiece>& pieces, SubRipSubtitlePiece& p)
156 {
157         if (!p.text.empty ()) {
158                 pieces.push_back (p);
159                 p.text.clear ();
160         }
161 }
162
163 list<SubRipSubtitlePiece>
164 SubRip::convert_content (list<string> t)
165 {
166         list<SubRipSubtitlePiece> pieces;
167         
168         SubRipSubtitlePiece p;
169
170         enum {
171                 TEXT,
172                 TAG
173         } state = TEXT;
174
175         string tag;
176
177         /* XXX: missing <font> support */
178         /* XXX: nesting of tags e.g. <b>foo<i>bar<b>baz</b>fred</i>jim</b> might
179            not work, I think.
180         */
181
182         for (list<string>::const_iterator i = t.begin(); i != t.end(); ++i) {
183                 for (size_t j = 0; j < i->size(); ++j) {
184                         switch (state) {
185                         case TEXT:
186                                 if ((*i)[j] == '<' || (*i)[j] == '{') {
187                                         state = TAG;
188                                 } else {
189                                         p.text += (*i)[j];
190                                 }
191                                 break;
192                         case TAG:
193                                 if ((*i)[j] == '>' || (*i)[j] == '}') {
194                                         if (tag == "b") {
195                                                 maybe_content (pieces, p);
196                                                 p.bold = true;
197                                         } else if (tag == "/b") {
198                                                 maybe_content (pieces, p);
199                                                 p.bold = false;
200                                         } else if (tag == "i") {
201                                                 maybe_content (pieces, p);
202                                                 p.italic = true;
203                                         } else if (tag == "/i") {
204                                                 maybe_content (pieces, p);
205                                                 p.italic = false;
206                                         } else if (tag == "u") {
207                                                 maybe_content (pieces, p);
208                                                 p.underline = true;
209                                         } else if (tag == "/u") {
210                                                 maybe_content (pieces, p);
211                                                 p.underline = false;
212                                         }
213                                         tag.clear ();
214                                         state = TEXT;
215                                 } else {
216                                         tag += (*i)[j];
217                                 }
218                                 break;
219                         }
220                 }
221         }
222
223         maybe_content (pieces, p);
224
225         return pieces;
226 }
227
228 ContentTime
229 SubRip::length () const
230 {
231         if (_subtitles.empty ()) {
232                 return 0;
233         }
234
235         return _subtitles.back().to;
236 }