X-Git-Url: https://main.carlh.net/gitweb/?p=dcpomatic.git;a=blobdiff_plain;f=src%2Flib%2Fsubrip.cc;h=a707d1f9fd13641301a3782f0ee84b272ea5a8c9;hp=380a2ce2cb7e7757c2e1fc7472f571d72aa0a693;hb=aeb835a18c8df347e0ed68fb24631b320abeb611;hpb=1629bd7df2150156109afbc7a16677cb29e82adf

diff --git a/src/lib/subrip.cc b/src/lib/subrip.cc
index 380a2ce2c..a707d1f9f 100644
--- a/src/lib/subrip.cc
+++ b/src/lib/subrip.cc
@@ -17,220 +17,64 @@
 
 */
 
-#include <boost/algorithm/string.hpp>
 #include "subrip.h"
-#include "subrip_content.h"
-#include "subrip_subtitle.h"
 #include "cross.h"
 #include "exceptions.h"
+#include "subrip_content.h"
+#include "data.h"
+#include <sub/subrip_reader.h>
+#include <sub/collect.h>
+#include <unicode/ucsdet.h>
+#include <unicode/ucnv.h>
+#include <iostream>
 
 #include "i18n.h"
 
-using std::string;
-using std::list;
 using std::vector;
 using std::cout;
+using std::string;
 using boost::shared_ptr;
-using boost::lexical_cast;
-using boost::algorithm::trim;
+using boost::scoped_array;
 
 SubRip::SubRip (shared_ptr<const SubRipContent> content)
 {
-	FILE* f = fopen_boost (content->path (0), "r");
-	if (!f) {
-		throw OpenFileError (content->path (0));
-	}
-
-	enum {
-		COUNTER,
-		METADATA,
-		CONTENT
-	} state = COUNTER;
-
-	char buffer[256];
-	int next_count = 1;
-
-	boost::optional<SubRipSubtitle> current;
-	list<string> lines;
-	
-	while (!feof (f)) {
-		fgets (buffer, sizeof (buffer), f);
-		if (feof (f)) {
-			break;
-		}
-		
-		string line (buffer);
-		trim_right_if (line, boost::is_any_of ("\n\r"));
-		
-		switch (state) {
-		case COUNTER:
-		{
-			int x = 0;
-			try {
-				x = lexical_cast<int> (line);
-			} catch (...) {
-
-			}
-			
-			if (x == next_count) {
-				state = METADATA;
-				++next_count;
-				current = SubRipSubtitle ();
-			} else {
-				throw SubRipError (line, _("a subtitle count"), content->path (0));
-			}
-		}
-		break;
-		case METADATA:
-		{
-			vector<string> p;
-			boost::algorithm::split (p, line, boost::algorithm::is_any_of (" "));
-			if (p.size() != 3 && p.size() != 7) {
-				throw SubRipError (line, _("a time/position line"), content->path (0));
-			}
-
-			current->from = convert_time (p[0]);
-			current->to = convert_time (p[2]);
-
-			if (p.size() > 3) {
-				current->x1 = convert_coordinate (p[3]);
-				current->x2 = convert_coordinate (p[4]);
-				current->y1 = convert_coordinate (p[5]);
-				current->y2 = convert_coordinate (p[6]);
-			}
-			state = CONTENT;
-			break;
-		}
-		case CONTENT:
-			if (line.empty ()) {
-				state = COUNTER;
-				current->pieces = convert_content (lines);
-				_subtitles.push_back (current.get ());
-				current.reset ();
-				lines.clear ();
-			} else {
-				lines.push_back (line);
-			}
-			break;
-		}
-	}
-
-	if (state == CONTENT) {
-		current->pieces = convert_content (lines);
-		_subtitles.push_back (current.get ());
-	}
-
-	fclose (f);
-}
-
-ContentTime
-SubRip::convert_time (string t)
-{
-	ContentTime r = 0;
-
-	vector<string> a;
-	boost::algorithm::split (a, t, boost::is_any_of (":"));
-	assert (a.size() == 3);
-	r += lexical_cast<int> (a[0]) * 60 * 60 * TIME_HZ;
-	r += lexical_cast<int> (a[1]) * 60 * TIME_HZ;
-
-	vector<string> b;
-	boost::algorithm::split (b, a[2], boost::is_any_of (","));
-	r += lexical_cast<int> (b[0]) * TIME_HZ;
-	r += lexical_cast<int> (b[1]) * TIME_HZ / 1000;
-
-	return r;
-}
-
-int
-SubRip::convert_coordinate (string t)
-{
-	vector<string> a;
-	boost::algorithm::split (a, t, boost::is_any_of (":"));
-	assert (a.size() == 2);
-	return lexical_cast<int> (a[1]);
-}
-
-void
-SubRip::maybe_content (list<SubRipSubtitlePiece>& pieces, SubRipSubtitlePiece& p)
-{
-	if (!p.text.empty ()) {
-		pieces.push_back (p);
-		p.text.clear ();
-	}
-}
-
-list<SubRipSubtitlePiece>
-SubRip::convert_content (list<string> t)
-{
-	list<SubRipSubtitlePiece> pieces;
-	
-	SubRipSubtitlePiece p;
-
-	enum {
-		TEXT,
-		TAG
-	} state = TEXT;
-
-	string tag;
-
-	/* XXX: missing <font> support */
-	/* XXX: nesting of tags e.g. <b>foo<i>bar<b>baz</b>fred</i>jim</b> might
-	   not work, I think.
-	*/
-
-	for (list<string>::const_iterator i = t.begin(); i != t.end(); ++i) {
-		for (size_t j = 0; j < i->size(); ++j) {
-			switch (state) {
-			case TEXT:
-				if ((*i)[j] == '<' || (*i)[j] == '{') {
-					state = TAG;
-				} else {
-					p.text += (*i)[j];
-				}
-				break;
-			case TAG:
-				if ((*i)[j] == '>' || (*i)[j] == '}') {
-					if (tag == "b") {
-						maybe_content (pieces, p);
-						p.bold = true;
-					} else if (tag == "/b") {
-						maybe_content (pieces, p);
-						p.bold = false;
-					} else if (tag == "i") {
-						maybe_content (pieces, p);
-						p.italic = true;
-					} else if (tag == "/i") {
-						maybe_content (pieces, p);
-						p.italic = false;
-					} else if (tag == "u") {
-						maybe_content (pieces, p);
-						p.underline = true;
-					} else if (tag == "/u") {
-						maybe_content (pieces, p);
-						p.underline = false;
-					}
-					tag.clear ();
-					state = TEXT;
-				} else {
-					tag += (*i)[j];
-				}
-				break;
-			}
-		}
-	}
-
-	maybe_content (pieces, p);
-
-	return pieces;
+	Data in (content->path (0));
+
+	UErrorCode status = U_ZERO_ERROR;
+	UCharsetDetector* detector = ucsdet_open (&status);
+	ucsdet_setText (detector, reinterpret_cast<const char *> (in.data().get()), in.size(), &status);
+
+	UCharsetMatch const * match = ucsdet_detect (detector, &status);
+	char const * in_charset = ucsdet_getName (match, &status);
+
+	UConverter* to_utf16 = ucnv_open (in_charset, &status);
+	/* This is a guess; I think we should be able to encode any input in 4 times its input size */
+	scoped_array<uint16_t> utf16 (new uint16_t[in.size() * 2]);
+	int const utf16_len = ucnv_toUChars (
+		to_utf16, reinterpret_cast<UChar*>(utf16.get()), in.size() * 2,
+		reinterpret_cast<const char *> (in.data().get()), in.size(),
+		&status
+		);
+
+	UConverter* to_utf8 = ucnv_open ("UTF-8", &status);
+	/* Another guess */
+	scoped_array<char> utf8 (new char[utf16_len * 2]);
+	ucnv_fromUChars (to_utf8, utf8.get(), utf16_len * 2, reinterpret_cast<UChar*>(utf16.get()), utf16_len, &status);
+
+	ucsdet_close (detector);
+	ucnv_close (to_utf16);
+	ucnv_close (to_utf8);
+
+	sub::SubripReader reader (utf8.get());
+	_subtitles = sub::collect<vector<sub::Subtitle> > (reader.subtitles ());
 }
 
 ContentTime
 SubRip::length () const
 {
 	if (_subtitles.empty ()) {
-		return 0;
+		return ContentTime ();
 	}
 
-	return _subtitles.back().to;
+	return ContentTime::from_seconds (_subtitles.back().to.all_as_seconds ());
 }