X-Git-Url: https://main.carlh.net/gitweb/?p=dcpomatic.git;a=blobdiff_plain;f=src%2Flib%2Fsubrip.cc;h=a707d1f9fd13641301a3782f0ee84b272ea5a8c9;hp=380a2ce2cb7e7757c2e1fc7472f571d72aa0a693;hb=aeb835a18c8df347e0ed68fb24631b320abeb611;hpb=1629bd7df2150156109afbc7a16677cb29e82adf diff --git a/src/lib/subrip.cc b/src/lib/subrip.cc index 380a2ce2c..a707d1f9f 100644 --- a/src/lib/subrip.cc +++ b/src/lib/subrip.cc @@ -17,220 +17,64 @@ */ -#include #include "subrip.h" -#include "subrip_content.h" -#include "subrip_subtitle.h" #include "cross.h" #include "exceptions.h" +#include "subrip_content.h" +#include "data.h" +#include +#include +#include +#include +#include #include "i18n.h" -using std::string; -using std::list; using std::vector; using std::cout; +using std::string; using boost::shared_ptr; -using boost::lexical_cast; -using boost::algorithm::trim; +using boost::scoped_array; SubRip::SubRip (shared_ptr content) { - FILE* f = fopen_boost (content->path (0), "r"); - if (!f) { - throw OpenFileError (content->path (0)); - } - - enum { - COUNTER, - METADATA, - CONTENT - } state = COUNTER; - - char buffer[256]; - int next_count = 1; - - boost::optional current; - list lines; - - while (!feof (f)) { - fgets (buffer, sizeof (buffer), f); - if (feof (f)) { - break; - } - - string line (buffer); - trim_right_if (line, boost::is_any_of ("\n\r")); - - switch (state) { - case COUNTER: - { - int x = 0; - try { - x = lexical_cast (line); - } catch (...) { - - } - - if (x == next_count) { - state = METADATA; - ++next_count; - current = SubRipSubtitle (); - } else { - throw SubRipError (line, _("a subtitle count"), content->path (0)); - } - } - break; - case METADATA: - { - vector p; - boost::algorithm::split (p, line, boost::algorithm::is_any_of (" ")); - if (p.size() != 3 && p.size() != 7) { - throw SubRipError (line, _("a time/position line"), content->path (0)); - } - - current->from = convert_time (p[0]); - current->to = convert_time (p[2]); - - if (p.size() > 3) { - current->x1 = convert_coordinate (p[3]); - current->x2 = convert_coordinate (p[4]); - current->y1 = convert_coordinate (p[5]); - current->y2 = convert_coordinate (p[6]); - } - state = CONTENT; - break; - } - case CONTENT: - if (line.empty ()) { - state = COUNTER; - current->pieces = convert_content (lines); - _subtitles.push_back (current.get ()); - current.reset (); - lines.clear (); - } else { - lines.push_back (line); - } - break; - } - } - - if (state == CONTENT) { - current->pieces = convert_content (lines); - _subtitles.push_back (current.get ()); - } - - fclose (f); -} - -ContentTime -SubRip::convert_time (string t) -{ - ContentTime r = 0; - - vector a; - boost::algorithm::split (a, t, boost::is_any_of (":")); - assert (a.size() == 3); - r += lexical_cast (a[0]) * 60 * 60 * TIME_HZ; - r += lexical_cast (a[1]) * 60 * TIME_HZ; - - vector b; - boost::algorithm::split (b, a[2], boost::is_any_of (",")); - r += lexical_cast (b[0]) * TIME_HZ; - r += lexical_cast (b[1]) * TIME_HZ / 1000; - - return r; -} - -int -SubRip::convert_coordinate (string t) -{ - vector a; - boost::algorithm::split (a, t, boost::is_any_of (":")); - assert (a.size() == 2); - return lexical_cast (a[1]); -} - -void -SubRip::maybe_content (list& pieces, SubRipSubtitlePiece& p) -{ - if (!p.text.empty ()) { - pieces.push_back (p); - p.text.clear (); - } -} - -list -SubRip::convert_content (list t) -{ - list pieces; - - SubRipSubtitlePiece p; - - enum { - TEXT, - TAG - } state = TEXT; - - string tag; - - /* XXX: missing support */ - /* XXX: nesting of tags e.g. foobarbazfredjim might - not work, I think. - */ - - for (list::const_iterator i = t.begin(); i != t.end(); ++i) { - for (size_t j = 0; j < i->size(); ++j) { - switch (state) { - case TEXT: - if ((*i)[j] == '<' || (*i)[j] == '{') { - state = TAG; - } else { - p.text += (*i)[j]; - } - break; - case TAG: - if ((*i)[j] == '>' || (*i)[j] == '}') { - if (tag == "b") { - maybe_content (pieces, p); - p.bold = true; - } else if (tag == "/b") { - maybe_content (pieces, p); - p.bold = false; - } else if (tag == "i") { - maybe_content (pieces, p); - p.italic = true; - } else if (tag == "/i") { - maybe_content (pieces, p); - p.italic = false; - } else if (tag == "u") { - maybe_content (pieces, p); - p.underline = true; - } else if (tag == "/u") { - maybe_content (pieces, p); - p.underline = false; - } - tag.clear (); - state = TEXT; - } else { - tag += (*i)[j]; - } - break; - } - } - } - - maybe_content (pieces, p); - - return pieces; + Data in (content->path (0)); + + UErrorCode status = U_ZERO_ERROR; + UCharsetDetector* detector = ucsdet_open (&status); + ucsdet_setText (detector, reinterpret_cast (in.data().get()), in.size(), &status); + + UCharsetMatch const * match = ucsdet_detect (detector, &status); + char const * in_charset = ucsdet_getName (match, &status); + + UConverter* to_utf16 = ucnv_open (in_charset, &status); + /* This is a guess; I think we should be able to encode any input in 4 times its input size */ + scoped_array utf16 (new uint16_t[in.size() * 2]); + int const utf16_len = ucnv_toUChars ( + to_utf16, reinterpret_cast(utf16.get()), in.size() * 2, + reinterpret_cast (in.data().get()), in.size(), + &status + ); + + UConverter* to_utf8 = ucnv_open ("UTF-8", &status); + /* Another guess */ + scoped_array utf8 (new char[utf16_len * 2]); + ucnv_fromUChars (to_utf8, utf8.get(), utf16_len * 2, reinterpret_cast(utf16.get()), utf16_len, &status); + + ucsdet_close (detector); + ucnv_close (to_utf16); + ucnv_close (to_utf8); + + sub::SubripReader reader (utf8.get()); + _subtitles = sub::collect > (reader.subtitles ()); } ContentTime SubRip::length () const { if (_subtitles.empty ()) { - return 0; + return ContentTime (); } - return _subtitles.back().to; + return ContentTime::from_seconds (_subtitles.back().to.all_as_seconds ()); }