Logging improvements to allow prettier displays in the server GUI.
[dcpomatic.git] / src / lib / subrip.cc
index 04765532fc2bfaad0f287e8d833aba220f55c3df..a707d1f9fd13641301a3782f0ee84b272ea5a8c9 100644 (file)
 
 */
 
-#include <boost/algorithm/string.hpp>
 #include "subrip.h"
-#include "subrip_content.h"
-#include "subrip_subtitle.h"
 #include "cross.h"
 #include "exceptions.h"
+#include "subrip_content.h"
+#include "data.h"
+#include <sub/subrip_reader.h>
+#include <sub/collect.h>
+#include <unicode/ucsdet.h>
+#include <unicode/ucnv.h>
+#include <iostream>
 
 #include "i18n.h"
 
-using std::string;
-using std::list;
 using std::vector;
 using std::cout;
+using std::string;
 using boost::shared_ptr;
-using boost::lexical_cast;
-using boost::algorithm::trim;
+using boost::scoped_array;
 
-SubRip::SubRip (shared_ptr<SubRipContent> content)
+SubRip::SubRip (shared_ptr<const SubRipContent> content)
 {
-       FILE* f = fopen_boost (content->path (0), "r");
-       if (!f) {
-               throw OpenFileError (content->path (0));
-       }
-
-       enum {
-               COUNTER,
-               METADATA,
-               CONTENT
-       } state = COUNTER;
-
-       char buffer[256];
-       int next_count = 1;
-
-       boost::optional<SubRipSubtitle> current;
-       list<string> lines;
-       
-       while (!feof (f)) {
-               fgets (buffer, sizeof (buffer), f);
-               if (feof (f)) {
-                       break;
-               }
-               
-               string line (buffer);
-               trim_right_if (line, boost::is_any_of ("\n\r"));
-               
-               switch (state) {
-               case COUNTER:
-               {
-                       int x = 0;
-                       try {
-                               x = lexical_cast<int> (line);
-                       } catch (...) {
-
-                       }
-                       
-                       if (x == next_count) {
-                               state = METADATA;
-                               ++next_count;
-                               current = SubRipSubtitle ();
-                       } else {
-                               throw SubRipError (line, _("a subtitle count"), content->path (0));
-                       }
-               }
-               break;
-               case METADATA:
-               {
-                       vector<string> p;
-                       boost::algorithm::split (p, line, boost::algorithm::is_any_of (" "));
-                       if (p.size() != 3 && p.size() != 7) {
-                               throw SubRipError (line, _("a time/position line"), content->path (0));
-                       }
-
-                       current->from = convert_time (p[0]);
-                       current->to = convert_time (p[2]);
-
-                       if (p.size() > 3) {
-                               current->x1 = convert_coordinate (p[3]);
-                               current->x2 = convert_coordinate (p[4]);
-                               current->y1 = convert_coordinate (p[5]);
-                               current->y2 = convert_coordinate (p[6]);
-                       }
-                       state = CONTENT;
-                       break;
-               }
-               case CONTENT:
-                       if (line.empty ()) {
-                               state = COUNTER;
-                               current->pieces = convert_content (lines);
-                               _subtitles.push_back (current.get ());
-                               current.reset ();
-                               lines.clear ();
-                       } else {
-                               lines.push_back (line);
-                       }
-                       break;
-               }
-       }
-
-       if (state == CONTENT) {
-               current->pieces = convert_content (lines);
-               _subtitles.push_back (current.get ());
-       }
-
-       fclose (f);
-}
-
-Time
-SubRip::convert_time (string t)
-{
-       Time r = 0;
-
-       vector<string> a;
-       boost::algorithm::split (a, t, boost::is_any_of (":"));
-       assert (a.size() == 3);
-       r += lexical_cast<int> (a[0]) * 60 * 60 * TIME_HZ;
-       r += lexical_cast<int> (a[1]) * 60 * TIME_HZ;
-
-       vector<string> b;
-       boost::algorithm::split (b, a[2], boost::is_any_of (","));
-       r += lexical_cast<int> (b[0]) * TIME_HZ;
-       r += lexical_cast<int> (b[1]) * TIME_HZ / 1000;
-
-       return r;
-}
-
-int
-SubRip::convert_coordinate (string t)
-{
-       vector<string> a;
-       boost::algorithm::split (a, t, boost::is_any_of (":"));
-       assert (a.size() == 2);
-       return lexical_cast<int> (a[1]);
-}
-
-void
-SubRip::maybe_content (list<SubRipSubtitlePiece>& pieces, SubRipSubtitlePiece& p)
-{
-       if (!p.text.empty ()) {
-               pieces.push_back (p);
-               p.text.clear ();
-       }
-}
-
-list<SubRipSubtitlePiece>
-SubRip::convert_content (list<string> t)
-{
-       list<SubRipSubtitlePiece> pieces;
-       
-       SubRipSubtitlePiece p;
-
-       enum {
-               TEXT,
-               TAG
-       } state = TEXT;
-
-       string tag;
-
-       /* XXX: missing <font> support */
-       /* XXX: nesting of tags e.g. <b>foo<i>bar<b>baz</b>fred</i>jim</b> might
-          not work, I think.
-       */
-
-       for (list<string>::const_iterator i = t.begin(); i != t.end(); ++i) {
-               for (size_t j = 0; j < i->size(); ++j) {
-                       switch (state) {
-                       case TEXT:
-                               if ((*i)[j] == '<' || (*i)[j] == '{') {
-                                       state = TAG;
-                               } else {
-                                       p.text += (*i)[j];
-                               }
-                               break;
-                       case TAG:
-                               if ((*i)[j] == '>' || (*i)[j] == '}') {
-                                       if (tag == "b") {
-                                               maybe_content (pieces, p);
-                                               p.bold = true;
-                                       } else if (tag == "/b") {
-                                               maybe_content (pieces, p);
-                                               p.bold = false;
-                                       } else if (tag == "i") {
-                                               maybe_content (pieces, p);
-                                               p.italic = true;
-                                       } else if (tag == "/i") {
-                                               maybe_content (pieces, p);
-                                               p.italic = false;
-                                       } else if (tag == "u") {
-                                               maybe_content (pieces, p);
-                                               p.underline = true;
-                                       } else if (tag == "/u") {
-                                               maybe_content (pieces, p);
-                                               p.underline = false;
-                                       }
-                                       tag.clear ();
-                                       state = TEXT;
-                               } else {
-                                       tag += (*i)[j];
-                               }
-                               break;
-                       }
-               }
-       }
-
-       maybe_content (pieces, p);
-
-       return pieces;
+       Data in (content->path (0));
+
+       UErrorCode status = U_ZERO_ERROR;
+       UCharsetDetector* detector = ucsdet_open (&status);
+       ucsdet_setText (detector, reinterpret_cast<const char *> (in.data().get()), in.size(), &status);
+
+       UCharsetMatch const * match = ucsdet_detect (detector, &status);
+       char const * in_charset = ucsdet_getName (match, &status);
+
+       UConverter* to_utf16 = ucnv_open (in_charset, &status);
+       /* This is a guess; I think we should be able to encode any input in 4 times its input size */
+       scoped_array<uint16_t> utf16 (new uint16_t[in.size() * 2]);
+       int const utf16_len = ucnv_toUChars (
+               to_utf16, reinterpret_cast<UChar*>(utf16.get()), in.size() * 2,
+               reinterpret_cast<const char *> (in.data().get()), in.size(),
+               &status
+               );
+
+       UConverter* to_utf8 = ucnv_open ("UTF-8", &status);
+       /* Another guess */
+       scoped_array<char> utf8 (new char[utf16_len * 2]);
+       ucnv_fromUChars (to_utf8, utf8.get(), utf16_len * 2, reinterpret_cast<UChar*>(utf16.get()), utf16_len, &status);
+
+       ucsdet_close (detector);
+       ucnv_close (to_utf16);
+       ucnv_close (to_utf8);
+
+       sub::SubripReader reader (utf8.get());
+       _subtitles = sub::collect<vector<sub::Subtitle> > (reader.subtitles ());
 }
 
-Time
+ContentTime
 SubRip::length () const
 {
        if (_subtitles.empty ()) {
-               return 0;
+               return ContentTime ();
        }
 
-       return _subtitles.back().to;
+       return ContentTime::from_seconds (_subtitles.back().to.all_as_seconds ());
 }