X-Git-Url: https://main.carlh.net/gitweb/?a=blobdiff_plain;f=src%2Flib%2Fstring_text_file.cc;h=d3c56832dfb85a40ccc979b6d85a7b1cdf13779d;hb=ca56871d4860b8ead384c410fe374c2fa993f88f;hp=c7eadbbb1a9332ba1f7312335bd7ad8acd9e26fd;hpb=df17bbd25da69fc38eb2dcd8b4a2531cf0bab0bc;p=dcpomatic.git diff --git a/src/lib/string_text_file.cc b/src/lib/string_text_file.cc index c7eadbbb1..d3c56832d 100644 --- a/src/lib/string_text_file.cc +++ b/src/lib/string_text_file.cc @@ -1,5 +1,5 @@ /* - Copyright (C) 2014-2018 Carl Hetherington + Copyright (C) 2014-2020 Carl Hetherington This file is part of DCP-o-matic. @@ -24,6 +24,7 @@ #include "string_text_file_content.h" #include #include +#include #include #include #include @@ -38,53 +39,71 @@ using boost::shared_ptr; using boost::scoped_array; using boost::optional; using dcp::Data; +using namespace dcpomatic; StringTextFile::StringTextFile (shared_ptr content) { - Data in (content->path (0)); - - UErrorCode status = U_ZERO_ERROR; - UCharsetDetector* detector = ucsdet_open (&status); - ucsdet_setText (detector, reinterpret_cast (in.data().get()), in.size(), &status); - - UCharsetMatch const * match = ucsdet_detect (detector, &status); - char const * in_charset = ucsdet_getName (match, &status); - - UConverter* to_utf16 = ucnv_open (in_charset, &status); - /* This is a guess; I think we should be able to encode any input in 4 times its input size */ - scoped_array utf16 (new uint16_t[in.size() * 2]); - int const utf16_len = ucnv_toUChars ( - to_utf16, reinterpret_cast(utf16.get()), in.size() * 2, - reinterpret_cast (in.data().get()), in.size(), - &status - ); - - UConverter* to_utf8 = ucnv_open ("UTF-8", &status); - /* Another guess */ - scoped_array utf8 (new char[utf16_len * 2]); - ucnv_fromUChars (to_utf8, utf8.get(), utf16_len * 2, reinterpret_cast(utf16.get()), utf16_len, &status); - - /* Fix OS X line endings */ - size_t utf8_len = strlen (utf8.get ()); - for (size_t i = 0; i < utf8_len; ++i) { - if (utf8[i] == '\r' && ((i == utf8_len - 1) || utf8[i + 1] != '\n')) { - utf8[i] = '\n'; - } - } - - ucsdet_close (detector); - ucnv_close (to_utf16); - ucnv_close (to_utf8); + string ext = content->path(0).extension().string(); + transform (ext.begin(), ext.end(), ext.begin(), ::tolower); sub::Reader* reader = 0; - string ext = content->path(0).extension().string(); - transform (ext.begin(), ext.end(), ext.begin(), ::tolower); + if (ext == ".stl") { + FILE* f = fopen_boost (content->path(0), "rb"); + if (!f) { + throw OpenFileError (content->path(0), errno, OpenFileError::READ); + } + try { + reader = new sub::STLBinaryReader (f); + } catch (...) { + fclose (f); + throw; + } + fclose (f); + + } else { + /* Text-based file; sort out its character encoding before we try to parse it */ + + Data in (content->path (0)); + + UErrorCode status = U_ZERO_ERROR; + UCharsetDetector* detector = ucsdet_open (&status); + ucsdet_setText (detector, reinterpret_cast (in.data().get()), in.size(), &status); + + UCharsetMatch const * match = ucsdet_detect (detector, &status); + char const * in_charset = ucsdet_getName (match, &status); + + UConverter* to_utf16 = ucnv_open (in_charset, &status); + /* This is a guess; I think we should be able to encode any input in 4 times its input size */ + scoped_array utf16 (new uint16_t[in.size() * 2]); + int const utf16_len = ucnv_toUChars ( + to_utf16, reinterpret_cast(utf16.get()), in.size() * 2, + reinterpret_cast (in.data().get()), in.size(), + &status + ); + + UConverter* to_utf8 = ucnv_open ("UTF-8", &status); + /* Another guess */ + scoped_array utf8 (new char[utf16_len * 2]); + ucnv_fromUChars (to_utf8, utf8.get(), utf16_len * 2, reinterpret_cast(utf16.get()), utf16_len, &status); + + /* Fix OS X line endings */ + size_t utf8_len = strlen (utf8.get ()); + for (size_t i = 0; i < utf8_len; ++i) { + if (utf8[i] == '\r' && ((i == utf8_len - 1) || utf8[i + 1] != '\n')) { + utf8[i] = '\n'; + } + } + + ucsdet_close (detector); + ucnv_close (to_utf16); + ucnv_close (to_utf8); - if (ext == ".srt") { - reader = new sub::SubripReader (utf8.get()); - } else if (ext == ".ssa" || ext == ".ass") { - reader = new sub::SSAReader (utf8.get()); + if (ext == ".srt") { + reader = new sub::SubripReader (utf8.get()); + } else if (ext == ".ssa" || ext == ".ass") { + reader = new sub::SSAReader (utf8.get()); + } } if (reader) {