X-Git-Url: https://main.carlh.net/gitweb/?p=dcpomatic.git;a=blobdiff_plain;f=src%2Flib%2Fstring_text_file.cc;h=8c2c5651854e08a625f2d9602bfc9aaede91cf43;hp=bf19e482c9ea2e10e13773f39985525fc114ed44;hb=da44da6f31f97d39ca91c35955e573e76371f2c2;hpb=c4403784febdbdd42e9c32e67fadb147f11fe566 diff --git a/src/lib/string_text_file.cc b/src/lib/string_text_file.cc index bf19e482c..8c2c56518 100644 --- a/src/lib/string_text_file.cc +++ b/src/lib/string_text_file.cc @@ -1,5 +1,5 @@ /* - Copyright (C) 2014-2018 Carl Hetherington + Copyright (C) 2014-2020 Carl Hetherington This file is part of DCP-o-matic. @@ -24,6 +24,7 @@ #include "string_text_file_content.h" #include #include +#include #include #include #include @@ -34,65 +35,80 @@ using std::vector; using std::cout; using std::string; -using boost::shared_ptr; +using std::shared_ptr; using boost::scoped_array; using boost::optional; -using dcp::Data; +using dcp::ArrayData; using namespace dcpomatic; StringTextFile::StringTextFile (shared_ptr content) { - Data in (content->path (0)); - - UErrorCode status = U_ZERO_ERROR; - UCharsetDetector* detector = ucsdet_open (&status); - ucsdet_setText (detector, reinterpret_cast (in.data().get()), in.size(), &status); - - UCharsetMatch const * match = ucsdet_detect (detector, &status); - char const * in_charset = ucsdet_getName (match, &status); - - UConverter* to_utf16 = ucnv_open (in_charset, &status); - /* This is a guess; I think we should be able to encode any input in 4 times its input size */ - scoped_array utf16 (new uint16_t[in.size() * 2]); - int const utf16_len = ucnv_toUChars ( - to_utf16, reinterpret_cast(utf16.get()), in.size() * 2, - reinterpret_cast (in.data().get()), in.size(), - &status - ); - - UConverter* to_utf8 = ucnv_open ("UTF-8", &status); - /* Another guess */ - scoped_array utf8 (new char[utf16_len * 2]); - ucnv_fromUChars (to_utf8, utf8.get(), utf16_len * 2, reinterpret_cast(utf16.get()), utf16_len, &status); - - /* Fix OS X line endings */ - size_t utf8_len = strlen (utf8.get ()); - for (size_t i = 0; i < utf8_len; ++i) { - if (utf8[i] == '\r' && ((i == utf8_len - 1) || utf8[i + 1] != '\n')) { - utf8[i] = '\n'; - } - } + string ext = content->path(0).extension().string(); + transform (ext.begin(), ext.end(), ext.begin(), ::tolower); - ucsdet_close (detector); - ucnv_close (to_utf16); - ucnv_close (to_utf8); + std::unique_ptr reader; - sub::Reader* reader = 0; + if (ext == ".stl") { + auto f = fopen_boost (content->path(0), "rb"); + if (!f) { + throw OpenFileError (content->path(0), errno, OpenFileError::READ); + } + try { + reader.reset(new sub::STLBinaryReader(f)); + } catch (...) { + fclose (f); + throw; + } + fclose (f); + + } else { + /* Text-based file; sort out its character encoding before we try to parse it */ + + ArrayData in (content->path (0)); + + UErrorCode status = U_ZERO_ERROR; + UCharsetDetector* detector = ucsdet_open (&status); + ucsdet_setText (detector, reinterpret_cast(in.data()), in.size(), &status); + + UCharsetMatch const * match = ucsdet_detect (detector, &status); + char const * in_charset = ucsdet_getName (match, &status); + + UConverter* to_utf16 = ucnv_open (in_charset, &status); + /* This is a guess; I think we should be able to encode any input in 4 times its input size */ + scoped_array utf16 (new uint16_t[in.size() * 2]); + int const utf16_len = ucnv_toUChars ( + to_utf16, reinterpret_cast(utf16.get()), in.size() * 2, + reinterpret_cast(in.data()), in.size(), + &status + ); + + UConverter* to_utf8 = ucnv_open ("UTF-8", &status); + /* Another guess */ + scoped_array utf8 (new char[utf16_len * 2]); + ucnv_fromUChars (to_utf8, utf8.get(), utf16_len * 2, reinterpret_cast(utf16.get()), utf16_len, &status); + + /* Fix OS X line endings */ + size_t utf8_len = strlen (utf8.get ()); + for (size_t i = 0; i < utf8_len; ++i) { + if (utf8[i] == '\r' && ((i == utf8_len - 1) || utf8[i + 1] != '\n')) { + utf8[i] = '\n'; + } + } - string ext = content->path(0).extension().string(); - transform (ext.begin(), ext.end(), ext.begin(), ::tolower); + ucsdet_close (detector); + ucnv_close (to_utf16); + ucnv_close (to_utf8); - if (ext == ".srt") { - reader = new sub::SubripReader (utf8.get()); - } else if (ext == ".ssa" || ext == ".ass") { - reader = new sub::SSAReader (utf8.get()); + if (ext == ".srt") { + reader.reset(new sub::SubripReader(utf8.get())); + } else if (ext == ".ssa" || ext == ".ass") { + reader.reset(new sub::SSAReader(utf8.get())); + } } if (reader) { - _subtitles = sub::collect > (reader->subtitles ()); + _subtitles = sub::collect>(reader->subtitles()); } - - delete reader; } /** @return time of first subtitle, if there is one */