Add "U8" character code table for binary STL.
authorCarl Hetherington <cth@carlh.net>
Thu, 3 Jun 2021 14:13:58 +0000 (16:13 +0200)
committerCarl Hetherington <cth@carlh.net>
Thu, 3 Jun 2021 14:17:23 +0000 (16:17 +0200)
A file was sent that has "U8" as a STL CCT.  I presume this means
"UTF-8" but I can't see any documentation about it anywhere.

This change accepts U8 as a CCT.  It doesn't look like this value
is actually used anywhere to change how text is parsed, but at
least it doesn't fail now.

src/stl_binary_tables.cc
src/stl_binary_tables.h
test/stl_binary_reader_test.cc

index db623bf32e705783d791e6c86d3030774ee338c8..2d275df82ce4fca2bae71d03d47fba220c769195 100644 (file)
@@ -211,6 +211,7 @@ STLBinaryTables::STLBinaryTables ()
        code<LanguageGroup, string> (_language_group_map, "02", LANGUAGE_GROUP_LATIN_ARABIC, "Latin/Arabic");
        code<LanguageGroup, string> (_language_group_map, "03", LANGUAGE_GROUP_LATIN_GREEK, "Latin/Greek");
        code<LanguageGroup, string> (_language_group_map, "04", LANGUAGE_GROUP_LATIN_HEBREW, "Latin/Hebrew");
+       code<LanguageGroup, string> (_language_group_map, "U8", LANGUAGE_GROUP_LATIN_HEBREW, "UTF-8");
 
        code<Language, string> (_language_map, "00", LANGUAGE_UNKNOWN, "Unknown");
        code<Language, string> (_language_map, "01", LANGUAGE_ALBANIAN, "Albanian");
index 4a6a7d86d6afa5ee236c7a0f51c7640ff58d9ee4..46214135f0b616a737e2c00fefaf2683c7c83221 100644 (file)
@@ -38,7 +38,8 @@ enum LanguageGroup {
        LANGUAGE_GROUP_LATIN_CYRILLIC,
        LANGUAGE_GROUP_LATIN_ARABIC,
        LANGUAGE_GROUP_LATIN_GREEK,
-       LANGUAGE_GROUP_LATIN_HEBREW
+       LANGUAGE_GROUP_LATIN_HEBREW,
+       LANGUAGE_GROUP_UTF8,
 };
 
 enum Language {
index dfd7c0a84a2d22b42a8ca520c859e8d4a9bdda41..cf97b5afb6afed8915d726c036099c28a51c4491 100644 (file)
@@ -68,3 +68,17 @@ BOOST_AUTO_TEST_CASE (stl_binary_reader_test2)
        sub::dump (r, dump_stream);
        dump_stream.close ();
 }
+
+
+/** Test reading a file which raised "Unknown language group code U8" */
+BOOST_AUTO_TEST_CASE (stl_binary_reader_test3)
+{
+       if (private_test.empty()) {
+               return;
+       }
+
+       auto path = private_test / "hsk.stl";
+       ifstream in (path.string().c_str());
+       auto reader = make_shared<sub::STLBinaryReader>(in);
+}
+