Improved comments.
[libdcp.git] / src / language_tag.cc
index 47b4602429da9b8db9905b29360857e4336c94b5..c2e81869ed133cbb8a10a5eeed5d6a980e3a471f 100644 (file)
@@ -1,5 +1,5 @@
 /*
-    Copyright (C) 2020 Carl Hetherington <cth@carlh.net>
+    Copyright (C) 2020-2021 Carl Hetherington <cth@carlh.net>
 
     This file is part of libdcp.
 
 */
 
 
+/** @file  src/language_tag.cc
+ *  @brief LanguageTag class
+ */
+
+
 #include "compose.hpp"
 #include "dcp_assert.h"
 #include "exceptions.h"
 #include "language_tag.h"
-#include <boost/foreach.hpp>
+#include <boost/algorithm/string.hpp>
 #include <string>
 
 
+using std::make_pair;
+using std::ostream;
+using std::pair;
 using std::string;
 using std::vector;
 using boost::optional;
+using boost::algorithm::trim;
 using namespace dcp;
 
 
-#include "language_tag_lists.cc"
+static vector<LanguageTag::SubtagData> language_list;
+static vector<LanguageTag::SubtagData> variant_list;
+static vector<LanguageTag::SubtagData> region_list;
+static vector<LanguageTag::SubtagData> script_list;
+static vector<LanguageTag::SubtagData> extlang_list;
 
 
 static
 optional<LanguageTag::SubtagData>
-find_in_list (LanguageTag::SubtagData* list, int length, string subtag)
+find_in_list (vector<LanguageTag::SubtagData> const& list, string subtag)
 {
-       for (int i = 0; i < length; ++i) {
-               if (list[i].subtag == subtag) {
-                       return list[i];
+       for (auto const& i: list) {
+               if (boost::iequals(i.subtag, subtag)) {
+                       return i;
                }
        }
 
-       return optional<LanguageTag::SubtagData>();
+       return {};
 }
 
 
-static
-optional<LanguageTag::SubtagData>
-find_in_list (LanguageTag::SubtagType type, string subtag)
+LanguageTag::Subtag::Subtag (string subtag, SubtagType type)
+       : _subtag (subtag)
 {
-       switch (type) {
-       case dcp::LanguageTag::LANGUAGE:
-               return find_in_list(language_list, sizeof(language_list) / sizeof(LanguageTag::SubtagData), subtag);
-       case dcp::LanguageTag::SCRIPT:
-               return find_in_list(script_list, sizeof(script_list) / sizeof(LanguageTag::SubtagData), subtag);
-       case dcp::LanguageTag::REGION:
-               return find_in_list(region_list, sizeof(region_list) / sizeof(LanguageTag::SubtagData), subtag);
-       case dcp::LanguageTag::VARIANT:
-               return find_in_list(variant_list, sizeof(variant_list) / sizeof(LanguageTag::SubtagData), subtag);
-       case dcp::LanguageTag::EXTLANG:
-               return find_in_list(extlang_list, sizeof(extlang_list) / sizeof(LanguageTag::SubtagData), subtag);
+       if (!get_subtag_data(type, subtag)) {
+               throw LanguageTagError(String::compose("Unknown %1 string %2", subtag_type_name(type), subtag));
        }
-
-       return optional<LanguageTag::SubtagData>();
 }
 
 
-LanguageTag::Subtag::Subtag (string subtag, SubtagType type)
-       : _subtag (subtag)
+LanguageTag::LanguageTag (string tag)
 {
-       if (!find_in_list(type, subtag)) {
-               throw LanguageTagError(String::compose("Unknown %1 string %2", subtag_type_name(type), subtag));
+       vector<string> parts;
+       boost::split (parts, tag, boost::is_any_of("-"));
+       if (parts.empty()) {
+               throw LanguageTagError (String::compose("Could not parse language tag %1", tag));
+       }
+
+       vector<string>::size_type p = 0;
+       _language = LanguageSubtag (parts[p]);
+       ++p;
+
+       if (p == parts.size()) {
+               return;
+       }
+
+       try {
+               _script = ScriptSubtag (parts[p]);
+               ++p;
+       } catch (...) {}
+
+       if (p == parts.size()) {
+               return;
+       }
+
+       try {
+               _region = RegionSubtag (parts[p]);
+               ++p;
+       } catch (...) {}
+
+       if (p == parts.size()) {
+               return;
+       }
+
+       try {
+               while (true) {
+                       _variants.push_back (VariantSubtag(parts[p]));
+                       ++p;
+                       if (p == parts.size()) {
+                               return;
+                       }
+               }
+       } catch (...) {}
+
+       try {
+               while (true) {
+                       _extlangs.push_back (ExtlangSubtag(parts[p]));
+                       ++p;
+                       if (p == parts.size()) {
+                               return;
+                       }
+               }
+       } catch (...) {}
+
+       if (p < parts.size()) {
+               throw LanguageTagError (String::compose("Unrecognised subtag %1", parts[p]));
        }
 }
 
@@ -100,7 +152,7 @@ LanguageTag::to_string () const
                throw LanguageTagError("No language set up");
        }
 
-       string s = _language->subtag();
+       auto s = _language->subtag();
 
        if (_script) {
                s += "-" + _script->subtag();
@@ -110,11 +162,11 @@ LanguageTag::to_string () const
                s += "-" + _region->subtag();
        }
 
-       BOOST_FOREACH (VariantSubtag i, _variants) {
+       for (auto i: _variants) {
                s += "-" + i.subtag();
        }
 
-       BOOST_FOREACH (ExtlangSubtag i, _extlangs) {
+       for (auto i: _extlangs) {
                s += "-" + i.subtag();
        }
 
@@ -161,7 +213,7 @@ check_for_duplicates (vector<T> const& subtags, dcp::LanguageTag::SubtagType typ
        vector<T> sorted = subtags;
        sort (sorted.begin(), sorted.end());
        optional<T> last;
-       BOOST_FOREACH (T const& i, sorted) {
+       for (auto const& i: sorted) {
                if (last && i == *last) {
                        throw LanguageTagError (String::compose("Duplicate %1 subtag %2", dcp::LanguageTag::subtag_type_name(type), i.subtag()));
                }
@@ -173,7 +225,7 @@ check_for_duplicates (vector<T> const& subtags, dcp::LanguageTag::SubtagType typ
 void
 LanguageTag::set_variants (vector<VariantSubtag> variants)
 {
-       check_for_duplicates (variants, VARIANT);
+       check_for_duplicates (variants, SubtagType::VARIANT);
        _variants = variants;
 }
 
@@ -192,7 +244,7 @@ LanguageTag::add_extlang (ExtlangSubtag extlang)
 void
 LanguageTag::set_extlangs (vector<ExtlangSubtag> extlangs)
 {
-       check_for_duplicates (extlangs, EXTLANG);
+       check_for_duplicates (extlangs, SubtagType::EXTLANG);
        _extlangs = extlangs;
 }
 
@@ -206,30 +258,30 @@ LanguageTag::description () const
 
        string d;
 
-       BOOST_FOREACH (VariantSubtag const& i, _variants) {
-               optional<SubtagData> variant = find_in_list (VARIANT, i.subtag());
+       for (auto const& i: _variants) {
+               optional<SubtagData> variant = get_subtag_data (SubtagType::VARIANT, i.subtag());
                DCP_ASSERT (variant);
                d += variant->description + " dialect of ";
        }
 
-       optional<SubtagData> language = find_in_list (LANGUAGE, _language->subtag());
+       auto language = get_subtag_data (SubtagType::LANGUAGE, _language->subtag());
        DCP_ASSERT (language);
        d += language->description;
 
        if (_script) {
-               optional<SubtagData> script = find_in_list (SCRIPT, _script->subtag());
+               auto script = get_subtag_data (SubtagType::SCRIPT, _script->subtag());
                DCP_ASSERT (script);
                d += " written using the " + script->description + " script";
        }
 
        if (_region) {
-               optional<SubtagData> region = find_in_list (REGION, _region->subtag());
+               auto region = get_subtag_data (SubtagType::REGION, _region->subtag());
                DCP_ASSERT (region);
                d += " for " + region->description;
        }
 
-       BOOST_FOREACH (ExtlangSubtag const& i, _extlangs) {
-               optional<SubtagData> extlang = find_in_list (EXTLANG, i.subtag());
+       for (auto const& i: _extlangs) {
+               auto extlang = get_subtag_data (SubtagType::EXTLANG, i.subtag());
                DCP_ASSERT (extlang);
                d += ", " + extlang->description;
        }
@@ -238,40 +290,23 @@ LanguageTag::description () const
 }
 
 
-vector<LanguageTag::SubtagData>
+vector<LanguageTag::SubtagData> const &
 LanguageTag::get_all (SubtagType type)
 {
-       vector<LanguageTag::SubtagData> all;
-
        switch (type) {
-       case LANGUAGE:
-               for (size_t i = 0; i < sizeof(language_list) / sizeof(LanguageTag::SubtagData); ++i) {
-                       all.push_back (language_list[i]);
-               }
-               break;
-       case SCRIPT:
-               for (size_t i = 0; i < sizeof(script_list) / sizeof(LanguageTag::SubtagData); ++i) {
-                       all.push_back (script_list[i]);
-               }
-               break;
-       case REGION:
-               for (size_t i = 0; i < sizeof(region_list) / sizeof(LanguageTag::SubtagData); ++i) {
-                       all.push_back (region_list[i]);
-               }
-               break;
-       case VARIANT:
-               for (size_t i = 0; i < sizeof(variant_list) / sizeof(LanguageTag::SubtagData); ++i) {
-                       all.push_back (variant_list[i]);
-               }
-               break;
-       case EXTLANG:
-               for (size_t i = 0; i < sizeof(extlang_list) / sizeof(LanguageTag::SubtagData); ++i) {
-                       all.push_back (extlang_list[i]);
-               }
-               break;
+       case SubtagType::LANGUAGE:
+               return language_list;
+       case SubtagType::SCRIPT:
+               return script_list;
+       case SubtagType::REGION:
+               return region_list;
+       case SubtagType::VARIANT:
+               return variant_list;
+       case SubtagType::EXTLANG:
+               return extlang_list;
        }
 
-       return all;
+       return language_list;
 }
 
 
@@ -279,19 +314,19 @@ string
 LanguageTag::subtag_type_name (SubtagType type)
 {
        switch (type) {
-               case LANGUAGE:
+               case SubtagType::LANGUAGE:
                        return "Language";
-               case SCRIPT:
+               case SubtagType::SCRIPT:
                        return "Script";
-               case REGION:
+               case SubtagType::REGION:
                        return "Region";
-               case VARIANT:
+               case SubtagType::VARIANT:
                        return "Variant";
-               case EXTLANG:
+               case SubtagType::EXTLANG:
                        return "Extended";
        }
 
-       return "";
+       return {};
 }
 
 bool
@@ -320,3 +355,124 @@ dcp::LanguageTag::ExtlangSubtag::operator< (ExtlangSubtag const & other) const
 {
        return subtag() < other.subtag();
 }
+
+
+bool
+dcp::operator== (dcp::LanguageTag const& a, dcp::LanguageTag const& b)
+{
+       return a.to_string() == b.to_string();
+}
+
+
+ostream&
+dcp::operator<< (ostream& os, dcp::LanguageTag const& tag)
+{
+       os << tag.to_string();
+       return os;
+}
+
+
+vector<pair<LanguageTag::SubtagType, LanguageTag::SubtagData> >
+LanguageTag::subtags () const
+{
+       vector<pair<SubtagType, SubtagData>> s;
+
+       if (_language) {
+               s.push_back (make_pair(SubtagType::LANGUAGE, *get_subtag_data(SubtagType::LANGUAGE, _language->subtag())));
+       }
+
+       if (_script) {
+               s.push_back (make_pair(SubtagType::SCRIPT, *get_subtag_data(SubtagType::SCRIPT, _script->subtag())));
+       }
+
+       if (_region) {
+               s.push_back (make_pair(SubtagType::REGION, *get_subtag_data(SubtagType::REGION, _region->subtag())));
+       }
+
+       for (auto const& i: _variants) {
+               s.push_back (make_pair(SubtagType::VARIANT, *get_subtag_data(SubtagType::VARIANT, i.subtag())));
+       }
+
+       for (auto const& i: _extlangs) {
+               s.push_back (make_pair(SubtagType::EXTLANG, *get_subtag_data(SubtagType::EXTLANG, i.subtag())));
+       }
+
+       return s;
+}
+
+
+optional<LanguageTag::SubtagData>
+LanguageTag::get_subtag_data (LanguageTag::SubtagType type, string subtag)
+{
+       switch (type) {
+       case SubtagType::LANGUAGE:
+               return find_in_list(language_list, subtag);
+       case SubtagType::SCRIPT:
+               return find_in_list(script_list, subtag);
+       case SubtagType::REGION:
+               return find_in_list(region_list, subtag);
+       case SubtagType::VARIANT:
+               return find_in_list(variant_list, subtag);
+       case SubtagType::EXTLANG:
+               return find_in_list(extlang_list, subtag);
+       }
+
+       return {};
+}
+
+
+optional<string>
+LanguageTag::get_subtag_description (LanguageTag::SubtagType type, string subtag)
+{
+       auto data = get_subtag_data (type, subtag);
+       if (!data) {
+               return {};
+       }
+
+       return data->description;
+}
+
+
+void
+load_language_tag_list (boost::filesystem::path tags_directory, string name, vector<LanguageTag::SubtagData>& list)
+{
+       auto f = fopen_boost (tags_directory / name, "r");
+       if (!f) {
+               throw FileError ("Could not open tags file", tags_directory / name, errno);
+       }
+       char buffer[512];
+
+       int i = 0;
+       while (!feof(f)) {
+               char* r = fgets (buffer, sizeof(buffer), f);
+               if (r == 0) {
+                       break;
+               }
+               string a = buffer;
+               trim (a);
+               r = fgets (buffer, sizeof(buffer), f);
+               if (r == 0) {
+                       fclose (f);
+                       throw FileError ("Bad tags file", tags_directory / name, -1);
+               }
+               string b = buffer;
+               trim (b);
+               list.push_back (LanguageTag::SubtagData(a, b));
+               ++i;
+       }
+
+       fclose (f);
+}
+
+
+void
+dcp::load_language_tag_lists (boost::filesystem::path tags_directory)
+{
+       load_language_tag_list (tags_directory, "language", language_list);
+       load_language_tag_list (tags_directory, "variant", variant_list);
+       load_language_tag_list (tags_directory, "region", region_list);
+       load_language_tag_list (tags_directory, "script", script_list);
+       load_language_tag_list (tags_directory, "extlang", extlang_list);
+}
+
+