From 04d4661dd008284aedc95b2a1f9e120cee17571b Mon Sep 17 00:00:00 2001 From: Carl Hetherington Date: Thu, 27 Aug 2020 22:48:00 +0200 Subject: [PATCH] Allow LanguageTag to be constructed from a tag string. --- src/language_tag.cc | 61 +++++++++++++++++++++++++++++++++++++++ src/language_tag.h | 3 ++ test/language_tag_test.cc | 21 +++++++++++++- 3 files changed, 84 insertions(+), 1 deletion(-) diff --git a/src/language_tag.cc b/src/language_tag.cc index 47b46024..b64dab72 100644 --- a/src/language_tag.cc +++ b/src/language_tag.cc @@ -36,6 +36,7 @@ #include "dcp_assert.h" #include "exceptions.h" #include "language_tag.h" +#include #include #include @@ -93,6 +94,66 @@ LanguageTag::Subtag::Subtag (string subtag, SubtagType type) } +LanguageTag::LanguageTag (string tag) +{ + vector parts; + boost::split (parts, tag, boost::is_any_of("-")); + if (parts.empty()) { + throw LanguageTagError (String::compose("Could not parse language tag %1", tag)); + } + + vector::size_type p = 0; + _language = LanguageSubtag (parts[p]); + ++p; + + if (p == parts.size()) { + return; + } + + try { + _script = ScriptSubtag (parts[p]); + ++p; + } catch (...) {} + + if (p == parts.size()) { + return; + } + + try { + _region = RegionSubtag (parts[p]); + ++p; + } catch (...) {} + + if (p == parts.size()) { + return; + } + + try { + while (true) { + _variants.push_back (VariantSubtag(parts[p])); + ++p; + if (p == parts.size()) { + return; + } + } + } catch (...) {} + + try { + while (true) { + _extlangs.push_back (ExtlangSubtag(parts[p])); + ++p; + if (p == parts.size()) { + return; + } + } + } catch (...) {} + + if (p < parts.size()) { + throw LanguageTagError (String::compose("Unrecognised subtag %1", parts[p])); + } +} + + string LanguageTag::to_string () const { diff --git a/src/language_tag.h b/src/language_tag.h index 7a6fd159..8aa8a723 100644 --- a/src/language_tag.h +++ b/src/language_tag.h @@ -141,6 +141,9 @@ public: bool operator< (ExtlangSubtag const& other) const; }; + LanguageTag () {} + LanguageTag (std::string tag); + void set_language (LanguageSubtag language); void set_script (ScriptSubtag script); void set_region (RegionSubtag region); diff --git a/test/language_tag_test.cc b/test/language_tag_test.cc index fd90b629..28c46b63 100644 --- a/test/language_tag_test.cc +++ b/test/language_tag_test.cc @@ -41,7 +41,7 @@ using std::vector; using std::string; -BOOST_AUTO_TEST_CASE (language_tag_test) +BOOST_AUTO_TEST_CASE (language_tag_create_test) { /* Bad subtags raise errors */ @@ -177,3 +177,22 @@ BOOST_AUTO_TEST_CASE (language_tag_test) } } + + +BOOST_AUTO_TEST_CASE (language_tag_parse_test) +{ + BOOST_CHECK_THROW (dcp::LanguageTag(""), dcp::LanguageTagError); + BOOST_CHECK_THROW (dcp::LanguageTag("...Aw498012351!"), dcp::LanguageTagError); + BOOST_CHECK_THROW (dcp::LanguageTag("fish"), dcp::LanguageTagError); + BOOST_CHECK_THROW (dcp::LanguageTag("de-Dogr-fish"), dcp::LanguageTagError); + BOOST_CHECK_THROW (dcp::LanguageTag("de-Dogr-DE-aranes-fish"), dcp::LanguageTagError); + + BOOST_CHECK_EQUAL (dcp::LanguageTag("de").to_string(), "de"); + BOOST_CHECK_EQUAL (dcp::LanguageTag("de-Dogr").to_string(), "de-Dogr"); + BOOST_CHECK_EQUAL (dcp::LanguageTag("de-Dogr-DE").to_string(), "de-Dogr-DE"); + BOOST_CHECK_EQUAL (dcp::LanguageTag("de-Dogr-DE-aranes").to_string(), "de-Dogr-DE-aranes"); + BOOST_CHECK_EQUAL (dcp::LanguageTag("de-Dogr-DE-aranes-lemosin").to_string(), "de-Dogr-DE-aranes-lemosin"); + BOOST_CHECK_EQUAL (dcp::LanguageTag("de-Dogr-DE-aranes-lemosin-abv").to_string(), "de-Dogr-DE-aranes-lemosin-abv"); + BOOST_CHECK_EQUAL (dcp::LanguageTag("de-Dogr-DE-aranes-lemosin-abv-zsm").to_string(), "de-Dogr-DE-aranes-lemosin-abv-zsm"); +} + -- 2.30.2