scripts/update-language-subtags

   1 #!/usr/bin/python3
   2
   3 import urllib.request
   4
   5 block = {}
   6 lists = {}
   7
   8 with urllib.request.urlopen('https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry') as f:
   9 #with open('language-subtag-registry', 'r') as f:
  10     for l in f.readlines():
  11         if l.strip().decode('utf-8') == "%%":
  12             if 'Type' in block:
  13                 if block['Type'] in ['language', 'variant', 'region', 'script', 'extlang']:
  14                     if not block['Type'] in lists:
  15                         lists[block['Type']] = []
  16                     lists[block['Type']].append((block['Subtag'], block['Description']))
  17                 elif block['Type'] == 'redundant' or block['Type'] == 'grandfathered':
  18                     # We'll ignore these (for now)
  19                     pass
  20                 else:
  21                     print("Unknown type `%s'" % block['Type'])
  22             block = {}
  23         else:
  24             p = l.strip().decode('utf-8').split(':')
  25             if len(p) > 1:
  26                 block[p[0]] = p[1][1:]
  27
  28 def escape(s):
  29     return s.replace('"', '\\"')
  30
  31 with open('src/language_tag_lists.cc', 'w') as f:
  32     for k, v in lists.items():
  33         print("static LanguageTag::SubtagData %s_list[] = {" % k, file=f)
  34         for e in v:
  35             print('\t{ "%s", "%s" },' % (escape(e[0]), escape(e[1])), file=f)
  36         print("};", file=f)
  37         print("", file=f)
  38