#!/usr/bin/python3

import urllib.request

block = {}
lists = {}

with urllib.request.urlopen('https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry') as f:
#with open('language-subtag-registry', 'r') as f:
    for l in f.readlines():
        if l.strip().decode('utf-8') == "%%":
            if 'Type' in block:
                if block['Type'] in ['language', 'variant', 'region', 'script', 'extlang']:
                    if not block['Type'] in lists:
                        lists[block['Type']] = []
                    lists[block['Type']].append((block['Subtag'], block['Description']))
                elif block['Type'] == 'redundant' or block['Type'] == 'grandfathered':
                    # We'll ignore these (for now)
                    pass
                else:
                    print("Unknown type `%s'" % block['Type'])
            block = {}
        else:
            p = l.strip().decode('utf-8').split(':')
            if len(p) > 1:
                block[p[0]] = p[1][1:]

def escape(s):
    return s.replace('"', '\\"')

with open('src/language_tag_lists.cc', 'w') as f:
    for k, v in lists.items():
        print("static LanguageTag::SubtagData const %s_list[] = {" % k, file=f)
        for e in v:
            print('\t{ "%s", "%s" },' % (escape(e[0]), escape(e[1])), file=f)
        print("};", file=f)
        print("", file=f)