#!/usr/bin/python3 import os import urllib.request import json block = {} lists = {} with urllib.request.urlopen('https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry') as f: #with open('language-subtag-registry', 'r') as f: for l in f.readlines(): if l.strip() == "%%": if 'Type' in block: if block['Type'] in ['language', 'variant', 'region', 'script', 'extlang']: if not block['Type'] in lists: lists[block['Type']] = [] lists[block['Type']].append((block['Subtag'], block['Description'])) elif block['Type'] == 'redundant' or block['Type'] == 'grandfathered': # We'll ignore these (for now) pass else: print("Unknown type `%s'" % block['Type']) block = {} else: p = l.strip().split(':') if len(p) > 1: key = p[0] value = p[1][1:] if key == 'Description' and key in block: block[key] = '/'.join([block[key], value]) else: block[key] = value for k, v in lists.items(): with open(os.path.join('tags', k), 'w') as f: for e in v: print(e[0], file=f) print(e[1], file=f) with urllib.request.urlopen('https://registry.isdcf.com/languages') as f, open(os.path.join('tags', 'dcnc'), 'w') as g: js = json.loads(f.read()) for d in js['data']: if 'dcncTag' in d: print(d['rfc5646Tag'], file=g) print(d['dcncTag'], file=g)