Load language tags from on-disk files rather than embedding them
[libdcp.git] / scripts / update-language-subtags
1 #!/usr/bin/python3
2
3 import os
4 import urllib.request
5
6 block = {}
7 lists = {}
8
9 with urllib.request.urlopen('https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry') as f:
10 #with open('language-subtag-registry', 'r') as f:
11     for l in f.readlines():
12         if l.strip().decode('utf-8') == "%%":
13             if 'Type' in block:
14                 if block['Type'] in ['language', 'variant', 'region', 'script', 'extlang']:
15                     if not block['Type'] in lists:
16                         lists[block['Type']] = []
17                     lists[block['Type']].append((block['Subtag'], block['Description']))
18                 elif block['Type'] == 'redundant' or block['Type'] == 'grandfathered':
19                     # We'll ignore these (for now)
20                     pass
21                 else:
22                     print("Unknown type `%s'" % block['Type'])
23             block = {}
24         else:
25             p = l.strip().decode('utf-8').split(':')
26             if len(p) > 1:
27                 block[p[0]] = p[1][1:]
28
29 for k, v in lists.items():
30     with open(os.path.join('tags', k), 'w') as f:
31         print(len(v), file=f)
32         for e in v:
33             print(e[0], file=f)
34             print(e[1], file=f)
35