b7d8169f02
When parsing the CLDR data, we only handle language, script and territory (which we call country) codes if they are known to our enumdata.py tables. When reporting the rest as unknown, in the content of an actual locale definition (not the likely subtag data), check whether en.xml can resolve the code for us; if it can, report the full name it provides, as a hint to whoever's running the script that an update to enumdata.py may be in order. Change-Id: I9ca1d6922a91d45bc436f4b622e5557261897d7f Reviewed-by: Thiago Macieira <thiago.macieira@intel.com> Reviewed-by: Lars Knoll <lars.knoll@qt.io> Reviewed-by: Konstantin Ritt <ritt.ks@gmail.com>
289 lines
11 KiB
Python
289 lines
11 KiB
Python
#!/usr/bin/env python
|
|
#############################################################################
|
|
##
|
|
## Copyright (C) 2016 The Qt Company Ltd.
|
|
## Contact: https://www.qt.io/licensing/
|
|
##
|
|
## This file is part of the test suite of the Qt Toolkit.
|
|
##
|
|
## $QT_BEGIN_LICENSE:GPL-EXCEPT$
|
|
## Commercial License Usage
|
|
## Licensees holding valid commercial Qt licenses may use this file in
|
|
## accordance with the commercial license agreement provided with the
|
|
## Software or, alternatively, in accordance with the terms contained in
|
|
## a written agreement between you and The Qt Company. For licensing terms
|
|
## and conditions see https://www.qt.io/terms-conditions. For further
|
|
## information use the contact form at https://www.qt.io/contact-us.
|
|
##
|
|
## GNU General Public License Usage
|
|
## Alternatively, this file may be used under the terms of the GNU
|
|
## General Public License version 3 as published by the Free Software
|
|
## Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
|
|
## included in the packaging of this file. Please review the following
|
|
## information to ensure the GNU General Public License requirements will
|
|
## be met: https://www.gnu.org/licenses/gpl-3.0.html.
|
|
##
|
|
## $QT_END_LICENSE$
|
|
##
|
|
#############################################################################
|
|
|
|
import sys
|
|
import os
|
|
import xml.dom.minidom
|
|
|
|
class DraftResolution:
|
|
# See http://www.unicode.org/cldr/process.html for description
|
|
unconfirmed = 'unconfirmed'
|
|
provisional = 'provisional'
|
|
contributed = 'contributed'
|
|
approved = 'approved'
|
|
_values = { unconfirmed : 1, provisional : 2, contributed : 3, approved : 4 }
|
|
def __init__(self, resolution):
|
|
self.resolution = resolution
|
|
def toInt(self):
|
|
return DraftResolution._values[self.resolution]
|
|
|
|
class Error:
|
|
def __init__(self, msg):
|
|
self.msg = msg
|
|
def __str__(self):
|
|
return self.msg
|
|
|
|
doc_cache = {}
|
|
def parseDoc(file):
|
|
if not doc_cache.has_key(file):
|
|
doc_cache[file] = xml.dom.minidom.parse(file)
|
|
return doc_cache[file]
|
|
|
|
def findChild(parent, tag_name, arg_name=None, arg_value=None, draft=None):
|
|
for node in parent.childNodes:
|
|
if node.nodeType != node.ELEMENT_NODE:
|
|
continue
|
|
if node.nodeName != tag_name:
|
|
continue
|
|
if arg_value:
|
|
if not node.attributes.has_key(arg_name):
|
|
continue
|
|
if node.attributes[arg_name].nodeValue != arg_value:
|
|
continue
|
|
if draft:
|
|
if not node.attributes.has_key('draft'):
|
|
# if draft is not specified then it's approved
|
|
return node
|
|
value = node.attributes['draft'].nodeValue
|
|
value = DraftResolution(value).toInt()
|
|
exemplar = DraftResolution(draft).toInt()
|
|
if exemplar > value:
|
|
continue
|
|
return node
|
|
return False
|
|
|
|
def codeMapsFromFile(file):
|
|
"""Extract mappings of language, script and country codes to names.
|
|
|
|
The file shall typically be common/main/en.xml, which contains a
|
|
localeDisplayNames element with children languages, scripts and
|
|
territories; each element in each of these has a code as its type
|
|
attribute and its name as element content. This returns a mapping
|
|
withe keys 'language', 'script' and 'country', each of which
|
|
has, as value, a mapping of the relevant codes to names.
|
|
"""
|
|
parent = findChild(findChild(parseDoc(file), 'ldml'), 'localeDisplayNames')
|
|
keys, result = {'languages': 'language', 'scripts': 'script', 'territories': 'country'}, {}
|
|
for src, dst in keys.items():
|
|
child = findChild(parent, src)
|
|
data = result[dst] = {}
|
|
for elt in child.childNodes:
|
|
if elt.attributes and elt.attributes.has_key('type'):
|
|
key, value = elt.attributes['type'].value, elt.childNodes[0].wholeText
|
|
# Don't over-write previously-read data for an alt form:
|
|
if elt.attributes.has_key('alt') and data.has_key(key):
|
|
continue
|
|
data[key] = value
|
|
|
|
return result
|
|
|
|
def findTagsInFile(file, path):
|
|
doc = parseDoc(file)
|
|
|
|
elt = doc.documentElement
|
|
tag_spec_list = path.split("/")
|
|
last_entry = None
|
|
for tag_spec in tag_spec_list:
|
|
tag_name = tag_spec
|
|
arg_name = 'type'
|
|
arg_value = ''
|
|
left_bracket = tag_spec.find('[')
|
|
if left_bracket != -1:
|
|
tag_name = tag_spec[:left_bracket]
|
|
arg_value = tag_spec[left_bracket+1:-1].split("=")
|
|
if len(arg_value) == 2:
|
|
arg_name = arg_value[0]
|
|
arg_value = arg_value[1]
|
|
else:
|
|
arg_value = arg_value[0]
|
|
elt = findChild(elt, tag_name, arg_name, arg_value)
|
|
if not elt:
|
|
return None
|
|
ret = []
|
|
if elt.childNodes:
|
|
for node in elt.childNodes:
|
|
if node.attributes:
|
|
element = [node.nodeName, None]
|
|
element[1] = node.attributes.items()
|
|
ret.append(element)
|
|
else:
|
|
if elt.attributes:
|
|
element = [elt.nodeName, None]
|
|
element[1] = elt.attributes.items()
|
|
ret.append(element)
|
|
return ret
|
|
|
|
def _findEntryInFile(file, path, draft=None, attribute=None):
|
|
doc = parseDoc(file)
|
|
|
|
elt = doc.documentElement
|
|
tag_spec_list = path.split("/")
|
|
last_entry = None
|
|
for i in range(len(tag_spec_list)):
|
|
tag_spec = tag_spec_list[i]
|
|
tag_name = tag_spec
|
|
arg_name = 'type'
|
|
arg_value = ''
|
|
left_bracket = tag_spec.find('[')
|
|
if left_bracket != -1:
|
|
tag_name = tag_spec[:left_bracket]
|
|
arg_value = tag_spec[left_bracket+1:-1].split("=")
|
|
if len(arg_value) == 2:
|
|
arg_name = arg_value[0].replace("@", "").replace("'", "")
|
|
arg_value = arg_value[1]
|
|
else:
|
|
arg_value = arg_value[0]
|
|
alias = findChild(elt, 'alias')
|
|
if alias and alias.attributes['source'].nodeValue == 'locale':
|
|
path = alias.attributes['path'].nodeValue
|
|
aliaspath = tag_spec_list[:i] + path.split("/")
|
|
def resolve(x, y):
|
|
if y == '..':
|
|
return x[:-1]
|
|
return x + [y]
|
|
# resolve all dot-dot parts of the path
|
|
aliaspath = reduce(resolve, aliaspath, [])
|
|
# remove attribute specification that our xpathlite doesnt support
|
|
aliaspath = map(lambda x: x.replace("@type=", "").replace("'", ""), aliaspath)
|
|
# append the remaining path
|
|
aliaspath = aliaspath + tag_spec_list[i:]
|
|
aliaspath = "/".join(aliaspath)
|
|
# "locale" aliases are special - we need to start lookup from scratch
|
|
return (None, aliaspath)
|
|
elt = findChild(elt, tag_name, arg_name, arg_value, draft)
|
|
if not elt:
|
|
return ("", None)
|
|
if attribute is not None:
|
|
if elt.attributes.has_key(attribute):
|
|
return (elt.attributes[attribute].nodeValue, None)
|
|
return (None, None)
|
|
try:
|
|
return (elt.firstChild.nodeValue, None)
|
|
except:
|
|
pass
|
|
return (None, None)
|
|
|
|
def findAlias(file):
|
|
doc = parseDoc(file)
|
|
|
|
alias_elt = findChild(doc.documentElement, "alias")
|
|
if not alias_elt:
|
|
return False
|
|
if not alias_elt.attributes.has_key('source'):
|
|
return False
|
|
return alias_elt.attributes['source'].nodeValue
|
|
|
|
lookup_chain_cache = {}
|
|
parent_locales = {}
|
|
def _fixedLookupChain(dirname, name):
|
|
if lookup_chain_cache.has_key(name):
|
|
return lookup_chain_cache[name]
|
|
|
|
# see http://www.unicode.org/reports/tr35/#Parent_Locales
|
|
if not parent_locales:
|
|
for ns in findTagsInFile(dirname + "/../supplemental/supplementalData.xml", "parentLocales"):
|
|
tmp = {}
|
|
parent_locale = ""
|
|
for data in ns[1:][0]: # ns looks like this: [u'parentLocale', [(u'parent', u'root'), (u'locales', u'az_Cyrl bs_Cyrl en_Dsrt ..')]]
|
|
tmp[data[0]] = data[1]
|
|
if data[0] == u"parent":
|
|
parent_locale = data[1]
|
|
parent_locales[parent_locale] = tmp[u"locales"].split(" ")
|
|
|
|
items = name.split("_")
|
|
# split locale name into items and iterate through them from back to front
|
|
# example: az_Latn_AZ => [az_Latn_AZ, az_Latn, az]
|
|
items = list(reversed(map(lambda x: "_".join(items[:x+1]), range(len(items)))))
|
|
|
|
for i in range(len(items)):
|
|
item = items[i]
|
|
for parent_locale in parent_locales.keys():
|
|
for locale in parent_locales[parent_locale]:
|
|
if item == locale:
|
|
if parent_locale == u"root":
|
|
items = items[:i+1]
|
|
else:
|
|
items = items[:i+1] + _fixedLookupChain(dirname, parent_locale)
|
|
lookup_chain_cache[name] = items
|
|
return items
|
|
|
|
lookup_chain_cache[name] = items
|
|
return items
|
|
|
|
def _findEntry(base, path, draft=None, attribute=None):
|
|
if base.endswith(".xml"):
|
|
base = base[:-4]
|
|
(dirname, filename) = os.path.split(base)
|
|
|
|
items = _fixedLookupChain(dirname, filename)
|
|
for item in items:
|
|
file = dirname + "/" + item + ".xml"
|
|
if os.path.isfile(file):
|
|
alias = findAlias(file)
|
|
if alias:
|
|
# if alias is found we should follow it and stop processing current file
|
|
# see http://www.unicode.org/reports/tr35/#Common_Elements
|
|
aliasfile = os.path.dirname(file) + "/" + alias + ".xml"
|
|
if not os.path.isfile(aliasfile):
|
|
raise Error("findEntry: fatal error: found an alias '%s' to '%s', but the alias file couldn't be found" % (filename, alias))
|
|
# found an alias, recurse into parsing it
|
|
result = _findEntry(aliasfile, path, draft, attribute)
|
|
return result
|
|
(result, aliaspath) = _findEntryInFile(file, path, draft, attribute)
|
|
if aliaspath:
|
|
# start lookup again because of the alias source="locale"
|
|
return _findEntry(base, aliaspath, draft, attribute)
|
|
if result:
|
|
return result
|
|
return None
|
|
|
|
def findEntry(base, path, draft=None, attribute=None):
|
|
file = base
|
|
if base.endswith(".xml"):
|
|
file = base
|
|
base = base[:-4]
|
|
else:
|
|
file = base + ".xml"
|
|
(dirname, filename) = os.path.split(base)
|
|
|
|
result = None
|
|
while path:
|
|
result = _findEntry(base, path, draft, attribute)
|
|
if result:
|
|
return result
|
|
(result, aliaspath) = _findEntryInFile(dirname + "/root.xml", path, draft, attribute)
|
|
if result:
|
|
return result
|
|
if not aliaspath:
|
|
raise Error("findEntry: fatal error: %s: cannot find key %s" % (filename, path))
|
|
path = aliaspath
|
|
|
|
return result
|
|
|