qt5base-lts/util/locale_database/xpathlite.py
Edward Welbourne b7d8169f02 Suggest name, when available, for unknown codes
When parsing the CLDR data, we only handle language, script and
territory (which we call country) codes if they are known to our
enumdata.py tables.  When reporting the rest as unknown, in the
content of an actual locale definition (not the likely subtag data),
check whether en.xml can resolve the code for us; if it can, report
the full name it provides, as a hint to whoever's running the script
that an update to enumdata.py may be in order.

Change-Id: I9ca1d6922a91d45bc436f4b622e5557261897d7f
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
Reviewed-by: Lars Knoll <lars.knoll@qt.io>
Reviewed-by: Konstantin Ritt <ritt.ks@gmail.com>
2019-05-20 20:42:11 +02:00

289 lines
11 KiB
Python

#!/usr/bin/env python
#############################################################################
##
## Copyright (C) 2016 The Qt Company Ltd.
## Contact: https://www.qt.io/licensing/
##
## This file is part of the test suite of the Qt Toolkit.
##
## $QT_BEGIN_LICENSE:GPL-EXCEPT$
## Commercial License Usage
## Licensees holding valid commercial Qt licenses may use this file in
## accordance with the commercial license agreement provided with the
## Software or, alternatively, in accordance with the terms contained in
## a written agreement between you and The Qt Company. For licensing terms
## and conditions see https://www.qt.io/terms-conditions. For further
## information use the contact form at https://www.qt.io/contact-us.
##
## GNU General Public License Usage
## Alternatively, this file may be used under the terms of the GNU
## General Public License version 3 as published by the Free Software
## Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
## included in the packaging of this file. Please review the following
## information to ensure the GNU General Public License requirements will
## be met: https://www.gnu.org/licenses/gpl-3.0.html.
##
## $QT_END_LICENSE$
##
#############################################################################
import sys
import os
import xml.dom.minidom
class DraftResolution:
# See http://www.unicode.org/cldr/process.html for description
unconfirmed = 'unconfirmed'
provisional = 'provisional'
contributed = 'contributed'
approved = 'approved'
_values = { unconfirmed : 1, provisional : 2, contributed : 3, approved : 4 }
def __init__(self, resolution):
self.resolution = resolution
def toInt(self):
return DraftResolution._values[self.resolution]
class Error:
def __init__(self, msg):
self.msg = msg
def __str__(self):
return self.msg
doc_cache = {}
def parseDoc(file):
if not doc_cache.has_key(file):
doc_cache[file] = xml.dom.minidom.parse(file)
return doc_cache[file]
def findChild(parent, tag_name, arg_name=None, arg_value=None, draft=None):
for node in parent.childNodes:
if node.nodeType != node.ELEMENT_NODE:
continue
if node.nodeName != tag_name:
continue
if arg_value:
if not node.attributes.has_key(arg_name):
continue
if node.attributes[arg_name].nodeValue != arg_value:
continue
if draft:
if not node.attributes.has_key('draft'):
# if draft is not specified then it's approved
return node
value = node.attributes['draft'].nodeValue
value = DraftResolution(value).toInt()
exemplar = DraftResolution(draft).toInt()
if exemplar > value:
continue
return node
return False
def codeMapsFromFile(file):
"""Extract mappings of language, script and country codes to names.
The file shall typically be common/main/en.xml, which contains a
localeDisplayNames element with children languages, scripts and
territories; each element in each of these has a code as its type
attribute and its name as element content. This returns a mapping
withe keys 'language', 'script' and 'country', each of which
has, as value, a mapping of the relevant codes to names.
"""
parent = findChild(findChild(parseDoc(file), 'ldml'), 'localeDisplayNames')
keys, result = {'languages': 'language', 'scripts': 'script', 'territories': 'country'}, {}
for src, dst in keys.items():
child = findChild(parent, src)
data = result[dst] = {}
for elt in child.childNodes:
if elt.attributes and elt.attributes.has_key('type'):
key, value = elt.attributes['type'].value, elt.childNodes[0].wholeText
# Don't over-write previously-read data for an alt form:
if elt.attributes.has_key('alt') and data.has_key(key):
continue
data[key] = value
return result
def findTagsInFile(file, path):
doc = parseDoc(file)
elt = doc.documentElement
tag_spec_list = path.split("/")
last_entry = None
for tag_spec in tag_spec_list:
tag_name = tag_spec
arg_name = 'type'
arg_value = ''
left_bracket = tag_spec.find('[')
if left_bracket != -1:
tag_name = tag_spec[:left_bracket]
arg_value = tag_spec[left_bracket+1:-1].split("=")
if len(arg_value) == 2:
arg_name = arg_value[0]
arg_value = arg_value[1]
else:
arg_value = arg_value[0]
elt = findChild(elt, tag_name, arg_name, arg_value)
if not elt:
return None
ret = []
if elt.childNodes:
for node in elt.childNodes:
if node.attributes:
element = [node.nodeName, None]
element[1] = node.attributes.items()
ret.append(element)
else:
if elt.attributes:
element = [elt.nodeName, None]
element[1] = elt.attributes.items()
ret.append(element)
return ret
def _findEntryInFile(file, path, draft=None, attribute=None):
doc = parseDoc(file)
elt = doc.documentElement
tag_spec_list = path.split("/")
last_entry = None
for i in range(len(tag_spec_list)):
tag_spec = tag_spec_list[i]
tag_name = tag_spec
arg_name = 'type'
arg_value = ''
left_bracket = tag_spec.find('[')
if left_bracket != -1:
tag_name = tag_spec[:left_bracket]
arg_value = tag_spec[left_bracket+1:-1].split("=")
if len(arg_value) == 2:
arg_name = arg_value[0].replace("@", "").replace("'", "")
arg_value = arg_value[1]
else:
arg_value = arg_value[0]
alias = findChild(elt, 'alias')
if alias and alias.attributes['source'].nodeValue == 'locale':
path = alias.attributes['path'].nodeValue
aliaspath = tag_spec_list[:i] + path.split("/")
def resolve(x, y):
if y == '..':
return x[:-1]
return x + [y]
# resolve all dot-dot parts of the path
aliaspath = reduce(resolve, aliaspath, [])
# remove attribute specification that our xpathlite doesnt support
aliaspath = map(lambda x: x.replace("@type=", "").replace("'", ""), aliaspath)
# append the remaining path
aliaspath = aliaspath + tag_spec_list[i:]
aliaspath = "/".join(aliaspath)
# "locale" aliases are special - we need to start lookup from scratch
return (None, aliaspath)
elt = findChild(elt, tag_name, arg_name, arg_value, draft)
if not elt:
return ("", None)
if attribute is not None:
if elt.attributes.has_key(attribute):
return (elt.attributes[attribute].nodeValue, None)
return (None, None)
try:
return (elt.firstChild.nodeValue, None)
except:
pass
return (None, None)
def findAlias(file):
doc = parseDoc(file)
alias_elt = findChild(doc.documentElement, "alias")
if not alias_elt:
return False
if not alias_elt.attributes.has_key('source'):
return False
return alias_elt.attributes['source'].nodeValue
lookup_chain_cache = {}
parent_locales = {}
def _fixedLookupChain(dirname, name):
if lookup_chain_cache.has_key(name):
return lookup_chain_cache[name]
# see http://www.unicode.org/reports/tr35/#Parent_Locales
if not parent_locales:
for ns in findTagsInFile(dirname + "/../supplemental/supplementalData.xml", "parentLocales"):
tmp = {}
parent_locale = ""
for data in ns[1:][0]: # ns looks like this: [u'parentLocale', [(u'parent', u'root'), (u'locales', u'az_Cyrl bs_Cyrl en_Dsrt ..')]]
tmp[data[0]] = data[1]
if data[0] == u"parent":
parent_locale = data[1]
parent_locales[parent_locale] = tmp[u"locales"].split(" ")
items = name.split("_")
# split locale name into items and iterate through them from back to front
# example: az_Latn_AZ => [az_Latn_AZ, az_Latn, az]
items = list(reversed(map(lambda x: "_".join(items[:x+1]), range(len(items)))))
for i in range(len(items)):
item = items[i]
for parent_locale in parent_locales.keys():
for locale in parent_locales[parent_locale]:
if item == locale:
if parent_locale == u"root":
items = items[:i+1]
else:
items = items[:i+1] + _fixedLookupChain(dirname, parent_locale)
lookup_chain_cache[name] = items
return items
lookup_chain_cache[name] = items
return items
def _findEntry(base, path, draft=None, attribute=None):
if base.endswith(".xml"):
base = base[:-4]
(dirname, filename) = os.path.split(base)
items = _fixedLookupChain(dirname, filename)
for item in items:
file = dirname + "/" + item + ".xml"
if os.path.isfile(file):
alias = findAlias(file)
if alias:
# if alias is found we should follow it and stop processing current file
# see http://www.unicode.org/reports/tr35/#Common_Elements
aliasfile = os.path.dirname(file) + "/" + alias + ".xml"
if not os.path.isfile(aliasfile):
raise Error("findEntry: fatal error: found an alias '%s' to '%s', but the alias file couldn't be found" % (filename, alias))
# found an alias, recurse into parsing it
result = _findEntry(aliasfile, path, draft, attribute)
return result
(result, aliaspath) = _findEntryInFile(file, path, draft, attribute)
if aliaspath:
# start lookup again because of the alias source="locale"
return _findEntry(base, aliaspath, draft, attribute)
if result:
return result
return None
def findEntry(base, path, draft=None, attribute=None):
file = base
if base.endswith(".xml"):
file = base
base = base[:-4]
else:
file = base + ".xml"
(dirname, filename) = os.path.split(base)
result = None
while path:
result = _findEntry(base, path, draft, attribute)
if result:
return result
(result, aliaspath) = _findEntryInFile(dirname + "/root.xml", path, draft, attribute)
if result:
return result
if not aliaspath:
raise Error("findEntry: fatal error: %s: cannot find key %s" % (filename, path))
path = aliaspath
return result