Clean up in CLDR scripts for QLocale
Use python more fluently; DRY - use functions to avoid repetition; use dict-comprehensions; use os.path.join() in preference to arithmetic with path strings; use elsif to avoid the need for a local variable; set() can take a generator directly, no need to go via a list; don't end lines in semicolon (this is python). Test isdir() once instead of exists() twice on the same name. Just, generally, use python's feature-set. Change-Id: Ib114aa016f70b3be09e968d9cfc069b057f49d41 Reviewed-by: Lars Knoll <lars.knoll@qt.io>
This commit is contained in:
parent
6a4875f0d1
commit
267edbec19
@ -46,14 +46,11 @@ import re
|
||||
|
||||
import enumdata
|
||||
import xpathlite
|
||||
from xpathlite import DraftResolution
|
||||
from xpathlite import DraftResolution, findAlias, findEntry, findTagsInFile
|
||||
from dateconverter import convert_date
|
||||
from localexml import Locale
|
||||
|
||||
findAlias = xpathlite.findAlias
|
||||
findEntry = xpathlite.findEntry
|
||||
findEntryInFile = xpathlite._findEntryInFile
|
||||
findTagsInFile = xpathlite.findTagsInFile
|
||||
|
||||
def parse_number_format(patterns, data):
|
||||
# this is a very limited parsing of the number format for currency only.
|
||||
@ -86,12 +83,8 @@ def parse_number_format(patterns, data):
|
||||
return result
|
||||
|
||||
def parse_list_pattern_part_format(pattern):
|
||||
# this is a very limited parsing of the format for list pattern part only.
|
||||
result = ""
|
||||
result = pattern.replace("{0}", "%1")
|
||||
result = result.replace("{1}", "%2")
|
||||
result = result.replace("{2}", "%3")
|
||||
return result
|
||||
# This is a very limited parsing of the format for list pattern part only.
|
||||
return pattern.replace("{0}", "%1").replace("{1}", "%2").replace("{2}", "%3")
|
||||
|
||||
def generateLocaleInfo(path):
|
||||
if not path.endswith(".xml"):
|
||||
@ -102,12 +95,11 @@ def generateLocaleInfo(path):
|
||||
if alias:
|
||||
raise xpathlite.Error('alias to "%s"' % alias)
|
||||
|
||||
language_code = findEntryInFile(path, "identity/language", attribute="type")[0]
|
||||
country_code = findEntryInFile(path, "identity/territory", attribute="type")[0]
|
||||
script_code = findEntryInFile(path, "identity/script", attribute="type")[0]
|
||||
variant_code = findEntryInFile(path, "identity/variant", attribute="type")[0]
|
||||
def code(tag):
|
||||
return findEntryInFile(path, 'identity/' + tag, attribute="type")[0]
|
||||
|
||||
return _generateLocaleInfo(path, language_code, script_code, country_code, variant_code)
|
||||
return _generateLocaleInfo(path, code('language'), code('script'),
|
||||
code('territory'), code('variant'))
|
||||
|
||||
def _generateLocaleInfo(path, language_code, script_code, country_code, variant_code=""):
|
||||
if not path.endswith(".xml"):
|
||||
@ -126,12 +118,10 @@ def _generateLocaleInfo(path, language_code, script_code, country_code, variant_
|
||||
language_id = enumdata.languageCodeToId(language_code)
|
||||
if language_id <= 0:
|
||||
raise xpathlite.Error('unknown language code "%s"' % language_code)
|
||||
language = enumdata.language_list[language_id][0]
|
||||
|
||||
script_id = enumdata.scriptCodeToId(script_code)
|
||||
if script_id == -1:
|
||||
raise xpathlite.Error('unknown script code "%s"' % script_code)
|
||||
script = enumdata.script_list[script_id][0]
|
||||
|
||||
# we should handle fully qualified names with the territory
|
||||
if not country_code:
|
||||
@ -139,7 +129,6 @@ def _generateLocaleInfo(path, language_code, script_code, country_code, variant_
|
||||
country_id = enumdata.countryCodeToId(country_code)
|
||||
if country_id <= 0:
|
||||
raise xpathlite.Error('unknown country code "%s"' % country_code)
|
||||
country = enumdata.country_list[country_id][0]
|
||||
|
||||
# So we say we accept only those values that have "contributed" or
|
||||
# "approved" resolution. see http://www.unicode.org/cldr/process.html
|
||||
@ -147,36 +136,36 @@ def _generateLocaleInfo(path, language_code, script_code, country_code, variant_
|
||||
# compatibility.
|
||||
draft = DraftResolution.contributed
|
||||
|
||||
result = {}
|
||||
result['language'] = language
|
||||
result['script'] = script
|
||||
result['country'] = country
|
||||
result['language_code'] = language_code
|
||||
result['country_code'] = country_code
|
||||
result['script_code'] = script_code
|
||||
result['variant_code'] = variant_code
|
||||
result['language_id'] = language_id
|
||||
result['script_id'] = script_id
|
||||
result['country_id'] = country_id
|
||||
result = dict(
|
||||
language=enumdata.language_list[language_id][0],
|
||||
language_code=language_code, language_id=language_id,
|
||||
script=enumdata.script_list[script_id][0],
|
||||
script_code=script_code, script_id=script_id,
|
||||
country=enumdata.country_list[country_id][0],
|
||||
country_code=country_code, country_id=country_id,
|
||||
variant_code=variant_code)
|
||||
|
||||
(dir_name, file_name) = os.path.split(path)
|
||||
supplementalPath = dir_name + "/../supplemental/supplementalData.xml"
|
||||
currencies = findTagsInFile(supplementalPath, "currencyData/region[iso3166=%s]"%country_code);
|
||||
def from_supplement(tag,
|
||||
path=os.path.join(dir_name, '..', 'supplemental',
|
||||
'supplementalData.xml')):
|
||||
return findTagsInFile(path, tag)
|
||||
currencies = from_supplement('currencyData/region[iso3166=%s]' % country_code)
|
||||
result['currencyIsoCode'] = ''
|
||||
result['currencyDigits'] = 2
|
||||
result['currencyRounding'] = 1
|
||||
if currencies:
|
||||
for e in currencies:
|
||||
if e[0] == 'currency':
|
||||
tender = True
|
||||
t = [x for x in e[1] if x[0] == 'tender']
|
||||
if t and t[0][1] == 'false':
|
||||
tender = False;
|
||||
if tender and not any(x[0] == 'to' for x in e[1]):
|
||||
t = [x[1] == 'false' for x in e[1] if x[0] == 'tender']
|
||||
if t and t[0]:
|
||||
pass
|
||||
elif not any(x[0] == 'to' for x in e[1]):
|
||||
result['currencyIsoCode'] = (x[1] for x in e[1] if x[0] == 'iso4217').next()
|
||||
break
|
||||
if result['currencyIsoCode']:
|
||||
t = findTagsInFile(supplementalPath, "currencyData/fractions/info[iso4217=%s]"%result['currencyIsoCode']);
|
||||
t = from_supplement("currencyData/fractions/info[iso4217=%s]"
|
||||
% result['currencyIsoCode'])
|
||||
if t and t[0][0] == 'info':
|
||||
result['currencyDigits'] = (int(x[1]) for x in t[0][1] if x[0] == 'digits').next()
|
||||
result['currencyRounding'] = (int(x[1]) for x in t[0][1] if x[0] == 'rounding').next()
|
||||
@ -210,7 +199,9 @@ def _generateLocaleInfo(path, language_code, script_code, country_code, variant_
|
||||
result['percent'] = get_number_in_system(path, "numbers/symbols/percentSign", numbering_system)
|
||||
try:
|
||||
numbering_systems = {}
|
||||
for ns in findTagsInFile(cldr_dir + "/../supplemental/numberingSystems.xml", "numberingSystems"):
|
||||
for ns in findTagsInFile(os.path.join(cldr_dir, '..', 'supplemental',
|
||||
'numberingSystems.xml'),
|
||||
'numberingSystems'):
|
||||
tmp = {}
|
||||
id = ""
|
||||
for data in ns[1:][0]: # ns looks like this: [u'numberingSystem', [(u'digits', u'0123456789'), (u'type', u'numeric'), (u'id', u'latn')]]
|
||||
@ -373,7 +364,9 @@ locale_database = {}
|
||||
|
||||
# see http://www.unicode.org/reports/tr35/tr35-info.html#Default_Content
|
||||
defaultContent_locales = {}
|
||||
for ns in findTagsInFile(cldr_dir + "/../supplemental/supplementalMetadata.xml", "metadata/defaultContent"):
|
||||
for ns in findTagsInFile(os.path.join(cldr_dir, '..', 'supplemental',
|
||||
'supplementalMetadata.xml'),
|
||||
'metadata/defaultContent'):
|
||||
for data in ns[1:][0]:
|
||||
if data[0] == u"locales":
|
||||
defaultContent_locales = data[1].split()
|
||||
|
@ -77,54 +77,40 @@ def eachEltInGroup(parent, group, key):
|
||||
yield element
|
||||
element = element.nextSibling
|
||||
|
||||
def eltText(elt):
|
||||
result = ""
|
||||
def eltWords(elt):
|
||||
child = elt.firstChild
|
||||
while child:
|
||||
if child.nodeType == elt.TEXT_NODE:
|
||||
if result:
|
||||
result += " "
|
||||
result += child.nodeValue
|
||||
yield child.nodeValue
|
||||
child = child.nextSibling
|
||||
return result
|
||||
|
||||
def firstChildText(elt, key):
|
||||
return ' '.join(eltWords(firstChildElt(elt, key)))
|
||||
|
||||
def loadMap(doc, category):
|
||||
return dict((int(eltText(firstChildElt(element, 'id'))),
|
||||
(eltText(firstChildElt(element, 'name')),
|
||||
eltText(firstChildElt(element, 'code'))))
|
||||
return dict((int(firstChildText(element, 'id')),
|
||||
(firstChildText(element, 'name'),
|
||||
firstChildText(element, 'code')))
|
||||
for element in eachEltInGroup(doc.documentElement,
|
||||
category + 'List', category))
|
||||
|
||||
def loadLikelySubtagsMap(doc):
|
||||
result = {}
|
||||
def triplet(element, keys=('language', 'script', 'country')):
|
||||
return tuple(firstChildText(element, key) for key in keys)
|
||||
|
||||
i = 0
|
||||
for elt in eachEltInGroup(doc.documentElement, "likelySubtags", "likelySubtag"):
|
||||
elt_from = firstChildElt(elt, "from")
|
||||
from_language = eltText(firstChildElt(elt_from, "language"));
|
||||
from_script = eltText(firstChildElt(elt_from, "script"));
|
||||
from_country = eltText(firstChildElt(elt_from, "country"));
|
||||
|
||||
elt_to = firstChildElt(elt, "to")
|
||||
to_language = eltText(firstChildElt(elt_to, "language"));
|
||||
to_script = eltText(firstChildElt(elt_to, "script"));
|
||||
to_country = eltText(firstChildElt(elt_to, "country"));
|
||||
|
||||
tmp = {}
|
||||
tmp["from"] = (from_language, from_script, from_country)
|
||||
tmp["to"] = (to_language, to_script, to_country)
|
||||
result[i] = tmp;
|
||||
i += 1
|
||||
return result
|
||||
return dict((i, {'from': triplet(firstChildElt(elt, "from")),
|
||||
'to': triplet(firstChildElt(elt, "to"))})
|
||||
for i, elt in enumerate(eachEltInGroup(doc.documentElement,
|
||||
'likelySubtags', 'likelySubtag')))
|
||||
|
||||
def fixedScriptName(name, dupes):
|
||||
# Don't .capitalize() as some names are already camel-case (see enumdata.py):
|
||||
name = ''.join(word[0].upper() + word[1:] for word in name.split())
|
||||
if name[-6:] != "Script":
|
||||
name = name + "Script";
|
||||
name = name + "Script"
|
||||
if name in dupes:
|
||||
sys.stderr.write("\n\n\nERROR: The script name '%s' is messy" % name)
|
||||
sys.exit(1);
|
||||
sys.exit(1)
|
||||
return name
|
||||
|
||||
def fixedCountryName(name, dupes):
|
||||
@ -138,8 +124,8 @@ def fixedLanguageName(name, dupes):
|
||||
return name.replace(" ", "")
|
||||
|
||||
def findDupes(country_map, language_map):
|
||||
country_set = set([ v[0] for a, v in country_map.iteritems() ])
|
||||
language_set = set([ v[0] for a, v in language_map.iteritems() ])
|
||||
country_set = set(v[0] for a, v in country_map.iteritems())
|
||||
language_set = set(v[0] for a, v in language_map.iteritems())
|
||||
return country_set & language_set
|
||||
|
||||
def languageNameToId(name, language_map):
|
||||
@ -164,7 +150,7 @@ def loadLocaleMap(doc, language_map, script_map, country_map, likely_subtags_map
|
||||
result = {}
|
||||
|
||||
for locale_elt in eachEltInGroup(doc.documentElement, "localeList", "locale"):
|
||||
locale = Locale.fromXmlData(lambda k: eltText(firstChildElt(locale_elt, k)))
|
||||
locale = Locale.fromXmlData(lambda k: firstChildText(locale_elt, k))
|
||||
language_id = languageNameToId(locale.language, language_map)
|
||||
if language_id == -1:
|
||||
sys.stderr.write("Cannot find a language id for '%s'\n" % locale.language)
|
||||
@ -324,8 +310,7 @@ def escapedString(s):
|
||||
return result
|
||||
|
||||
def printEscapedString(s):
|
||||
print escapedString(s);
|
||||
|
||||
print escapedString(s)
|
||||
|
||||
def currencyIsoCodeData(s):
|
||||
if s:
|
||||
@ -346,13 +331,9 @@ def main():
|
||||
localexml = sys.argv[1]
|
||||
qtsrcdir = sys.argv[2]
|
||||
|
||||
if not os.path.exists(qtsrcdir) or not os.path.exists(qtsrcdir):
|
||||
usage()
|
||||
if not os.path.isfile(qtsrcdir + "/src/corelib/tools/qlocale_data_p.h"):
|
||||
usage()
|
||||
if not os.path.isfile(qtsrcdir + "/src/corelib/tools/qlocale.h"):
|
||||
usage()
|
||||
if not os.path.isfile(qtsrcdir + "/src/corelib/tools/qlocale.qdoc"):
|
||||
if not (os.path.isdir(qtsrcdir)
|
||||
and all(os.path.isfile(os.path.join(qtsrcdir, 'src', 'corelib', 'tools', leaf))
|
||||
for leaf in ('qlocale_data_p.h', 'qlocale.h', 'qlocale.qdoc'))):
|
||||
usage()
|
||||
|
||||
(data_temp_file, data_temp_file_path) = tempfile.mkstemp("qlocale_data_p", dir=qtsrcdir)
|
||||
@ -377,7 +358,7 @@ def main():
|
||||
locale_map = loadLocaleMap(doc, language_map, script_map, country_map, likely_subtags_map)
|
||||
dupes = findDupes(language_map, country_map)
|
||||
|
||||
cldr_version = eltText(firstChildElt(doc.documentElement, "version"))
|
||||
cldr_version = firstChildText(doc.documentElement, "version")
|
||||
|
||||
data_temp_file.write("""
|
||||
/*
|
||||
@ -455,9 +436,7 @@ def main():
|
||||
index += count
|
||||
data_temp_file.write("%6d, // %s\n" % (i, language_map[key][0]))
|
||||
data_temp_file.write(" 0 // trailing 0\n")
|
||||
data_temp_file.write("};\n")
|
||||
|
||||
data_temp_file.write("\n")
|
||||
data_temp_file.write("};\n\n")
|
||||
|
||||
list_pattern_part_data = StringData('list_pattern_part_data')
|
||||
date_format_data = StringData('date_format_data')
|
||||
|
Loading…
Reference in New Issue
Block a user