Move some shared code to a localetools module

The time-zone script was importing two functions from the locale data
generation script. Move them to a separate module, to which I'll
shortly add some more shared utilities. Cleaned up some imports in the
process.

Combined qlocalexml2cpp's and xpathlit's error classes into a new
Error class in the new module and made it a bit more like a proper
python error class.

Task-number: QTBUG-81344
Change-Id: Idbe0139ba9aaa2f823b8f7216dee1d2539c18b75
Reviewed-by: Cristian Maureira-Fredes <cristian.maureira-fredes@qt.io>
This commit is contained in:
Edward Welbourne 2020-02-19 17:18:28 +01:00 committed by Edward Welbourne
parent 4d9f1a87de
commit c3dea1ffca
6 changed files with 105 additions and 75 deletions

View File

@ -58,14 +58,14 @@ import re
import textwrap
import enumdata
import xpathlite
from xpathlite import DraftResolution, findAlias, findEntry, findTagsInFile
from localetools import Error
from xpathlite import DraftResolution, findAlias, findEntry, findTagsInFile, codeMapsFromFile, \
_findEntryInFile as findEntryInFile
from dateconverter import convert_date
from qlocalexml import Locale, QLocaleXmlWriter
# TODO: make calendars a command-line option
calendars = ['gregorian', 'persian', 'islamic'] # 'hebrew'
findEntryInFile = xpathlite._findEntryInFile
def wrappedwarn(err, prefix, tokens):
return err.write(
'\n'.join(textwrap.wrap(prefix + ', '.join(tokens),
@ -116,19 +116,19 @@ def raiseUnknownCode(code, form, cache={}):
type of code to look up. Do not pass further parameters (the next
will deprive you of the cache).
Raises xpathlite.Error with a suitable message, that includes the
unknown code's full name if found.
Raises localetools.Error with a suitable message, that includes
the unknown code's full name if found.
Relies on global cldr_dir being set before it's called; see tail
of this file.
"""
if not cache:
cache.update(xpathlite.codeMapsFromFile(os.path.join(cldr_dir, 'en.xml')))
cache.update(codeMapsFromFile(os.path.join(cldr_dir, 'en.xml')))
name = cache[form].get(code)
msg = 'unknown %s code "%s"' % (form, code)
if name:
msg += ' - could use "%s"' % name
raise xpathlite.Error(msg)
raise Error(msg)
def parse_list_pattern_part_format(pattern):
# This is a very limited parsing of the format for list pattern part only.
@ -182,7 +182,7 @@ def generateLocaleInfo(path):
# skip legacy/compatibility ones
alias = findAlias(path)
if alias:
raise xpathlite.Error('alias to "%s"' % alias)
raise Error('Alias to "{}"'.format(alias))
def code(tag):
return findEntryInFile(path, 'identity/' + tag, attribute="type")[0]
@ -224,7 +224,7 @@ def _generateLocaleInfo(path, language_code, script_code, country_code, variant_
# ### actually there is only one locale with variant: en_US_POSIX
# does anybody care about it at all?
if variant_code:
raise xpathlite.Error('we do not support variants ("%s")' % variant_code)
raise Error('We do not support variants ("{}")'.format(variant_code))
language_id = enumdata.languageCodeToId(language_code)
if language_id <= 0:
@ -283,23 +283,23 @@ def _generateLocaleInfo(path, language_code, script_code, country_code, variant_
numbering_system = None
try:
numbering_system = findEntry(path, "numbers/defaultNumberingSystem")
except xpathlite.Error:
except Error:
pass
def findEntryDef(path, xpath, value=''):
try:
return findEntry(path, xpath)
except xpathlite.Error:
except Error:
return value
def get_number_in_system(path, xpath, numbering_system):
if numbering_system:
try:
return findEntry(path, xpath + "[numberSystem=" + numbering_system + "]")
except xpathlite.Error:
except Error:
# in CLDR 1.9 number system was refactored for numbers (but not for currency)
# so if previous findEntry doesn't work we should try this:
try:
return findEntry(path, xpath.replace("/symbols/", "/symbols[numberSystem=" + numbering_system + "]/"))
except xpathlite.Error:
except Error:
# fallback to default
pass
return findEntry(path, xpath)
@ -368,7 +368,7 @@ def _generateLocaleInfo(path, language_code, script_code, country_code, variant_
for count in ('many', 'few', 'two', 'other', 'zero', 'one'):
try:
ans = findEntry(path, stem + 'unitPattern[count=%s]' % count)
except xpathlite.Error:
except Error:
continue
# TODO: epxloit count-handling, instead of discarding placeholders
@ -498,7 +498,7 @@ def _parseLocale(l):
country = "AnyCountry"
if l == "und":
raise xpathlite.Error("we are treating unknown locale like C")
raise Error('We treat unknown locale like C')
parsed = splitLocale(l)
language_code = parsed.next()
@ -511,19 +511,19 @@ def _parseLocale(l):
if language_code != "und":
language_id = enumdata.languageCodeToId(language_code)
if language_id == -1:
raise xpathlite.Error('unknown language code "%s"' % language_code)
raise Error('Unknown language code "{}"'.format(language_code))
language = enumdata.language_list[language_id][0]
if script_code:
script_id = enumdata.scriptCodeToId(script_code)
if script_id == -1:
raise xpathlite.Error('unknown script code "%s"' % script_code)
raise Error('Unknown script code "{}"'.format(script_code))
script = enumdata.script_list[script_id][0]
if country_code:
country_id = enumdata.countryCodeToId(country_code)
if country_id == -1:
raise xpathlite.Error('unknown country code "%s"' % country_code)
raise Error('Unknown country code "{}"'.format(country_code))
country = enumdata.country_list[country_id][0]
return (language, script, country)
@ -538,11 +538,13 @@ def likelySubtags(root, err):
try:
from_language, from_script, from_country = _parseLocale(tmp[u"from"])
to_language, to_script, to_country = _parseLocale(tmp[u"to"])
except xpathlite.Error as e:
if tmp[u'to'].startswith(tmp[u'from']) and str(e) == 'unknown language code "%s"' % tmp[u'from']:
skips.append(tmp[u'to'])
except Error as e:
if (tmp['to'].startswith(tmp['from'])
and e.message == 'Unknown language code "{}"'.format(tmp['from'])):
skips.append(tmp['to'])
else:
sys.stderr.write('skipping likelySubtag "%s" -> "%s" (%s)\n' % (tmp[u"from"], tmp[u"to"], str(e)))
sys.stderr.write('skipping likelySubtag "{}" -> "{}" ({})\n'.format(
tmp[u"from"], tmp[u"to"], e.message))
continue
# substitute according to http://www.unicode.org/reports/tr35/#Likely_Subtags
if to_country == "AnyCountry" and from_country != to_country:
@ -612,8 +614,8 @@ def main(args, out, err):
if not l:
skips.append(file)
continue
except xpathlite.Error as e:
sys.stderr.write('skipping defaultContent locale "{}" ({})\n'.format(file, str(e)))
except Error as e:
sys.stderr.write('skipping defaultContent locale "{}" ({})\n'.format(file, e.message))
continue
locale_database[(l.language_id, l.script_id, l.country_id, l.variant_code)] = l
@ -628,8 +630,8 @@ def main(args, out, err):
if not l:
skips.append(file)
continue
except xpathlite.Error as e:
sys.stderr.write('skipping file "{}" ({})\n'.format(file, str(e)))
except Error as e:
sys.stderr.write('skipping file "{}" ({})\n'.format(file, e.message))
continue
locale_database[(l.language_id, l.script_id, l.country_id, l.variant_code)] = l

View File

@ -54,20 +54,14 @@ The XML structure is as follows:
import os
import sys
import re
import datetime
import tempfile
import enumdata
import xpathlite
from xpathlite import DraftResolution
import re
import qlocalexml2cpp
findAlias = xpathlite.findAlias
findEntry = xpathlite.findEntry
findEntryInFile = xpathlite._findEntryInFile
findTagsInFile = xpathlite.findTagsInFile
unicode2hex = qlocalexml2cpp.unicode2hex
wrap_list = qlocalexml2cpp.wrap_list
import enumdata
from localetools import unicode2hex, wrap_list, Error
from xpathlite import DraftResolution, findAlias, findEntry, findTagsInFile, \
_findEntryInFile as findEntryInFile
class ByteArrayData:
def __init__(self):
@ -343,13 +337,13 @@ if mapTimezones:
else:
data['countryId'] = enumdata.countryCodeToId(data['countryCode'])
if data['countryId'] < 0:
raise xpathlite.Error("Unknown Country Code \"%s\"" % data['countryCode'])
raise Error('Unknown Country Code "{}"'.format(data['countryCode']))
data['country'] = enumdata.country_list[data['countryId']][0]
windowsIdDict[data['windowsKey'], data['countryId']] = data
if badZones:
sys.stderr.write('\n\t'.join(["\nUnknown Windows ID, please add:"] + sorted(badZones))
+ "\nto the windowIdList in cldr2qtimezone.py\n\n")
raise xpathlite.Error("Unknown Windows IDs")
raise Error('Unknown Windows IDs')
print "Input file parsed, now writing data"

View File

@ -0,0 +1,65 @@
#############################################################################
##
## Copyright (C) 2020 The Qt Company Ltd.
## Contact: https://www.qt.io/licensing/
##
## This file is part of the test suite of the Qt Toolkit.
##
## $QT_BEGIN_LICENSE:GPL-EXCEPT$
## Commercial License Usage
## Licensees holding valid commercial Qt licenses may use this file in
## accordance with the commercial license agreement provided with the
## Software or, alternatively, in accordance with the terms contained in
## a written agreement between you and The Qt Company. For licensing terms
## and conditions see https://www.qt.io/terms-conditions. For further
## information use the contact form at https://www.qt.io/contact-us.
##
## GNU General Public License Usage
## Alternatively, this file may be used under the terms of the GNU
## General Public License version 3 as published by the Free Software
## Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
## included in the packaging of this file. Please review the following
## information to ensure the GNU General Public License requirements will
## be met: https://www.gnu.org/licenses/gpl-3.0.html.
##
## $QT_END_LICENSE$
##
#############################################################################
"""Utilities shared among the CLDR extraction tools.
Functions:
unicode2hex() -- converts unicode text to UCS-2 in hex form.
wrap_list() -- map list to comma-separated string, 20 entries per line.
Classes:
Error -- A shared error class.
"""
class Error (StandardError):
__upinit = StandardError.__init__
def __init__(self, msg, *args):
self.__upinit(msg, *args)
self.message = msg
def __str__(self):
return self.message
def unicode2hex(s):
lst = []
for x in s:
v = ord(x)
if v > 0xFFFF:
# make a surrogate pair
# copied from qchar.h
high = (v >> 10) + 0xd7c0
low = (v % 0x400 + 0xdc00)
lst.append(hex(high))
lst.append(hex(low))
else:
lst.append(hex(v))
return lst
def wrap_list(lst):
def split(lst, size):
while lst:
head, lst = lst[:size], lst[size:]
yield head
return ",\n".join(", ".join(x) for x in split(lst, 20))

View File

@ -39,7 +39,7 @@ Support:
from __future__ import print_function
from xml.sax.saxutils import escape
from xpathlite import Error
from localetools import Error
# Tools used by Locale:
def camel(seq):

View File

@ -37,9 +37,10 @@ import os
import sys
import tempfile
import datetime
from enumdata import language_aliases, country_aliases, script_aliases
from qlocalexml import QLocaleXmlReader
from enumdata import language_aliases, country_aliases, script_aliases
from localetools import unicode2hex, wrap_list, Error
# TODO: Make calendars a command-line parameter
# map { CLDR name: Qt file name }
@ -59,19 +60,6 @@ generated_template = """
"""
class Error:
def __init__(self, msg):
self.msg = msg
def __str__(self):
return self.msg
def wrap_list(lst):
def split(lst, size):
while lst:
head, lst = lst[:size], lst[size:]
yield head
return ",\n".join(", ".join(x) for x in split(lst, 20))
def fixedScriptName(name, dupes):
# Don't .capitalize() as some names are already camel-case (see enumdata.py):
name = ''.join(word[0].upper() + word[1:] for word in name.split())
@ -127,21 +115,6 @@ def compareLocaleKeys(key1, key2):
return key1[1] - key2[1]
def unicode2hex(s):
lst = []
for x in s:
v = ord(x)
if v > 0xFFFF:
# make a surrogate pair
# copied from qchar.h
high = (v >> 10) + 0xd7c0
low = (v % 0x400 + 0xdc00)
lst.append(hex(high))
lst.append(hex(low))
else:
lst.append(hex(v))
return lst
class StringDataToken:
def __init__(self, index, length):
if index > 0xFFFF or length > 0xFFFF:

View File

@ -31,6 +31,8 @@ import sys
import os
import xml.dom.minidom
from localetools import Error
class DraftResolution:
# See http://www.unicode.org/cldr/process.html for description
unconfirmed = 'unconfirmed'
@ -43,12 +45,6 @@ class DraftResolution:
def toInt(self):
return DraftResolution._values[self.resolution]
class Error:
def __init__(self, msg):
self.msg = msg
def __str__(self):
return self.msg
doc_cache = {}
def parseDoc(file):
if not doc_cache.has_key(file):