Move some shared code to a localetools module
The time-zone script was importing two functions from the locale data generation script. Move them to a separate module, to which I'll shortly add some more shared utilities. Cleaned up some imports in the process. Combined qlocalexml2cpp's and xpathlit's error classes into a new Error class in the new module and made it a bit more like a proper python error class. Task-number: QTBUG-81344 Change-Id: Idbe0139ba9aaa2f823b8f7216dee1d2539c18b75 Reviewed-by: Cristian Maureira-Fredes <cristian.maureira-fredes@qt.io>
This commit is contained in:
parent
4d9f1a87de
commit
c3dea1ffca
@ -58,14 +58,14 @@ import re
|
||||
import textwrap
|
||||
|
||||
import enumdata
|
||||
import xpathlite
|
||||
from xpathlite import DraftResolution, findAlias, findEntry, findTagsInFile
|
||||
from localetools import Error
|
||||
from xpathlite import DraftResolution, findAlias, findEntry, findTagsInFile, codeMapsFromFile, \
|
||||
_findEntryInFile as findEntryInFile
|
||||
from dateconverter import convert_date
|
||||
from qlocalexml import Locale, QLocaleXmlWriter
|
||||
|
||||
# TODO: make calendars a command-line option
|
||||
calendars = ['gregorian', 'persian', 'islamic'] # 'hebrew'
|
||||
findEntryInFile = xpathlite._findEntryInFile
|
||||
def wrappedwarn(err, prefix, tokens):
|
||||
return err.write(
|
||||
'\n'.join(textwrap.wrap(prefix + ', '.join(tokens),
|
||||
@ -116,19 +116,19 @@ def raiseUnknownCode(code, form, cache={}):
|
||||
type of code to look up. Do not pass further parameters (the next
|
||||
will deprive you of the cache).
|
||||
|
||||
Raises xpathlite.Error with a suitable message, that includes the
|
||||
unknown code's full name if found.
|
||||
Raises localetools.Error with a suitable message, that includes
|
||||
the unknown code's full name if found.
|
||||
|
||||
Relies on global cldr_dir being set before it's called; see tail
|
||||
of this file.
|
||||
"""
|
||||
if not cache:
|
||||
cache.update(xpathlite.codeMapsFromFile(os.path.join(cldr_dir, 'en.xml')))
|
||||
cache.update(codeMapsFromFile(os.path.join(cldr_dir, 'en.xml')))
|
||||
name = cache[form].get(code)
|
||||
msg = 'unknown %s code "%s"' % (form, code)
|
||||
if name:
|
||||
msg += ' - could use "%s"' % name
|
||||
raise xpathlite.Error(msg)
|
||||
raise Error(msg)
|
||||
|
||||
def parse_list_pattern_part_format(pattern):
|
||||
# This is a very limited parsing of the format for list pattern part only.
|
||||
@ -182,7 +182,7 @@ def generateLocaleInfo(path):
|
||||
# skip legacy/compatibility ones
|
||||
alias = findAlias(path)
|
||||
if alias:
|
||||
raise xpathlite.Error('alias to "%s"' % alias)
|
||||
raise Error('Alias to "{}"'.format(alias))
|
||||
|
||||
def code(tag):
|
||||
return findEntryInFile(path, 'identity/' + tag, attribute="type")[0]
|
||||
@ -224,7 +224,7 @@ def _generateLocaleInfo(path, language_code, script_code, country_code, variant_
|
||||
# ### actually there is only one locale with variant: en_US_POSIX
|
||||
# does anybody care about it at all?
|
||||
if variant_code:
|
||||
raise xpathlite.Error('we do not support variants ("%s")' % variant_code)
|
||||
raise Error('We do not support variants ("{}")'.format(variant_code))
|
||||
|
||||
language_id = enumdata.languageCodeToId(language_code)
|
||||
if language_id <= 0:
|
||||
@ -283,23 +283,23 @@ def _generateLocaleInfo(path, language_code, script_code, country_code, variant_
|
||||
numbering_system = None
|
||||
try:
|
||||
numbering_system = findEntry(path, "numbers/defaultNumberingSystem")
|
||||
except xpathlite.Error:
|
||||
except Error:
|
||||
pass
|
||||
def findEntryDef(path, xpath, value=''):
|
||||
try:
|
||||
return findEntry(path, xpath)
|
||||
except xpathlite.Error:
|
||||
except Error:
|
||||
return value
|
||||
def get_number_in_system(path, xpath, numbering_system):
|
||||
if numbering_system:
|
||||
try:
|
||||
return findEntry(path, xpath + "[numberSystem=" + numbering_system + "]")
|
||||
except xpathlite.Error:
|
||||
except Error:
|
||||
# in CLDR 1.9 number system was refactored for numbers (but not for currency)
|
||||
# so if previous findEntry doesn't work we should try this:
|
||||
try:
|
||||
return findEntry(path, xpath.replace("/symbols/", "/symbols[numberSystem=" + numbering_system + "]/"))
|
||||
except xpathlite.Error:
|
||||
except Error:
|
||||
# fallback to default
|
||||
pass
|
||||
return findEntry(path, xpath)
|
||||
@ -368,7 +368,7 @@ def _generateLocaleInfo(path, language_code, script_code, country_code, variant_
|
||||
for count in ('many', 'few', 'two', 'other', 'zero', 'one'):
|
||||
try:
|
||||
ans = findEntry(path, stem + 'unitPattern[count=%s]' % count)
|
||||
except xpathlite.Error:
|
||||
except Error:
|
||||
continue
|
||||
|
||||
# TODO: epxloit count-handling, instead of discarding placeholders
|
||||
@ -498,7 +498,7 @@ def _parseLocale(l):
|
||||
country = "AnyCountry"
|
||||
|
||||
if l == "und":
|
||||
raise xpathlite.Error("we are treating unknown locale like C")
|
||||
raise Error('We treat unknown locale like C')
|
||||
|
||||
parsed = splitLocale(l)
|
||||
language_code = parsed.next()
|
||||
@ -511,19 +511,19 @@ def _parseLocale(l):
|
||||
if language_code != "und":
|
||||
language_id = enumdata.languageCodeToId(language_code)
|
||||
if language_id == -1:
|
||||
raise xpathlite.Error('unknown language code "%s"' % language_code)
|
||||
raise Error('Unknown language code "{}"'.format(language_code))
|
||||
language = enumdata.language_list[language_id][0]
|
||||
|
||||
if script_code:
|
||||
script_id = enumdata.scriptCodeToId(script_code)
|
||||
if script_id == -1:
|
||||
raise xpathlite.Error('unknown script code "%s"' % script_code)
|
||||
raise Error('Unknown script code "{}"'.format(script_code))
|
||||
script = enumdata.script_list[script_id][0]
|
||||
|
||||
if country_code:
|
||||
country_id = enumdata.countryCodeToId(country_code)
|
||||
if country_id == -1:
|
||||
raise xpathlite.Error('unknown country code "%s"' % country_code)
|
||||
raise Error('Unknown country code "{}"'.format(country_code))
|
||||
country = enumdata.country_list[country_id][0]
|
||||
|
||||
return (language, script, country)
|
||||
@ -538,11 +538,13 @@ def likelySubtags(root, err):
|
||||
try:
|
||||
from_language, from_script, from_country = _parseLocale(tmp[u"from"])
|
||||
to_language, to_script, to_country = _parseLocale(tmp[u"to"])
|
||||
except xpathlite.Error as e:
|
||||
if tmp[u'to'].startswith(tmp[u'from']) and str(e) == 'unknown language code "%s"' % tmp[u'from']:
|
||||
skips.append(tmp[u'to'])
|
||||
except Error as e:
|
||||
if (tmp['to'].startswith(tmp['from'])
|
||||
and e.message == 'Unknown language code "{}"'.format(tmp['from'])):
|
||||
skips.append(tmp['to'])
|
||||
else:
|
||||
sys.stderr.write('skipping likelySubtag "%s" -> "%s" (%s)\n' % (tmp[u"from"], tmp[u"to"], str(e)))
|
||||
sys.stderr.write('skipping likelySubtag "{}" -> "{}" ({})\n'.format(
|
||||
tmp[u"from"], tmp[u"to"], e.message))
|
||||
continue
|
||||
# substitute according to http://www.unicode.org/reports/tr35/#Likely_Subtags
|
||||
if to_country == "AnyCountry" and from_country != to_country:
|
||||
@ -612,8 +614,8 @@ def main(args, out, err):
|
||||
if not l:
|
||||
skips.append(file)
|
||||
continue
|
||||
except xpathlite.Error as e:
|
||||
sys.stderr.write('skipping defaultContent locale "{}" ({})\n'.format(file, str(e)))
|
||||
except Error as e:
|
||||
sys.stderr.write('skipping defaultContent locale "{}" ({})\n'.format(file, e.message))
|
||||
continue
|
||||
|
||||
locale_database[(l.language_id, l.script_id, l.country_id, l.variant_code)] = l
|
||||
@ -628,8 +630,8 @@ def main(args, out, err):
|
||||
if not l:
|
||||
skips.append(file)
|
||||
continue
|
||||
except xpathlite.Error as e:
|
||||
sys.stderr.write('skipping file "{}" ({})\n'.format(file, str(e)))
|
||||
except Error as e:
|
||||
sys.stderr.write('skipping file "{}" ({})\n'.format(file, e.message))
|
||||
continue
|
||||
|
||||
locale_database[(l.language_id, l.script_id, l.country_id, l.variant_code)] = l
|
||||
|
@ -54,20 +54,14 @@ The XML structure is as follows:
|
||||
|
||||
import os
|
||||
import sys
|
||||
import re
|
||||
import datetime
|
||||
import tempfile
|
||||
import enumdata
|
||||
import xpathlite
|
||||
from xpathlite import DraftResolution
|
||||
import re
|
||||
import qlocalexml2cpp
|
||||
|
||||
findAlias = xpathlite.findAlias
|
||||
findEntry = xpathlite.findEntry
|
||||
findEntryInFile = xpathlite._findEntryInFile
|
||||
findTagsInFile = xpathlite.findTagsInFile
|
||||
unicode2hex = qlocalexml2cpp.unicode2hex
|
||||
wrap_list = qlocalexml2cpp.wrap_list
|
||||
import enumdata
|
||||
from localetools import unicode2hex, wrap_list, Error
|
||||
from xpathlite import DraftResolution, findAlias, findEntry, findTagsInFile, \
|
||||
_findEntryInFile as findEntryInFile
|
||||
|
||||
class ByteArrayData:
|
||||
def __init__(self):
|
||||
@ -343,13 +337,13 @@ if mapTimezones:
|
||||
else:
|
||||
data['countryId'] = enumdata.countryCodeToId(data['countryCode'])
|
||||
if data['countryId'] < 0:
|
||||
raise xpathlite.Error("Unknown Country Code \"%s\"" % data['countryCode'])
|
||||
raise Error('Unknown Country Code "{}"'.format(data['countryCode']))
|
||||
data['country'] = enumdata.country_list[data['countryId']][0]
|
||||
windowsIdDict[data['windowsKey'], data['countryId']] = data
|
||||
if badZones:
|
||||
sys.stderr.write('\n\t'.join(["\nUnknown Windows ID, please add:"] + sorted(badZones))
|
||||
+ "\nto the windowIdList in cldr2qtimezone.py\n\n")
|
||||
raise xpathlite.Error("Unknown Windows IDs")
|
||||
raise Error('Unknown Windows IDs')
|
||||
|
||||
print "Input file parsed, now writing data"
|
||||
|
||||
|
65
util/locale_database/localetools.py
Normal file
65
util/locale_database/localetools.py
Normal file
@ -0,0 +1,65 @@
|
||||
#############################################################################
|
||||
##
|
||||
## Copyright (C) 2020 The Qt Company Ltd.
|
||||
## Contact: https://www.qt.io/licensing/
|
||||
##
|
||||
## This file is part of the test suite of the Qt Toolkit.
|
||||
##
|
||||
## $QT_BEGIN_LICENSE:GPL-EXCEPT$
|
||||
## Commercial License Usage
|
||||
## Licensees holding valid commercial Qt licenses may use this file in
|
||||
## accordance with the commercial license agreement provided with the
|
||||
## Software or, alternatively, in accordance with the terms contained in
|
||||
## a written agreement between you and The Qt Company. For licensing terms
|
||||
## and conditions see https://www.qt.io/terms-conditions. For further
|
||||
## information use the contact form at https://www.qt.io/contact-us.
|
||||
##
|
||||
## GNU General Public License Usage
|
||||
## Alternatively, this file may be used under the terms of the GNU
|
||||
## General Public License version 3 as published by the Free Software
|
||||
## Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
|
||||
## included in the packaging of this file. Please review the following
|
||||
## information to ensure the GNU General Public License requirements will
|
||||
## be met: https://www.gnu.org/licenses/gpl-3.0.html.
|
||||
##
|
||||
## $QT_END_LICENSE$
|
||||
##
|
||||
#############################################################################
|
||||
"""Utilities shared among the CLDR extraction tools.
|
||||
Functions:
|
||||
unicode2hex() -- converts unicode text to UCS-2 in hex form.
|
||||
wrap_list() -- map list to comma-separated string, 20 entries per line.
|
||||
|
||||
Classes:
|
||||
Error -- A shared error class.
|
||||
"""
|
||||
|
||||
class Error (StandardError):
|
||||
__upinit = StandardError.__init__
|
||||
def __init__(self, msg, *args):
|
||||
self.__upinit(msg, *args)
|
||||
self.message = msg
|
||||
def __str__(self):
|
||||
return self.message
|
||||
|
||||
def unicode2hex(s):
|
||||
lst = []
|
||||
for x in s:
|
||||
v = ord(x)
|
||||
if v > 0xFFFF:
|
||||
# make a surrogate pair
|
||||
# copied from qchar.h
|
||||
high = (v >> 10) + 0xd7c0
|
||||
low = (v % 0x400 + 0xdc00)
|
||||
lst.append(hex(high))
|
||||
lst.append(hex(low))
|
||||
else:
|
||||
lst.append(hex(v))
|
||||
return lst
|
||||
|
||||
def wrap_list(lst):
|
||||
def split(lst, size):
|
||||
while lst:
|
||||
head, lst = lst[:size], lst[size:]
|
||||
yield head
|
||||
return ",\n".join(", ".join(x) for x in split(lst, 20))
|
@ -39,7 +39,7 @@ Support:
|
||||
from __future__ import print_function
|
||||
from xml.sax.saxutils import escape
|
||||
|
||||
from xpathlite import Error
|
||||
from localetools import Error
|
||||
|
||||
# Tools used by Locale:
|
||||
def camel(seq):
|
||||
|
@ -37,9 +37,10 @@ import os
|
||||
import sys
|
||||
import tempfile
|
||||
import datetime
|
||||
from enumdata import language_aliases, country_aliases, script_aliases
|
||||
|
||||
from qlocalexml import QLocaleXmlReader
|
||||
from enumdata import language_aliases, country_aliases, script_aliases
|
||||
from localetools import unicode2hex, wrap_list, Error
|
||||
|
||||
# TODO: Make calendars a command-line parameter
|
||||
# map { CLDR name: Qt file name }
|
||||
@ -59,19 +60,6 @@ generated_template = """
|
||||
|
||||
"""
|
||||
|
||||
class Error:
|
||||
def __init__(self, msg):
|
||||
self.msg = msg
|
||||
def __str__(self):
|
||||
return self.msg
|
||||
|
||||
def wrap_list(lst):
|
||||
def split(lst, size):
|
||||
while lst:
|
||||
head, lst = lst[:size], lst[size:]
|
||||
yield head
|
||||
return ",\n".join(", ".join(x) for x in split(lst, 20))
|
||||
|
||||
def fixedScriptName(name, dupes):
|
||||
# Don't .capitalize() as some names are already camel-case (see enumdata.py):
|
||||
name = ''.join(word[0].upper() + word[1:] for word in name.split())
|
||||
@ -127,21 +115,6 @@ def compareLocaleKeys(key1, key2):
|
||||
return key1[1] - key2[1]
|
||||
|
||||
|
||||
def unicode2hex(s):
|
||||
lst = []
|
||||
for x in s:
|
||||
v = ord(x)
|
||||
if v > 0xFFFF:
|
||||
# make a surrogate pair
|
||||
# copied from qchar.h
|
||||
high = (v >> 10) + 0xd7c0
|
||||
low = (v % 0x400 + 0xdc00)
|
||||
lst.append(hex(high))
|
||||
lst.append(hex(low))
|
||||
else:
|
||||
lst.append(hex(v))
|
||||
return lst
|
||||
|
||||
class StringDataToken:
|
||||
def __init__(self, index, length):
|
||||
if index > 0xFFFF or length > 0xFFFF:
|
||||
|
@ -31,6 +31,8 @@ import sys
|
||||
import os
|
||||
import xml.dom.minidom
|
||||
|
||||
from localetools import Error
|
||||
|
||||
class DraftResolution:
|
||||
# See http://www.unicode.org/cldr/process.html for description
|
||||
unconfirmed = 'unconfirmed'
|
||||
@ -43,12 +45,6 @@ class DraftResolution:
|
||||
def toInt(self):
|
||||
return DraftResolution._values[self.resolution]
|
||||
|
||||
class Error:
|
||||
def __init__(self, msg):
|
||||
self.msg = msg
|
||||
def __str__(self):
|
||||
return self.msg
|
||||
|
||||
doc_cache = {}
|
||||
def parseDoc(file):
|
||||
if not doc_cache.has_key(file):
|
||||
|
Loading…
Reference in New Issue
Block a user