qt5base-lts/util/local_database/cldr2qlocalexml.py
Iikka Eklund be15856f61 Change copyrights from Nokia to Digia
Change copyrights and license headers from Nokia to Digia

Change-Id: If1cc974286d29fd01ec6c19dd4719a67f4c3f00e
Reviewed-by: Lars Knoll <lars.knoll@digia.com>
Reviewed-by: Sergio Ahumada <sergio.ahumada@digia.com>
2012-09-22 19:20:11 +02:00

831 lines
42 KiB
Python
Executable File

#!/usr/bin/env python
#############################################################################
##
## Copyright (C) 2012 Digia Plc and/or its subsidiary(-ies).
## Contact: http://www.qt-project.org/legal
##
## This file is part of the test suite of the Qt Toolkit.
##
## $QT_BEGIN_LICENSE:LGPL$
## Commercial License Usage
## Licensees holding valid commercial Qt licenses may use this file in
## accordance with the commercial license agreement provided with the
## Software or, alternatively, in accordance with the terms contained in
## a written agreement between you and Digia. For licensing terms and
## conditions see http://qt.digia.com/licensing. For further information
## use the contact form at http://qt.digia.com/contact-us.
##
## GNU Lesser General Public License Usage
## Alternatively, this file may be used under the terms of the GNU Lesser
## General Public License version 2.1 as published by the Free Software
## Foundation and appearing in the file LICENSE.LGPL included in the
## packaging of this file. Please review the following information to
## ensure the GNU Lesser General Public License version 2.1 requirements
## will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
##
## In addition, as a special exception, Digia gives you certain additional
## rights. These rights are described in the Digia Qt LGPL Exception
## version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
##
## GNU General Public License Usage
## Alternatively, this file may be used under the terms of the GNU
## General Public License version 3.0 as published by the Free Software
## Foundation and appearing in the file LICENSE.GPL included in the
## packaging of this file. Please review the following information to
## ensure the GNU General Public License version 3.0 requirements will be
## met: http://www.gnu.org/copyleft/gpl.html.
##
##
## $QT_END_LICENSE$
##
#############################################################################
import os
import sys
import enumdata
import xpathlite
from xpathlite import DraftResolution
from dateconverter import convert_date
import re
findEntry = xpathlite.findEntry
findEntryInFile = xpathlite._findEntryInFile
findTagsInFile = xpathlite.findTagsInFile
def parse_number_format(patterns, data):
# this is a very limited parsing of the number format for currency only.
def skip_repeating_pattern(x):
p = x.replace('0', '#').replace(',', '').replace('.', '')
seen = False
result = ''
for c in p:
if c == '#':
if seen:
continue
seen = True
else:
seen = False
result = result + c
return result
patterns = patterns.split(';')
result = []
for pattern in patterns:
pattern = skip_repeating_pattern(pattern)
pattern = pattern.replace('#', "%1")
# according to http://www.unicode.org/reports/tr35/#Number_Format_Patterns
# there can be doubled or trippled currency sign, however none of the
# locales use that.
pattern = pattern.replace(u'\xa4', "%2")
pattern = pattern.replace("''", "###").replace("'", '').replace("###", "'")
pattern = pattern.replace('-', data['minus'])
pattern = pattern.replace('+', data['plus'])
result.append(pattern)
return result
def parse_list_pattern_part_format(pattern):
# this is a very limited parsing of the format for list pattern part only.
result = ""
result = pattern.replace("{0}", "%1")
result = result.replace("{1}", "%2")
result = result.replace("{2}", "%3")
return result
def ordStr(c):
if len(c) == 1:
return str(ord(c))
return "##########"
# the following functions are supposed to fix the problem with QLocale
# returning a character instead of strings for QLocale::exponential()
# and others. So we fallback to default values in these cases.
def fixOrdStrExp(c):
if len(c) == 1:
return str(ord(c))
return str(ord('e'))
def fixOrdStrPercent(c):
if len(c) == 1:
return str(ord(c))
return str(ord('%'))
def fixOrdStrList(c):
if len(c) == 1:
return str(ord(c))
return str(ord(';'))
def generateLocaleInfo(path):
(dir_name, file_name) = os.path.split(path)
if not path.endswith(".xml"):
return {}
language_code = findEntryInFile(path, "identity/language", attribute="type")[0]
if language_code == 'root':
# just skip it
return {}
country_code = findEntryInFile(path, "identity/territory", attribute="type")[0]
script_code = findEntryInFile(path, "identity/script", attribute="type")[0]
variant_code = findEntryInFile(path, "identity/variant", attribute="type")[0]
# we should handle fully qualified names with the territory
if not country_code:
return {}
# we do not support variants
# ### actually there is only one locale with variant: en_US_POSIX
# does anybody care about it at all?
if variant_code:
return {}
language_id = enumdata.languageCodeToId(language_code)
if language_id == -1:
sys.stderr.write("unnknown language code \"" + language_code + "\"\n")
return {}
language = enumdata.language_list[language_id][0]
script_id = enumdata.scriptCodeToId(script_code)
if script_code == -1:
sys.stderr.write("unnknown script code \"" + script_code + "\"\n")
return {}
script = "AnyScript"
if script_id != -1:
script = enumdata.script_list[script_id][0]
country_id = enumdata.countryCodeToId(country_code)
country = ""
if country_id != -1:
country = enumdata.country_list[country_id][0]
if country == "":
sys.stderr.write("unnknown country code \"" + country_code + "\"\n")
return {}
# So we say we accept only those values that have "contributed" or
# "approved" resolution. see http://www.unicode.org/cldr/process.html
# But we only respect the resolution for new datas for backward
# compatibility.
draft = DraftResolution.contributed
result = {}
result['language'] = language
result['script'] = script
result['country'] = country
result['language_code'] = language_code
result['country_code'] = country_code
result['script_code'] = script_code
result['variant_code'] = variant_code
result['language_id'] = language_id
result['script_id'] = script_id
result['country_id'] = country_id
supplementalPath = dir_name + "/../supplemental/supplementalData.xml"
currencies = findTagsInFile(supplementalPath, "currencyData/region[iso3166=%s]"%country_code);
result['currencyIsoCode'] = ''
result['currencyDigits'] = 2
result['currencyRounding'] = 1
if currencies:
for e in currencies:
if e[0] == 'currency':
tender = True
t = filter(lambda x: x[0] == 'tender', e[1])
if t and t[0][1] == 'false':
tender = False;
if tender and not filter(lambda x: x[0] == 'to', e[1]):
result['currencyIsoCode'] = filter(lambda x: x[0] == 'iso4217', e[1])[0][1]
break
if result['currencyIsoCode']:
t = findTagsInFile(supplementalPath, "currencyData/fractions/info[iso4217=%s]"%result['currencyIsoCode']);
if t and t[0][0] == 'info':
result['currencyDigits'] = int(filter(lambda x: x[0] == 'digits', t[0][1])[0][1])
result['currencyRounding'] = int(filter(lambda x: x[0] == 'rounding', t[0][1])[0][1])
numbering_system = None
try:
numbering_system = findEntry(path, "numbers/defaultNumberingSystem")
except:
pass
def findEntryDef(path, xpath, value=''):
try:
return findEntry(path, xpath)
except xpathlite.Error:
return value
def get_number_in_system(path, xpath, numbering_system):
if numbering_system:
try:
return findEntry(path, xpath + "[numberSystem=" + numbering_system + "]")
except xpathlite.Error:
# in CLDR 1.9 number system was refactored for numbers (but not for currency)
# so if previous findEntry doesn't work we should try this:
try:
return findEntry(path, xpath.replace("/symbols/", "/symbols[numberSystem=" + numbering_system + "]/"))
except xpathlite.Error:
# fallback to default
pass
return findEntry(path, xpath)
result['decimal'] = get_number_in_system(path, "numbers/symbols/decimal", numbering_system)
result['group'] = get_number_in_system(path, "numbers/symbols/group", numbering_system)
result['list'] = get_number_in_system(path, "numbers/symbols/list", numbering_system)
result['percent'] = get_number_in_system(path, "numbers/symbols/percentSign", numbering_system)
try:
numbering_systems = {}
for ns in findTagsInFile(cldr_dir + "/../supplemental/numberingSystems.xml", "numberingSystems"):
tmp = {}
id = ""
for data in ns[1:][0]: # ns looks like this: [u'numberingSystem', [(u'digits', u'0123456789'), (u'type', u'numeric'), (u'id', u'latn')]]
tmp[data[0]] = data[1]
if data[0] == u"id":
id = data[1]
numbering_systems[id] = tmp
result['zero'] = numbering_systems[numbering_system][u"digits"][0]
except e:
sys.stderr.write("Native zero detection problem:\n" + str(e) + "\n")
result['zero'] = get_number_in_system(path, "numbers/symbols/nativeZeroDigit", numbering_system)
result['minus'] = get_number_in_system(path, "numbers/symbols/minusSign", numbering_system)
result['plus'] = get_number_in_system(path, "numbers/symbols/plusSign", numbering_system)
result['exp'] = get_number_in_system(path, "numbers/symbols/exponential", numbering_system).lower()
result['quotationStart'] = findEntry(path, "delimiters/quotationStart")
result['quotationEnd'] = findEntry(path, "delimiters/quotationEnd")
result['alternateQuotationStart'] = findEntry(path, "delimiters/alternateQuotationStart")
result['alternateQuotationEnd'] = findEntry(path, "delimiters/alternateQuotationEnd")
result['listPatternPartStart'] = parse_list_pattern_part_format(findEntry(path, "listPatterns/listPattern/listPatternPart[start]"))
result['listPatternPartMiddle'] = parse_list_pattern_part_format(findEntry(path, "listPatterns/listPattern/listPatternPart[middle]"))
result['listPatternPartEnd'] = parse_list_pattern_part_format(findEntry(path, "listPatterns/listPattern/listPatternPart[end]"))
result['listPatternPartTwo'] = parse_list_pattern_part_format(findEntry(path, "listPatterns/listPattern/listPatternPart[2]"))
result['am'] = findEntry(path, "dates/calendars/calendar[gregorian]/dayPeriods/dayPeriodContext[format]/dayPeriodWidth[wide]/dayPeriod[am]", draft)
result['pm'] = findEntry(path, "dates/calendars/calendar[gregorian]/dayPeriods/dayPeriodContext[format]/dayPeriodWidth[wide]/dayPeriod[pm]", draft)
result['longDateFormat'] = convert_date(findEntry(path, "dates/calendars/calendar[gregorian]/dateFormats/dateFormatLength[full]/dateFormat/pattern"))
result['shortDateFormat'] = convert_date(findEntry(path, "dates/calendars/calendar[gregorian]/dateFormats/dateFormatLength[short]/dateFormat/pattern"))
result['longTimeFormat'] = convert_date(findEntry(path, "dates/calendars/calendar[gregorian]/timeFormats/timeFormatLength[full]/timeFormat/pattern"))
result['shortTimeFormat'] = convert_date(findEntry(path, "dates/calendars/calendar[gregorian]/timeFormats/timeFormatLength[short]/timeFormat/pattern"))
endonym = None
if country_code and script_code:
endonym = findEntryDef(path, "localeDisplayNames/languages/language[type=%s_%s_%s]" % (language_code, script_code, country_code))
if not endonym and script_code:
endonym = findEntryDef(path, "localeDisplayNames/languages/language[type=%s_%s]" % (language_code, script_code))
if not endonym and country_code:
endonym = findEntryDef(path, "localeDisplayNames/languages/language[type=%s_%s]" % (language_code, country_code))
if not endonym:
endonym = findEntryDef(path, "localeDisplayNames/languages/language[type=%s]" % (language_code))
result['language_endonym'] = endonym
result['country_endonym'] = findEntryDef(path, "localeDisplayNames/territories/territory[type=%s]" % (country_code))
currency_format = get_number_in_system(path, "numbers/currencyFormats/currencyFormatLength/currencyFormat/pattern", numbering_system)
currency_format = parse_number_format(currency_format, result)
result['currencyFormat'] = currency_format[0]
result['currencyNegativeFormat'] = ''
if len(currency_format) > 1:
result['currencyNegativeFormat'] = currency_format[1]
result['currencySymbol'] = ''
result['currencyDisplayName'] = ''
if result['currencyIsoCode']:
result['currencySymbol'] = findEntryDef(path, "numbers/currencies/currency[%s]/symbol" % result['currencyIsoCode'])
display_name_path = "numbers/currencies/currency[%s]/displayName" % result['currencyIsoCode']
result['currencyDisplayName'] \
= findEntryDef(path, display_name_path) + ";" \
+ findEntryDef(path, display_name_path + "[count=zero]") + ";" \
+ findEntryDef(path, display_name_path + "[count=one]") + ";" \
+ findEntryDef(path, display_name_path + "[count=two]") + ";" \
+ findEntryDef(path, display_name_path + "[count=few]") + ";" \
+ findEntryDef(path, display_name_path + "[count=many]") + ";" \
+ findEntryDef(path, display_name_path + "[count=other]") + ";"
standalone_long_month_path = "dates/calendars/calendar[gregorian]/months/monthContext[stand-alone]/monthWidth[wide]/month"
result['standaloneLongMonths'] \
= findEntry(path, standalone_long_month_path + "[1]") + ";" \
+ findEntry(path, standalone_long_month_path + "[2]") + ";" \
+ findEntry(path, standalone_long_month_path + "[3]") + ";" \
+ findEntry(path, standalone_long_month_path + "[4]") + ";" \
+ findEntry(path, standalone_long_month_path + "[5]") + ";" \
+ findEntry(path, standalone_long_month_path + "[6]") + ";" \
+ findEntry(path, standalone_long_month_path + "[7]") + ";" \
+ findEntry(path, standalone_long_month_path + "[8]") + ";" \
+ findEntry(path, standalone_long_month_path + "[9]") + ";" \
+ findEntry(path, standalone_long_month_path + "[10]") + ";" \
+ findEntry(path, standalone_long_month_path + "[11]") + ";" \
+ findEntry(path, standalone_long_month_path + "[12]") + ";"
standalone_short_month_path = "dates/calendars/calendar[gregorian]/months/monthContext[stand-alone]/monthWidth[abbreviated]/month"
result['standaloneShortMonths'] \
= findEntry(path, standalone_short_month_path + "[1]") + ";" \
+ findEntry(path, standalone_short_month_path + "[2]") + ";" \
+ findEntry(path, standalone_short_month_path + "[3]") + ";" \
+ findEntry(path, standalone_short_month_path + "[4]") + ";" \
+ findEntry(path, standalone_short_month_path + "[5]") + ";" \
+ findEntry(path, standalone_short_month_path + "[6]") + ";" \
+ findEntry(path, standalone_short_month_path + "[7]") + ";" \
+ findEntry(path, standalone_short_month_path + "[8]") + ";" \
+ findEntry(path, standalone_short_month_path + "[9]") + ";" \
+ findEntry(path, standalone_short_month_path + "[10]") + ";" \
+ findEntry(path, standalone_short_month_path + "[11]") + ";" \
+ findEntry(path, standalone_short_month_path + "[12]") + ";"
standalone_narrow_month_path = "dates/calendars/calendar[gregorian]/months/monthContext[stand-alone]/monthWidth[narrow]/month"
result['standaloneNarrowMonths'] \
= findEntry(path, standalone_narrow_month_path + "[1]") + ";" \
+ findEntry(path, standalone_narrow_month_path + "[2]") + ";" \
+ findEntry(path, standalone_narrow_month_path + "[3]") + ";" \
+ findEntry(path, standalone_narrow_month_path + "[4]") + ";" \
+ findEntry(path, standalone_narrow_month_path + "[5]") + ";" \
+ findEntry(path, standalone_narrow_month_path + "[6]") + ";" \
+ findEntry(path, standalone_narrow_month_path + "[7]") + ";" \
+ findEntry(path, standalone_narrow_month_path + "[8]") + ";" \
+ findEntry(path, standalone_narrow_month_path + "[9]") + ";" \
+ findEntry(path, standalone_narrow_month_path + "[10]") + ";" \
+ findEntry(path, standalone_narrow_month_path + "[11]") + ";" \
+ findEntry(path, standalone_narrow_month_path + "[12]") + ";"
long_month_path = "dates/calendars/calendar[gregorian]/months/monthContext[format]/monthWidth[wide]/month"
result['longMonths'] \
= findEntry(path, long_month_path + "[1]") + ";" \
+ findEntry(path, long_month_path + "[2]") + ";" \
+ findEntry(path, long_month_path + "[3]") + ";" \
+ findEntry(path, long_month_path + "[4]") + ";" \
+ findEntry(path, long_month_path + "[5]") + ";" \
+ findEntry(path, long_month_path + "[6]") + ";" \
+ findEntry(path, long_month_path + "[7]") + ";" \
+ findEntry(path, long_month_path + "[8]") + ";" \
+ findEntry(path, long_month_path + "[9]") + ";" \
+ findEntry(path, long_month_path + "[10]") + ";" \
+ findEntry(path, long_month_path + "[11]") + ";" \
+ findEntry(path, long_month_path + "[12]") + ";"
short_month_path = "dates/calendars/calendar[gregorian]/months/monthContext[format]/monthWidth[abbreviated]/month"
result['shortMonths'] \
= findEntry(path, short_month_path + "[1]") + ";" \
+ findEntry(path, short_month_path + "[2]") + ";" \
+ findEntry(path, short_month_path + "[3]") + ";" \
+ findEntry(path, short_month_path + "[4]") + ";" \
+ findEntry(path, short_month_path + "[5]") + ";" \
+ findEntry(path, short_month_path + "[6]") + ";" \
+ findEntry(path, short_month_path + "[7]") + ";" \
+ findEntry(path, short_month_path + "[8]") + ";" \
+ findEntry(path, short_month_path + "[9]") + ";" \
+ findEntry(path, short_month_path + "[10]") + ";" \
+ findEntry(path, short_month_path + "[11]") + ";" \
+ findEntry(path, short_month_path + "[12]") + ";"
narrow_month_path = "dates/calendars/calendar[gregorian]/months/monthContext[format]/monthWidth[narrow]/month"
result['narrowMonths'] \
= findEntry(path, narrow_month_path + "[1]") + ";" \
+ findEntry(path, narrow_month_path + "[2]") + ";" \
+ findEntry(path, narrow_month_path + "[3]") + ";" \
+ findEntry(path, narrow_month_path + "[4]") + ";" \
+ findEntry(path, narrow_month_path + "[5]") + ";" \
+ findEntry(path, narrow_month_path + "[6]") + ";" \
+ findEntry(path, narrow_month_path + "[7]") + ";" \
+ findEntry(path, narrow_month_path + "[8]") + ";" \
+ findEntry(path, narrow_month_path + "[9]") + ";" \
+ findEntry(path, narrow_month_path + "[10]") + ";" \
+ findEntry(path, narrow_month_path + "[11]") + ";" \
+ findEntry(path, narrow_month_path + "[12]") + ";"
long_day_path = "dates/calendars/calendar[gregorian]/days/dayContext[format]/dayWidth[wide]/day"
result['longDays'] \
= findEntry(path, long_day_path + "[sun]") + ";" \
+ findEntry(path, long_day_path + "[mon]") + ";" \
+ findEntry(path, long_day_path + "[tue]") + ";" \
+ findEntry(path, long_day_path + "[wed]") + ";" \
+ findEntry(path, long_day_path + "[thu]") + ";" \
+ findEntry(path, long_day_path + "[fri]") + ";" \
+ findEntry(path, long_day_path + "[sat]") + ";"
short_day_path = "dates/calendars/calendar[gregorian]/days/dayContext[format]/dayWidth[abbreviated]/day"
result['shortDays'] \
= findEntry(path, short_day_path + "[sun]") + ";" \
+ findEntry(path, short_day_path + "[mon]") + ";" \
+ findEntry(path, short_day_path + "[tue]") + ";" \
+ findEntry(path, short_day_path + "[wed]") + ";" \
+ findEntry(path, short_day_path + "[thu]") + ";" \
+ findEntry(path, short_day_path + "[fri]") + ";" \
+ findEntry(path, short_day_path + "[sat]") + ";"
narrow_day_path = "dates/calendars/calendar[gregorian]/days/dayContext[format]/dayWidth[narrow]/day"
result['narrowDays'] \
= findEntry(path, narrow_day_path + "[sun]") + ";" \
+ findEntry(path, narrow_day_path + "[mon]") + ";" \
+ findEntry(path, narrow_day_path + "[tue]") + ";" \
+ findEntry(path, narrow_day_path + "[wed]") + ";" \
+ findEntry(path, narrow_day_path + "[thu]") + ";" \
+ findEntry(path, narrow_day_path + "[fri]") + ";" \
+ findEntry(path, narrow_day_path + "[sat]") + ";"
standalone_long_day_path = "dates/calendars/calendar[gregorian]/days/dayContext[stand-alone]/dayWidth[wide]/day"
result['standaloneLongDays'] \
= findEntry(path, standalone_long_day_path + "[sun]") + ";" \
+ findEntry(path, standalone_long_day_path + "[mon]") + ";" \
+ findEntry(path, standalone_long_day_path + "[tue]") + ";" \
+ findEntry(path, standalone_long_day_path + "[wed]") + ";" \
+ findEntry(path, standalone_long_day_path + "[thu]") + ";" \
+ findEntry(path, standalone_long_day_path + "[fri]") + ";" \
+ findEntry(path, standalone_long_day_path + "[sat]") + ";"
standalone_short_day_path = "dates/calendars/calendar[gregorian]/days/dayContext[stand-alone]/dayWidth[abbreviated]/day"
result['standaloneShortDays'] \
= findEntry(path, standalone_short_day_path + "[sun]") + ";" \
+ findEntry(path, standalone_short_day_path + "[mon]") + ";" \
+ findEntry(path, standalone_short_day_path + "[tue]") + ";" \
+ findEntry(path, standalone_short_day_path + "[wed]") + ";" \
+ findEntry(path, standalone_short_day_path + "[thu]") + ";" \
+ findEntry(path, standalone_short_day_path + "[fri]") + ";" \
+ findEntry(path, standalone_short_day_path + "[sat]") + ";"
standalone_narrow_day_path = "dates/calendars/calendar[gregorian]/days/dayContext[stand-alone]/dayWidth[narrow]/day"
result['standaloneNarrowDays'] \
= findEntry(path, standalone_narrow_day_path + "[sun]") + ";" \
+ findEntry(path, standalone_narrow_day_path + "[mon]") + ";" \
+ findEntry(path, standalone_narrow_day_path + "[tue]") + ";" \
+ findEntry(path, standalone_narrow_day_path + "[wed]") + ";" \
+ findEntry(path, standalone_narrow_day_path + "[thu]") + ";" \
+ findEntry(path, standalone_narrow_day_path + "[fri]") + ";" \
+ findEntry(path, standalone_narrow_day_path + "[sat]") + ";"
return result
def addEscapes(s):
result = ''
for c in s:
n = ord(c)
if n < 128:
result += c
else:
result += "\\x"
result += "%02x" % (n)
return result
def unicodeStr(s):
utf8 = s.encode('utf-8')
return "<size>" + str(len(utf8)) + "</size><data>" + addEscapes(utf8) + "</data>"
def usage():
print "Usage: cldr2qlocalexml.py <path-to-cldr-main>"
sys.exit()
def integrateWeekData(filePath):
if not filePath.endswith(".xml"):
return {}
monFirstDayIn = findEntryInFile(filePath, "weekData/firstDay[day=mon]", attribute="territories")[0].split(" ")
tueFirstDayIn = findEntryInFile(filePath, "weekData/firstDay[day=tue]", attribute="territories")[0].split(" ")
wedFirstDayIn = findEntryInFile(filePath, "weekData/firstDay[day=wed]", attribute="territories")[0].split(" ")
thuFirstDayIn = findEntryInFile(filePath, "weekData/firstDay[day=thu]", attribute="territories")[0].split(" ")
friFirstDayIn = findEntryInFile(filePath, "weekData/firstDay[day=fri]", attribute="territories")[0].split(" ")
satFirstDayIn = findEntryInFile(filePath, "weekData/firstDay[day=sat]", attribute="territories")[0].split(" ")
sunFirstDayIn = findEntryInFile(filePath, "weekData/firstDay[day=sun]", attribute="territories")[0].split(" ")
monWeekendStart = findEntryInFile(filePath, "weekData/weekendStart[day=mon]", attribute="territories")[0].split(" ")
tueWeekendStart = findEntryInFile(filePath, "weekData/weekendStart[day=tue]", attribute="territories")[0].split(" ")
wedWeekendStart = findEntryInFile(filePath, "weekData/weekendStart[day=wed]", attribute="territories")[0].split(" ")
thuWeekendStart = findEntryInFile(filePath, "weekData/weekendStart[day=thu]", attribute="territories")[0].split(" ")
friWeekendStart = findEntryInFile(filePath, "weekData/weekendStart[day=fri]", attribute="territories")[0].split(" ")
satWeekendStart = findEntryInFile(filePath, "weekData/weekendStart[day=sat]", attribute="territories")[0].split(" ")
sunWeekendStart = findEntryInFile(filePath, "weekData/weekendStart[day=sun]", attribute="territories")[0].split(" ")
monWeekendEnd = findEntryInFile(filePath, "weekData/weekendEnd[day=mon]", attribute="territories")[0].split(" ")
tueWeekendEnd = findEntryInFile(filePath, "weekData/weekendEnd[day=tue]", attribute="territories")[0].split(" ")
wedWeekendEnd = findEntryInFile(filePath, "weekData/weekendEnd[day=wed]", attribute="territories")[0].split(" ")
thuWeekendEnd = findEntryInFile(filePath, "weekData/weekendEnd[day=thu]", attribute="territories")[0].split(" ")
friWeekendEnd = findEntryInFile(filePath, "weekData/weekendEnd[day=fri]", attribute="territories")[0].split(" ")
satWeekendEnd = findEntryInFile(filePath, "weekData/weekendEnd[day=sat]", attribute="territories")[0].split(" ")
sunWeekendEnd = findEntryInFile(filePath, "weekData/weekendEnd[day=sun]", attribute="territories")[0].split(" ")
firstDayByCountryCode = {}
for countryCode in monFirstDayIn:
firstDayByCountryCode[countryCode] = "mon"
for countryCode in tueFirstDayIn:
firstDayByCountryCode[countryCode] = "tue"
for countryCode in wedFirstDayIn:
firstDayByCountryCode[countryCode] = "wed"
for countryCode in thuFirstDayIn:
firstDayByCountryCode[countryCode] = "thu"
for countryCode in friFirstDayIn:
firstDayByCountryCode[countryCode] = "fri"
for countryCode in satFirstDayIn:
firstDayByCountryCode[countryCode] = "sat"
for countryCode in sunFirstDayIn:
firstDayByCountryCode[countryCode] = "sun"
weekendStartByCountryCode = {}
for countryCode in monWeekendStart:
weekendStartByCountryCode[countryCode] = "mon"
for countryCode in tueWeekendStart:
weekendStartByCountryCode[countryCode] = "tue"
for countryCode in wedWeekendStart:
weekendStartByCountryCode[countryCode] = "wed"
for countryCode in thuWeekendStart:
weekendStartByCountryCode[countryCode] = "thu"
for countryCode in friWeekendStart:
weekendStartByCountryCode[countryCode] = "fri"
for countryCode in satWeekendStart:
weekendStartByCountryCode[countryCode] = "sat"
for countryCode in sunWeekendStart:
weekendStartByCountryCode[countryCode] = "sun"
weekendEndByCountryCode = {}
for countryCode in monWeekendEnd:
weekendEndByCountryCode[countryCode] = "mon"
for countryCode in tueWeekendEnd:
weekendEndByCountryCode[countryCode] = "tue"
for countryCode in wedWeekendEnd:
weekendEndByCountryCode[countryCode] = "wed"
for countryCode in thuWeekendEnd:
weekendEndByCountryCode[countryCode] = "thu"
for countryCode in friWeekendEnd:
weekendEndByCountryCode[countryCode] = "fri"
for countryCode in satWeekendEnd:
weekendEndByCountryCode[countryCode] = "sat"
for countryCode in sunWeekendEnd:
weekendEndByCountryCode[countryCode] = "sun"
for (key,locale) in locale_database.iteritems():
countryCode = locale['country_code']
if countryCode in firstDayByCountryCode:
locale_database[key]['firstDayOfWeek'] = firstDayByCountryCode[countryCode]
else:
locale_database[key]['firstDayOfWeek'] = firstDayByCountryCode["001"]
if countryCode in weekendStartByCountryCode:
locale_database[key]['weekendStart'] = weekendStartByCountryCode[countryCode]
else:
locale_database[key]['weekendStart'] = weekendStartByCountryCode["001"]
if countryCode in weekendEndByCountryCode:
locale_database[key]['weekendEnd'] = weekendEndByCountryCode[countryCode]
else:
locale_database[key]['weekendEnd'] = weekendEndByCountryCode["001"]
if len(sys.argv) != 2:
usage()
cldr_dir = sys.argv[1]
if not os.path.isdir(cldr_dir):
usage()
cldr_files = os.listdir(cldr_dir)
locale_database = {}
for file in cldr_files:
l = generateLocaleInfo(cldr_dir + "/" + file)
if not l:
sys.stderr.write("skipping file \"" + file + "\"\n")
continue
locale_database[(l['language_id'], l['script_id'], l['country_id'], l['variant_code'])] = l
integrateWeekData(cldr_dir+"/../supplemental/supplementalData.xml")
locale_keys = locale_database.keys()
locale_keys.sort()
cldr_version = 'unknown'
ldml = open(cldr_dir+"/../dtd/ldml.dtd", "r")
for line in ldml:
if 'version cldrVersion CDATA #FIXED' in line:
cldr_version = line.split('"')[1]
print "<localeDatabase>"
print " <version>" + cldr_version + "</version>"
print " <languageList>"
for id in enumdata.language_list:
l = enumdata.language_list[id]
print " <language>"
print " <name>" + l[0] + "</name>"
print " <id>" + str(id) + "</id>"
print " <code>" + l[1] + "</code>"
print " </language>"
print " </languageList>"
print " <scriptList>"
for id in enumdata.script_list:
l = enumdata.script_list[id]
print " <script>"
print " <name>" + l[0] + "</name>"
print " <id>" + str(id) + "</id>"
print " <code>" + l[1] + "</code>"
print " </script>"
print " </scriptList>"
print " <countryList>"
for id in enumdata.country_list:
l = enumdata.country_list[id]
print " <country>"
print " <name>" + l[0] + "</name>"
print " <id>" + str(id) + "</id>"
print " <code>" + l[1] + "</code>"
print " </country>"
print " </countryList>"
print \
" <defaultCountryList>\n\
<defaultCountry>\n\
<language>Afrikaans</language>\n\
<country>SouthAfrica</country>\n\
</defaultCountry>\n\
<defaultCountry>\n\
<language>Afan</language>\n\
<country>Ethiopia</country>\n\
</defaultCountry>\n\
<defaultCountry>\n\
<language>Afar</language>\n\
<country>Djibouti</country>\n\
</defaultCountry>\n\
<defaultCountry>\n\
<language>Arabic</language>\n\
<country>SaudiArabia</country>\n\
</defaultCountry>\n\
<defaultCountry>\n\
<language>Chinese</language>\n\
<country>China</country>\n\
</defaultCountry>\n\
<defaultCountry>\n\
<language>Dutch</language>\n\
<country>Netherlands</country>\n\
</defaultCountry>\n\
<defaultCountry>\n\
<language>English</language>\n\
<country>UnitedStates</country>\n\
</defaultCountry>\n\
<defaultCountry>\n\
<language>French</language>\n\
<country>France</country>\n\
</defaultCountry>\n\
<defaultCountry>\n\
<language>German</language>\n\
<country>Germany</country>\n\
</defaultCountry>\n\
<defaultCountry>\n\
<language>Greek</language>\n\
<country>Greece</country>\n\
</defaultCountry>\n\
<defaultCountry>\n\
<language>Italian</language>\n\
<country>Italy</country>\n\
</defaultCountry>\n\
<defaultCountry>\n\
<language>Malay</language>\n\
<country>Malaysia</country>\n\
</defaultCountry>\n\
<defaultCountry>\n\
<language>Portuguese</language>\n\
<country>Portugal</country>\n\
</defaultCountry>\n\
<defaultCountry>\n\
<language>Russian</language>\n\
<country>RussianFederation</country>\n\
</defaultCountry>\n\
<defaultCountry>\n\
<language>Serbian</language>\n\
<country>SerbiaAndMontenegro</country>\n\
</defaultCountry>\n\
<defaultCountry>\n\
<language>SerboCroatian</language>\n\
<country>SerbiaAndMontenegro</country>\n\
</defaultCountry>\n\
<defaultCountry>\n\
<language>Somali</language>\n\
<country>Somalia</country>\n\
</defaultCountry>\n\
<defaultCountry>\n\
<language>Spanish</language>\n\
<country>Spain</country>\n\
</defaultCountry>\n\
<defaultCountry>\n\
<language>Swahili</language>\n\
<country>Kenya</country>\n\
</defaultCountry>\n\
<defaultCountry>\n\
<language>Swedish</language>\n\
<country>Sweden</country>\n\
</defaultCountry>\n\
<defaultCountry>\n\
<language>Tigrinya</language>\n\
<country>Eritrea</country>\n\
</defaultCountry>\n\
<defaultCountry>\n\
<language>Uzbek</language>\n\
<country>Uzbekistan</country>\n\
</defaultCountry>\n\
<defaultCountry>\n\
<language>Persian</language>\n\
<country>Iran</country>\n\
</defaultCountry>\n\
<defaultCountry>\n\
<language>Mongolian</language>\n\
<country>Mongolia</country>\n\
</defaultCountry>\n\
<defaultCountry>\n\
<language>Nepali</language>\n\
<country>Nepal</country>\n\
</defaultCountry>\n\
</defaultCountryList>"
print " <localeList>"
print \
" <locale>\n\
<language>C</language>\n\
<languageEndonym></languageEndonym>\n\
<script>AnyScript</script>\n\
<country>AnyCountry</country>\n\
<countryEndonym></countryEndonym>\n\
<decimal>46</decimal>\n\
<group>44</group>\n\
<list>59</list>\n\
<percent>37</percent>\n\
<zero>48</zero>\n\
<minus>45</minus>\n\
<plus>43</plus>\n\
<exp>101</exp>\n\
<quotationStart>\"</quotationStart>\n\
<quotationEnd>\"</quotationEnd>\n\
<alternateQuotationStart>\'</alternateQuotationStart>\n\
<alternateQuotationEnd>\'</alternateQuotationEnd>\n\
<listPatternPartStart>%1, %2</listPatternPartStart>\n\
<listPatternPartMiddle>%1, %2</listPatternPartMiddle>\n\
<listPatternPartEnd>%1, %2</listPatternPartEnd>\n\
<listPatternPartTwo>%1, %2</listPatternPartTwo>\n\
<am>AM</am>\n\
<pm>PM</pm>\n\
<firstDayOfWeek>mon</firstDayOfWeek>\n\
<weekendStart>sat</weekendStart>\n\
<weekendEnd>sun</weekendEnd>\n\
<longDateFormat>EEEE, d MMMM yyyy</longDateFormat>\n\
<shortDateFormat>d MMM yyyy</shortDateFormat>\n\
<longTimeFormat>HH:mm:ss z</longTimeFormat>\n\
<shortTimeFormat>HH:mm:ss</shortTimeFormat>\n\
<standaloneLongMonths>January;February;March;April;May;June;July;August;September;October;November;December;</standaloneLongMonths>\n\
<standaloneShortMonths>Jan;Feb;Mar;Apr;May;Jun;Jul;Aug;Sep;Oct;Nov;Dec;</standaloneShortMonths>\n\
<standaloneNarrowMonths>J;F;M;A;M;J;J;A;S;O;N;D;</standaloneNarrowMonths>\n\
<longMonths>January;February;March;April;May;June;July;August;September;October;November;December;</longMonths>\n\
<shortMonths>Jan;Feb;Mar;Apr;May;Jun;Jul;Aug;Sep;Oct;Nov;Dec;</shortMonths>\n\
<narrowMonths>1;2;3;4;5;6;7;8;9;10;11;12;</narrowMonths>\n\
<longDays>Sunday;Monday;Tuesday;Wednesday;Thursday;Friday;Saturday;</longDays>\n\
<shortDays>Sun;Mon;Tue;Wed;Thu;Fri;Sat;</shortDays>\n\
<narrowDays>7;1;2;3;4;5;6;</narrowDays>\n\
<standaloneLongDays>Sunday;Monday;Tuesday;Wednesday;Thursday;Friday;Saturday;</standaloneLongDays>\n\
<standaloneShortDays>Sun;Mon;Tue;Wed;Thu;Fri;Sat;</standaloneShortDays>\n\
<standaloneNarrowDays>S;M;T;W;T;F;S;</standaloneNarrowDays>\n\
<currencyIsoCode></currencyIsoCode>\n\
<currencySymbol></currencySymbol>\n\
<currencyDisplayName>;;;;;;;</currencyDisplayName>\n\
<currencyDigits>2</currencyDigits>\n\
<currencyRounding>1</currencyRounding>\n\
<currencyFormat>%1%2</currencyFormat>\n\
<currencyNegativeFormat></currencyNegativeFormat>\n\
</locale>"
for key in locale_keys:
l = locale_database[key]
print " <locale>"
print " <language>" + l['language'] + "</language>"
print " <languageEndonym>" + l['language_endonym'].encode('utf-8') + "</languageEndonym>"
print " <script>" + l['script'] + "</script>"
print " <country>" + l['country'] + "</country>"
print " <countryEndonym>" + l['country_endonym'].encode('utf-8') + "</countryEndonym>"
print " <languagecode>" + l['language_code'] + "</languagecode>"
print " <scriptcode>" + l['script_code'] + "</scriptcode>"
print " <countrycode>" + l['country_code'] + "</countrycode>"
print " <decimal>" + ordStr(l['decimal']) + "</decimal>"
print " <group>" + ordStr(l['group']) + "</group>"
print " <list>" + fixOrdStrList(l['list']) + "</list>"
print " <percent>" + fixOrdStrPercent(l['percent']) + "</percent>"
print " <zero>" + ordStr(l['zero']) + "</zero>"
print " <minus>" + ordStr(l['minus']) + "</minus>"
print " <plus>" + ordStr(l['plus']) + "</plus>"
print " <exp>" + fixOrdStrExp(l['exp']) + "</exp>"
print " <quotationStart>" + l['quotationStart'].encode('utf-8') + "</quotationStart>"
print " <quotationEnd>" + l['quotationEnd'].encode('utf-8') + "</quotationEnd>"
print " <alternateQuotationStart>" + l['alternateQuotationStart'].encode('utf-8') + "</alternateQuotationStart>"
print " <alternateQuotationEnd>" + l['alternateQuotationEnd'].encode('utf-8') + "</alternateQuotationEnd>"
print " <listPatternPartStart>" + l['listPatternPartStart'].encode('utf-8') + "</listPatternPartStart>"
print " <listPatternPartMiddle>" + l['listPatternPartMiddle'].encode('utf-8') + "</listPatternPartMiddle>"
print " <listPatternPartEnd>" + l['listPatternPartEnd'].encode('utf-8') + "</listPatternPartEnd>"
print " <listPatternPartTwo>" + l['listPatternPartTwo'].encode('utf-8') + "</listPatternPartTwo>"
print " <am>" + l['am'].encode('utf-8') + "</am>"
print " <pm>" + l['pm'].encode('utf-8') + "</pm>"
print " <firstDayOfWeek>" + l['firstDayOfWeek'].encode('utf-8') + "</firstDayOfWeek>"
print " <weekendStart>" + l['weekendStart'].encode('utf-8') + "</weekendStart>"
print " <weekendEnd>" + l['weekendEnd'].encode('utf-8') + "</weekendEnd>"
print " <longDateFormat>" + l['longDateFormat'].encode('utf-8') + "</longDateFormat>"
print " <shortDateFormat>" + l['shortDateFormat'].encode('utf-8') + "</shortDateFormat>"
print " <longTimeFormat>" + l['longTimeFormat'].encode('utf-8') + "</longTimeFormat>"
print " <shortTimeFormat>" + l['shortTimeFormat'].encode('utf-8') + "</shortTimeFormat>"
print " <standaloneLongMonths>" + l['standaloneLongMonths'].encode('utf-8') + "</standaloneLongMonths>"
print " <standaloneShortMonths>"+ l['standaloneShortMonths'].encode('utf-8') + "</standaloneShortMonths>"
print " <standaloneNarrowMonths>"+ l['standaloneNarrowMonths'].encode('utf-8') + "</standaloneNarrowMonths>"
print " <longMonths>" + l['longMonths'].encode('utf-8') + "</longMonths>"
print " <shortMonths>" + l['shortMonths'].encode('utf-8') + "</shortMonths>"
print " <narrowMonths>" + l['narrowMonths'].encode('utf-8') + "</narrowMonths>"
print " <longDays>" + l['longDays'].encode('utf-8') + "</longDays>"
print " <shortDays>" + l['shortDays'].encode('utf-8') + "</shortDays>"
print " <narrowDays>" + l['narrowDays'].encode('utf-8') + "</narrowDays>"
print " <standaloneLongDays>" + l['standaloneLongDays'].encode('utf-8') + "</standaloneLongDays>"
print " <standaloneShortDays>" + l['standaloneShortDays'].encode('utf-8') + "</standaloneShortDays>"
print " <standaloneNarrowDays>" + l['standaloneNarrowDays'].encode('utf-8') + "</standaloneNarrowDays>"
print " <currencyIsoCode>" + l['currencyIsoCode'].encode('utf-8') + "</currencyIsoCode>"
print " <currencySymbol>" + l['currencySymbol'].encode('utf-8') + "</currencySymbol>"
print " <currencyDisplayName>" + l['currencyDisplayName'].encode('utf-8') + "</currencyDisplayName>"
print " <currencyDigits>" + str(l['currencyDigits']) + "</currencyDigits>"
print " <currencyRounding>" + str(l['currencyRounding']) + "</currencyRounding>"
print " <currencyFormat>" + l['currencyFormat'].encode('utf-8') + "</currencyFormat>"
print " <currencyNegativeFormat>" + l['currencyNegativeFormat'].encode('utf-8') + "</currencyNegativeFormat>"
print " </locale>"
print " </localeList>"
print "</localeDatabase>"