Separate offsets from sizes in QLocale's data

This enables us to make the sizes quint8 and benefit from the
resulting packing, making the locale data smaller. The sizes for long
month-name lists (which concatenate twelve names with semicolon as
separator) can overflow an 8-bit member, so use quint16 where needed.

Re-ordered the data in QLocaleData and QCalendarLocale. Now all
long-short(-narrow) families arise in that order; and any standalone
is grouped with the one of the same length. (This cost 20 bytes in the
date-format table, which optimises out more duplication if short is
before long, but the saving in the (smaller) time-format table more
than make up for it; and 20 bytes isn't worth the confusion that being
inconsistent in ordering might cause.)

At the same time, drop trailing semicolons from list entries (which
join various names with semicolon) as they're not needed: we know
where the end of the list is, because we know the size of the string
that results from concatenation. The code that parses such lists can
even correctly handle empty entries at the end.

Saves 26 kB of data in the compiled binaries.

Task-number: QTBUG-81053
Change-Id: If6ccc96a6910828817aa605d10fd814f567ae1e8
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
Reviewed-by: Lars Knoll <lars.knoll@qt.io>
This commit is contained in:
Edward Welbourne 2020-01-09 14:48:21 +01:00
parent c0f041fcdf
commit c08a31634f
10 changed files with 7113 additions and 7220 deletions

View File

@ -2827,16 +2827,16 @@ static QString rawMonthName(const QCalendarLocale &localeData,
QLocaleData::DataRange range;
switch (type) {
case QLocale::LongFormat:
range = localeData.m_long;
break;
range = localeData.longMonth();
break;
case QLocale::ShortFormat:
range = localeData.m_short;
break;
range = localeData.shortMonth();
break;
case QLocale::NarrowFormat:
range = localeData.m_narrow;
break;
range = localeData.narrowMonth();
break;
default:
return QString();
return QString();
}
return range.getListEntry(monthsData, month - 1);
}
@ -2852,13 +2852,13 @@ static QString rawStandaloneMonthName(const QCalendarLocale &localeData,
QLocaleData::DataRange range;
switch (type) {
case QLocale::LongFormat:
range = localeData.m_standalone_long;
range = localeData.longMonthStandalone();
break;
case QLocale::ShortFormat:
range = localeData.m_standalone_short;
range = localeData.shortMonthStandalone();
break;
case QLocale::NarrowFormat:
range = localeData.m_standalone_narrow;
range = localeData.narrowMonthStandalone();
break;
default:
return QString();

File diff suppressed because it is too large Load Diff

View File

@ -326,35 +326,22 @@ public:
return { quint16(offset + i), quint16(end - i) };
}
};
#define rangeGetter(name, stem) \
DataRange name() const { return { m_ ## stem ## _idx, m_ ## stem ## _size }; }
rangeGetter(startListPattern, list_pattern_part_start)
rangeGetter(midListPattern, list_pattern_part_mid)
rangeGetter(endListPattern, list_pattern_part_end)
rangeGetter(pairListPattern, list_pattern_part_two)
rangeGetter(shortDateFormat, short_date_format)
rangeGetter(longDateFormat, long_date_format)
rangeGetter(shortTimeFormat, short_time_format)
rangeGetter(longTimeFormat, long_time_format)
rangeGetter(narrowDayNamesStandalone, standalone_narrow_day_names)
rangeGetter(shortDayNamesStandalone, standalone_short_day_names)
rangeGetter(longDayNamesStandalone, standalone_long_day_names)
rangeGetter(narrowDayNames, narrow_day_names)
rangeGetter(shortDayNames, short_day_names)
rangeGetter(longDayNames, long_day_names)
rangeGetter(anteMeridiem, am)
rangeGetter(postMeridiem, pm)
rangeGetter(byteCount, byte)
rangeGetter(byteAmountSI, byte_si_quantified)
rangeGetter(byteAmountIEC, byte_iec_quantified)
rangeGetter(currencySymbol, currency_symbol)
rangeGetter(currencyDisplayName, currency_display_name)
rangeGetter(currencyFormat, currency_format)
rangeGetter(currencyFormatNegative, currency_negative_format)
rangeGetter(endonymLanguage, language_endonym)
rangeGetter(endonymCountry, country_endonym)
#define ForEachQLocaleRange(X) \
X(startListPattern) X(midListPattern) X(endListPattern) X(pairListPattern) \
X(longDateFormat) X(shortDateFormat) X(longTimeFormat) X(shortTimeFormat) \
X(longDayNamesStandalone) X(longDayNames) \
X(shortDayNamesStandalone) X(shortDayNames) \
X(narrowDayNamesStandalone) X(narrowDayNames) \
X(anteMeridiem) X(postMeridiem) \
X(byteCount) X(byteAmountSI) X(byteAmountIEC) \
X(currencySymbol) X(currencyDisplayName) \
X(currencyFormat) X(currencyFormatNegative) \
X(endonymLanguage) X(endonymCountry)
#define rangeGetter(name) \
DataRange name() const { return { m_ ## name ## _idx, m_ ## name ## _size }; }
ForEachQLocaleRange(rangeGetter)
#undef rangeGetter
public:
@ -365,34 +352,20 @@ public:
char16_t m_quotation_start, m_quotation_end;
char16_t m_alternate_quotation_start, m_alternate_quotation_end;
quint16 m_list_pattern_part_start_idx, m_list_pattern_part_start_size;
quint16 m_list_pattern_part_mid_idx, m_list_pattern_part_mid_size;
quint16 m_list_pattern_part_end_idx, m_list_pattern_part_end_size;
quint16 m_list_pattern_part_two_idx, m_list_pattern_part_two_size;
quint16 m_short_date_format_idx, m_short_date_format_size;
quint16 m_long_date_format_idx, m_long_date_format_size;
quint16 m_short_time_format_idx, m_short_time_format_size;
quint16 m_long_time_format_idx, m_long_time_format_size;
quint16 m_standalone_short_day_names_idx, m_standalone_short_day_names_size;
quint16 m_standalone_long_day_names_idx, m_standalone_long_day_names_size;
quint16 m_standalone_narrow_day_names_idx, m_standalone_narrow_day_names_size;
quint16 m_short_day_names_idx, m_short_day_names_size;
quint16 m_long_day_names_idx, m_long_day_names_size;
quint16 m_narrow_day_names_idx, m_narrow_day_names_size;
quint16 m_am_idx, m_am_size;
quint16 m_pm_idx, m_pm_size;
quint16 m_byte_idx, m_byte_size;
quint16 m_byte_si_quantified_idx, m_byte_si_quantified_size;
quint16 m_byte_iec_quantified_idx, m_byte_iec_quantified_size;
// Offsets, then sizes, for each range:
#define rangeIndex(name) quint16 m_ ## name ## _idx;
ForEachQLocaleRange(rangeIndex)
#undef rangeIndex
#define Size(name) quint8 m_ ## name ## _size;
ForEachQLocaleRange(Size)
#undef Size
#undef ForEachQLocaleRange
// Strays:
char m_currency_iso_code[3];
quint16 m_currency_symbol_idx, m_currency_symbol_size;
quint16 m_currency_display_name_idx, m_currency_display_name_size;
quint8 m_currency_format_idx, m_currency_format_size;
quint8 m_currency_negative_format_idx, m_currency_negative_format_size;
quint16 m_language_endonym_idx, m_language_endonym_size;
quint16 m_country_endonym_idx, m_country_endonym_size;
quint16 m_currency_digits : 2;
quint16 m_currency_rounding : 3;
quint16 m_currency_rounding : 3; // (not yet used !)
quint16 m_first_day_of_week : 3;
quint16 m_weekend_start : 3;
quint16 m_weekend_end : 3;

View File

@ -1,6 +1,6 @@
/****************************************************************************
**
** Copyright (C) 2019 The Qt Company Ltd.
** Copyright (C) 2020 The Qt Company Ltd.
** Contact: https://www.qt.io/licensing/
**
** This file is part of the QtCore module of the Qt Toolkit.
@ -64,13 +64,25 @@ QT_BEGIN_NAMESPACE
struct QCalendarLocale {
quint16 m_language_id, m_script_id, m_country_id;
#define rangeGetter(name) \
QLocaleData::DataRange name() const { return { m_ ## name ## _idx, m_ ## name ## _size }; }
rangeGetter(longMonthStandalone) rangeGetter(longMonth)
rangeGetter(shortMonthStandalone) rangeGetter(shortMonth)
rangeGetter(narrowMonthStandalone) rangeGetter(narrowMonth)
#undef rangeGetter
// Month name indexes:
QLocaleData::DataRange m_standalone_short;
QLocaleData::DataRange m_standalone_long;
QLocaleData::DataRange m_standalone_narrow;
QLocaleData::DataRange m_short;
QLocaleData::DataRange m_long;
QLocaleData::DataRange m_narrow;
quint16 m_longMonthStandalone_idx, m_longMonth_idx;
quint16 m_shortMonthStandalone_idx, m_shortMonth_idx;
quint16 m_narrowMonthStandalone_idx, m_narrowMonth_idx;
// Twelve long month names (separated by commas) can add up to more than 256
// QChars - e.g. kde_TZ gets to 264.
quint16 m_longMonthStandalone_size, m_longMonth_size;
quint8 m_shortMonthStandalone_size, m_shortMonth_size;
quint8 m_narrowMonthStandalone_size, m_narrowMonth_size;
};
// Partial implementation, of methods with common forms, in qcalendar.cpp

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -352,13 +352,15 @@ def _generateLocaleInfo(path, language_code, script_code, country_code, variant_
result['currencySymbol'] = ''
result['currencyDisplayName'] = ''
if result['currencyIsoCode']:
result['currencySymbol'] = findEntryDef(path, "numbers/currencies/currency[%s]/symbol" % result['currencyIsoCode'])
result['currencyDisplayName'] = ';'.join(
findEntryDef(path, 'numbers/currencies/currency[' + result['currencyIsoCode']
+ ']/displayName' + tail)
for tail in ['',] + [
'[count=%s]' % x for x in ('zero', 'one', 'two', 'few', 'many', 'other')
]) + ';'
stem = "numbers/currencies/currency[%s]/" % result['currencyIsoCode']
result['currencySymbol'] = findEntryDef(path, stem + 'symbol')
displays = tuple(findEntryDef(path, stem + 'displayName' + tail)
for tail in ('',) + tuple(
'[count=%s]' % x for x in ('zero', 'one', 'two',
'few', 'many', 'other')))
while displays and not displays[-1]:
displays = displays[:-1]
result['currencyDisplayName'] = ';'.join(displays)
def findUnitDef(path, stem, fallback=''):
# The displayName for a quantified unit in en.xml is kByte
@ -405,7 +407,7 @@ def _generateLocaleInfo(path, language_code, script_code, country_code, variant_
prop = 'monthContext[' + mode + ']/monthWidth[' + size + ']/'
result[key + 'Months_' + cal] = ';'.join(
findEntry(path, stem + prop + "month[%d]" % i)
for i in range(1, 13)) + ';'
for i in range(1, 13))
# Day data (for Gregorian, at least):
stem = 'dates/calendars/calendar[gregorian]/days/'
@ -414,7 +416,7 @@ def _generateLocaleInfo(path, language_code, script_code, country_code, variant_
prop = 'dayContext[' + mode + ']/dayWidth[' + size + ']/day'
result[key + 'Days'] = ';'.join(
findEntry(path, stem + prop + '[' + day + ']')
for day in days) + ';'
for day in days)
return Locale(result)

View File

@ -1,7 +1,7 @@
# coding=utf8
#############################################################################
##
## Copyright (C) 2018 The Qt Company Ltd.
## Copyright (C) 2020 The Qt Company Ltd.
## Contact: https://www.qt.io/licensing/
##
## This file is part of the test suite of the Qt Toolkit.
@ -267,7 +267,7 @@ class Locale:
except KeyError: # Need to add an entry to known, above.
print 'Unsupported calendar:', cal
raise
names, get = data[0] + ('',), data[1:]
names, get = data[0], data[1:]
for n, size in enumerate(sizes):
yield ('_'.join((camelCase((size, 'months')), cal)),
';'.join(get[n][0](i, x) for i, x in enumerate(names)))
@ -279,7 +279,7 @@ class Locale:
def C(cls, calendars=('gregorian',),
# Empty entry at end to ensure final separator when join()ed:
days = ('Sunday', 'Monday', 'Tuesday', 'Wednesday',
'Thursday', 'Friday', 'Saturday', ''),
'Thursday', 'Friday', 'Saturday'),
quantifiers=('k', 'M', 'G', 'T', 'P', 'E')):
"""Returns an object representing the C locale."""
return cls(dict(cls.__monthNames(calendars)),
@ -303,11 +303,11 @@ class Locale:
longTimeFormat='HH:mm:ss z', shortTimeFormat='HH:mm:ss',
longDays=';'.join(days),
shortDays=';'.join(d[:3] for d in days),
narrowDays='7;1;2;3;4;5;6;',
narrowDays='7;1;2;3;4;5;6',
standaloneLongDays=';'.join(days),
standaloneShortDays=';'.join(d[:3] for d in days),
standaloneNarrowDays=';'.join(d[:1] for d in days),
currencyIsoCode='', currencySymbol='',
currencyDisplayName=';' * 7,
currencyDisplayName='',
currencyDigits=2, currencyRounding=1,
currencyFormat='%1%2', currencyNegativeFormat='')

View File

@ -259,13 +259,16 @@ def unicode2hex(s):
return lst
class StringDataToken:
def __init__(self, index, length):
if index > 0xFFFF or length > 0xFFFF:
raise Error("Position exceeds ushort range: %d,%d " % (index, length))
def __init__(self, index, length, bits):
if index > 0xffff:
print "\n\n\n#error Data index is too big!", index
raise ValueError("Start-index (%d) exceeds the uint16 range!" % index)
if length >= (1 << bits):
print "\n\n\n#error Range length is too big!", length
raise ValueError("Data size (%d) exceeds the %d-bit range!" % (length, bits))
self.index = index
self.length = length
def __str__(self):
return " %d,%d " % (self.index, self.length)
class StringData:
def __init__(self, name):
@ -274,22 +277,22 @@ class StringData:
self.name = name
self.text = '' # Used in quick-search for matches in data
def append(self, s):
def append(self, s, bits=8):
try:
token = self.hash[s]
except KeyError:
token = self.__store(s)
token = self.__store(s, bits)
self.hash[s] = token
return token
def __store(self, s):
def __store(self, s, bits):
"""Add string s to known data.
Seeks to avoid duplication, where possible.
For example, short-forms may be prefixes of long-forms.
"""
if not s:
return StringDataToken(0, 0)
return StringDataToken(0, 0, bits)
ucs2 = unicode2hex(s)
try:
index = self.text.index(s) - 1
@ -307,12 +310,15 @@ class StringData:
assert index >= 0
try:
return StringDataToken(index, len(ucs2))
return StringDataToken(index, len(ucs2), bits)
except ValueError as e:
e.args += (self.name, s)
raise
def write(self, fd):
if len(self.data) > 0xffff:
raise ValueError("Data is too big for quint16 index to its end!" % len(self.data),
self.name)
fd.write("\nstatic const ushort %s[] = {\n" % self.name)
fd.write(wrap_list(self.data))
fd.write("\n};\n")
@ -498,39 +504,43 @@ def main():
+ ' quotEnd '
+ 'altQtOpn '
+ 'altQtEnd '
# Width 11 + comma:
+ ' lpStart ' # List pattern
+ ' lpMid '
+ ' lpEnd '
+ ' lpTwo '
+ ' sDtFmt ' # Date format
+ ' lDtFmt '
+ ' sTmFmt ' # Time format
+ ' lTmFmt '
+ ' ssDays ' # Days
+ ' slDays '
+ ' snDays '
+ ' sDays '
+ ' lDays '
+ ' nDays '
+ ' am ' # am/pm indicators
+ ' pm '
# Width 8 + comma
+ ' byte '
+ ' siQuant '
+ 'iecQuant '
# Range entries (all start-indices, then all sizes):
# Width 5 + comma:
+ 'lStrt ' # List pattern
+ 'lpMid '
+ 'lpEnd '
+ 'lPair '
+ 'lDFmt ' # Date format
+ 'sDFmt '
+ 'lTFmt ' # Time format
+ 'sTFmt '
+ 'slDay ' # Day names
+ 'lDays '
+ 'ssDys '
+ 'sDays '
+ 'snDay '
+ 'nDays '
+ ' am ' # am/pm indicators
+ ' pm '
+ ' byte '
+ 'siQnt '
+ 'iecQn '
+ 'crSym ' # Currency formatting:
+ 'crDsp '
+ 'crFmt '
+ 'crFNg '
+ 'ntLng ' # Name of language in itself, and of territory:
+ 'ntTer '
# Width 3 + comma for each size; no header
+ ' ' * 25
# Strays (char array, bit-fields):
# Width 8+4 + comma
+ ' currISO '
# Width 11 + comma:
+ ' currSym ' # Currency formatting:
+ ' currDsply '
+ ' currFmt '
+ ' currFmtNeg '
+ ' endoLang ' # Name of language in itself, and of country:
+ ' endoCntry '
# Width 6 + comma:
+ 'curDgt ' # Currency number representation:
+ 'curRnd '
+ 'curDgt ' # Currency digits
+ 'curRnd ' # Currencty rounding (unused: QTBUG-81343)
+ 'dow1st ' # First day of week
+ ' wknd+ ' # Week-end start/end days:
+ ' wknd-'
@ -550,14 +560,16 @@ def main():
+ '%6d,' * 8
# Quotation marks:
+ '%8d,' * 4
# List patterns, date/time formats, month/day names, am/pm:
+ '%11s,' * 16
# SI/IEC byte-unit abbreviations:
+ '%8s,' * 3
# Currency and endonyms
+ '%5d,' * 25
# Sizes for the same:
+ '%3d,' * 25
# Currency ISO code:
+ ' %10s, '
# Currency and endonyms
+ '%11s,' * 6
# Currency formatting:
+ '%6d,%6d'
# Day of week and week-end:
@ -565,8 +577,32 @@ def main():
+ ' }')
for key in locale_keys:
l = locale_map[key]
# Sequence of StringDataToken:
ranges = (tuple(list_pattern_part_data.append(p) for p in # 4 entries:
(l.listPatternPartStart, l.listPatternPartMiddle,
l.listPatternPartEnd, l.listPatternPartTwo)) +
tuple (date_format_data.append(f) for f in # 2 entries:
(l.longDateFormat, l.shortDateFormat)) +
tuple(time_format_data.append(f) for f in # 2 entries:
(l.longTimeFormat, l.shortTimeFormat)) +
tuple(days_data.append(d) for d in # 6 entries:
(l.standaloneLongDays, l.longDays,
l.standaloneShortDays, l.shortDays,
l.standaloneNarrowDays, l.narrowDays)) +
(am_data.append(l.am), pm_data.append(l.pm)) + # 2 entries:
tuple(byte_unit_data.append(b) for b in # 3 entries:
(l.byte_unit, l.byte_si_quantified, l.byte_iec_quantified)) +
(currency_symbol_data.append(l.currencySymbol),
currency_display_name_data.append(l.currencyDisplayName),
currency_format_data.append(l.currencyFormat),
currency_format_data.append(l.currencyNegativeFormat),
endonyms_data.append(l.languageEndonym),
endonyms_data.append(l.countryEndonym)) # 6 entries
) # Total: 25 entries
assert len(ranges) == 25
data_temp_file.write(line_format
% (key[0], key[1], key[2],
% ((key[0], key[1], key[2],
l.decimal,
l.group,
l.listDelim,
@ -578,43 +614,21 @@ def main():
l.quotationStart,
l.quotationEnd,
l.alternateQuotationStart,
l.alternateQuotationEnd,
list_pattern_part_data.append(l.listPatternPartStart),
list_pattern_part_data.append(l.listPatternPartMiddle),
list_pattern_part_data.append(l.listPatternPartEnd),
list_pattern_part_data.append(l.listPatternPartTwo),
date_format_data.append(l.shortDateFormat),
date_format_data.append(l.longDateFormat),
time_format_data.append(l.shortTimeFormat),
time_format_data.append(l.longTimeFormat),
days_data.append(l.standaloneShortDays),
days_data.append(l.standaloneLongDays),
days_data.append(l.standaloneNarrowDays),
days_data.append(l.shortDays),
days_data.append(l.longDays),
days_data.append(l.narrowDays),
am_data.append(l.am),
pm_data.append(l.pm),
byte_unit_data.append(l.byte_unit),
byte_unit_data.append(l.byte_si_quantified),
byte_unit_data.append(l.byte_iec_quantified),
currencyIsoCodeData(l.currencyIsoCode),
currency_symbol_data.append(l.currencySymbol),
currency_display_name_data.append(l.currencyDisplayName),
currency_format_data.append(l.currencyFormat),
currency_format_data.append(l.currencyNegativeFormat),
endonyms_data.append(l.languageEndonym),
endonyms_data.append(l.countryEndonym),
l.alternateQuotationEnd) +
tuple(r.index for r in ranges) +
tuple(r.length for r in ranges) +
(currencyIsoCodeData(l.currencyIsoCode),
l.currencyDigits,
l.currencyRounding, # unused (QTBUG-81343)
l.firstDayOfWeek,
l.weekendStart,
l.weekendEnd)
l.weekendEnd))
+ ", // %s/%s/%s\n" % (l.language, l.script, l.country))
data_temp_file.write(line_format # All zeros, matching the format:
% ( (0,) * (3 + 8 + 4) + ("0,0",) * (16 + 3)
% ( (0,) * (3 + 8 + 4) + (0,) * 25 * 2
+ (currencyIsoCodeData(0),)
+ ("0,0",) * 6 + (0,) * (2 + 3))
+ (0,) * 2
+ (0,) * 3)
+ " // trailing zeros\n")
data_temp_file.write("};\n")
@ -750,7 +764,7 @@ def main():
os.rename(data_temp_file_path, qtsrcdir + "/src/corelib/text/qlocale_data_p.h")
# Generate calendar data
calendar_format = ' {%6d,%6d,%6d,{%5s},{%5s},{%5s},{%5s},{%5s},{%5s}}, '
calendar_format = ' {%6d,%6d,%6d' + ',%5d' * 6 + ',%3d' * 6 + ' },'
for calendar, stem in calendars.items():
months_data = StringData('months_data')
calendar_data_file = "q%scalendar_data_p.h" % stem
@ -770,30 +784,38 @@ def main():
+ ' lang '
+ ' script'
+ ' terr '
# Month-name start-end pairs, width 8 (5 plus '{},'):
+ ' sShort '
+ ' sLong '
+ ' sNarrow'
+ ' short '
+ ' long '
+ ' narrow'
# No trailing space on last; be sure
# to pad before adding later entries.
# Month-name start-indices, width 6 (5 + comma):
+ 'sLng '
+ 'long '
+ 'sSrt '
+ 'shrt '
+ 'sNrw '
+ 'naro '
# No individual headers for the sizes.
+ 'Sizes...'
+ '\n')
for key in locale_keys:
l = locale_map[key]
# Sequence of StringDataToken:
try:
# Twelve long month names can add up to more than 256 (e.g. kde_TZ: 264)
ranges = (tuple(months_data.append(m[calendar], 16) for m in
(l.standaloneLongMonths, l.longMonths)) +
tuple(months_data.append(m[calendar]) for m in
(l.standaloneShortMonths, l.shortMonths,
l.standaloneNarrowMonths, l.narrowMonths)))
except ValueError as e:
e.args += (l.language, l.script, l.country, stem)
raise
calendar_temp_file.write(
calendar_format
% (key[0], key[1], key[2],
months_data.append(l.standaloneShortMonths[calendar]),
months_data.append(l.standaloneLongMonths[calendar]),
months_data.append(l.standaloneNarrowMonths[calendar]),
months_data.append(l.shortMonths[calendar]),
months_data.append(l.longMonths[calendar]),
months_data.append(l.narrowMonths[calendar]))
% ((key[0], key[1], key[2]) +
tuple(r.index for r in ranges) +
tuple(r.length for r in ranges))
+ "// %s/%s/%s\n" % (l.language, l.script, l.country))
calendar_temp_file.write(calendar_format % ( (0,) * 3 + ('0,0',) * 6 )
+ '// trailing zeros\n')
calendar_temp_file.write(calendar_format % ( (0,) * (3 + 6 * 2) )
+ '// trailing zeros\n')
calendar_temp_file.write("};\n")
months_data.write(calendar_temp_file)
s = calendar_template_file.readline()