Support digit-grouping correctly

Read three more values from CLDR and add a byte to the bit-fields at
the end of QLocaleData, indicating the three group sizes. This adds
three new parameters to various low-level formatting functions. At the
same time, rename ThousandsGroup to GroupDigits, more faithfully
expressing what this (internal) option means.

This replaces commit 27d1391280 with a
fuller implementation that handles digit-grouping in any of the ways
that CLDR supports. The formerly "Indian" formatting now also applies
to at least some locales for Bangladesh, Bhutan and Sri Lanka.

Fixed Costa Rica currency formatting test that wrongly put a separator
after the first digit; the locale (in common with several Spanish
locales) requires at least two digits before the first separator.

[ChangeLog][QtCore][Important Behavior Changes] Some locales require
more than one digit before the first grouping separator; others use
group sizes other than three. The latter was partially supported (only
for India) at 5.15 but is now systematically supported; the former is
now also supported.

Task-number: QTBUG-24301
Fixes: QTBUG-81050
Change-Id: I4ea4e331f3254d1f34801cddf51f3c65d3815573
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
This commit is contained in:
Edward Welbourne 2020-01-17 11:00:24 +01:00
parent 19ed60d703
commit bb6a73260e
10 changed files with 703 additions and 663 deletions

View File

@ -2269,7 +2269,7 @@ void QTextStreamPrivate::putNumber(qulonglong number, bool negative)
// add thousands group separators. For backward compatibility we
// don't add a group separator for C locale.
if (locale != QLocale::c() && !locale.numberOptions().testFlag(QLocale::OmitGroupSeparator))
flags |= QLocaleData::ThousandsGroup;
flags |= QLocaleData::GroupDigits;
const QLocaleData *dd = locale.d->m_data;
int base = params.integerBase ? params.integerBase : 10;
@ -2485,7 +2485,7 @@ QTextStream &QTextStream::operator<<(double f)
flags |= QLocaleData::AddTrailingZeroes | QLocaleData::ShowBase;
}
if (locale() != QLocale::c() && !(numberOptions & QLocale::OmitGroupSeparator))
flags |= QLocaleData::ThousandsGroup;
flags |= QLocaleData::GroupDigits;
if (!(numberOptions & QLocale::OmitLeadingZeroInExponent))
flags |= QLocaleData::ZeroPadExponent;
if (numberOptions & QLocale::IncludeTrailingZeroesAfterDot)

View File

@ -1807,10 +1807,8 @@ double QLocale::toDouble(QStringView s, bool *ok) const
QString QLocale::toString(qlonglong i) const
{
int flags = d->m_numberOptions & OmitGroupSeparator
? 0
: (d->m_data->m_country_id == Country::India)
? QLocaleData::IndianNumberGrouping : QLocaleData::ThousandsGroup;
int flags = (d->m_numberOptions & OmitGroupSeparator
? 0 : QLocaleData::GroupDigits);
return d->m_data->longLongToString(i, -1, 10, -1, flags);
}
@ -1823,10 +1821,8 @@ QString QLocale::toString(qlonglong i) const
QString QLocale::toString(qulonglong i) const
{
int flags = d->m_numberOptions & OmitGroupSeparator
? 0
: (d->m_data->m_country_id == Country::India)
? QLocaleData::IndianNumberGrouping : QLocaleData::ThousandsGroup;
int flags = (d->m_numberOptions & OmitGroupSeparator
? 0 : QLocaleData::GroupDigits);
return d->m_data->unsLongLongToString(i, -1, 10, -1, flags);
}
@ -2497,7 +2493,7 @@ QString QLocale::toString(double i, char f, int prec) const
}
if (!(d->m_numberOptions & OmitGroupSeparator))
flags |= QLocaleData::ThousandsGroup;
flags |= QLocaleData::GroupDigits;
if (!(d->m_numberOptions & OmitLeadingZeroInExponent))
flags |= QLocaleData::ZeroPadExponent;
if (d->m_numberOptions & IncludeTrailingZeroesAfterDot)
@ -3380,7 +3376,7 @@ QString QLocaleData::doubleToString(double d, int precision, DoubleForm form,
}
const bool mustMarkDecimal = flags & ForcePoint;
const bool groupDigits = flags & ThousandsGroup;
const bool groupDigits = flags & GroupDigits;
const int minExponentDigits = flags & ZeroPadExponent ? 2 : 1;
switch (form) {
case DFExponent:
@ -3415,8 +3411,8 @@ QString QLocaleData::doubleToString(double d, int precision, DoubleForm form,
// Exponent adds separator, sign and digits:
int bias = 2 + minExponentDigits;
// Decimal form may get grouping separators inserted:
if (groupDigits && decpt > 3)
bias -= (decpt - 1) / 3;
if (groupDigits && decpt >= m_grouping_top + m_grouping_least)
bias -= (decpt - m_grouping_top - m_grouping_least) / m_grouping_higher + 1;
// X = decpt - 1 needs two digits if decpt > 10:
if (decpt > 10 && minExponentDigits == 1)
++bias;
@ -3501,12 +3497,14 @@ QString QLocaleData::decimalForm(QString &&digits, int decpt, int precision,
if (mustMarkDecimal || decpt < digits.length() / digitWidth)
digits.insert(decpt * digitWidth, decimalPoint());
// FIXME: they're not simply thousands separators !
// Need to mirror IndianNumberGrouping code in longLongToString()
if (groupDigits) {
const QString group = groupSeparator();
for (int i = decpt - 3; i > 0; i -= 3)
int i = decpt - m_grouping_least;
if (i >= m_grouping_top) {
digits.insert(i * digitWidth, group);
while ((i -= m_grouping_higher) >= m_grouping_top)
digits.insert(i * digitWidth, group);
}
}
if (decpt == 0)
@ -3615,22 +3613,18 @@ QString QLocaleData::applyIntegerFormatting(QString &&numStr, bool negative, int
// Count how much of width we've used up. Each digit counts as one
int usedWidth = digitCount + prefix.size();
if (base == 10) {
if (base == 10 && flags & GroupDigits) {
const QString group = groupSeparator();
if (flags & ThousandsGroup) {
for (int i = numStr.length() / digitWidth - 3; i > 0; i -= 3) {
int i = digitCount - m_grouping_least;
if (i >= m_grouping_top) {
numStr.insert(i * digitWidth, group);
++usedWidth;
}
} else if (flags & IndianNumberGrouping) {
const int size = numStr.length();
if (size > 3 * digitWidth)
numStr.insert(size - 3 * digitWidth , group);
for (int i = size / digitWidth - 5; i > 0; i -= 2) {
while ((i -= m_grouping_higher) >= m_grouping_top) {
numStr.insert(i * digitWidth, group);
++usedWidth;
}
}
// TODO: should we group any zero-padding we add later ?
}
const bool noPrecision = precision == -1;
@ -3671,7 +3665,6 @@ bool QLocaleData::numberToCLocale(QStringView s, QLocale::NumberOptions number_o
auto length = s.size();
decltype(length) idx = 0;
const int leadingGroupWidth = (m_country_id == QLocale::India ? 2 : 3);
int digitsInGroup = 0;
int group_cnt = 0; // counts number of group chars
int decpt_idx = -1;
@ -3731,13 +3724,14 @@ bool QLocaleData::numberToCLocale(QStringView s, QLocale::NumberOptions number_o
return false;
if (last_separator_idx == -1) {
if (start_of_digits_idx == -1 || digitsInGroup > leadingGroupWidth)
// Check distance from the beginning of the digits:
if (start_of_digits_idx == -1 || m_grouping_top > digitsInGroup
|| digitsInGroup >= m_grouping_higher + m_grouping_top) {
return false;
}
} else {
// check distance from the last separator or from the beginning of the digits
// ### FIXME: Some locales allow other groupings!
// See https://en.wikipedia.org/wiki/Thousands_separator
if (digitsInGroup != leadingGroupWidth)
// Check distance from the last separator:
if (digitsInGroup != m_grouping_higher)
return false;
}
@ -3749,11 +3743,11 @@ bool QLocaleData::numberToCLocale(QStringView s, QLocale::NumberOptions number_o
idx += in.size();
continue;
} else if (out == '.' || idx == exponent_idx) {
// check distance from the last separator
// ### FIXME: Some locales allow other groupings!
// See https://en.wikipedia.org/wiki/Thousands_separator
if (last_separator_idx != -1 && digitsInGroup != 3)
// Were there enough digits since the last separator?
if (last_separator_idx != -1 && digitsInGroup != m_grouping_least)
return false;
// If we saw no separator, should we fail if
// digitsInGroup > m_grouping_top + m_grouping_least ?
// stop processing separators
last_separator_idx = -1;
@ -3771,9 +3765,11 @@ bool QLocaleData::numberToCLocale(QStringView s, QLocale::NumberOptions number_o
// did we end in a separator?
if (last_separator_idx + 1 == idx)
return false;
// were there enough digits since the last separator?
if (last_separator_idx != -1 && digitsInGroup != 3)
// Were there enough digits since the last separator?
if (last_separator_idx != -1 && digitsInGroup != m_grouping_least)
return false;
// If we saw no separator, and no decimal point, should we fail if
// digitsInGroup > m_grouping_top + m_grouping_least ?
}
if (number_options & QLocale::RejectTrailingZeroesAfterDot) {

File diff suppressed because it is too large Load Diff

View File

@ -196,14 +196,13 @@ public:
LeftAdjusted = 0x04,
BlankBeforePositive = 0x08,
AlwaysShowSign = 0x10,
ThousandsGroup = 0x20,
GroupDigits = 0x20,
CapitalEorX = 0x40,
ShowBase = 0x80,
UppercaseBase = 0x100,
ZeroPadExponent = 0x200,
ForcePoint = 0x400,
IndianNumberGrouping= 0x800
ForcePoint = 0x400
};
enum NumberMode { IntegerMode, DoubleStandardMode, DoubleScientificMode };
@ -370,11 +369,14 @@ public:
// Strays:
char m_currency_iso_code[3];
quint16 m_currency_digits : 2;
quint16 m_currency_rounding : 3; // (not yet used !)
quint16 m_first_day_of_week : 3;
quint16 m_weekend_start : 3;
quint16 m_weekend_end : 3;
quint8 m_currency_digits : 2;
quint8 m_currency_rounding : 3; // (not yet used !)
quint8 m_first_day_of_week : 3;
quint8 m_weekend_start : 3;
quint8 m_weekend_end : 3;
quint8 m_grouping_top : 2; // Must have this many before the first grouping separator
quint8 m_grouping_higher : 3; // Number of digits between grouping separators
quint8 m_grouping_least : 3; // Number of digits after last grouping separator (before decimal).
};
class Q_CORE_EXPORT QLocalePrivate // A POD type

View File

@ -86,9 +86,10 @@ void qt_doubleToAscii(double d, QLocaleData::DoubleForm form, int precision, cha
}
// Detect special numbers (nan, +/-inf)
// We cannot use the high-level API of libdouble-conversion as we need to apply locale-specific
// formatting, such as decimal points, thousands-separators, etc. Because of this, we have to
// check for infinity and NaN before calling DoubleToAscii.
// We cannot use the high-level API of libdouble-conversion as we need to
// apply locale-specific formatting, such as decimal points, grouping
// separators, etc. Because of this, we have to check for infinity and NaN
// before calling DoubleToAscii.
if (qt_is_inf(d)) {
sign = d < 0;
if (bufSize >= 3) {

View File

@ -6288,7 +6288,7 @@ static uint parse_flag_characters(const char * &c) noexcept
case '-': flags |= QLocaleData::LeftAdjusted; break;
case ' ': flags |= QLocaleData::BlankBeforePositive; break;
case '+': flags |= QLocaleData::AlwaysShowSign; break;
case '\'': flags |= QLocaleData::ThousandsGroup; break;
case '\'': flags |= QLocaleData::GroupDigits; break;
default: return flags;
}
++c;
@ -8010,7 +8010,7 @@ QString QString::arg(qlonglong a, int fieldWidth, int base, QChar fillChar) cons
if (d.locale_occurrences > 0) {
QLocale locale;
if (!(locale.numberOptions() & QLocale::OmitGroupSeparator))
flags |= QLocaleData::ThousandsGroup;
flags |= QLocaleData::GroupDigits;
locale_arg = locale.d->m_data->longLongToString(a, -1, base, fieldWidth, flags);
}
@ -8054,7 +8054,7 @@ QString QString::arg(qulonglong a, int fieldWidth, int base, QChar fillChar) con
if (d.locale_occurrences > 0) {
QLocale locale;
if (!(locale.numberOptions() & QLocale::OmitGroupSeparator))
flags |= QLocaleData::ThousandsGroup;
flags |= QLocaleData::GroupDigits;
locale_arg = locale.d->m_data->unsLongLongToString(a, -1, base, fieldWidth, flags);
}
@ -8185,7 +8185,7 @@ QString QString::arg(double a, int fieldWidth, char fmt, int prec, QChar fillCha
const QLocale::NumberOptions numberOptions = locale.numberOptions();
if (!(numberOptions & QLocale::OmitGroupSeparator))
flags |= QLocaleData::ThousandsGroup;
flags |= QLocaleData::GroupDigits;
if (!(numberOptions & QLocale::OmitLeadingZeroInExponent))
flags |= QLocaleData::ZeroPadExponent;
if (numberOptions & QLocale::IncludeTrailingZeroesAfterDot)

View File

@ -2682,7 +2682,9 @@ void tst_QLocale::currency()
const QLocale es_CR(QLocale::Spanish, QLocale::CostaRica);
QCOMPARE(es_CR.toCurrencyString(double(1565.25)),
QString::fromUtf8("\xE2\x82\xA1" "1\xC2\xA0" "565,25"));
QString::fromUtf8("\xE2\x82\xA1" "1565,25"));
QCOMPARE(es_CR.toCurrencyString(double(12565.25)),
QString::fromUtf8("\xE2\x82\xA1" "12\xC2\xA0" "565,25"));
const QLocale system = QLocale::system();
QVERIFY(system.toCurrencyString(1, QLatin1String("FOO")).contains(QLatin1String("FOO")));
@ -3135,9 +3137,7 @@ void tst_QLocale::numberGroupingIndia()
void tst_QLocale::numberFormatChakma()
{
// Initially India's flavour, since the number formatting is currently only
// done right for India. Should change to Bangladesh once that's fixed.
const QLocale chakma(QLocale::Chakma, QLocale::ChakmaScript, QLocale::India);
const QLocale chakma(QLocale::Chakma, QLocale::ChakmaScript, QLocale::Bangladesh);
const uint zeroVal = 0x11136; // Unicode's representation of Chakma zero
const QChar data[] = {
QChar::highSurrogate(zeroVal), QChar::lowSurrogate(zeroVal),

View File

@ -294,6 +294,7 @@ class LocaleScanner (object):
yield 'percent', self.find(stem + 'percentSign')
yield 'list', self.find(stem + 'list')
yield 'exp', self.find(stem + 'exponential')
yield 'groupSizes', self.__numberGrouping(system)
digits = lookup(system)['digits']
assert len(digits) == 10
@ -527,6 +528,36 @@ class LocaleScanner (object):
cache.append(rest)
yield it
def __numberGrouping(self, system):
"""Sizes of groups of digits within a number.
Returns a triple (least, higher, top) for which:
* least is the number of digits after the last grouping
separator;
* higher is the number of digits between grouping
separators;
* top is the fewest digits that can appear before the first
grouping separator.
Thus (4, 3, 2) would want 1e7 as 1000,0000 but 1e8 as 10,000,0000.
Note: CLDR does countenance the possibility of grouping also
in the fractional part. This is not presently attempted. Nor
is placement of the sign character anywhere but at the start
of the number (some formats may place it at the end, possibly
elsewhere)."""
top = int(self.find('numbers/minimumGroupingDigits'))
assert top < 4, top # We store it in a 2-bit field
grouping = self.find('numbers/decimalFormats[numberSystem='
+ system + ']/decimalFormatLength/decimalFormat/pattern')
groups = grouping.split('.')[0].split(',')[-3:]
assert all(len(x) < 8 for x in groups[-2:]), grouping # we store them in 3-bit fields
if len(groups) > 2:
return len(groups[-1]), len(groups[-2]), top
size = len(groups[-1]) if len(groups) == 2 else 3
return size, size, top
@staticmethod
def __currencyFormats(patterns, plus, minus):
for p in patterns.split(';'):

View File

@ -476,6 +476,11 @@ class Locale (object):
for k in cls.propsMonthDay('months'):
data[k] = dict((cal, lookup('_'.join((k, cal)))) for cal in calendars)
grouping = lookup('groupSizes').split(';')
data.update(groupLeast = int(grouping[0]),
groupHigher = int(grouping[1]),
groupTop = int(grouping[2]))
return cls(data)
def toXml(self, write, calendars=('gregorian',)):
@ -515,6 +520,7 @@ class Locale (object):
for cal in calendars):
write(key, escape(get(key)).encode('utf-8'))
write('groupSizes', ';'.join(str(x) for x in get('groupSizes')))
for key in ('currencyDigits', 'currencyRounding'):
write(key, get(key))
@ -586,6 +592,7 @@ class Locale (object):
language='C', language_code='0', languageEndonym='',
script='AnyScript', script_code='0',
country='AnyCountry', country_code='0', countryEndonym='',
groupSizes=(3, 3, 1),
decimal='.', group=',', list=';', percent='%',
zero='0', minus='-', plus='+', exp='e',
quotationStart='"', quotationEnd='"',

View File

@ -254,6 +254,9 @@ class LocaleDataWriter (LocaleSourceEditor):
'dow1st ' # First day of week
' wknd+ ' # Week-end start/end days
' wknd- '
'grpTop '
'grpMid '
'grpEnd'
# No trailing space on last entry (be sure to
# pad before adding anything after it).
'\n')
@ -276,6 +279,8 @@ class LocaleDataWriter (LocaleSourceEditor):
'{:6d},{:6d}',
# Day of week and week-end
',{:6d}' * 3,
# Number group sizes
',{:6d}' * 3,
' }}')).format
for key in names:
locale = locales[key]
@ -318,16 +323,14 @@ class LocaleDataWriter (LocaleSourceEditor):
(currencyIsoCodeData(locale.currencyIsoCode),
locale.currencyDigits,
locale.currencyRounding, # unused (QTBUG-81343)
locale.firstDayOfWeek,
locale.weekendStart,
locale.weekendEnd) ))
locale.firstDayOfWeek, locale.weekendStart, locale.weekendEnd,
locale.groupTop, locale.groupHigher, locale.groupLeast) ))
+ ', // {}/{}/{}\n'.format(
locale.language, locale.script, locale.country))
self.writer.write(formatLine(*( # All zeros, matching the format:
(0,) * 3 + (0,) * 37 * 2
+ (currencyIsoCodeData(0),)
+ (0,) * 2
+ (0,) * 3 ))
+ (0,) * 8 ))
+ ' // trailing zeros\n')
self.writer.write('};\n')