Handle QCollator with locale C by delegating to QString

Previously, the C locale was treated as English because each back-end
takes the locale's bcp47Name(), which maps C to en. However, the C
locale has its own rules; which QString helpfully implements; so we
can delegate to it in this case. Extended this to sort keys, where
possible. Clean up existing implementations in the process.

Extended tst_QCollator::compare() with some cases to check this. That
required wrapping the test's calls to collator.compare() in a sign
canonicalizer, since it can return any -ve for < or +ve for >, not
just -1 and +1 for these cases (and it'd be rash to hard-code specific
negative and positive values, as they may vary between backends).

[ChangeLog][QtCore][QCollator] Added support for collation in the C
locale, albeit this is only well-defined for ASCII. Collation sort
keys remain unsupported on Darwin.

Fixes: QTBUG-58621
Change-Id: I327010d90f09bd1b1816f5590cb124e3d423e61d
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
This commit is contained in:
Edward Welbourne 2018-10-02 19:02:29 +02:00
parent 63b0eb3a89
commit ab448f731e
7 changed files with 70 additions and 24 deletions

View File

@ -79,7 +79,6 @@ QT_BEGIN_NAMESPACE
QCollator::QCollator(const QLocale &locale) QCollator::QCollator(const QLocale &locale)
: d(new QCollatorPrivate(locale)) : d(new QCollatorPrivate(locale))
{ {
d->init();
} }
/*! /*!
@ -323,6 +322,8 @@ bool QCollator::ignorePunctuation() const
methods directly. But if the string is compared repeatedly (e.g. when sorting methods directly. But if the string is compared repeatedly (e.g. when sorting
a whole list of strings), it's usually faster to create the sort keys for each a whole list of strings), it's usually faster to create the sort keys for each
string and then sort using the keys. string and then sort using the keys.
\note Not supported with the C (a.k.a. POSIX) locale on Darwin.
*/ */
/*! /*!

View File

@ -55,6 +55,8 @@ QT_BEGIN_NAMESPACE
void QCollatorPrivate::init() void QCollatorPrivate::init()
{ {
cleanup(); cleanup();
if (isC())
return;
UErrorCode status = U_ZERO_ERROR; UErrorCode status = U_ZERO_ERROR;
QByteArray name = QLocalePrivate::get(locale)->bcp47Name('_'); QByteArray name = QLocalePrivate::get(locale)->bcp47Name('_');
@ -140,6 +142,8 @@ QCollatorSortKey QCollator::sortKey(const QString &string) const
{ {
if (d->dirty) if (d->dirty)
d->init(); d->init();
if (d->isC())
return QCollatorSortKey(new QCollatorSortKeyPrivate(string.toUtf8()));
if (d->collator) { if (d->collator) {
QByteArray result(16 + string.size() + (string.size() >> 2), Qt::Uninitialized); QByteArray result(16 + string.size() + (string.size() >> 2), Qt::Uninitialized);

View File

@ -55,6 +55,15 @@ QT_BEGIN_NAMESPACE
void QCollatorPrivate::init() void QCollatorPrivate::init()
{ {
cleanup(); cleanup();
/*
LocaleRefFromLocaleString() will accept "POSIX" as the locale name, but
the locale it produces (named "pos") doesn't implement the [A-Z] < [a-z]
behavior we expect of the C locale. We can use QStringView to get round
that for collation, but this leaves no way to do a sort key.
*/
if (isC())
return;
LocaleRef localeRef; LocaleRef localeRef;
int rc = LocaleRefFromLocaleString(QLocalePrivate::get(locale)->bcp47Name().constData(), &localeRef); int rc = LocaleRefFromLocaleString(QLocalePrivate::get(locale)->bcp47Name().constData(), &localeRef);
if (rc != 0) if (rc != 0)
@ -92,6 +101,8 @@ int QCollator::compare(const QChar *s1, int len1, const QChar *s2, int len2) con
{ {
if (d->dirty) if (d->dirty)
d->init(); d->init();
if (!d->collator)
return QStringView(s1, len1).compare(QStringView(s2, len2), caseSensitivity());
SInt32 result; SInt32 result;
Boolean equivalent; Boolean equivalent;
@ -104,6 +115,7 @@ int QCollator::compare(const QChar *s1, int len1, const QChar *s2, int len2) con
return 0; return 0;
return result < 0 ? -1 : 1; return result < 0 ? -1 : 1;
} }
int QCollator::compare(const QString &str1, const QString &str2) const int QCollator::compare(const QString &str1, const QString &str2) const
{ {
return compare(str1.constData(), str1.size(), str2.constData(), str2.size()); return compare(str1.constData(), str1.size(), str2.constData(), str2.size());
@ -118,6 +130,11 @@ QCollatorSortKey QCollator::sortKey(const QString &string) const
{ {
if (d->dirty) if (d->dirty)
d->init(); d->init();
if (!d->collator) {
// What should (or even *can*) we do here ? (See init()'s comment.)
qWarning("QCollator doesn't support sort keys for the C locale on Darwin");
return QCollatorSortKey(nullptr);
}
//Documentation recommends having it 5 times as big as the input //Documentation recommends having it 5 times as big as the input
QVector<UCCollationValue> ret(string.size() * 5); QVector<UCCollationValue> ret(string.size() * 5);
@ -136,6 +153,9 @@ QCollatorSortKey QCollator::sortKey(const QString &string) const
int QCollatorSortKey::compare(const QCollatorSortKey &key) const int QCollatorSortKey::compare(const QCollatorSortKey &key) const
{ {
if (!d.data())
return 0;
SInt32 order; SInt32 order;
UCCompareCollationKeys(d->m_key.data(), d->m_key.size(), UCCompareCollationKeys(d->m_key.data(), d->m_key.size(),
key.d->m_key.data(), key.d->m_key.size(), key.d->m_key.data(), key.d->m_key.size(),

View File

@ -110,6 +110,7 @@ public:
QCollatorPrivate(const QLocale &locale) : locale(locale) {} QCollatorPrivate(const QLocale &locale) : locale(locale) {}
~QCollatorPrivate() { cleanup(); } ~QCollatorPrivate() { cleanup(); }
bool isC() { return locale.language() == QLocale::C; }
void clear() { void clear() {
cleanup(); cleanup();

View File

@ -48,10 +48,12 @@ QT_BEGIN_NAMESPACE
void QCollatorPrivate::init() void QCollatorPrivate::init()
{ {
if (locale != QLocale()) if (!isC()) {
qWarning("Only default locale supported with the posix collation implementation"); if (locale != QLocale())
if (caseSensitivity != Qt::CaseSensitive) qWarning("Only C and default locale supported with the posix collation implementation");
qWarning("Case insensitive sorting unsupported in the posix collation implementation"); if (caseSensitivity != Qt::CaseSensitive)
qWarning("Case insensitive sorting unsupported in the posix collation implementation");
}
if (numericMode) if (numericMode)
qWarning("Numeric mode unsupported in the posix collation implementation"); qWarning("Numeric mode unsupported in the posix collation implementation");
if (ignorePunctuation) if (ignorePunctuation)
@ -73,14 +75,16 @@ static void stringToWCharArray(QVarLengthArray<wchar_t> &ret, const QString &str
int QCollator::compare(const QChar *s1, int len1, const QChar *s2, int len2) const int QCollator::compare(const QChar *s1, int len1, const QChar *s2, int len2) const
{ {
QVarLengthArray<wchar_t> array1, array2; return compare(QString::fromRawData(s1, len1), QString::fromRawData(s2, len2));
stringToWCharArray(array1, QString(s1, len1));
stringToWCharArray(array2, QString(s2, len2));
return std::wcscoll(array1.constData(), array2.constData());
} }
int QCollator::compare(const QString &s1, const QString &s2) const int QCollator::compare(const QString &s1, const QString &s2) const
{ {
if (d->isC())
return s1.compare(s2, caseSensitivity());
if (d->dirty)
d->init();
QVarLengthArray<wchar_t> array1, array2; QVarLengthArray<wchar_t> array1, array2;
stringToWCharArray(array1, s1); stringToWCharArray(array1, s1);
stringToWCharArray(array2, s2); stringToWCharArray(array2, s2);
@ -89,10 +93,7 @@ int QCollator::compare(const QString &s1, const QString &s2) const
int QCollator::compare(const QStringRef &s1, const QStringRef &s2) const int QCollator::compare(const QStringRef &s1, const QStringRef &s2) const
{ {
if (d->dirty) return compare(s1.toString(), s2.toString());
d->init();
return compare(s1.constData(), s1.size(), s2.constData(), s2.size());
} }
QCollatorSortKey QCollator::sortKey(const QString &string) const QCollatorSortKey QCollator::sortKey(const QString &string) const
@ -102,14 +103,18 @@ QCollatorSortKey QCollator::sortKey(const QString &string) const
QVarLengthArray<wchar_t> original; QVarLengthArray<wchar_t> original;
stringToWCharArray(original, string); stringToWCharArray(original, string);
QVector<wchar_t> result(string.size()); QVector<wchar_t> result(original.size());
size_t size = std::wcsxfrm(result.data(), original.constData(), string.size()); if (d->isC()) {
if (size > uint(result.size())) { std::copy(original.cbegin(), original.cend(), result.begin());
} else {
size_t size = std::wcsxfrm(result.data(), original.constData(), string.size());
if (size > uint(result.size())) {
result.resize(size+1);
size = std::wcsxfrm(result.data(), original.constData(), string.size());
}
result.resize(size+1); result.resize(size+1);
size = std::wcsxfrm(result.data(), original.constData(), string.size()); result[size] = 0;
} }
result.resize(size+1);
result[size] = 0;
return QCollatorSortKey(new QCollatorSortKeyPrivate(std::move(result))); return QCollatorSortKey(new QCollatorSortKeyPrivate(std::move(result)));
} }

View File

@ -60,6 +60,8 @@ extern LCID qt_inIsoNametoLCID(const char *name);
void QCollatorPrivate::init() void QCollatorPrivate::init()
{ {
collator = 0; collator = 0;
if (isC())
return;
#ifndef USE_COMPARESTRINGEX #ifndef USE_COMPARESTRINGEX
localeID = qt_inIsoNametoLCID(QLocalePrivate::get(locale)->bcp47Name().constData()); localeID = qt_inIsoNametoLCID(QLocalePrivate::get(locale)->bcp47Name().constData());
@ -86,6 +88,9 @@ void QCollatorPrivate::cleanup()
int QCollator::compare(const QChar *s1, int len1, const QChar *s2, int len2) const int QCollator::compare(const QChar *s1, int len1, const QChar *s2, int len2) const
{ {
if (d->isC())
return QString::compare_helper(s1, len1, s2, len2, d->caseSensitivity);
if (d->dirty) if (d->dirty)
d->init(); d->init();
@ -119,6 +124,8 @@ QCollatorSortKey QCollator::sortKey(const QString &string) const
{ {
if (d->dirty) if (d->dirty)
d->init(); d->init();
if (d->isC())
return QCollatorSortKey(new QCollatorSortKeyPrivate(string));
#ifndef USE_COMPARESTRINGEX #ifndef USE_COMPARESTRINGEX
int size = LCMapStringW(d->localeID, LCMAP_SORTKEY | d->collator, int size = LCMapStringW(d->localeID, LCMAP_SORTKEY | d->collator,

View File

@ -93,7 +93,7 @@ void tst_QCollator::compare_data()
QTest::addColumn<int>("caseInsensitiveResult"); QTest::addColumn<int>("caseInsensitiveResult");
QTest::addColumn<bool>("numericMode"); QTest::addColumn<bool>("numericMode");
QTest::addColumn<bool>("ignorePunctuation"); QTest::addColumn<bool>("ignorePunctuation");
QTest::addColumn<int>("punctuationResult"); QTest::addColumn<int>("punctuationResult"); // Test ignores punctuation *and case*
/* /*
It's hard to test English, because it's treated differently It's hard to test English, because it's treated differently
@ -169,8 +169,12 @@ void tst_QCollator::compare_data()
QTest::newRow("french6") << QString("fr_FR") << QString("Test 9") << QString("Test_19") << -1 << -1 << true << true << -1; QTest::newRow("french6") << QString("fr_FR") << QString("Test 9") << QString("Test_19") << -1 << -1 << true << true << -1;
QTest::newRow("french7") << QString("fr_FR") << QString("test_19") << QString("test 19") << 1 << 1 << true << false << 1; QTest::newRow("french7") << QString("fr_FR") << QString("test_19") << QString("test 19") << 1 << 1 << true << false << 1;
QTest::newRow("french8") << QString("fr_FR") << QString("test.19") << QString("test,19") << 1 << 1 << true << true << 0; QTest::newRow("french8") << QString("fr_FR") << QString("test.19") << QString("test,19") << 1 << 1 << true << true << 0;
}
// C locale: case sensitive [A-Z] < [a-z] but case insensitive [Aa] < [Bb] <...< [Zz]
const QString C = QStringLiteral("C");
QTest::newRow("C:ABBA:AaaA") << C << QStringLiteral("ABBA") << QStringLiteral("AaaA") << -1 << 1 << false << false << 1;
QTest::newRow("C:AZa:aAZ") << C << QStringLiteral("AZa") << QStringLiteral("aAZ") << -1 << 1 << false << false << 1;
}
void tst_QCollator::compare() void tst_QCollator::compare()
{ {
@ -184,6 +188,10 @@ void tst_QCollator::compare()
QFETCH(int, punctuationResult); QFETCH(int, punctuationResult);
QCollator collator(locale); QCollator collator(locale);
// Need to canonicalize sign to -1, 0 or 1, as .compare() can produce any -ve for <, any +ve for >.
auto asSign = [](int compared) {
return compared < 0 ? -1 : compared > 0 ? 1 : 0;
};
#if defined(Q_OS_ANDROID) && !defined(Q_OS_ANDROID_EMBEDDED) #if defined(Q_OS_ANDROID) && !defined(Q_OS_ANDROID_EMBEDDED)
if (collator.locale() != QLocale()) if (collator.locale() != QLocale())
@ -193,12 +201,12 @@ void tst_QCollator::compare()
if (numericMode) if (numericMode)
collator.setNumericMode(true); collator.setNumericMode(true);
QCOMPARE(collator.compare(s1, s2), result); QCOMPARE(asSign(collator.compare(s1, s2)), result);
collator.setCaseSensitivity(Qt::CaseInsensitive); collator.setCaseSensitivity(Qt::CaseInsensitive);
QCOMPARE(collator.compare(s1, s2), caseInsensitiveResult); QCOMPARE(asSign(collator.compare(s1, s2)), caseInsensitiveResult);
#if !QT_CONFIG(iconv) #if !QT_CONFIG(iconv)
collator.setIgnorePunctuation(ignorePunctuation); collator.setIgnorePunctuation(ignorePunctuation);
QCOMPARE(collator.compare(s1, s2), punctuationResult); QCOMPARE(asSign(collator.compare(s1, s2)), punctuationResult);
#endif #endif
} }