Handle QCollator with locale C by delegating to QString

Previously, the C locale was treated as English because each back-end
takes the locale's bcp47Name(), which maps C to en. However, the C
locale has its own rules; which QString helpfully implements; so we
can delegate to it in this case. Extended this to sort keys, where
possible. Clean up existing implementations in the process.

Extended tst_QCollator::compare() with some cases to check this. That
required wrapping the test's calls to collator.compare() in a sign
canonicalizer, since it can return any -ve for < or +ve for >, not
just -1 and +1 for these cases (and it'd be rash to hard-code specific
negative and positive values, as they may vary between backends).

[ChangeLog][QtCore][QCollator] Added support for collation in the C
locale, albeit this is only well-defined for ASCII. Collation sort
keys remain unsupported on Darwin.

Fixes: QTBUG-58621
Change-Id: I327010d90f09bd1b1816f5590cb124e3d423e61d
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
This commit is contained in:
Edward Welbourne 2018-10-02 19:02:29 +02:00
parent 63b0eb3a89
commit ab448f731e
7 changed files with 70 additions and 24 deletions

View File

@ -79,7 +79,6 @@ QT_BEGIN_NAMESPACE
QCollator::QCollator(const QLocale &locale)
: d(new QCollatorPrivate(locale))
{
d->init();
}
/*!
@ -323,6 +322,8 @@ bool QCollator::ignorePunctuation() const
methods directly. But if the string is compared repeatedly (e.g. when sorting
a whole list of strings), it's usually faster to create the sort keys for each
string and then sort using the keys.
\note Not supported with the C (a.k.a. POSIX) locale on Darwin.
*/
/*!

View File

@ -55,6 +55,8 @@ QT_BEGIN_NAMESPACE
void QCollatorPrivate::init()
{
cleanup();
if (isC())
return;
UErrorCode status = U_ZERO_ERROR;
QByteArray name = QLocalePrivate::get(locale)->bcp47Name('_');
@ -140,6 +142,8 @@ QCollatorSortKey QCollator::sortKey(const QString &string) const
{
if (d->dirty)
d->init();
if (d->isC())
return QCollatorSortKey(new QCollatorSortKeyPrivate(string.toUtf8()));
if (d->collator) {
QByteArray result(16 + string.size() + (string.size() >> 2), Qt::Uninitialized);

View File

@ -55,6 +55,15 @@ QT_BEGIN_NAMESPACE
void QCollatorPrivate::init()
{
cleanup();
/*
LocaleRefFromLocaleString() will accept "POSIX" as the locale name, but
the locale it produces (named "pos") doesn't implement the [A-Z] < [a-z]
behavior we expect of the C locale. We can use QStringView to get round
that for collation, but this leaves no way to do a sort key.
*/
if (isC())
return;
LocaleRef localeRef;
int rc = LocaleRefFromLocaleString(QLocalePrivate::get(locale)->bcp47Name().constData(), &localeRef);
if (rc != 0)
@ -92,6 +101,8 @@ int QCollator::compare(const QChar *s1, int len1, const QChar *s2, int len2) con
{
if (d->dirty)
d->init();
if (!d->collator)
return QStringView(s1, len1).compare(QStringView(s2, len2), caseSensitivity());
SInt32 result;
Boolean equivalent;
@ -104,6 +115,7 @@ int QCollator::compare(const QChar *s1, int len1, const QChar *s2, int len2) con
return 0;
return result < 0 ? -1 : 1;
}
int QCollator::compare(const QString &str1, const QString &str2) const
{
return compare(str1.constData(), str1.size(), str2.constData(), str2.size());
@ -118,6 +130,11 @@ QCollatorSortKey QCollator::sortKey(const QString &string) const
{
if (d->dirty)
d->init();
if (!d->collator) {
// What should (or even *can*) we do here ? (See init()'s comment.)
qWarning("QCollator doesn't support sort keys for the C locale on Darwin");
return QCollatorSortKey(nullptr);
}
//Documentation recommends having it 5 times as big as the input
QVector<UCCollationValue> ret(string.size() * 5);
@ -136,6 +153,9 @@ QCollatorSortKey QCollator::sortKey(const QString &string) const
int QCollatorSortKey::compare(const QCollatorSortKey &key) const
{
if (!d.data())
return 0;
SInt32 order;
UCCompareCollationKeys(d->m_key.data(), d->m_key.size(),
key.d->m_key.data(), key.d->m_key.size(),

View File

@ -110,6 +110,7 @@ public:
QCollatorPrivate(const QLocale &locale) : locale(locale) {}
~QCollatorPrivate() { cleanup(); }
bool isC() { return locale.language() == QLocale::C; }
void clear() {
cleanup();

View File

@ -48,10 +48,12 @@ QT_BEGIN_NAMESPACE
void QCollatorPrivate::init()
{
if (locale != QLocale())
qWarning("Only default locale supported with the posix collation implementation");
if (caseSensitivity != Qt::CaseSensitive)
qWarning("Case insensitive sorting unsupported in the posix collation implementation");
if (!isC()) {
if (locale != QLocale())
qWarning("Only C and default locale supported with the posix collation implementation");
if (caseSensitivity != Qt::CaseSensitive)
qWarning("Case insensitive sorting unsupported in the posix collation implementation");
}
if (numericMode)
qWarning("Numeric mode unsupported in the posix collation implementation");
if (ignorePunctuation)
@ -73,14 +75,16 @@ static void stringToWCharArray(QVarLengthArray<wchar_t> &ret, const QString &str
int QCollator::compare(const QChar *s1, int len1, const QChar *s2, int len2) const
{
QVarLengthArray<wchar_t> array1, array2;
stringToWCharArray(array1, QString(s1, len1));
stringToWCharArray(array2, QString(s2, len2));
return std::wcscoll(array1.constData(), array2.constData());
return compare(QString::fromRawData(s1, len1), QString::fromRawData(s2, len2));
}
int QCollator::compare(const QString &s1, const QString &s2) const
{
if (d->isC())
return s1.compare(s2, caseSensitivity());
if (d->dirty)
d->init();
QVarLengthArray<wchar_t> array1, array2;
stringToWCharArray(array1, s1);
stringToWCharArray(array2, s2);
@ -89,10 +93,7 @@ int QCollator::compare(const QString &s1, const QString &s2) const
int QCollator::compare(const QStringRef &s1, const QStringRef &s2) const
{
if (d->dirty)
d->init();
return compare(s1.constData(), s1.size(), s2.constData(), s2.size());
return compare(s1.toString(), s2.toString());
}
QCollatorSortKey QCollator::sortKey(const QString &string) const
@ -102,14 +103,18 @@ QCollatorSortKey QCollator::sortKey(const QString &string) const
QVarLengthArray<wchar_t> original;
stringToWCharArray(original, string);
QVector<wchar_t> result(string.size());
size_t size = std::wcsxfrm(result.data(), original.constData(), string.size());
if (size > uint(result.size())) {
QVector<wchar_t> result(original.size());
if (d->isC()) {
std::copy(original.cbegin(), original.cend(), result.begin());
} else {
size_t size = std::wcsxfrm(result.data(), original.constData(), string.size());
if (size > uint(result.size())) {
result.resize(size+1);
size = std::wcsxfrm(result.data(), original.constData(), string.size());
}
result.resize(size+1);
size = std::wcsxfrm(result.data(), original.constData(), string.size());
result[size] = 0;
}
result.resize(size+1);
result[size] = 0;
return QCollatorSortKey(new QCollatorSortKeyPrivate(std::move(result)));
}

View File

@ -60,6 +60,8 @@ extern LCID qt_inIsoNametoLCID(const char *name);
void QCollatorPrivate::init()
{
collator = 0;
if (isC())
return;
#ifndef USE_COMPARESTRINGEX
localeID = qt_inIsoNametoLCID(QLocalePrivate::get(locale)->bcp47Name().constData());
@ -86,6 +88,9 @@ void QCollatorPrivate::cleanup()
int QCollator::compare(const QChar *s1, int len1, const QChar *s2, int len2) const
{
if (d->isC())
return QString::compare_helper(s1, len1, s2, len2, d->caseSensitivity);
if (d->dirty)
d->init();
@ -119,6 +124,8 @@ QCollatorSortKey QCollator::sortKey(const QString &string) const
{
if (d->dirty)
d->init();
if (d->isC())
return QCollatorSortKey(new QCollatorSortKeyPrivate(string));
#ifndef USE_COMPARESTRINGEX
int size = LCMapStringW(d->localeID, LCMAP_SORTKEY | d->collator,

View File

@ -93,7 +93,7 @@ void tst_QCollator::compare_data()
QTest::addColumn<int>("caseInsensitiveResult");
QTest::addColumn<bool>("numericMode");
QTest::addColumn<bool>("ignorePunctuation");
QTest::addColumn<int>("punctuationResult");
QTest::addColumn<int>("punctuationResult"); // Test ignores punctuation *and case*
/*
It's hard to test English, because it's treated differently
@ -169,8 +169,12 @@ void tst_QCollator::compare_data()
QTest::newRow("french6") << QString("fr_FR") << QString("Test 9") << QString("Test_19") << -1 << -1 << true << true << -1;
QTest::newRow("french7") << QString("fr_FR") << QString("test_19") << QString("test 19") << 1 << 1 << true << false << 1;
QTest::newRow("french8") << QString("fr_FR") << QString("test.19") << QString("test,19") << 1 << 1 << true << true << 0;
}
// C locale: case sensitive [A-Z] < [a-z] but case insensitive [Aa] < [Bb] <...< [Zz]
const QString C = QStringLiteral("C");
QTest::newRow("C:ABBA:AaaA") << C << QStringLiteral("ABBA") << QStringLiteral("AaaA") << -1 << 1 << false << false << 1;
QTest::newRow("C:AZa:aAZ") << C << QStringLiteral("AZa") << QStringLiteral("aAZ") << -1 << 1 << false << false << 1;
}
void tst_QCollator::compare()
{
@ -184,6 +188,10 @@ void tst_QCollator::compare()
QFETCH(int, punctuationResult);
QCollator collator(locale);
// Need to canonicalize sign to -1, 0 or 1, as .compare() can produce any -ve for <, any +ve for >.
auto asSign = [](int compared) {
return compared < 0 ? -1 : compared > 0 ? 1 : 0;
};
#if defined(Q_OS_ANDROID) && !defined(Q_OS_ANDROID_EMBEDDED)
if (collator.locale() != QLocale())
@ -193,12 +201,12 @@ void tst_QCollator::compare()
if (numericMode)
collator.setNumericMode(true);
QCOMPARE(collator.compare(s1, s2), result);
QCOMPARE(asSign(collator.compare(s1, s2)), result);
collator.setCaseSensitivity(Qt::CaseInsensitive);
QCOMPARE(collator.compare(s1, s2), caseInsensitiveResult);
QCOMPARE(asSign(collator.compare(s1, s2)), caseInsensitiveResult);
#if !QT_CONFIG(iconv)
collator.setIgnorePunctuation(ignorePunctuation);
QCOMPARE(collator.compare(s1, s2), punctuationResult);
QCOMPARE(asSign(collator.compare(s1, s2)), punctuationResult);
#endif
}