Handle QCollator with locale C by delegating to QString
Previously, the C locale was treated as English because each back-end takes the locale's bcp47Name(), which maps C to en. However, the C locale has its own rules; which QString helpfully implements; so we can delegate to it in this case. Extended this to sort keys, where possible. Clean up existing implementations in the process. Extended tst_QCollator::compare() with some cases to check this. That required wrapping the test's calls to collator.compare() in a sign canonicalizer, since it can return any -ve for < or +ve for >, not just -1 and +1 for these cases (and it'd be rash to hard-code specific negative and positive values, as they may vary between backends). [ChangeLog][QtCore][QCollator] Added support for collation in the C locale, albeit this is only well-defined for ASCII. Collation sort keys remain unsupported on Darwin. Fixes: QTBUG-58621 Change-Id: I327010d90f09bd1b1816f5590cb124e3d423e61d Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
This commit is contained in:
parent
63b0eb3a89
commit
ab448f731e
@ -79,7 +79,6 @@ QT_BEGIN_NAMESPACE
|
||||
QCollator::QCollator(const QLocale &locale)
|
||||
: d(new QCollatorPrivate(locale))
|
||||
{
|
||||
d->init();
|
||||
}
|
||||
|
||||
/*!
|
||||
@ -323,6 +322,8 @@ bool QCollator::ignorePunctuation() const
|
||||
methods directly. But if the string is compared repeatedly (e.g. when sorting
|
||||
a whole list of strings), it's usually faster to create the sort keys for each
|
||||
string and then sort using the keys.
|
||||
|
||||
\note Not supported with the C (a.k.a. POSIX) locale on Darwin.
|
||||
*/
|
||||
|
||||
/*!
|
||||
|
@ -55,6 +55,8 @@ QT_BEGIN_NAMESPACE
|
||||
void QCollatorPrivate::init()
|
||||
{
|
||||
cleanup();
|
||||
if (isC())
|
||||
return;
|
||||
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
QByteArray name = QLocalePrivate::get(locale)->bcp47Name('_');
|
||||
@ -140,6 +142,8 @@ QCollatorSortKey QCollator::sortKey(const QString &string) const
|
||||
{
|
||||
if (d->dirty)
|
||||
d->init();
|
||||
if (d->isC())
|
||||
return QCollatorSortKey(new QCollatorSortKeyPrivate(string.toUtf8()));
|
||||
|
||||
if (d->collator) {
|
||||
QByteArray result(16 + string.size() + (string.size() >> 2), Qt::Uninitialized);
|
||||
|
@ -55,6 +55,15 @@ QT_BEGIN_NAMESPACE
|
||||
void QCollatorPrivate::init()
|
||||
{
|
||||
cleanup();
|
||||
/*
|
||||
LocaleRefFromLocaleString() will accept "POSIX" as the locale name, but
|
||||
the locale it produces (named "pos") doesn't implement the [A-Z] < [a-z]
|
||||
behavior we expect of the C locale. We can use QStringView to get round
|
||||
that for collation, but this leaves no way to do a sort key.
|
||||
*/
|
||||
if (isC())
|
||||
return;
|
||||
|
||||
LocaleRef localeRef;
|
||||
int rc = LocaleRefFromLocaleString(QLocalePrivate::get(locale)->bcp47Name().constData(), &localeRef);
|
||||
if (rc != 0)
|
||||
@ -92,6 +101,8 @@ int QCollator::compare(const QChar *s1, int len1, const QChar *s2, int len2) con
|
||||
{
|
||||
if (d->dirty)
|
||||
d->init();
|
||||
if (!d->collator)
|
||||
return QStringView(s1, len1).compare(QStringView(s2, len2), caseSensitivity());
|
||||
|
||||
SInt32 result;
|
||||
Boolean equivalent;
|
||||
@ -104,6 +115,7 @@ int QCollator::compare(const QChar *s1, int len1, const QChar *s2, int len2) con
|
||||
return 0;
|
||||
return result < 0 ? -1 : 1;
|
||||
}
|
||||
|
||||
int QCollator::compare(const QString &str1, const QString &str2) const
|
||||
{
|
||||
return compare(str1.constData(), str1.size(), str2.constData(), str2.size());
|
||||
@ -118,6 +130,11 @@ QCollatorSortKey QCollator::sortKey(const QString &string) const
|
||||
{
|
||||
if (d->dirty)
|
||||
d->init();
|
||||
if (!d->collator) {
|
||||
// What should (or even *can*) we do here ? (See init()'s comment.)
|
||||
qWarning("QCollator doesn't support sort keys for the C locale on Darwin");
|
||||
return QCollatorSortKey(nullptr);
|
||||
}
|
||||
|
||||
//Documentation recommends having it 5 times as big as the input
|
||||
QVector<UCCollationValue> ret(string.size() * 5);
|
||||
@ -136,6 +153,9 @@ QCollatorSortKey QCollator::sortKey(const QString &string) const
|
||||
|
||||
int QCollatorSortKey::compare(const QCollatorSortKey &key) const
|
||||
{
|
||||
if (!d.data())
|
||||
return 0;
|
||||
|
||||
SInt32 order;
|
||||
UCCompareCollationKeys(d->m_key.data(), d->m_key.size(),
|
||||
key.d->m_key.data(), key.d->m_key.size(),
|
||||
|
@ -110,6 +110,7 @@ public:
|
||||
|
||||
QCollatorPrivate(const QLocale &locale) : locale(locale) {}
|
||||
~QCollatorPrivate() { cleanup(); }
|
||||
bool isC() { return locale.language() == QLocale::C; }
|
||||
|
||||
void clear() {
|
||||
cleanup();
|
||||
|
@ -48,10 +48,12 @@ QT_BEGIN_NAMESPACE
|
||||
|
||||
void QCollatorPrivate::init()
|
||||
{
|
||||
if (locale != QLocale())
|
||||
qWarning("Only default locale supported with the posix collation implementation");
|
||||
if (caseSensitivity != Qt::CaseSensitive)
|
||||
qWarning("Case insensitive sorting unsupported in the posix collation implementation");
|
||||
if (!isC()) {
|
||||
if (locale != QLocale())
|
||||
qWarning("Only C and default locale supported with the posix collation implementation");
|
||||
if (caseSensitivity != Qt::CaseSensitive)
|
||||
qWarning("Case insensitive sorting unsupported in the posix collation implementation");
|
||||
}
|
||||
if (numericMode)
|
||||
qWarning("Numeric mode unsupported in the posix collation implementation");
|
||||
if (ignorePunctuation)
|
||||
@ -73,14 +75,16 @@ static void stringToWCharArray(QVarLengthArray<wchar_t> &ret, const QString &str
|
||||
|
||||
int QCollator::compare(const QChar *s1, int len1, const QChar *s2, int len2) const
|
||||
{
|
||||
QVarLengthArray<wchar_t> array1, array2;
|
||||
stringToWCharArray(array1, QString(s1, len1));
|
||||
stringToWCharArray(array2, QString(s2, len2));
|
||||
return std::wcscoll(array1.constData(), array2.constData());
|
||||
return compare(QString::fromRawData(s1, len1), QString::fromRawData(s2, len2));
|
||||
}
|
||||
|
||||
int QCollator::compare(const QString &s1, const QString &s2) const
|
||||
{
|
||||
if (d->isC())
|
||||
return s1.compare(s2, caseSensitivity());
|
||||
if (d->dirty)
|
||||
d->init();
|
||||
|
||||
QVarLengthArray<wchar_t> array1, array2;
|
||||
stringToWCharArray(array1, s1);
|
||||
stringToWCharArray(array2, s2);
|
||||
@ -89,10 +93,7 @@ int QCollator::compare(const QString &s1, const QString &s2) const
|
||||
|
||||
int QCollator::compare(const QStringRef &s1, const QStringRef &s2) const
|
||||
{
|
||||
if (d->dirty)
|
||||
d->init();
|
||||
|
||||
return compare(s1.constData(), s1.size(), s2.constData(), s2.size());
|
||||
return compare(s1.toString(), s2.toString());
|
||||
}
|
||||
|
||||
QCollatorSortKey QCollator::sortKey(const QString &string) const
|
||||
@ -102,14 +103,18 @@ QCollatorSortKey QCollator::sortKey(const QString &string) const
|
||||
|
||||
QVarLengthArray<wchar_t> original;
|
||||
stringToWCharArray(original, string);
|
||||
QVector<wchar_t> result(string.size());
|
||||
size_t size = std::wcsxfrm(result.data(), original.constData(), string.size());
|
||||
if (size > uint(result.size())) {
|
||||
QVector<wchar_t> result(original.size());
|
||||
if (d->isC()) {
|
||||
std::copy(original.cbegin(), original.cend(), result.begin());
|
||||
} else {
|
||||
size_t size = std::wcsxfrm(result.data(), original.constData(), string.size());
|
||||
if (size > uint(result.size())) {
|
||||
result.resize(size+1);
|
||||
size = std::wcsxfrm(result.data(), original.constData(), string.size());
|
||||
}
|
||||
result.resize(size+1);
|
||||
size = std::wcsxfrm(result.data(), original.constData(), string.size());
|
||||
result[size] = 0;
|
||||
}
|
||||
result.resize(size+1);
|
||||
result[size] = 0;
|
||||
return QCollatorSortKey(new QCollatorSortKeyPrivate(std::move(result)));
|
||||
}
|
||||
|
||||
|
@ -60,6 +60,8 @@ extern LCID qt_inIsoNametoLCID(const char *name);
|
||||
void QCollatorPrivate::init()
|
||||
{
|
||||
collator = 0;
|
||||
if (isC())
|
||||
return;
|
||||
|
||||
#ifndef USE_COMPARESTRINGEX
|
||||
localeID = qt_inIsoNametoLCID(QLocalePrivate::get(locale)->bcp47Name().constData());
|
||||
@ -86,6 +88,9 @@ void QCollatorPrivate::cleanup()
|
||||
|
||||
int QCollator::compare(const QChar *s1, int len1, const QChar *s2, int len2) const
|
||||
{
|
||||
if (d->isC())
|
||||
return QString::compare_helper(s1, len1, s2, len2, d->caseSensitivity);
|
||||
|
||||
if (d->dirty)
|
||||
d->init();
|
||||
|
||||
@ -119,6 +124,8 @@ QCollatorSortKey QCollator::sortKey(const QString &string) const
|
||||
{
|
||||
if (d->dirty)
|
||||
d->init();
|
||||
if (d->isC())
|
||||
return QCollatorSortKey(new QCollatorSortKeyPrivate(string));
|
||||
|
||||
#ifndef USE_COMPARESTRINGEX
|
||||
int size = LCMapStringW(d->localeID, LCMAP_SORTKEY | d->collator,
|
||||
|
@ -93,7 +93,7 @@ void tst_QCollator::compare_data()
|
||||
QTest::addColumn<int>("caseInsensitiveResult");
|
||||
QTest::addColumn<bool>("numericMode");
|
||||
QTest::addColumn<bool>("ignorePunctuation");
|
||||
QTest::addColumn<int>("punctuationResult");
|
||||
QTest::addColumn<int>("punctuationResult"); // Test ignores punctuation *and case*
|
||||
|
||||
/*
|
||||
It's hard to test English, because it's treated differently
|
||||
@ -169,8 +169,12 @@ void tst_QCollator::compare_data()
|
||||
QTest::newRow("french6") << QString("fr_FR") << QString("Test 9") << QString("Test_19") << -1 << -1 << true << true << -1;
|
||||
QTest::newRow("french7") << QString("fr_FR") << QString("test_19") << QString("test 19") << 1 << 1 << true << false << 1;
|
||||
QTest::newRow("french8") << QString("fr_FR") << QString("test.19") << QString("test,19") << 1 << 1 << true << true << 0;
|
||||
}
|
||||
|
||||
// C locale: case sensitive [A-Z] < [a-z] but case insensitive [Aa] < [Bb] <...< [Zz]
|
||||
const QString C = QStringLiteral("C");
|
||||
QTest::newRow("C:ABBA:AaaA") << C << QStringLiteral("ABBA") << QStringLiteral("AaaA") << -1 << 1 << false << false << 1;
|
||||
QTest::newRow("C:AZa:aAZ") << C << QStringLiteral("AZa") << QStringLiteral("aAZ") << -1 << 1 << false << false << 1;
|
||||
}
|
||||
|
||||
void tst_QCollator::compare()
|
||||
{
|
||||
@ -184,6 +188,10 @@ void tst_QCollator::compare()
|
||||
QFETCH(int, punctuationResult);
|
||||
|
||||
QCollator collator(locale);
|
||||
// Need to canonicalize sign to -1, 0 or 1, as .compare() can produce any -ve for <, any +ve for >.
|
||||
auto asSign = [](int compared) {
|
||||
return compared < 0 ? -1 : compared > 0 ? 1 : 0;
|
||||
};
|
||||
|
||||
#if defined(Q_OS_ANDROID) && !defined(Q_OS_ANDROID_EMBEDDED)
|
||||
if (collator.locale() != QLocale())
|
||||
@ -193,12 +201,12 @@ void tst_QCollator::compare()
|
||||
if (numericMode)
|
||||
collator.setNumericMode(true);
|
||||
|
||||
QCOMPARE(collator.compare(s1, s2), result);
|
||||
QCOMPARE(asSign(collator.compare(s1, s2)), result);
|
||||
collator.setCaseSensitivity(Qt::CaseInsensitive);
|
||||
QCOMPARE(collator.compare(s1, s2), caseInsensitiveResult);
|
||||
QCOMPARE(asSign(collator.compare(s1, s2)), caseInsensitiveResult);
|
||||
#if !QT_CONFIG(iconv)
|
||||
collator.setIgnorePunctuation(ignorePunctuation);
|
||||
QCOMPARE(collator.compare(s1, s2), punctuationResult);
|
||||
QCOMPARE(asSign(collator.compare(s1, s2)), punctuationResult);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user