Fix case insensitive comparisons using QCollator

In ICU the strength parameter decides whether a comparison is
case sensitive or not.

Fix mac comparison code. It can't have worked before.

Added some basic automated testing for QCollator.

Change-Id: I2646c464fd22ccd3a93c461fa3dba4bd1d4c7b4b
Reviewed-by: Konstantin Ritt <ritt.ks@gmail.com>
This commit is contained in:
Lars Knoll 2014-03-24 16:12:20 +01:00 committed by The Qt Project
parent dbe6db192a
commit 81ba16cad9
3 changed files with 105 additions and 5 deletions

View File

@ -75,10 +75,17 @@ void QCollator::setCaseSensitivity(Qt::CaseSensitivity cs)
{
detach();
UColAttributeValue val = (cs == Qt::CaseSensitive) ? UCOL_UPPER_FIRST : UCOL_OFF;
// The strength attribute in ICU is rather badly documented. Basically UCOL_PRIMARY
// ignores differences between base characters and accented characters as well as case.
// So A and A-umlaut would compare equal.
// UCOL_SECONDARY ignores case differences. UCOL_TERTIARY is the default in most languages
// and does case sensitive comparison.
// UCOL_QUATERNARY is used as default in a few languages such as Japanese to take care of some
// additional differences in those languages.
UColAttributeValue val = (cs == Qt::CaseSensitive) ? UCOL_DEFAULT_STRENGTH : UCOL_SECONDARY;
UErrorCode status = U_ZERO_ERROR;
ucol_setAttribute(d->collator, UCOL_CASE_FIRST, val, &status);
ucol_setAttribute(d->collator, UCOL_STRENGTH, val, &status);
if (U_FAILURE(status))
qWarning("ucol_setAttribute: Case First failed: %d", status);
}

View File

@ -128,12 +128,15 @@ bool QCollator::ignorePunctuation() const
int QCollator::compare(const QChar *s1, int len1, const QChar *s2, int len2) const
{
SInt32 result;
return UCCompareText(d->collator.collator,
Boolean equivalent;
UCCompareText(d->collator.collator,
reinterpret_cast<const UniChar *>(s1), len1,
reinterpret_cast<const UniChar *>(s2), len2,
NULL,
&equivalent,
&result);
return result;
if (equivalent)
return 0;
return result < 0 ? -1 : 1;
}
int QCollator::compare(const QString &str1, const QString &str2) const
{

View File

@ -52,6 +52,9 @@ class tst_QCollator : public QObject
private Q_SLOTS:
void moveSemantics();
void compare_data();
void compare();
};
#ifdef Q_COMPILER_RVALUE_REFS
@ -87,6 +90,93 @@ void tst_QCollator::moveSemantics()
#endif
}
void tst_QCollator::compare_data()
{
QTest::addColumn<QString>("locale");
QTest::addColumn<QString>("s1");
QTest::addColumn<QString>("s2");
QTest::addColumn<int>("result");
QTest::addColumn<int>("caseInsensitiveResult");
/*
A few tests below are commented out on the mac. It's unclear why they fail,
as it looks like the collator for the locale is created correctly.
*/
/*
It's hard to test English, because it's treated differently
on different platforms. For example, on Linux, it uses the
iso14651_t1 template file, which happens to provide good
defaults for Swedish. Mac OS X seems to do a pure bytewise
comparison of Latin-1 values, although I'm not sure. So I
just test digits to make sure that it's not totally broken.
*/
QTest::newRow("english1") << QString("en_US") << QString("5") << QString("4") << 1 << 1;
QTest::newRow("english2") << QString("en_US") << QString("4") << QString("6") << -1 << -1;
QTest::newRow("english3") << QString("en_US") << QString("5") << QString("6") << -1 << -1;
QTest::newRow("english4") << QString("en_US") << QString("a") << QString("b") << -1 << -1;
/*
In Swedish, a with ring above (E5) comes before a with
diaresis (E4), which comes before o diaresis (F6), which
all come after z.
*/
QTest::newRow("swedish1") << QString("sv_SE") << QString::fromLatin1("\xe5") << QString::fromLatin1("\xe4") << -1 << -1;
QTest::newRow("swedish2") << QString("sv_SE") << QString::fromLatin1("\xe4") << QString::fromLatin1("\xf6") << -1 << -1;
QTest::newRow("swedish3") << QString("sv_SE") << QString::fromLatin1("\xe5") << QString::fromLatin1("\xf6") << -1 << -1;
#ifndef Q_OS_MAC
QTest::newRow("swedish4") << QString("sv_SE") << QString::fromLatin1("z") << QString::fromLatin1("\xe5") << -1 << -1;
#endif
/*
In Norwegian, ae (E6) comes before o with stroke (D8), which
comes before a with ring above (E5).
*/
QTest::newRow("norwegian1") << QString("no_NO") << QString::fromLatin1("\xe6") << QString::fromLatin1("\xd8") << -1 << -1;
#ifndef Q_OS_MAC
QTest::newRow("norwegian2") << QString("no_NO") << QString::fromLatin1("\xd8") << QString::fromLatin1("\xe5") << -1 << -1;
#endif
QTest::newRow("norwegian3") << QString("no_NO") << QString::fromLatin1("\xe6") << QString::fromLatin1("\xe5") << -1 << -1;
/*
In German, z comes *after* a with diaresis (E4),
which comes before o diaresis (F6).
*/
QTest::newRow("german1") << QString("de_DE") << QString::fromLatin1("a") << QString::fromLatin1("\xe4") << -1 << -1;
QTest::newRow("german2") << QString("de_DE") << QString::fromLatin1("b") << QString::fromLatin1("\xe4") << 1 << 1;
QTest::newRow("german3") << QString("de_DE") << QString::fromLatin1("z") << QString::fromLatin1("\xe4") << 1 << 1;
QTest::newRow("german4") << QString("de_DE") << QString::fromLatin1("\xe4") << QString::fromLatin1("\xf6") << -1 << -1;
QTest::newRow("german5") << QString("de_DE") << QString::fromLatin1("z") << QString::fromLatin1("\xf6") << 1 << 1;
QTest::newRow("german6") << QString("de_DE") << QString::fromLatin1("\xc0") << QString::fromLatin1("\xe0") << 1 << 0;
QTest::newRow("german7") << QString("de_DE") << QString::fromLatin1("\xd6") << QString::fromLatin1("\xf6") << 1 << 0;
QTest::newRow("german8") << QString("de_DE") << QString::fromLatin1("oe") << QString::fromLatin1("\xf6") << 1 << 1;
QTest::newRow("german9") << QString("de_DE") << QString("A") << QString("a") << 1 << 0;
/*
French sorting of e and e with accent
*/
QTest::newRow("french1") << QString("fr_FR") << QString::fromLatin1("\xe9") << QString::fromLatin1("e") << 1 << 1;
QTest::newRow("french2") << QString("fr_FR") << QString::fromLatin1("\xe9t") << QString::fromLatin1("et") << 1 << 1;
QTest::newRow("french3") << QString("fr_FR") << QString::fromLatin1("\xe9") << QString::fromLatin1("d") << 1 << 1;
QTest::newRow("french4") << QString("fr_FR") << QString::fromLatin1("\xe9") << QString::fromLatin1("f") << -1 << -1;
}
void tst_QCollator::compare()
{
QFETCH(QString, locale);
QFETCH(QString, s1);
QFETCH(QString, s2);
QFETCH(int, result);
QFETCH(int, caseInsensitiveResult);
QCollator collator(locale);
QCOMPARE(collator.compare(s1, s2), result);
collator.setCaseSensitivity(Qt::CaseInsensitive);
QCOMPARE(collator.compare(s1, s2), caseInsensitiveResult);
}
QTEST_APPLESS_MAIN(tst_QCollator)
#include "tst_qcollator.moc"