Fix case insensitive comparisons using QCollator
In ICU the strength parameter decides whether a comparison is case sensitive or not. Fix mac comparison code. It can't have worked before. Added some basic automated testing for QCollator. Change-Id: I2646c464fd22ccd3a93c461fa3dba4bd1d4c7b4b Reviewed-by: Konstantin Ritt <ritt.ks@gmail.com>
This commit is contained in:
parent
dbe6db192a
commit
81ba16cad9
@ -75,10 +75,17 @@ void QCollator::setCaseSensitivity(Qt::CaseSensitivity cs)
|
||||
{
|
||||
detach();
|
||||
|
||||
UColAttributeValue val = (cs == Qt::CaseSensitive) ? UCOL_UPPER_FIRST : UCOL_OFF;
|
||||
// The strength attribute in ICU is rather badly documented. Basically UCOL_PRIMARY
|
||||
// ignores differences between base characters and accented characters as well as case.
|
||||
// So A and A-umlaut would compare equal.
|
||||
// UCOL_SECONDARY ignores case differences. UCOL_TERTIARY is the default in most languages
|
||||
// and does case sensitive comparison.
|
||||
// UCOL_QUATERNARY is used as default in a few languages such as Japanese to take care of some
|
||||
// additional differences in those languages.
|
||||
UColAttributeValue val = (cs == Qt::CaseSensitive) ? UCOL_DEFAULT_STRENGTH : UCOL_SECONDARY;
|
||||
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
ucol_setAttribute(d->collator, UCOL_CASE_FIRST, val, &status);
|
||||
ucol_setAttribute(d->collator, UCOL_STRENGTH, val, &status);
|
||||
if (U_FAILURE(status))
|
||||
qWarning("ucol_setAttribute: Case First failed: %d", status);
|
||||
}
|
||||
|
@ -128,12 +128,15 @@ bool QCollator::ignorePunctuation() const
|
||||
int QCollator::compare(const QChar *s1, int len1, const QChar *s2, int len2) const
|
||||
{
|
||||
SInt32 result;
|
||||
return UCCompareText(d->collator.collator,
|
||||
Boolean equivalent;
|
||||
UCCompareText(d->collator.collator,
|
||||
reinterpret_cast<const UniChar *>(s1), len1,
|
||||
reinterpret_cast<const UniChar *>(s2), len2,
|
||||
NULL,
|
||||
&equivalent,
|
||||
&result);
|
||||
return result;
|
||||
if (equivalent)
|
||||
return 0;
|
||||
return result < 0 ? -1 : 1;
|
||||
}
|
||||
int QCollator::compare(const QString &str1, const QString &str2) const
|
||||
{
|
||||
|
@ -52,6 +52,9 @@ class tst_QCollator : public QObject
|
||||
|
||||
private Q_SLOTS:
|
||||
void moveSemantics();
|
||||
|
||||
void compare_data();
|
||||
void compare();
|
||||
};
|
||||
|
||||
#ifdef Q_COMPILER_RVALUE_REFS
|
||||
@ -87,6 +90,93 @@ void tst_QCollator::moveSemantics()
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
void tst_QCollator::compare_data()
|
||||
{
|
||||
QTest::addColumn<QString>("locale");
|
||||
QTest::addColumn<QString>("s1");
|
||||
QTest::addColumn<QString>("s2");
|
||||
QTest::addColumn<int>("result");
|
||||
QTest::addColumn<int>("caseInsensitiveResult");
|
||||
|
||||
/*
|
||||
A few tests below are commented out on the mac. It's unclear why they fail,
|
||||
as it looks like the collator for the locale is created correctly.
|
||||
*/
|
||||
|
||||
/*
|
||||
It's hard to test English, because it's treated differently
|
||||
on different platforms. For example, on Linux, it uses the
|
||||
iso14651_t1 template file, which happens to provide good
|
||||
defaults for Swedish. Mac OS X seems to do a pure bytewise
|
||||
comparison of Latin-1 values, although I'm not sure. So I
|
||||
just test digits to make sure that it's not totally broken.
|
||||
*/
|
||||
QTest::newRow("english1") << QString("en_US") << QString("5") << QString("4") << 1 << 1;
|
||||
QTest::newRow("english2") << QString("en_US") << QString("4") << QString("6") << -1 << -1;
|
||||
QTest::newRow("english3") << QString("en_US") << QString("5") << QString("6") << -1 << -1;
|
||||
QTest::newRow("english4") << QString("en_US") << QString("a") << QString("b") << -1 << -1;
|
||||
/*
|
||||
In Swedish, a with ring above (E5) comes before a with
|
||||
diaresis (E4), which comes before o diaresis (F6), which
|
||||
all come after z.
|
||||
*/
|
||||
QTest::newRow("swedish1") << QString("sv_SE") << QString::fromLatin1("\xe5") << QString::fromLatin1("\xe4") << -1 << -1;
|
||||
QTest::newRow("swedish2") << QString("sv_SE") << QString::fromLatin1("\xe4") << QString::fromLatin1("\xf6") << -1 << -1;
|
||||
QTest::newRow("swedish3") << QString("sv_SE") << QString::fromLatin1("\xe5") << QString::fromLatin1("\xf6") << -1 << -1;
|
||||
#ifndef Q_OS_MAC
|
||||
QTest::newRow("swedish4") << QString("sv_SE") << QString::fromLatin1("z") << QString::fromLatin1("\xe5") << -1 << -1;
|
||||
#endif
|
||||
|
||||
/*
|
||||
In Norwegian, ae (E6) comes before o with stroke (D8), which
|
||||
comes before a with ring above (E5).
|
||||
*/
|
||||
QTest::newRow("norwegian1") << QString("no_NO") << QString::fromLatin1("\xe6") << QString::fromLatin1("\xd8") << -1 << -1;
|
||||
#ifndef Q_OS_MAC
|
||||
QTest::newRow("norwegian2") << QString("no_NO") << QString::fromLatin1("\xd8") << QString::fromLatin1("\xe5") << -1 << -1;
|
||||
#endif
|
||||
QTest::newRow("norwegian3") << QString("no_NO") << QString::fromLatin1("\xe6") << QString::fromLatin1("\xe5") << -1 << -1;
|
||||
|
||||
/*
|
||||
In German, z comes *after* a with diaresis (E4),
|
||||
which comes before o diaresis (F6).
|
||||
*/
|
||||
QTest::newRow("german1") << QString("de_DE") << QString::fromLatin1("a") << QString::fromLatin1("\xe4") << -1 << -1;
|
||||
QTest::newRow("german2") << QString("de_DE") << QString::fromLatin1("b") << QString::fromLatin1("\xe4") << 1 << 1;
|
||||
QTest::newRow("german3") << QString("de_DE") << QString::fromLatin1("z") << QString::fromLatin1("\xe4") << 1 << 1;
|
||||
QTest::newRow("german4") << QString("de_DE") << QString::fromLatin1("\xe4") << QString::fromLatin1("\xf6") << -1 << -1;
|
||||
QTest::newRow("german5") << QString("de_DE") << QString::fromLatin1("z") << QString::fromLatin1("\xf6") << 1 << 1;
|
||||
QTest::newRow("german6") << QString("de_DE") << QString::fromLatin1("\xc0") << QString::fromLatin1("\xe0") << 1 << 0;
|
||||
QTest::newRow("german7") << QString("de_DE") << QString::fromLatin1("\xd6") << QString::fromLatin1("\xf6") << 1 << 0;
|
||||
QTest::newRow("german8") << QString("de_DE") << QString::fromLatin1("oe") << QString::fromLatin1("\xf6") << 1 << 1;
|
||||
QTest::newRow("german9") << QString("de_DE") << QString("A") << QString("a") << 1 << 0;
|
||||
|
||||
/*
|
||||
French sorting of e and e with accent
|
||||
*/
|
||||
QTest::newRow("french1") << QString("fr_FR") << QString::fromLatin1("\xe9") << QString::fromLatin1("e") << 1 << 1;
|
||||
QTest::newRow("french2") << QString("fr_FR") << QString::fromLatin1("\xe9t") << QString::fromLatin1("et") << 1 << 1;
|
||||
QTest::newRow("french3") << QString("fr_FR") << QString::fromLatin1("\xe9") << QString::fromLatin1("d") << 1 << 1;
|
||||
QTest::newRow("french4") << QString("fr_FR") << QString::fromLatin1("\xe9") << QString::fromLatin1("f") << -1 << -1;
|
||||
|
||||
}
|
||||
|
||||
|
||||
void tst_QCollator::compare()
|
||||
{
|
||||
QFETCH(QString, locale);
|
||||
QFETCH(QString, s1);
|
||||
QFETCH(QString, s2);
|
||||
QFETCH(int, result);
|
||||
QFETCH(int, caseInsensitiveResult);
|
||||
|
||||
QCollator collator(locale);
|
||||
QCOMPARE(collator.compare(s1, s2), result);
|
||||
collator.setCaseSensitivity(Qt::CaseInsensitive);
|
||||
QCOMPARE(collator.compare(s1, s2), caseInsensitiveResult);
|
||||
}
|
||||
|
||||
QTEST_APPLESS_MAIN(tst_QCollator)
|
||||
|
||||
#include "tst_qcollator.moc"
|
||||
|
Loading…
Reference in New Issue
Block a user