Fix QString::localeAwareCompare with composed/decomposed strings on Windows

With ICU and on macOS it appears that the comparison is done on a
canonical form, while CompareString(Ex) does not do that, as the added
test verifies. Explicit normalization fixes that.

As a bonus, this also unifies the code path between regular Windows
and UWP by unconditionally using CompareStringEx (which requires
Vista or later).

This issue surfaced while running the ECMASCript 6 Conformance Test
Suite in QtQml.

This re-uses the existing test for localeAwareCompare, which was
disabled on Windows, macOS and Linux with ICU (the common case).

Change-Id: I52440fce60b54745ead1eff005ec51e98e2a79ec
Reviewed-by: Friedemann Kleint <Friedemann.Kleint@qt.io>
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
Reviewed-by: Konstantin Ritt <ritt.ks@gmail.com>
This commit is contained in:
Simon Hausmann 2018-06-25 15:45:32 +02:00
parent 00d9ade6e6
commit cd64a96b31
2 changed files with 18 additions and 14 deletions

View File

@ -6393,11 +6393,10 @@ int QString::localeAwareCompare_helper(const QChar *data1, int length1,
Qt::CaseSensitive); Qt::CaseSensitive);
#if defined(Q_OS_WIN) #if defined(Q_OS_WIN)
# ifndef Q_OS_WINRT QString lhs = QString::fromRawData(data1, length1).normalized(QString::NormalizationForm_C);
int res = CompareString(GetUserDefaultLCID(), 0, (wchar_t*)data1, length1, (wchar_t*)data2, length2); QString rhs = QString::fromRawData(data2, length2).normalized(QString::NormalizationForm_C);
# else
int res = CompareStringEx(LOCALE_NAME_USER_DEFAULT, 0, (LPCWSTR)data1, length1, (LPCWSTR)data2, length2, NULL, NULL, 0); int res = CompareStringEx(LOCALE_NAME_USER_DEFAULT, 0, (LPWSTR)lhs.constData(), lhs.length(), (LPWSTR)rhs.constData(), rhs.length(), NULL, NULL, 0);
# endif
switch (res) { switch (res) {
case CSTR_LESS_THAN: case CSTR_LESS_THAN:

View File

@ -530,10 +530,8 @@ private slots:
void integer_conversion(); void integer_conversion();
void tortureSprintfDouble(); void tortureSprintfDouble();
void toNum(); void toNum();
#if !defined(Q_OS_WIN)
void localeAwareCompare_data(); void localeAwareCompare_data();
void localeAwareCompare(); void localeAwareCompare();
#endif
void reverseIterators(); void reverseIterators();
void split_data(); void split_data();
void split(); void split();
@ -5487,8 +5485,6 @@ void tst_QString::tortureSprintfDouble()
#include <locale.h> #include <locale.h>
#if !defined(Q_OS_WIN)
// On Q_OS_WIN, we cannot set the system or user locale
void tst_QString::localeAwareCompare_data() void tst_QString::localeAwareCompare_data()
{ {
QTest::addColumn<QString>("locale"); QTest::addColumn<QString>("locale");
@ -5496,6 +5492,15 @@ void tst_QString::localeAwareCompare_data()
QTest::addColumn<QString>("s2"); QTest::addColumn<QString>("s2");
QTest::addColumn<int>("result"); QTest::addColumn<int>("result");
// Compare decomposed and composed form
{
// From ES6 test262 test suite (built-ins/String/prototype/localeCompare/15.5.4.9_CE.js), addressing from Unicode 5.0, chapter 3.12. Boils
// down to this one-liner: console.log("\u1111\u1171\u11B6".localeCompare("\ud4db")
QTest::newRow("normalize") << QString() << QString::fromUtf8("\xED\x93\x9B") << QString::fromUtf8("\xE1\x84\x91\xE1\x85\xB1\xE1\x86\xB6") << 0;
}
#if !defined(Q_OS_WIN)
// On Q_OS_WIN, we cannot set the system or user locale
/* /*
The C locale performs pure byte comparisons for The C locale performs pure byte comparisons for
Latin-1-specific characters (I think). Compare with Swedish Latin-1-specific characters (I think). Compare with Swedish
@ -5556,6 +5561,7 @@ void tst_QString::localeAwareCompare_data()
QTest::newRow("german2") << QString("de_DE") << QString::fromLatin1("\xe4") << QString::fromLatin1("\xf6") << -1; QTest::newRow("german2") << QString("de_DE") << QString::fromLatin1("\xe4") << QString::fromLatin1("\xf6") << -1;
QTest::newRow("german3") << QString("de_DE") << QString::fromLatin1("z") << QString::fromLatin1("\xf6") << 1; QTest::newRow("german3") << QString("de_DE") << QString::fromLatin1("z") << QString::fromLatin1("\xf6") << 1;
#endif #endif
#endif //!defined(Q_OS_WIN)
} }
void tst_QString::localeAwareCompare() void tst_QString::localeAwareCompare()
@ -5568,17 +5574,17 @@ void tst_QString::localeAwareCompare()
QStringRef r1(&s1, 0, s1.length()); QStringRef r1(&s1, 0, s1.length());
QStringRef r2(&s2, 0, s2.length()); QStringRef r2(&s2, 0, s2.length());
#if defined (Q_OS_DARWIN) || defined(QT_USE_ICU)
QSKIP("Setting the locale is not supported on OS X or ICU (you can set the C locale, but that won't affect localeAwareCompare)");
#else
if (!locale.isEmpty()) { if (!locale.isEmpty()) {
#if defined (Q_OS_DARWIN) || defined(QT_USE_ICU)
QSKIP("Setting the locale is not supported on OS X or ICU (you can set the C locale, but that won't affect localeAwareCompare)");
#else
const char *newLocale = setlocale(LC_ALL, locale.toLatin1()); const char *newLocale = setlocale(LC_ALL, locale.toLatin1());
if (!newLocale) { if (!newLocale) {
setlocale(LC_ALL, ""); setlocale(LC_ALL, "");
QSKIP("Please install the proper locale on this machine to test properly"); QSKIP("Please install the proper locale on this machine to test properly");
} }
}
#endif #endif
}
#ifdef QT_USE_ICU #ifdef QT_USE_ICU
// ### for c1, ICU disagrees with libc on how to compare // ### for c1, ICU disagrees with libc on how to compare
@ -5633,7 +5639,6 @@ void tst_QString::localeAwareCompare()
if (!locale.isEmpty()) if (!locale.isEmpty())
setlocale(LC_ALL, ""); setlocale(LC_ALL, "");
} }
#endif //!defined(Q_OS_WIN)
void tst_QString::reverseIterators() void tst_QString::reverseIterators()
{ {