Fix QString::localeAwareCompare with composed/decomposed strings on macOS

Similar to commit cd64a96b31 we also need
to normalize the strings before comparison in order to be compliant with
the ECMAScript test suite.

This patch also adds the remaining test cases from
built-ins/String/prototype/localeCompare/15.5.4.9_CE.

Since the same tests are also failing with strcoll/qt_compare_strings,
this simplifies the code to always normalize except when using ICU
(which gets it right by default).

Change-Id: I16b32da7fc70dc7e6725c49f66fe9941d0bf3a47
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
Reviewed-by: Konstantin Ritt <ritt.ks@gmail.com>
This commit is contained in:
Simon Hausmann 2018-06-29 16:09:16 +02:00
parent 13337096f9
commit 42cc42acae
2 changed files with 44 additions and 14 deletions

View File

@ -6392,10 +6392,11 @@ int QString::localeAwareCompare_helper(const QChar *data1, int length1,
return qt_compare_strings(QStringView(data1, length1), QStringView(data2, length2),
Qt::CaseSensitive);
#if !QT_CONFIG(icu)
const QString lhs = QString::fromRawData(data1, length1).normalized(QString::NormalizationForm_C);
const QString rhs = QString::fromRawData(data2, length2).normalized(QString::NormalizationForm_C);
#endif
#if defined(Q_OS_WIN)
QString lhs = QString::fromRawData(data1, length1).normalized(QString::NormalizationForm_C);
QString rhs = QString::fromRawData(data2, length2).normalized(QString::NormalizationForm_C);
int res = CompareStringEx(LOCALE_NAME_USER_DEFAULT, 0, (LPWSTR)lhs.constData(), lhs.length(), (LPWSTR)rhs.constData(), rhs.length(), NULL, NULL, 0);
switch (res) {
@ -6406,17 +6407,17 @@ int QString::localeAwareCompare_helper(const QChar *data1, int length1,
default:
return 0;
}
#elif defined (Q_OS_MAC)
#elif defined (Q_OS_DARWIN)
// Use CFStringCompare for comparing strings on Mac. This makes Qt order
// strings the same way as native applications do, and also respects
// the "Order for sorted lists" setting in the International preferences
// panel.
const CFStringRef thisString =
CFStringCreateWithCharactersNoCopy(kCFAllocatorDefault,
reinterpret_cast<const UniChar *>(data1), length1, kCFAllocatorNull);
reinterpret_cast<const UniChar *>(lhs.constData()), lhs.length(), kCFAllocatorNull);
const CFStringRef otherString =
CFStringCreateWithCharactersNoCopy(kCFAllocatorDefault,
reinterpret_cast<const UniChar *>(data2), length2, kCFAllocatorNull);
reinterpret_cast<const UniChar *>(rhs.constData()), rhs.length(), kCFAllocatorNull);
const int result = CFStringCompare(thisString, otherString, kCFCompareLocalized);
CFRelease(thisString);
@ -6428,14 +6429,12 @@ int QString::localeAwareCompare_helper(const QChar *data1, int length1,
return defaultCollator()->localData().compare(data1, length1, data2, length2);
#elif defined(Q_OS_UNIX)
// declared in <string.h>
int delta = strcoll(toLocal8Bit_helper(data1, length1).constData(), toLocal8Bit_helper(data2, length2).constData());
int delta = strcoll(lhs.toLocal8Bit().constData(), rhs.toLocal8Bit().constData());
if (delta == 0)
delta = qt_compare_strings(QStringView(data1, length1), QStringView(data2, length2),
Qt::CaseSensitive);
delta = qt_compare_strings(lhs, rhs, Qt::CaseSensitive);
return delta;
#else
return qt_compare_strings(QStringView(data1, length1), QStringView(data2, length2),
Qt::CaseSensitive);
return qt_compare_strings(lhs, rhs, Qt::CaseSensitive);
#endif
}

View File

@ -5517,9 +5517,40 @@ void tst_QString::localeAwareCompare_data()
// Compare decomposed and composed form
{
// From ES6 test262 test suite (built-ins/String/prototype/localeCompare/15.5.4.9_CE.js), addressing from Unicode 5.0, chapter 3.12. Boils
// down to this one-liner: console.log("\u1111\u1171\u11B6".localeCompare("\ud4db")
QTest::newRow("normalize") << QString() << QString::fromUtf8("\xED\x93\x9B") << QString::fromUtf8("\xE1\x84\x91\xE1\x85\xB1\xE1\x86\xB6") << 0;
// From ES6 test262 test suite (built-ins/String/prototype/localeCompare/15.5.4.9_CE.js). The test cases boil down to code like this:
// console.log("\u1111\u1171\u11B6".localeCompare("\ud4db")
// example from Unicode 5.0, section 3.7, definition D70
QTest::newRow("normalize1") << QString() << QString::fromUtf8("o\xCC\x88") << QString::fromUtf8("\xC3\xB6") << 0;
// examples from Unicode 5.0, chapter 3.11
QTest::newRow("normalize2") << QString() << QString::fromUtf8("\xC3\xA4\xCC\xA3") << QString::fromUtf8("a\xCC\xA3\xCC\x88") << 0;
QTest::newRow("normalize3") << QString() << QString::fromUtf8("a\xCC\x88\xCC\xA3") << QString::fromUtf8("a\xCC\xA3\xCC\x88") << 0;
QTest::newRow("normalize4") << QString() << QString::fromUtf8("\xE1\xBA\xA1\xCC\x88") << QString::fromUtf8("a\xCC\xA3\xCC\x88") << 0;
QTest::newRow("normalize5") << QString() << QString::fromUtf8("\xC3\xA4\xCC\x86") << QString::fromUtf8("a\xCC\x88\xCC\x86") << 0;
QTest::newRow("normalize6") << QString() << QString::fromUtf8("\xC4\x83\xCC\x88") << QString::fromUtf8("a\xCC\x86\xCC\x88") << 0;
// example from Unicode 5.0, chapter 3.12
QTest::newRow("normalize7") << QString() << QString::fromUtf8("\xE1\x84\x91\xE1\x85\xB1\xE1\x86\xB6") << QString::fromUtf8("\xED\x93\x9B") << 0;
// examples from UTS 10, Unicode Collation Algorithm
QTest::newRow("normalize8") << QString() << QString::fromUtf8("\xE2\x84\xAB") << QString::fromUtf8("\xC3\x85") << 0;
QTest::newRow("normalize9") << QString() << QString::fromUtf8("\xE2\x84\xAB") << QString::fromUtf8("A\xCC\x8A") << 0;
QTest::newRow("normalize10") << QString() << QString::fromUtf8("x\xCC\x9B\xCC\xA3") << QString::fromUtf8("x\xCC\xA3\xCC\x9B") << 0;
QTest::newRow("normalize11") << QString() << QString::fromUtf8("\xE1\xBB\xB1") << QString::fromUtf8("\xE1\xBB\xA5\xCC\x9B") << 0;
QTest::newRow("normalize12") << QString() << QString::fromUtf8("\xE1\xBB\xB1") << QString::fromUtf8("u\xCC\x9B\xCC\xA3") << 0;
QTest::newRow("normalize13") << QString() << QString::fromUtf8("\xE1\xBB\xB1") << QString::fromUtf8("\xC6\xB0\xCC\xA3") << 0;
QTest::newRow("normalize14") << QString() << QString::fromUtf8("\xE1\xBB\xB1") << QString::fromUtf8("u\xCC\xA3\xCC\x9B") << 0;
// examples from UAX 15, Unicode Normalization Forms
QTest::newRow("normalize15") << QString() << QString::fromUtf8("\xC3\x87") << QString::fromUtf8("C\xCC\xA7") << 0;
QTest::newRow("normalize16") << QString() << QString::fromUtf8("q\xCC\x87\xCC\xA3") << QString::fromUtf8("q\xCC\xA3\xCC\x87") << 0;
QTest::newRow("normalize17") << QString() << QString::fromUtf8("\xEA\xB0\x80") << QString::fromUtf8("\xE1\x84\x80\xE1\x85\xA1") << 0;
QTest::newRow("normalize18") << QString() << QString::fromUtf8("\xE2\x84\xAB") << QString::fromUtf8("A\xCC\x8A") << 0;
QTest::newRow("normalize19") << QString() << QString::fromUtf8("\xE2\x84\xA6") << QString::fromUtf8("\xCE\xA9") << 0;
QTest::newRow("normalize20") << QString() << QString::fromUtf8("\xC3\x85") << QString::fromUtf8("A\xCC\x8A") << 0;
QTest::newRow("normalize21") << QString() << QString::fromUtf8("\xC3\xB4") << QString::fromUtf8("o\xCC\x82") << 0;
QTest::newRow("normalize22") << QString() << QString::fromUtf8("\xE1\xB9\xA9") << QString::fromUtf8("s\xCC\xA3\xCC\x87") << 0;
QTest::newRow("normalize23") << QString() << QString::fromUtf8("\xE1\xB8\x8B\xCC\xA3") << QString::fromUtf8("d\xCC\xA3\xCC\x87") << 0;
QTest::newRow("normalize24") << QString() << QString::fromUtf8("\xE1\xB8\x8B\xCC\xA3") << QString::fromUtf8("\xE1\xB8\x8D\xCC\x87") << 0;
QTest::newRow("normalize25") << QString() << QString::fromUtf8("q\xCC\x87\xCC\xA3") << QString::fromUtf8("q\xCC\xA3\xCC\x87") << 0;
}
#if !defined(Q_OS_WIN)