From 42cc42acae8b4b5d8aa79e0d4f079b4322588ce7 Mon Sep 17 00:00:00 2001 From: Simon Hausmann Date: Fri, 29 Jun 2018 16:09:16 +0200 Subject: [PATCH] Fix QString::localeAwareCompare with composed/decomposed strings on macOS Similar to commit cd64a96b31f57e522ab8d29c8357acf384012ebe we also need to normalize the strings before comparison in order to be compliant with the ECMAScript test suite. This patch also adds the remaining test cases from built-ins/String/prototype/localeCompare/15.5.4.9_CE. Since the same tests are also failing with strcoll/qt_compare_strings, this simplifies the code to always normalize except when using ICU (which gets it right by default). Change-Id: I16b32da7fc70dc7e6725c49f66fe9941d0bf3a47 Reviewed-by: Thiago Macieira Reviewed-by: Konstantin Ritt --- src/corelib/tools/qstring.cpp | 21 +++++------ .../corelib/tools/qstring/tst_qstring.cpp | 37 +++++++++++++++++-- 2 files changed, 44 insertions(+), 14 deletions(-) diff --git a/src/corelib/tools/qstring.cpp b/src/corelib/tools/qstring.cpp index 650c3bdb32..da4066a1e3 100644 --- a/src/corelib/tools/qstring.cpp +++ b/src/corelib/tools/qstring.cpp @@ -6392,10 +6392,11 @@ int QString::localeAwareCompare_helper(const QChar *data1, int length1, return qt_compare_strings(QStringView(data1, length1), QStringView(data2, length2), Qt::CaseSensitive); +#if !QT_CONFIG(icu) + const QString lhs = QString::fromRawData(data1, length1).normalized(QString::NormalizationForm_C); + const QString rhs = QString::fromRawData(data2, length2).normalized(QString::NormalizationForm_C); +#endif #if defined(Q_OS_WIN) - QString lhs = QString::fromRawData(data1, length1).normalized(QString::NormalizationForm_C); - QString rhs = QString::fromRawData(data2, length2).normalized(QString::NormalizationForm_C); - int res = CompareStringEx(LOCALE_NAME_USER_DEFAULT, 0, (LPWSTR)lhs.constData(), lhs.length(), (LPWSTR)rhs.constData(), rhs.length(), NULL, NULL, 0); switch (res) { @@ -6406,17 +6407,17 @@ int QString::localeAwareCompare_helper(const QChar *data1, int length1, default: return 0; } -#elif defined (Q_OS_MAC) +#elif defined (Q_OS_DARWIN) // Use CFStringCompare for comparing strings on Mac. This makes Qt order // strings the same way as native applications do, and also respects // the "Order for sorted lists" setting in the International preferences // panel. const CFStringRef thisString = CFStringCreateWithCharactersNoCopy(kCFAllocatorDefault, - reinterpret_cast(data1), length1, kCFAllocatorNull); + reinterpret_cast(lhs.constData()), lhs.length(), kCFAllocatorNull); const CFStringRef otherString = CFStringCreateWithCharactersNoCopy(kCFAllocatorDefault, - reinterpret_cast(data2), length2, kCFAllocatorNull); + reinterpret_cast(rhs.constData()), rhs.length(), kCFAllocatorNull); const int result = CFStringCompare(thisString, otherString, kCFCompareLocalized); CFRelease(thisString); @@ -6428,14 +6429,12 @@ int QString::localeAwareCompare_helper(const QChar *data1, int length1, return defaultCollator()->localData().compare(data1, length1, data2, length2); #elif defined(Q_OS_UNIX) // declared in - int delta = strcoll(toLocal8Bit_helper(data1, length1).constData(), toLocal8Bit_helper(data2, length2).constData()); + int delta = strcoll(lhs.toLocal8Bit().constData(), rhs.toLocal8Bit().constData()); if (delta == 0) - delta = qt_compare_strings(QStringView(data1, length1), QStringView(data2, length2), - Qt::CaseSensitive); + delta = qt_compare_strings(lhs, rhs, Qt::CaseSensitive); return delta; #else - return qt_compare_strings(QStringView(data1, length1), QStringView(data2, length2), - Qt::CaseSensitive); + return qt_compare_strings(lhs, rhs, Qt::CaseSensitive); #endif } diff --git a/tests/auto/corelib/tools/qstring/tst_qstring.cpp b/tests/auto/corelib/tools/qstring/tst_qstring.cpp index cdabd51d43..e6dfe81ca9 100644 --- a/tests/auto/corelib/tools/qstring/tst_qstring.cpp +++ b/tests/auto/corelib/tools/qstring/tst_qstring.cpp @@ -5517,9 +5517,40 @@ void tst_QString::localeAwareCompare_data() // Compare decomposed and composed form { - // From ES6 test262 test suite (built-ins/String/prototype/localeCompare/15.5.4.9_CE.js), addressing from Unicode 5.0, chapter 3.12. Boils - // down to this one-liner: console.log("\u1111\u1171\u11B6".localeCompare("\ud4db") - QTest::newRow("normalize") << QString() << QString::fromUtf8("\xED\x93\x9B") << QString::fromUtf8("\xE1\x84\x91\xE1\x85\xB1\xE1\x86\xB6") << 0; + // From ES6 test262 test suite (built-ins/String/prototype/localeCompare/15.5.4.9_CE.js). The test cases boil down to code like this: + // console.log("\u1111\u1171\u11B6".localeCompare("\ud4db") + + // example from Unicode 5.0, section 3.7, definition D70 + QTest::newRow("normalize1") << QString() << QString::fromUtf8("o\xCC\x88") << QString::fromUtf8("\xC3\xB6") << 0; + // examples from Unicode 5.0, chapter 3.11 + QTest::newRow("normalize2") << QString() << QString::fromUtf8("\xC3\xA4\xCC\xA3") << QString::fromUtf8("a\xCC\xA3\xCC\x88") << 0; + QTest::newRow("normalize3") << QString() << QString::fromUtf8("a\xCC\x88\xCC\xA3") << QString::fromUtf8("a\xCC\xA3\xCC\x88") << 0; + QTest::newRow("normalize4") << QString() << QString::fromUtf8("\xE1\xBA\xA1\xCC\x88") << QString::fromUtf8("a\xCC\xA3\xCC\x88") << 0; + QTest::newRow("normalize5") << QString() << QString::fromUtf8("\xC3\xA4\xCC\x86") << QString::fromUtf8("a\xCC\x88\xCC\x86") << 0; + QTest::newRow("normalize6") << QString() << QString::fromUtf8("\xC4\x83\xCC\x88") << QString::fromUtf8("a\xCC\x86\xCC\x88") << 0; + // example from Unicode 5.0, chapter 3.12 + QTest::newRow("normalize7") << QString() << QString::fromUtf8("\xE1\x84\x91\xE1\x85\xB1\xE1\x86\xB6") << QString::fromUtf8("\xED\x93\x9B") << 0; + // examples from UTS 10, Unicode Collation Algorithm + QTest::newRow("normalize8") << QString() << QString::fromUtf8("\xE2\x84\xAB") << QString::fromUtf8("\xC3\x85") << 0; + QTest::newRow("normalize9") << QString() << QString::fromUtf8("\xE2\x84\xAB") << QString::fromUtf8("A\xCC\x8A") << 0; + QTest::newRow("normalize10") << QString() << QString::fromUtf8("x\xCC\x9B\xCC\xA3") << QString::fromUtf8("x\xCC\xA3\xCC\x9B") << 0; + QTest::newRow("normalize11") << QString() << QString::fromUtf8("\xE1\xBB\xB1") << QString::fromUtf8("\xE1\xBB\xA5\xCC\x9B") << 0; + QTest::newRow("normalize12") << QString() << QString::fromUtf8("\xE1\xBB\xB1") << QString::fromUtf8("u\xCC\x9B\xCC\xA3") << 0; + QTest::newRow("normalize13") << QString() << QString::fromUtf8("\xE1\xBB\xB1") << QString::fromUtf8("\xC6\xB0\xCC\xA3") << 0; + QTest::newRow("normalize14") << QString() << QString::fromUtf8("\xE1\xBB\xB1") << QString::fromUtf8("u\xCC\xA3\xCC\x9B") << 0; + // examples from UAX 15, Unicode Normalization Forms + QTest::newRow("normalize15") << QString() << QString::fromUtf8("\xC3\x87") << QString::fromUtf8("C\xCC\xA7") << 0; + QTest::newRow("normalize16") << QString() << QString::fromUtf8("q\xCC\x87\xCC\xA3") << QString::fromUtf8("q\xCC\xA3\xCC\x87") << 0; + QTest::newRow("normalize17") << QString() << QString::fromUtf8("\xEA\xB0\x80") << QString::fromUtf8("\xE1\x84\x80\xE1\x85\xA1") << 0; + QTest::newRow("normalize18") << QString() << QString::fromUtf8("\xE2\x84\xAB") << QString::fromUtf8("A\xCC\x8A") << 0; + QTest::newRow("normalize19") << QString() << QString::fromUtf8("\xE2\x84\xA6") << QString::fromUtf8("\xCE\xA9") << 0; + QTest::newRow("normalize20") << QString() << QString::fromUtf8("\xC3\x85") << QString::fromUtf8("A\xCC\x8A") << 0; + QTest::newRow("normalize21") << QString() << QString::fromUtf8("\xC3\xB4") << QString::fromUtf8("o\xCC\x82") << 0; + QTest::newRow("normalize22") << QString() << QString::fromUtf8("\xE1\xB9\xA9") << QString::fromUtf8("s\xCC\xA3\xCC\x87") << 0; + QTest::newRow("normalize23") << QString() << QString::fromUtf8("\xE1\xB8\x8B\xCC\xA3") << QString::fromUtf8("d\xCC\xA3\xCC\x87") << 0; + QTest::newRow("normalize24") << QString() << QString::fromUtf8("\xE1\xB8\x8B\xCC\xA3") << QString::fromUtf8("\xE1\xB8\x8D\xCC\x87") << 0; + QTest::newRow("normalize25") << QString() << QString::fromUtf8("q\xCC\x87\xCC\xA3") << QString::fromUtf8("q\xCC\xA3\xCC\x87") << 0; + } #if !defined(Q_OS_WIN)