diff --git a/src/corelib/.prev_configure.cmake b/src/corelib/.prev_configure.cmake index 893acda953..18d9120aae 100644 --- a/src/corelib/.prev_configure.cmake +++ b/src/corelib/.prev_configure.cmake @@ -150,6 +150,28 @@ std::mt19937 mt(0); } ") +# cxx17_bm_searcher +qt_config_compile_test(cxx17_bm_searcher + LABEL "C++17 boyer_moore_searcher" + CODE +"#include +#include + +int main(void) +{ + /* BEGIN TEST: */ +const char haystack[] = \"hello\"; +const char needle[] = \"e\"; +const auto it = +std::search(haystack + 0, haystack + std::size(haystack), +std::boyer_moore_searcher(needle + 0, needle + std::size(needle))); +(void)it; + /* END TEST: */ + return 0; +} +"# FIXME: qmake: CONFIG += c++17 +) + # cxx17_filesystem qt_config_compile_test(cxx17_filesystem LABEL "C++17 " @@ -513,6 +535,10 @@ qt_feature("cxx11_future" PUBLIC LABEL "C++11 " CONDITION TEST_cxx11_future ) +qt_feature("cxx17_bm_searcher" PRIVATE + LABEL "C++17 boyer_moore_searcher" + CONDITION TEST_cxx17_bm_searcher +) qt_feature("cxx17_filesystem" PUBLIC LABEL "C++17 " CONDITION TEST_cxx17_filesystem diff --git a/src/corelib/configure.cmake b/src/corelib/configure.cmake index e82ac8bf93..e363c44b58 100644 --- a/src/corelib/configure.cmake +++ b/src/corelib/configure.cmake @@ -156,6 +156,28 @@ std::mt19937 mt(0); } ") +# cxx17_bm_searcher +qt_config_compile_test(cxx17_bm_searcher + LABEL "C++17 boyer_moore_searcher" + CODE +"#include +#include + +int main(void) +{ + /* BEGIN TEST: */ +const char haystack[] = \"hello\"; +const char needle[] = \"e\"; +const auto it = +std::search(haystack + 0, haystack + std::size(haystack), +std::boyer_moore_searcher(needle + 0, needle + std::size(needle))); +(void)it; + /* END TEST: */ + return 0; +} +"# FIXME: qmake: CONFIG += c++17 +) + # cxx17_filesystem qt_config_compile_test(cxx17_filesystem LABEL "C++17 " @@ -519,6 +541,10 @@ qt_feature("cxx11_future" PUBLIC LABEL "C++11 " CONDITION TEST_cxx11_future ) +qt_feature("cxx17_bm_searcher" PRIVATE + LABEL "C++17 boyer_moore_searcher" + CONDITION TEST_cxx17_bm_searcher +) qt_feature("cxx17_filesystem" PUBLIC LABEL "C++17 " CONDITION TEST_cxx17_filesystem diff --git a/src/corelib/configure.json b/src/corelib/configure.json index 4643531135..c68c0cf4bf 100644 --- a/src/corelib/configure.json +++ b/src/corelib/configure.json @@ -282,6 +282,22 @@ "main": "std::mt19937 mt(0);" } }, + "cxx17_bm_searcher": { + "label": "C++17 boyer_moore_searcher", + "type": "compile", + "test": { + "include": [ "algorithm", "functional" ], + "main": [ + "const char haystack[] = \"hello\";", + "const char needle[] = \"e\";", + "const auto it =", + "std::search(haystack + 0, haystack + std::size(haystack),", + "std::boyer_moore_searcher(needle + 0, needle + std::size(needle)));", + "(void)it;" + ], + "qmake": "CONFIG += c++17" + } + }, "cxx17_filesystem": { "label": "C++17 ", "type": "compile", @@ -543,6 +559,13 @@ "condition": "tests.cxx11_future", "output": [ "publicFeature" ] }, + "cxx17_bm_searcher": { + "label": "C++17 boyer_moore_searcher", + "condition": "tests.cxx17_bm_searcher", + "output": [ + "privateFeature" + ] + }, "cxx17_filesystem": { "label": "C++17 ", "condition": "tests.cxx17_filesystem", diff --git a/src/corelib/global/qconfig-bootstrapped.h b/src/corelib/global/qconfig-bootstrapped.h index 42d38d0d04..4d3f88e071 100644 --- a/src/corelib/global/qconfig-bootstrapped.h +++ b/src/corelib/global/qconfig-bootstrapped.h @@ -78,6 +78,7 @@ #define QT_FEATURE_cborstreamwriter 1 #define QT_CRYPTOGRAPHICHASH_ONLY_SHA1 #define QT_FEATURE_cxx11_random (__has_include() ? 1 : -1) +#define QT_FEATURE_cxx17_bm_searcher -1 #define QT_FEATURE_cxx17_filesystem -1 #define QT_NO_DATASTREAM #define QT_FEATURE_datestring 1 diff --git a/src/corelib/text/qstring.cpp b/src/corelib/text/qstring.cpp index 8f3bde843e..89abbcbff3 100644 --- a/src/corelib/text/qstring.cpp +++ b/src/corelib/text/qstring.cpp @@ -78,6 +78,11 @@ #include "qstringalgorithms_p.h" #include "qthreadstorage.h" +#include "qbytearraymatcher.h" // Helper for comparison of QLatin1String + +#include +#include + #ifdef Q_OS_WIN # include #endif @@ -1204,27 +1209,27 @@ static int ucstrcmp(const QChar *a, size_t alen, const char *b, size_t blen) return cmp ? cmp : lencmp(alen, blen); } +static constexpr uchar latin1Lower[256] = { + 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f, + 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f, + 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f, + 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f, + 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f, + 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x5b,0x5c,0x5d,0x5e,0x5f, + 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f, + 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f, + 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f, + 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f, + 0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf, + 0xb0,0xb1,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xbe,0xbf, + // 0xd7 (multiplication sign) and 0xdf (sz ligature) complicate life + 0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,0xee,0xef, + 0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xd7,0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xdf, + 0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,0xee,0xef, + 0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff +}; static int latin1nicmp(const char *lhsChar, qsizetype lSize, const char *rhsChar, qsizetype rSize) { - constexpr uchar latin1Lower[256] = { - 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f, - 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f, - 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f, - 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f, - 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f, - 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x5b,0x5c,0x5d,0x5e,0x5f, - 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f, - 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f, - 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f, - 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f, - 0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf, - 0xb0,0xb1,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xbe,0xbf, - // 0xd7 (multiplication sign) and 0xdf (sz ligature) complicate life - 0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,0xee,0xef, - 0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xd7,0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xdf, - 0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,0xee,0xef, - 0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff - }; // We're called with QLatin1String's .data() and .size(): Q_ASSERT(lSize >= 0 && rSize >= 0); if (!lSize) @@ -10362,15 +10367,73 @@ qsizetype QtPrivate::findString(QLatin1String haystack, qsizetype from, QStringV qsizetype QtPrivate::findString(QLatin1String haystack, qsizetype from, QLatin1String needle, Qt::CaseSensitivity cs) noexcept { - if (haystack.size() < needle.size()) + if (from < 0) + from += haystack.size(); + if (from < 0) return -1; + qsizetype adjustedSize = haystack.size() - from; + if (adjustedSize < needle.size()) + return -1; + if (needle.size() == 0) + return from; - QVarLengthArray h(haystack.size()); - qt_from_latin1(h.data(), haystack.latin1(), haystack.size()); + if (cs == Qt::CaseSensitive) { + const QByteArrayMatcher matcher(needle.data(), needle.size()); + return matcher.indexIn(haystack.data(), haystack.size(), from); + } + + // If the needle is sufficiently small we simply iteratively search through + // the haystack. When the needle is too long we use a boyer-moore searcher + // from the standard library, if available. If it is not available then the + // QLatin1Strings are converted to QString and compared as such. Though + // initialization is slower the boyer-moore search it employs still makes up + // for it when haystack and needle are sufficiently long. + // The needle size was chosen by testing various lengths using the + // qstringtokenizer benchmark with the + // "tokenize_qlatin1string_qlatin1string" test. +#ifdef Q_CC_MSVC + const qsizetype threshold = 1; +#else + const qsizetype threshold = 13; +#endif + if (needle.size() <= threshold) { + const auto begin = haystack.begin(); + const auto end = haystack.end() - needle.size() + 1; + const uchar needle1 = latin1Lower[uchar(needle[0].toLatin1())]; + auto ciMatch = [needle1](const char ch) { + return latin1Lower[uchar(ch)] == needle1; + }; + const qsizetype nlen1 = needle.size() - 1; + for (auto it = std::find_if(begin + from, end, ciMatch); it < end; + it = std::find_if(it + 1, end, ciMatch)) { + // In this comparison we skip the first character because we know it's a match + if (!nlen1 || QLatin1String(it + 1, nlen1).compare(needle.sliced(1), cs) == 0) + return std::distance(begin, it); + } + return -1; + } + +#if QT_CONFIG(cxx17_bm_searcher) + const auto ciHasher = [](char a) { return latin1Lower[uchar(a)]; }; + const auto ciEqual = [](char a, char b) { + return latin1Lower[uchar(a)] == latin1Lower[uchar(b)]; + }; + const auto it = + std::search(haystack.begin() + from, haystack.end(), + std::boyer_moore_searcher(needle.begin(), needle.end(), ciHasher, ciEqual)); + return it == haystack.end() ? -1 : std::distance(haystack.begin(), it); +#else + QVarLengthArray h(adjustedSize); + const auto begin = haystack.end() - adjustedSize; + qt_from_latin1(h.data(), begin, adjustedSize); QVarLengthArray n(needle.size()); qt_from_latin1(n.data(), needle.latin1(), needle.size()); - return QtPrivate::findString(QStringView(reinterpret_cast(h.constData()), h.size()), from, - QStringView(reinterpret_cast(n.constData()), n.size()), cs); + qsizetype res = QtPrivate::findString(QStringView(h.constData(), h.size()), 0, + QStringView(n.constData(), n.size()), cs); + if (res == -1) + return -1; + return res + std::distance(haystack.begin(), begin); +#endif } qsizetype QtPrivate::lastIndexOf(QStringView haystack, qsizetype from, QStringView needle, Qt::CaseSensitivity cs) noexcept