QLatin1String: optimize indexOf
Some time ago I wanted to use QStringTokenizer with a QL1S haystack and needle to optimize a particular bottleneck. Only to realize the new bottleneck was more expensive (continuously converting both the strings to QString). Change-Id: Ica86db0043c839c2336a3c3886ffbe3875659b5b Reviewed-by: Edward Welbourne <edward.welbourne@qt.io>
This commit is contained in:
parent
de2c3ccd49
commit
b24e689cb5
@ -150,6 +150,28 @@ std::mt19937 mt(0);
|
||||
}
|
||||
")
|
||||
|
||||
# cxx17_bm_searcher
|
||||
qt_config_compile_test(cxx17_bm_searcher
|
||||
LABEL "C++17 boyer_moore_searcher"
|
||||
CODE
|
||||
"#include <algorithm>
|
||||
#include <functional>
|
||||
|
||||
int main(void)
|
||||
{
|
||||
/* BEGIN TEST: */
|
||||
const char haystack[] = \"hello\";
|
||||
const char needle[] = \"e\";
|
||||
const auto it =
|
||||
std::search(haystack + 0, haystack + std::size(haystack),
|
||||
std::boyer_moore_searcher(needle + 0, needle + std::size(needle)));
|
||||
(void)it;
|
||||
/* END TEST: */
|
||||
return 0;
|
||||
}
|
||||
"# FIXME: qmake: CONFIG += c++17
|
||||
)
|
||||
|
||||
# cxx17_filesystem
|
||||
qt_config_compile_test(cxx17_filesystem
|
||||
LABEL "C++17 <filesystem>"
|
||||
@ -513,6 +535,10 @@ qt_feature("cxx11_future" PUBLIC
|
||||
LABEL "C++11 <future>"
|
||||
CONDITION TEST_cxx11_future
|
||||
)
|
||||
qt_feature("cxx17_bm_searcher" PRIVATE
|
||||
LABEL "C++17 boyer_moore_searcher"
|
||||
CONDITION TEST_cxx17_bm_searcher
|
||||
)
|
||||
qt_feature("cxx17_filesystem" PUBLIC
|
||||
LABEL "C++17 <filesystem>"
|
||||
CONDITION TEST_cxx17_filesystem
|
||||
|
@ -156,6 +156,28 @@ std::mt19937 mt(0);
|
||||
}
|
||||
")
|
||||
|
||||
# cxx17_bm_searcher
|
||||
qt_config_compile_test(cxx17_bm_searcher
|
||||
LABEL "C++17 boyer_moore_searcher"
|
||||
CODE
|
||||
"#include <algorithm>
|
||||
#include <functional>
|
||||
|
||||
int main(void)
|
||||
{
|
||||
/* BEGIN TEST: */
|
||||
const char haystack[] = \"hello\";
|
||||
const char needle[] = \"e\";
|
||||
const auto it =
|
||||
std::search(haystack + 0, haystack + std::size(haystack),
|
||||
std::boyer_moore_searcher(needle + 0, needle + std::size(needle)));
|
||||
(void)it;
|
||||
/* END TEST: */
|
||||
return 0;
|
||||
}
|
||||
"# FIXME: qmake: CONFIG += c++17
|
||||
)
|
||||
|
||||
# cxx17_filesystem
|
||||
qt_config_compile_test(cxx17_filesystem
|
||||
LABEL "C++17 <filesystem>"
|
||||
@ -519,6 +541,10 @@ qt_feature("cxx11_future" PUBLIC
|
||||
LABEL "C++11 <future>"
|
||||
CONDITION TEST_cxx11_future
|
||||
)
|
||||
qt_feature("cxx17_bm_searcher" PRIVATE
|
||||
LABEL "C++17 boyer_moore_searcher"
|
||||
CONDITION TEST_cxx17_bm_searcher
|
||||
)
|
||||
qt_feature("cxx17_filesystem" PUBLIC
|
||||
LABEL "C++17 <filesystem>"
|
||||
CONDITION TEST_cxx17_filesystem
|
||||
|
@ -282,6 +282,22 @@
|
||||
"main": "std::mt19937 mt(0);"
|
||||
}
|
||||
},
|
||||
"cxx17_bm_searcher": {
|
||||
"label": "C++17 boyer_moore_searcher",
|
||||
"type": "compile",
|
||||
"test": {
|
||||
"include": [ "algorithm", "functional" ],
|
||||
"main": [
|
||||
"const char haystack[] = \"hello\";",
|
||||
"const char needle[] = \"e\";",
|
||||
"const auto it =",
|
||||
"std::search(haystack + 0, haystack + std::size(haystack),",
|
||||
"std::boyer_moore_searcher(needle + 0, needle + std::size(needle)));",
|
||||
"(void)it;"
|
||||
],
|
||||
"qmake": "CONFIG += c++17"
|
||||
}
|
||||
},
|
||||
"cxx17_filesystem": {
|
||||
"label": "C++17 <filesystem>",
|
||||
"type": "compile",
|
||||
@ -543,6 +559,13 @@
|
||||
"condition": "tests.cxx11_future",
|
||||
"output": [ "publicFeature" ]
|
||||
},
|
||||
"cxx17_bm_searcher": {
|
||||
"label": "C++17 boyer_moore_searcher",
|
||||
"condition": "tests.cxx17_bm_searcher",
|
||||
"output": [
|
||||
"privateFeature"
|
||||
]
|
||||
},
|
||||
"cxx17_filesystem": {
|
||||
"label": "C++17 <filesystem>",
|
||||
"condition": "tests.cxx17_filesystem",
|
||||
|
@ -78,6 +78,7 @@
|
||||
#define QT_FEATURE_cborstreamwriter 1
|
||||
#define QT_CRYPTOGRAPHICHASH_ONLY_SHA1
|
||||
#define QT_FEATURE_cxx11_random (__has_include(<random>) ? 1 : -1)
|
||||
#define QT_FEATURE_cxx17_bm_searcher -1
|
||||
#define QT_FEATURE_cxx17_filesystem -1
|
||||
#define QT_NO_DATASTREAM
|
||||
#define QT_FEATURE_datestring 1
|
||||
|
@ -78,6 +78,11 @@
|
||||
#include "qstringalgorithms_p.h"
|
||||
#include "qthreadstorage.h"
|
||||
|
||||
#include "qbytearraymatcher.h" // Helper for comparison of QLatin1String
|
||||
|
||||
#include <algorithm>
|
||||
#include <functional>
|
||||
|
||||
#ifdef Q_OS_WIN
|
||||
# include <qt_windows.h>
|
||||
#endif
|
||||
@ -1204,27 +1209,27 @@ static int ucstrcmp(const QChar *a, size_t alen, const char *b, size_t blen)
|
||||
return cmp ? cmp : lencmp(alen, blen);
|
||||
}
|
||||
|
||||
static constexpr uchar latin1Lower[256] = {
|
||||
0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,
|
||||
0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,
|
||||
0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,
|
||||
0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f,
|
||||
0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f,
|
||||
0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x5b,0x5c,0x5d,0x5e,0x5f,
|
||||
0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f,
|
||||
0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f,
|
||||
0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f,
|
||||
0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f,
|
||||
0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf,
|
||||
0xb0,0xb1,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xbe,0xbf,
|
||||
// 0xd7 (multiplication sign) and 0xdf (sz ligature) complicate life
|
||||
0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,0xee,0xef,
|
||||
0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xd7,0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xdf,
|
||||
0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,0xee,0xef,
|
||||
0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff
|
||||
};
|
||||
static int latin1nicmp(const char *lhsChar, qsizetype lSize, const char *rhsChar, qsizetype rSize)
|
||||
{
|
||||
constexpr uchar latin1Lower[256] = {
|
||||
0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,
|
||||
0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,
|
||||
0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,
|
||||
0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f,
|
||||
0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f,
|
||||
0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x5b,0x5c,0x5d,0x5e,0x5f,
|
||||
0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f,
|
||||
0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f,
|
||||
0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f,
|
||||
0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f,
|
||||
0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf,
|
||||
0xb0,0xb1,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xbe,0xbf,
|
||||
// 0xd7 (multiplication sign) and 0xdf (sz ligature) complicate life
|
||||
0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,0xee,0xef,
|
||||
0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xd7,0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xdf,
|
||||
0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,0xee,0xef,
|
||||
0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff
|
||||
};
|
||||
// We're called with QLatin1String's .data() and .size():
|
||||
Q_ASSERT(lSize >= 0 && rSize >= 0);
|
||||
if (!lSize)
|
||||
@ -10362,15 +10367,73 @@ qsizetype QtPrivate::findString(QLatin1String haystack, qsizetype from, QStringV
|
||||
|
||||
qsizetype QtPrivate::findString(QLatin1String haystack, qsizetype from, QLatin1String needle, Qt::CaseSensitivity cs) noexcept
|
||||
{
|
||||
if (haystack.size() < needle.size())
|
||||
if (from < 0)
|
||||
from += haystack.size();
|
||||
if (from < 0)
|
||||
return -1;
|
||||
qsizetype adjustedSize = haystack.size() - from;
|
||||
if (adjustedSize < needle.size())
|
||||
return -1;
|
||||
if (needle.size() == 0)
|
||||
return from;
|
||||
|
||||
QVarLengthArray<char16_t> h(haystack.size());
|
||||
qt_from_latin1(h.data(), haystack.latin1(), haystack.size());
|
||||
if (cs == Qt::CaseSensitive) {
|
||||
const QByteArrayMatcher matcher(needle.data(), needle.size());
|
||||
return matcher.indexIn(haystack.data(), haystack.size(), from);
|
||||
}
|
||||
|
||||
// If the needle is sufficiently small we simply iteratively search through
|
||||
// the haystack. When the needle is too long we use a boyer-moore searcher
|
||||
// from the standard library, if available. If it is not available then the
|
||||
// QLatin1Strings are converted to QString and compared as such. Though
|
||||
// initialization is slower the boyer-moore search it employs still makes up
|
||||
// for it when haystack and needle are sufficiently long.
|
||||
// The needle size was chosen by testing various lengths using the
|
||||
// qstringtokenizer benchmark with the
|
||||
// "tokenize_qlatin1string_qlatin1string" test.
|
||||
#ifdef Q_CC_MSVC
|
||||
const qsizetype threshold = 1;
|
||||
#else
|
||||
const qsizetype threshold = 13;
|
||||
#endif
|
||||
if (needle.size() <= threshold) {
|
||||
const auto begin = haystack.begin();
|
||||
const auto end = haystack.end() - needle.size() + 1;
|
||||
const uchar needle1 = latin1Lower[uchar(needle[0].toLatin1())];
|
||||
auto ciMatch = [needle1](const char ch) {
|
||||
return latin1Lower[uchar(ch)] == needle1;
|
||||
};
|
||||
const qsizetype nlen1 = needle.size() - 1;
|
||||
for (auto it = std::find_if(begin + from, end, ciMatch); it < end;
|
||||
it = std::find_if(it + 1, end, ciMatch)) {
|
||||
// In this comparison we skip the first character because we know it's a match
|
||||
if (!nlen1 || QLatin1String(it + 1, nlen1).compare(needle.sliced(1), cs) == 0)
|
||||
return std::distance(begin, it);
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
#if QT_CONFIG(cxx17_bm_searcher)
|
||||
const auto ciHasher = [](char a) { return latin1Lower[uchar(a)]; };
|
||||
const auto ciEqual = [](char a, char b) {
|
||||
return latin1Lower[uchar(a)] == latin1Lower[uchar(b)];
|
||||
};
|
||||
const auto it =
|
||||
std::search(haystack.begin() + from, haystack.end(),
|
||||
std::boyer_moore_searcher(needle.begin(), needle.end(), ciHasher, ciEqual));
|
||||
return it == haystack.end() ? -1 : std::distance(haystack.begin(), it);
|
||||
#else
|
||||
QVarLengthArray<char16_t> h(adjustedSize);
|
||||
const auto begin = haystack.end() - adjustedSize;
|
||||
qt_from_latin1(h.data(), begin, adjustedSize);
|
||||
QVarLengthArray<char16_t> n(needle.size());
|
||||
qt_from_latin1(n.data(), needle.latin1(), needle.size());
|
||||
return QtPrivate::findString(QStringView(reinterpret_cast<const QChar*>(h.constData()), h.size()), from,
|
||||
QStringView(reinterpret_cast<const QChar*>(n.constData()), n.size()), cs);
|
||||
qsizetype res = QtPrivate::findString(QStringView(h.constData(), h.size()), 0,
|
||||
QStringView(n.constData(), n.size()), cs);
|
||||
if (res == -1)
|
||||
return -1;
|
||||
return res + std::distance(haystack.begin(), begin);
|
||||
#endif
|
||||
}
|
||||
|
||||
qsizetype QtPrivate::lastIndexOf(QStringView haystack, qsizetype from, QStringView needle, Qt::CaseSensitivity cs) noexcept
|
||||
|
Loading…
Reference in New Issue
Block a user