From 3af5cab0547fc7c6a82d94e8a2104f1b052ccd7d Mon Sep 17 00:00:00 2001 From: Marc Mutz Date: Thu, 25 Feb 2016 02:32:27 +0100 Subject: [PATCH] Long live QStaticByteArrayMatcher! This is a version of QByteArrayMatcher that calculates the Boyer-Moore skip table at compile-time instead of at run-time, making this class more generally applicable than QByteArray- Matcher itself, at least for statically-known strings. The compile-time part requires C++14 constexpr support, but the class should compile and work even in C++98 mode, just with runtime initialization of the skip-table. While touching tst_qbytearraymatcher, clean up the static global QByteArrayMatchers there and add tests with needles longer than 255 characters for QByteArrayMatcher, too. [ChangeLog][QtCore] Added QStaticByteArrayMatcher. Change-Id: I0662f262ab19b79ae4096f3ab384d5b3ada72347 Reviewed-by: David Faure --- src/corelib/tools/qbytearraymatcher.cpp | 108 +++++++++++++++++ src/corelib/tools/qbytearraymatcher.h | 74 ++++++++++++ .../qbytearraymatcher/qbytearraymatcher.pro | 1 + .../tst_qbytearraymatcher.cpp | 109 +++++++++++++++++- 4 files changed, 288 insertions(+), 4 deletions(-) diff --git a/src/corelib/tools/qbytearraymatcher.cpp b/src/corelib/tools/qbytearraymatcher.cpp index d06ca1292a..fc9d12aba5 100644 --- a/src/corelib/tools/qbytearraymatcher.cpp +++ b/src/corelib/tools/qbytearraymatcher.cpp @@ -323,4 +323,112 @@ int qFindByteArray( return -1; } +/*! + \class QStaticByteArrayMatcherBase + \since 5.9 + \internal + \brief Non-template base class of QStaticByteArrayMatcher. +*/ + +/*! + \class QStaticByteArrayMatcher + \since 5.9 + \inmodule QtCore + \brief The QStaticByteArrayMatcher class is a compile-time version of QByteArrayMatcher + + \ingroup tools + \ingroup string-processing + + This class is useful when you have a sequence of bytes that you + want to repeatedly match against some byte arrays (perhaps in a + loop), or when you want to search for the same sequence of bytes + multiple times in the same byte array. Using a matcher object and + indexIn() is faster than matching a plain QByteArray with + QByteArray::indexOf(), in particular if repeated matching takes place. + + Unlike QByteArrayMatcher, this class calculates the internal + representation at \e{compile-time}, if your compiler supports + C++14-level \c{constexpr} (C++11 is not sufficient), so it can + even benefit if you are doing one-off byte array matches. + + Create the QStaticByteArrayMatcher by calling qMakeStaticByteArrayMatcher(), + passing it the C string literal you want to search for. Store the return + value of that function in a \c{static const auto} variable, so you don't need + to pass the \c{N} template parameter explicitly: + + \code + static const auto matcher = qMakeStaticByteArrayMatcher("needle"); + \endcode + + Then call indexIn() on the QByteArray in which you want to search, just like + with QByteArrayMatcher. + + Since this class is designed to do all the up-front calculations at compile-time, + it does not offer a setPattern() method. + + \sa QByteArrayMatcher, QStringMatcher +*/ + +/*! + \fn QStaticByteArrayMatcher::indexIn(const char *haystack, int hlen, int from) + + Searches the char string \a haystack, which has length \a hlen, from + byte position \a from (default 0, i.e. from the first byte), for + the byte array pattern() that was set in the constructor. + + Returns the position where the pattern() matched in \a haystack, or -1 if no match was found. +*/ + +/*! + \fn QStaticByteArrayMatcher::indexIn(const QByteArray &haystack, int from) + + Searches the char string \a haystack, from byte position \a from + (default 0, i.e. from the first byte), for the byte array pattern() + that was set in the constructor. + + Returns the position where the pattern() matched in \a haystack, or -1 if no match was found. +*/ + +/*! + \fn QByteArray QStaticByteArrayMatcher::pattern() const + + Returns the byte array pattern that this byte array matcher will + search for. + + \sa setPattern() +*/ + +/*! + \internal +*/ +int QStaticByteArrayMatcherBase::indexOfIn(const char *needle, uint nlen, const char *haystack, int hlen, int from) const Q_DECL_NOTHROW +{ + if (from < 0) + from = 0; + return bm_find(reinterpret_cast(haystack), hlen, from, + reinterpret_cast(needle), nlen, m_skiptable.data); +} + +/*! + \fn QStaticByteArrayMatcher::QStaticByteArrayMatcher(const char (&pattern)[N]) + \internal +*/ + +/*! + \fn qMakeStaticByteArrayMatcher(const char (&pattern)[N]) + \since 5.9 + \relates QStaticByteArrayMatcher + + Return a QStaticByteArrayMatcher with the correct \c{N} determined + automatically from the \a pattern passed. + + To take full advantage of this function, assign the result to an + \c{auto} variable: + + \code + static const auto matcher = qMakeStaticByteArrayMatcher("needle"); + \endcode +*/ + + QT_END_NAMESPACE diff --git a/src/corelib/tools/qbytearraymatcher.h b/src/corelib/tools/qbytearraymatcher.h index aac62715a1..51e08ba4bf 100644 --- a/src/corelib/tools/qbytearraymatcher.h +++ b/src/corelib/tools/qbytearraymatcher.h @@ -83,6 +83,80 @@ private: }; }; +class QStaticByteArrayMatcherBase { + struct Skiptable { + uchar data[256]; + } m_skiptable; +protected: + explicit Q_DECL_RELAXED_CONSTEXPR QStaticByteArrayMatcherBase(const char *pattern, uint n) Q_DECL_NOTHROW + : m_skiptable(generate(pattern, n)) {} + // compiler-generated copy/more ctors/assignment operators are ok! + // compiler-generated dtor is ok! + + Q_CORE_EXPORT int indexOfIn(const char *needle, uint nlen, const char *haystack, int hlen, int from) const Q_DECL_NOTHROW; + +private: + static Q_DECL_RELAXED_CONSTEXPR Skiptable generate(const char *pattern, uint n) Q_DECL_NOTHROW + { + const auto uchar_max = (std::numeric_limits::max)(); + uchar max = n > uchar_max ? uchar_max : n; + Skiptable table = { + // this verbose initialization code aims to avoid some opaque error messages + // even on powerful compilers such as GCC 5.3. Even though for GCC a loop + // format can be found that v5.3 groks, it's probably better to go with this + // for the time being: + { + max, max, max, max, max, max, max, max, max, max, max, max, max, max, max, max, + max, max, max, max, max, max, max, max, max, max, max, max, max, max, max, max, + max, max, max, max, max, max, max, max, max, max, max, max, max, max, max, max, + max, max, max, max, max, max, max, max, max, max, max, max, max, max, max, max, + max, max, max, max, max, max, max, max, max, max, max, max, max, max, max, max, + max, max, max, max, max, max, max, max, max, max, max, max, max, max, max, max, + max, max, max, max, max, max, max, max, max, max, max, max, max, max, max, max, + max, max, max, max, max, max, max, max, max, max, max, max, max, max, max, max, + + max, max, max, max, max, max, max, max, max, max, max, max, max, max, max, max, + max, max, max, max, max, max, max, max, max, max, max, max, max, max, max, max, + max, max, max, max, max, max, max, max, max, max, max, max, max, max, max, max, + max, max, max, max, max, max, max, max, max, max, max, max, max, max, max, max, + max, max, max, max, max, max, max, max, max, max, max, max, max, max, max, max, + max, max, max, max, max, max, max, max, max, max, max, max, max, max, max, max, + max, max, max, max, max, max, max, max, max, max, max, max, max, max, max, max, + max, max, max, max, max, max, max, max, max, max, max, max, max, max, max, max, + } + }; + pattern += n - max; + while (max--) + table.data[uchar(*pattern++)] = max; + return table; + } +}; + +template +class QStaticByteArrayMatcher : QStaticByteArrayMatcherBase +{ + char m_pattern[N]; + Q_STATIC_ASSERT_X(N > 2, "QStaticByteArrayMatcher makes no sense for finding a single-char pattern"); +public: + explicit Q_DECL_RELAXED_CONSTEXPR QStaticByteArrayMatcher(const char (&patternToMatch)[N]) Q_DECL_NOTHROW + : QStaticByteArrayMatcherBase(patternToMatch, N - 1), m_pattern() + { + for (uint i = 0; i < N; ++i) + m_pattern[i] = patternToMatch[i]; + } + + int indexIn(const QByteArray &haystack, int from = 0) const Q_DECL_NOTHROW + { return this->indexOfIn(m_pattern, N - 1, haystack.data(), haystack.size(), from); } + int indexIn(const char *haystack, int hlen, int from = 0) const Q_DECL_NOTHROW + { return this->indexOfIn(m_pattern, N - 1, haystack, hlen, from); } + + QByteArray pattern() const { return QByteArray(m_pattern, int(N - 1)); } +}; + +template +Q_DECL_RELAXED_CONSTEXPR QStaticByteArrayMatcher qMakeStaticByteArrayMatcher(const char (&pattern)[N]) Q_DECL_NOTHROW +{ return QStaticByteArrayMatcher(pattern); } + QT_END_NAMESPACE #endif // QBYTEARRAYMATCHER_H diff --git a/tests/auto/corelib/tools/qbytearraymatcher/qbytearraymatcher.pro b/tests/auto/corelib/tools/qbytearraymatcher/qbytearraymatcher.pro index 0624e1886c..9d4d5964c9 100644 --- a/tests/auto/corelib/tools/qbytearraymatcher/qbytearraymatcher.pro +++ b/tests/auto/corelib/tools/qbytearraymatcher/qbytearraymatcher.pro @@ -2,3 +2,4 @@ CONFIG += testcase TARGET = tst_qbytearraymatcher QT = core testlib SOURCES = tst_qbytearraymatcher.cpp +contains(QT_CONFIG, c++14):CONFIG += c++14 diff --git a/tests/auto/corelib/tools/qbytearraymatcher/tst_qbytearraymatcher.cpp b/tests/auto/corelib/tools/qbytearraymatcher/tst_qbytearraymatcher.cpp index 82cc432d55..647a3ae379 100644 --- a/tests/auto/corelib/tools/qbytearraymatcher/tst_qbytearraymatcher.cpp +++ b/tests/auto/corelib/tools/qbytearraymatcher/tst_qbytearraymatcher.cpp @@ -43,10 +43,9 @@ class tst_QByteArrayMatcher : public QObject private slots: void interface(); void indexIn(); + void staticByteArrayMatcher(); }; -static QByteArrayMatcher matcher1; - void tst_QByteArrayMatcher::interface() { const char needle[] = "abc123"; @@ -56,6 +55,8 @@ void tst_QByteArrayMatcher::interface() haystack.insert(42, "abc123"); haystack.insert(84, "abc123"); + QByteArrayMatcher matcher1; + matcher1 = QByteArrayMatcher(QByteArray(needle)); QByteArrayMatcher matcher2; matcher2.setPattern(QByteArray(needle)); @@ -90,8 +91,10 @@ void tst_QByteArrayMatcher::interface() QCOMPARE(matcher7.indexIn(haystack), 42); } - -static QByteArrayMatcher matcher; +#define LONG_STRING__32 "abcdefghijklmnopqrstuvwxyz012345" +#define LONG_STRING__64 LONG_STRING__32 LONG_STRING__32 +#define LONG_STRING_128 LONG_STRING__64 LONG_STRING__64 +#define LONG_STRING_256 LONG_STRING_128 LONG_STRING_128 void tst_QByteArrayMatcher::indexIn() { @@ -101,6 +104,8 @@ void tst_QByteArrayMatcher::indexIn() QByteArray haystack(8, '\0'); haystack[7] = 0x1; + QByteArrayMatcher matcher; + matcher = QByteArrayMatcher(pattern); QCOMPARE(matcher.indexIn(haystack, 0), 5); QCOMPARE(matcher.indexIn(haystack, 1), 5); @@ -110,7 +115,103 @@ void tst_QByteArrayMatcher::indexIn() QCOMPARE(matcher.indexIn(haystack, 0), 5); QCOMPARE(matcher.indexIn(haystack, 1), 5); QCOMPARE(matcher.indexIn(haystack, 2), 5); + + QByteArray allChars(256, Qt::Uninitialized); + for (int i = 0; i < 256; ++i) + allChars[i] = char(i); + + matcher = QByteArrayMatcher(allChars); + haystack = LONG_STRING__32 "x" + matcher.pattern(); + QCOMPARE(matcher.indexIn(haystack, 0), 33); + QCOMPARE(matcher.indexIn(haystack, 1), 33); + QCOMPARE(matcher.indexIn(haystack, 2), 33); + QCOMPARE(matcher.indexIn(haystack, 33), 33); + QCOMPARE(matcher.indexIn(haystack, 34), -1); + + matcher = QByteArrayMatcher(LONG_STRING_256); + haystack = LONG_STRING__32 "x" + matcher.pattern(); + QCOMPARE(matcher.indexIn(haystack, 0), 33); + QCOMPARE(matcher.indexIn(haystack, 1), 33); + QCOMPARE(matcher.indexIn(haystack, 2), 33); + QCOMPARE(matcher.indexIn(haystack, 33), 33); + QCOMPARE(matcher.indexIn(haystack, 34), -1); } +void tst_QByteArrayMatcher::staticByteArrayMatcher() +{ + { + static Q_RELAXED_CONSTEXPR auto smatcher = qMakeStaticByteArrayMatcher("Hello"); + QCOMPARE(smatcher.pattern(), QByteArrayLiteral("Hello")); + + QCOMPARE(smatcher.indexIn(QByteArray("Hello, World!")), 0); + QCOMPARE(smatcher.indexIn(QByteArray("Hello, World!"), 0), 0); + QCOMPARE(smatcher.indexIn(QByteArray("Hello, World!"), 1), -1); + QCOMPARE(smatcher.indexIn(QByteArray("aHello, World!")), 1); + QCOMPARE(smatcher.indexIn(QByteArray("aaHello, World!")), 2); + QCOMPARE(smatcher.indexIn(QByteArray("aaaHello, World!")), 3); + QCOMPARE(smatcher.indexIn(QByteArray("aaaaHello, World!")), 4); + QCOMPARE(smatcher.indexIn(QByteArray("aaaaaHello, World!")), 5); + QCOMPARE(smatcher.indexIn(QByteArray("aaaaaaHello, World!")), 6); + QCOMPARE(smatcher.indexIn(QByteArray("HHello, World!")), 1); + QCOMPARE(smatcher.indexIn(QByteArray("HeHello, World!")), 2); + QCOMPARE(smatcher.indexIn(QByteArray("HelHello, World!")), 3); + QCOMPARE(smatcher.indexIn(QByteArray("HellHello, World!")), 4); + QCOMPARE(smatcher.indexIn(QByteArray("HellaHello, World!")), 5); + QCOMPARE(smatcher.indexIn(QByteArray("HellauHello, World!")), 6); + QCOMPARE(smatcher.indexIn(QByteArray("aHella, World!")), -1); + QCOMPARE(smatcher.indexIn(QByteArray("aaHella, World!")), -1); + QCOMPARE(smatcher.indexIn(QByteArray("aaaHella, World!")), -1); + QCOMPARE(smatcher.indexIn(QByteArray("aaaaHella, World!")), -1); + QCOMPARE(smatcher.indexIn(QByteArray("aaaaaHella, World!")), -1); + QCOMPARE(smatcher.indexIn(QByteArray("aaaaaaHella, World!")), -1); + + QCOMPARE(smatcher.indexIn(QByteArray("aHello")), 1); + QCOMPARE(smatcher.indexIn(QByteArray("aaHello")), 2); + QCOMPARE(smatcher.indexIn(QByteArray("aaaHello")), 3); + QCOMPARE(smatcher.indexIn(QByteArray("aaaaHello")), 4); + QCOMPARE(smatcher.indexIn(QByteArray("aaaaaHello")), 5); + QCOMPARE(smatcher.indexIn(QByteArray("aaaaaaHello")), 6); + QCOMPARE(smatcher.indexIn(QByteArray("HHello")), 1); + QCOMPARE(smatcher.indexIn(QByteArray("HeHello")), 2); + QCOMPARE(smatcher.indexIn(QByteArray("HelHello")), 3); + QCOMPARE(smatcher.indexIn(QByteArray("HellHello")), 4); + QCOMPARE(smatcher.indexIn(QByteArray("HellaHello")), 5); + QCOMPARE(smatcher.indexIn(QByteArray("HellauHello")), 6); + QCOMPARE(smatcher.indexIn(QByteArray("aHella")), -1); + QCOMPARE(smatcher.indexIn(QByteArray("aaHella")), -1); + QCOMPARE(smatcher.indexIn(QByteArray("aaaHella")), -1); + QCOMPARE(smatcher.indexIn(QByteArray("aaaaHella")), -1); + QCOMPARE(smatcher.indexIn(QByteArray("aaaaaHella")), -1); + QCOMPARE(smatcher.indexIn(QByteArray("aaaaaaHella")), -1); + } + + { + static Q_RELAXED_CONSTEXPR auto smatcher = qMakeStaticByteArrayMatcher(LONG_STRING_256); + QCOMPARE(smatcher.pattern(), QByteArrayLiteral(LONG_STRING_256)); + + QCOMPARE(smatcher.indexIn(QByteArray("a" LONG_STRING_256)), 1); + QCOMPARE(smatcher.indexIn(QByteArray("aa" LONG_STRING_256)), 2); + QCOMPARE(smatcher.indexIn(QByteArray("aaa" LONG_STRING_256)), 3); + QCOMPARE(smatcher.indexIn(QByteArray("aaaa" LONG_STRING_256)), 4); + QCOMPARE(smatcher.indexIn(QByteArray("aaaaa" LONG_STRING_256)), 5); + QCOMPARE(smatcher.indexIn(QByteArray("aaaaaa" LONG_STRING_256)), 6); + QCOMPARE(smatcher.indexIn(QByteArray("a" LONG_STRING_256 "a")), 1); + QCOMPARE(smatcher.indexIn(QByteArray("aa" LONG_STRING_256 "a")), 2); + QCOMPARE(smatcher.indexIn(QByteArray("aaa" LONG_STRING_256 "a")), 3); + QCOMPARE(smatcher.indexIn(QByteArray("aaaa" LONG_STRING_256 "a")), 4); + QCOMPARE(smatcher.indexIn(QByteArray("aaaaa" LONG_STRING_256 "a")), 5); + QCOMPARE(smatcher.indexIn(QByteArray("aaaaaa" LONG_STRING_256 "a")), 6); + QCOMPARE(smatcher.indexIn(QByteArray(LONG_STRING__32 "x" LONG_STRING_256)), 33); + QCOMPARE(smatcher.indexIn(QByteArray(LONG_STRING__64 "x" LONG_STRING_256)), 65); + QCOMPARE(smatcher.indexIn(QByteArray(LONG_STRING_128 "x" LONG_STRING_256)), 129); + } + +} + +#undef LONG_STRING_256 +#undef LONG_STRING_128 +#undef LONG_STRING__64 +#undef LONG_STRING__32 + QTEST_APPLESS_MAIN(tst_QByteArrayMatcher) #include "tst_qbytearraymatcher.moc"