Long live QStaticByteArrayMatcher!
This is a version of QByteArrayMatcher that calculates the Boyer-Moore skip table at compile-time instead of at run-time, making this class more generally applicable than QByteArray- Matcher itself, at least for statically-known strings. The compile-time part requires C++14 constexpr support, but the class should compile and work even in C++98 mode, just with runtime initialization of the skip-table. While touching tst_qbytearraymatcher, clean up the static global QByteArrayMatchers there and add tests with needles longer than 255 characters for QByteArrayMatcher, too. [ChangeLog][QtCore] Added QStaticByteArrayMatcher. Change-Id: I0662f262ab19b79ae4096f3ab384d5b3ada72347 Reviewed-by: David Faure <david.faure@kdab.com>
This commit is contained in:
parent
a6cdfacf8d
commit
3af5cab054
@ -323,4 +323,112 @@ int qFindByteArray(
|
||||
return -1;
|
||||
}
|
||||
|
||||
/*!
|
||||
\class QStaticByteArrayMatcherBase
|
||||
\since 5.9
|
||||
\internal
|
||||
\brief Non-template base class of QStaticByteArrayMatcher.
|
||||
*/
|
||||
|
||||
/*!
|
||||
\class QStaticByteArrayMatcher
|
||||
\since 5.9
|
||||
\inmodule QtCore
|
||||
\brief The QStaticByteArrayMatcher class is a compile-time version of QByteArrayMatcher
|
||||
|
||||
\ingroup tools
|
||||
\ingroup string-processing
|
||||
|
||||
This class is useful when you have a sequence of bytes that you
|
||||
want to repeatedly match against some byte arrays (perhaps in a
|
||||
loop), or when you want to search for the same sequence of bytes
|
||||
multiple times in the same byte array. Using a matcher object and
|
||||
indexIn() is faster than matching a plain QByteArray with
|
||||
QByteArray::indexOf(), in particular if repeated matching takes place.
|
||||
|
||||
Unlike QByteArrayMatcher, this class calculates the internal
|
||||
representation at \e{compile-time}, if your compiler supports
|
||||
C++14-level \c{constexpr} (C++11 is not sufficient), so it can
|
||||
even benefit if you are doing one-off byte array matches.
|
||||
|
||||
Create the QStaticByteArrayMatcher by calling qMakeStaticByteArrayMatcher(),
|
||||
passing it the C string literal you want to search for. Store the return
|
||||
value of that function in a \c{static const auto} variable, so you don't need
|
||||
to pass the \c{N} template parameter explicitly:
|
||||
|
||||
\code
|
||||
static const auto matcher = qMakeStaticByteArrayMatcher("needle");
|
||||
\endcode
|
||||
|
||||
Then call indexIn() on the QByteArray in which you want to search, just like
|
||||
with QByteArrayMatcher.
|
||||
|
||||
Since this class is designed to do all the up-front calculations at compile-time,
|
||||
it does not offer a setPattern() method.
|
||||
|
||||
\sa QByteArrayMatcher, QStringMatcher
|
||||
*/
|
||||
|
||||
/*!
|
||||
\fn QStaticByteArrayMatcher::indexIn(const char *haystack, int hlen, int from)
|
||||
|
||||
Searches the char string \a haystack, which has length \a hlen, from
|
||||
byte position \a from (default 0, i.e. from the first byte), for
|
||||
the byte array pattern() that was set in the constructor.
|
||||
|
||||
Returns the position where the pattern() matched in \a haystack, or -1 if no match was found.
|
||||
*/
|
||||
|
||||
/*!
|
||||
\fn QStaticByteArrayMatcher::indexIn(const QByteArray &haystack, int from)
|
||||
|
||||
Searches the char string \a haystack, from byte position \a from
|
||||
(default 0, i.e. from the first byte), for the byte array pattern()
|
||||
that was set in the constructor.
|
||||
|
||||
Returns the position where the pattern() matched in \a haystack, or -1 if no match was found.
|
||||
*/
|
||||
|
||||
/*!
|
||||
\fn QByteArray QStaticByteArrayMatcher::pattern() const
|
||||
|
||||
Returns the byte array pattern that this byte array matcher will
|
||||
search for.
|
||||
|
||||
\sa setPattern()
|
||||
*/
|
||||
|
||||
/*!
|
||||
\internal
|
||||
*/
|
||||
int QStaticByteArrayMatcherBase::indexOfIn(const char *needle, uint nlen, const char *haystack, int hlen, int from) const Q_DECL_NOTHROW
|
||||
{
|
||||
if (from < 0)
|
||||
from = 0;
|
||||
return bm_find(reinterpret_cast<const uchar *>(haystack), hlen, from,
|
||||
reinterpret_cast<const uchar *>(needle), nlen, m_skiptable.data);
|
||||
}
|
||||
|
||||
/*!
|
||||
\fn QStaticByteArrayMatcher::QStaticByteArrayMatcher(const char (&pattern)[N])
|
||||
\internal
|
||||
*/
|
||||
|
||||
/*!
|
||||
\fn qMakeStaticByteArrayMatcher(const char (&pattern)[N])
|
||||
\since 5.9
|
||||
\relates QStaticByteArrayMatcher
|
||||
|
||||
Return a QStaticByteArrayMatcher with the correct \c{N} determined
|
||||
automatically from the \a pattern passed.
|
||||
|
||||
To take full advantage of this function, assign the result to an
|
||||
\c{auto} variable:
|
||||
|
||||
\code
|
||||
static const auto matcher = qMakeStaticByteArrayMatcher("needle");
|
||||
\endcode
|
||||
*/
|
||||
|
||||
|
||||
QT_END_NAMESPACE
|
||||
|
@ -83,6 +83,80 @@ private:
|
||||
};
|
||||
};
|
||||
|
||||
class QStaticByteArrayMatcherBase {
|
||||
struct Skiptable {
|
||||
uchar data[256];
|
||||
} m_skiptable;
|
||||
protected:
|
||||
explicit Q_DECL_RELAXED_CONSTEXPR QStaticByteArrayMatcherBase(const char *pattern, uint n) Q_DECL_NOTHROW
|
||||
: m_skiptable(generate(pattern, n)) {}
|
||||
// compiler-generated copy/more ctors/assignment operators are ok!
|
||||
// compiler-generated dtor is ok!
|
||||
|
||||
Q_CORE_EXPORT int indexOfIn(const char *needle, uint nlen, const char *haystack, int hlen, int from) const Q_DECL_NOTHROW;
|
||||
|
||||
private:
|
||||
static Q_DECL_RELAXED_CONSTEXPR Skiptable generate(const char *pattern, uint n) Q_DECL_NOTHROW
|
||||
{
|
||||
const auto uchar_max = (std::numeric_limits<uchar>::max)();
|
||||
uchar max = n > uchar_max ? uchar_max : n;
|
||||
Skiptable table = {
|
||||
// this verbose initialization code aims to avoid some opaque error messages
|
||||
// even on powerful compilers such as GCC 5.3. Even though for GCC a loop
|
||||
// format can be found that v5.3 groks, it's probably better to go with this
|
||||
// for the time being:
|
||||
{
|
||||
max, max, max, max, max, max, max, max, max, max, max, max, max, max, max, max,
|
||||
max, max, max, max, max, max, max, max, max, max, max, max, max, max, max, max,
|
||||
max, max, max, max, max, max, max, max, max, max, max, max, max, max, max, max,
|
||||
max, max, max, max, max, max, max, max, max, max, max, max, max, max, max, max,
|
||||
max, max, max, max, max, max, max, max, max, max, max, max, max, max, max, max,
|
||||
max, max, max, max, max, max, max, max, max, max, max, max, max, max, max, max,
|
||||
max, max, max, max, max, max, max, max, max, max, max, max, max, max, max, max,
|
||||
max, max, max, max, max, max, max, max, max, max, max, max, max, max, max, max,
|
||||
|
||||
max, max, max, max, max, max, max, max, max, max, max, max, max, max, max, max,
|
||||
max, max, max, max, max, max, max, max, max, max, max, max, max, max, max, max,
|
||||
max, max, max, max, max, max, max, max, max, max, max, max, max, max, max, max,
|
||||
max, max, max, max, max, max, max, max, max, max, max, max, max, max, max, max,
|
||||
max, max, max, max, max, max, max, max, max, max, max, max, max, max, max, max,
|
||||
max, max, max, max, max, max, max, max, max, max, max, max, max, max, max, max,
|
||||
max, max, max, max, max, max, max, max, max, max, max, max, max, max, max, max,
|
||||
max, max, max, max, max, max, max, max, max, max, max, max, max, max, max, max,
|
||||
}
|
||||
};
|
||||
pattern += n - max;
|
||||
while (max--)
|
||||
table.data[uchar(*pattern++)] = max;
|
||||
return table;
|
||||
}
|
||||
};
|
||||
|
||||
template <uint N>
|
||||
class QStaticByteArrayMatcher : QStaticByteArrayMatcherBase
|
||||
{
|
||||
char m_pattern[N];
|
||||
Q_STATIC_ASSERT_X(N > 2, "QStaticByteArrayMatcher makes no sense for finding a single-char pattern");
|
||||
public:
|
||||
explicit Q_DECL_RELAXED_CONSTEXPR QStaticByteArrayMatcher(const char (&patternToMatch)[N]) Q_DECL_NOTHROW
|
||||
: QStaticByteArrayMatcherBase(patternToMatch, N - 1), m_pattern()
|
||||
{
|
||||
for (uint i = 0; i < N; ++i)
|
||||
m_pattern[i] = patternToMatch[i];
|
||||
}
|
||||
|
||||
int indexIn(const QByteArray &haystack, int from = 0) const Q_DECL_NOTHROW
|
||||
{ return this->indexOfIn(m_pattern, N - 1, haystack.data(), haystack.size(), from); }
|
||||
int indexIn(const char *haystack, int hlen, int from = 0) const Q_DECL_NOTHROW
|
||||
{ return this->indexOfIn(m_pattern, N - 1, haystack, hlen, from); }
|
||||
|
||||
QByteArray pattern() const { return QByteArray(m_pattern, int(N - 1)); }
|
||||
};
|
||||
|
||||
template <uint N>
|
||||
Q_DECL_RELAXED_CONSTEXPR QStaticByteArrayMatcher<N> qMakeStaticByteArrayMatcher(const char (&pattern)[N]) Q_DECL_NOTHROW
|
||||
{ return QStaticByteArrayMatcher<N>(pattern); }
|
||||
|
||||
QT_END_NAMESPACE
|
||||
|
||||
#endif // QBYTEARRAYMATCHER_H
|
||||
|
@ -2,3 +2,4 @@ CONFIG += testcase
|
||||
TARGET = tst_qbytearraymatcher
|
||||
QT = core testlib
|
||||
SOURCES = tst_qbytearraymatcher.cpp
|
||||
contains(QT_CONFIG, c++14):CONFIG += c++14
|
||||
|
@ -43,10 +43,9 @@ class tst_QByteArrayMatcher : public QObject
|
||||
private slots:
|
||||
void interface();
|
||||
void indexIn();
|
||||
void staticByteArrayMatcher();
|
||||
};
|
||||
|
||||
static QByteArrayMatcher matcher1;
|
||||
|
||||
void tst_QByteArrayMatcher::interface()
|
||||
{
|
||||
const char needle[] = "abc123";
|
||||
@ -56,6 +55,8 @@ void tst_QByteArrayMatcher::interface()
|
||||
haystack.insert(42, "abc123");
|
||||
haystack.insert(84, "abc123");
|
||||
|
||||
QByteArrayMatcher matcher1;
|
||||
|
||||
matcher1 = QByteArrayMatcher(QByteArray(needle));
|
||||
QByteArrayMatcher matcher2;
|
||||
matcher2.setPattern(QByteArray(needle));
|
||||
@ -90,8 +91,10 @@ void tst_QByteArrayMatcher::interface()
|
||||
QCOMPARE(matcher7.indexIn(haystack), 42);
|
||||
}
|
||||
|
||||
|
||||
static QByteArrayMatcher matcher;
|
||||
#define LONG_STRING__32 "abcdefghijklmnopqrstuvwxyz012345"
|
||||
#define LONG_STRING__64 LONG_STRING__32 LONG_STRING__32
|
||||
#define LONG_STRING_128 LONG_STRING__64 LONG_STRING__64
|
||||
#define LONG_STRING_256 LONG_STRING_128 LONG_STRING_128
|
||||
|
||||
void tst_QByteArrayMatcher::indexIn()
|
||||
{
|
||||
@ -101,6 +104,8 @@ void tst_QByteArrayMatcher::indexIn()
|
||||
QByteArray haystack(8, '\0');
|
||||
haystack[7] = 0x1;
|
||||
|
||||
QByteArrayMatcher matcher;
|
||||
|
||||
matcher = QByteArrayMatcher(pattern);
|
||||
QCOMPARE(matcher.indexIn(haystack, 0), 5);
|
||||
QCOMPARE(matcher.indexIn(haystack, 1), 5);
|
||||
@ -110,7 +115,103 @@ void tst_QByteArrayMatcher::indexIn()
|
||||
QCOMPARE(matcher.indexIn(haystack, 0), 5);
|
||||
QCOMPARE(matcher.indexIn(haystack, 1), 5);
|
||||
QCOMPARE(matcher.indexIn(haystack, 2), 5);
|
||||
|
||||
QByteArray allChars(256, Qt::Uninitialized);
|
||||
for (int i = 0; i < 256; ++i)
|
||||
allChars[i] = char(i);
|
||||
|
||||
matcher = QByteArrayMatcher(allChars);
|
||||
haystack = LONG_STRING__32 "x" + matcher.pattern();
|
||||
QCOMPARE(matcher.indexIn(haystack, 0), 33);
|
||||
QCOMPARE(matcher.indexIn(haystack, 1), 33);
|
||||
QCOMPARE(matcher.indexIn(haystack, 2), 33);
|
||||
QCOMPARE(matcher.indexIn(haystack, 33), 33);
|
||||
QCOMPARE(matcher.indexIn(haystack, 34), -1);
|
||||
|
||||
matcher = QByteArrayMatcher(LONG_STRING_256);
|
||||
haystack = LONG_STRING__32 "x" + matcher.pattern();
|
||||
QCOMPARE(matcher.indexIn(haystack, 0), 33);
|
||||
QCOMPARE(matcher.indexIn(haystack, 1), 33);
|
||||
QCOMPARE(matcher.indexIn(haystack, 2), 33);
|
||||
QCOMPARE(matcher.indexIn(haystack, 33), 33);
|
||||
QCOMPARE(matcher.indexIn(haystack, 34), -1);
|
||||
}
|
||||
|
||||
void tst_QByteArrayMatcher::staticByteArrayMatcher()
|
||||
{
|
||||
{
|
||||
static Q_RELAXED_CONSTEXPR auto smatcher = qMakeStaticByteArrayMatcher("Hello");
|
||||
QCOMPARE(smatcher.pattern(), QByteArrayLiteral("Hello"));
|
||||
|
||||
QCOMPARE(smatcher.indexIn(QByteArray("Hello, World!")), 0);
|
||||
QCOMPARE(smatcher.indexIn(QByteArray("Hello, World!"), 0), 0);
|
||||
QCOMPARE(smatcher.indexIn(QByteArray("Hello, World!"), 1), -1);
|
||||
QCOMPARE(smatcher.indexIn(QByteArray("aHello, World!")), 1);
|
||||
QCOMPARE(smatcher.indexIn(QByteArray("aaHello, World!")), 2);
|
||||
QCOMPARE(smatcher.indexIn(QByteArray("aaaHello, World!")), 3);
|
||||
QCOMPARE(smatcher.indexIn(QByteArray("aaaaHello, World!")), 4);
|
||||
QCOMPARE(smatcher.indexIn(QByteArray("aaaaaHello, World!")), 5);
|
||||
QCOMPARE(smatcher.indexIn(QByteArray("aaaaaaHello, World!")), 6);
|
||||
QCOMPARE(smatcher.indexIn(QByteArray("HHello, World!")), 1);
|
||||
QCOMPARE(smatcher.indexIn(QByteArray("HeHello, World!")), 2);
|
||||
QCOMPARE(smatcher.indexIn(QByteArray("HelHello, World!")), 3);
|
||||
QCOMPARE(smatcher.indexIn(QByteArray("HellHello, World!")), 4);
|
||||
QCOMPARE(smatcher.indexIn(QByteArray("HellaHello, World!")), 5);
|
||||
QCOMPARE(smatcher.indexIn(QByteArray("HellauHello, World!")), 6);
|
||||
QCOMPARE(smatcher.indexIn(QByteArray("aHella, World!")), -1);
|
||||
QCOMPARE(smatcher.indexIn(QByteArray("aaHella, World!")), -1);
|
||||
QCOMPARE(smatcher.indexIn(QByteArray("aaaHella, World!")), -1);
|
||||
QCOMPARE(smatcher.indexIn(QByteArray("aaaaHella, World!")), -1);
|
||||
QCOMPARE(smatcher.indexIn(QByteArray("aaaaaHella, World!")), -1);
|
||||
QCOMPARE(smatcher.indexIn(QByteArray("aaaaaaHella, World!")), -1);
|
||||
|
||||
QCOMPARE(smatcher.indexIn(QByteArray("aHello")), 1);
|
||||
QCOMPARE(smatcher.indexIn(QByteArray("aaHello")), 2);
|
||||
QCOMPARE(smatcher.indexIn(QByteArray("aaaHello")), 3);
|
||||
QCOMPARE(smatcher.indexIn(QByteArray("aaaaHello")), 4);
|
||||
QCOMPARE(smatcher.indexIn(QByteArray("aaaaaHello")), 5);
|
||||
QCOMPARE(smatcher.indexIn(QByteArray("aaaaaaHello")), 6);
|
||||
QCOMPARE(smatcher.indexIn(QByteArray("HHello")), 1);
|
||||
QCOMPARE(smatcher.indexIn(QByteArray("HeHello")), 2);
|
||||
QCOMPARE(smatcher.indexIn(QByteArray("HelHello")), 3);
|
||||
QCOMPARE(smatcher.indexIn(QByteArray("HellHello")), 4);
|
||||
QCOMPARE(smatcher.indexIn(QByteArray("HellaHello")), 5);
|
||||
QCOMPARE(smatcher.indexIn(QByteArray("HellauHello")), 6);
|
||||
QCOMPARE(smatcher.indexIn(QByteArray("aHella")), -1);
|
||||
QCOMPARE(smatcher.indexIn(QByteArray("aaHella")), -1);
|
||||
QCOMPARE(smatcher.indexIn(QByteArray("aaaHella")), -1);
|
||||
QCOMPARE(smatcher.indexIn(QByteArray("aaaaHella")), -1);
|
||||
QCOMPARE(smatcher.indexIn(QByteArray("aaaaaHella")), -1);
|
||||
QCOMPARE(smatcher.indexIn(QByteArray("aaaaaaHella")), -1);
|
||||
}
|
||||
|
||||
{
|
||||
static Q_RELAXED_CONSTEXPR auto smatcher = qMakeStaticByteArrayMatcher(LONG_STRING_256);
|
||||
QCOMPARE(smatcher.pattern(), QByteArrayLiteral(LONG_STRING_256));
|
||||
|
||||
QCOMPARE(smatcher.indexIn(QByteArray("a" LONG_STRING_256)), 1);
|
||||
QCOMPARE(smatcher.indexIn(QByteArray("aa" LONG_STRING_256)), 2);
|
||||
QCOMPARE(smatcher.indexIn(QByteArray("aaa" LONG_STRING_256)), 3);
|
||||
QCOMPARE(smatcher.indexIn(QByteArray("aaaa" LONG_STRING_256)), 4);
|
||||
QCOMPARE(smatcher.indexIn(QByteArray("aaaaa" LONG_STRING_256)), 5);
|
||||
QCOMPARE(smatcher.indexIn(QByteArray("aaaaaa" LONG_STRING_256)), 6);
|
||||
QCOMPARE(smatcher.indexIn(QByteArray("a" LONG_STRING_256 "a")), 1);
|
||||
QCOMPARE(smatcher.indexIn(QByteArray("aa" LONG_STRING_256 "a")), 2);
|
||||
QCOMPARE(smatcher.indexIn(QByteArray("aaa" LONG_STRING_256 "a")), 3);
|
||||
QCOMPARE(smatcher.indexIn(QByteArray("aaaa" LONG_STRING_256 "a")), 4);
|
||||
QCOMPARE(smatcher.indexIn(QByteArray("aaaaa" LONG_STRING_256 "a")), 5);
|
||||
QCOMPARE(smatcher.indexIn(QByteArray("aaaaaa" LONG_STRING_256 "a")), 6);
|
||||
QCOMPARE(smatcher.indexIn(QByteArray(LONG_STRING__32 "x" LONG_STRING_256)), 33);
|
||||
QCOMPARE(smatcher.indexIn(QByteArray(LONG_STRING__64 "x" LONG_STRING_256)), 65);
|
||||
QCOMPARE(smatcher.indexIn(QByteArray(LONG_STRING_128 "x" LONG_STRING_256)), 129);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#undef LONG_STRING_256
|
||||
#undef LONG_STRING_128
|
||||
#undef LONG_STRING__64
|
||||
#undef LONG_STRING__32
|
||||
|
||||
QTEST_APPLESS_MAIN(tst_QByteArrayMatcher)
|
||||
#include "tst_qbytearraymatcher.moc"
|
||||
|
Loading…
Reference in New Issue
Block a user