Long live QStaticByteArrayMatcher!

This is a version of QByteArrayMatcher that calculates the
Boyer-Moore skip table at compile-time instead of at run-time,
making this class more generally applicable than QByteArray-
Matcher itself, at least for statically-known strings.

The compile-time part requires C++14 constexpr support, but
the class should compile and work even in C++98 mode, just
with runtime initialization of the skip-table.

While touching tst_qbytearraymatcher, clean up the static
global QByteArrayMatchers there and add tests with needles
longer than 255 characters for QByteArrayMatcher, too.

[ChangeLog][QtCore] Added QStaticByteArrayMatcher.

Change-Id: I0662f262ab19b79ae4096f3ab384d5b3ada72347
Reviewed-by: David Faure <david.faure@kdab.com>
This commit is contained in:
Marc Mutz 2016-02-25 02:32:27 +01:00 committed by Giuseppe D'Angelo
parent a6cdfacf8d
commit 3af5cab054
4 changed files with 288 additions and 4 deletions

View File

@ -323,4 +323,112 @@ int qFindByteArray(
return -1;
}
/*!
\class QStaticByteArrayMatcherBase
\since 5.9
\internal
\brief Non-template base class of QStaticByteArrayMatcher.
*/
/*!
\class QStaticByteArrayMatcher
\since 5.9
\inmodule QtCore
\brief The QStaticByteArrayMatcher class is a compile-time version of QByteArrayMatcher
\ingroup tools
\ingroup string-processing
This class is useful when you have a sequence of bytes that you
want to repeatedly match against some byte arrays (perhaps in a
loop), or when you want to search for the same sequence of bytes
multiple times in the same byte array. Using a matcher object and
indexIn() is faster than matching a plain QByteArray with
QByteArray::indexOf(), in particular if repeated matching takes place.
Unlike QByteArrayMatcher, this class calculates the internal
representation at \e{compile-time}, if your compiler supports
C++14-level \c{constexpr} (C++11 is not sufficient), so it can
even benefit if you are doing one-off byte array matches.
Create the QStaticByteArrayMatcher by calling qMakeStaticByteArrayMatcher(),
passing it the C string literal you want to search for. Store the return
value of that function in a \c{static const auto} variable, so you don't need
to pass the \c{N} template parameter explicitly:
\code
static const auto matcher = qMakeStaticByteArrayMatcher("needle");
\endcode
Then call indexIn() on the QByteArray in which you want to search, just like
with QByteArrayMatcher.
Since this class is designed to do all the up-front calculations at compile-time,
it does not offer a setPattern() method.
\sa QByteArrayMatcher, QStringMatcher
*/
/*!
\fn QStaticByteArrayMatcher::indexIn(const char *haystack, int hlen, int from)
Searches the char string \a haystack, which has length \a hlen, from
byte position \a from (default 0, i.e. from the first byte), for
the byte array pattern() that was set in the constructor.
Returns the position where the pattern() matched in \a haystack, or -1 if no match was found.
*/
/*!
\fn QStaticByteArrayMatcher::indexIn(const QByteArray &haystack, int from)
Searches the char string \a haystack, from byte position \a from
(default 0, i.e. from the first byte), for the byte array pattern()
that was set in the constructor.
Returns the position where the pattern() matched in \a haystack, or -1 if no match was found.
*/
/*!
\fn QByteArray QStaticByteArrayMatcher::pattern() const
Returns the byte array pattern that this byte array matcher will
search for.
\sa setPattern()
*/
/*!
\internal
*/
int QStaticByteArrayMatcherBase::indexOfIn(const char *needle, uint nlen, const char *haystack, int hlen, int from) const Q_DECL_NOTHROW
{
if (from < 0)
from = 0;
return bm_find(reinterpret_cast<const uchar *>(haystack), hlen, from,
reinterpret_cast<const uchar *>(needle), nlen, m_skiptable.data);
}
/*!
\fn QStaticByteArrayMatcher::QStaticByteArrayMatcher(const char (&pattern)[N])
\internal
*/
/*!
\fn qMakeStaticByteArrayMatcher(const char (&pattern)[N])
\since 5.9
\relates QStaticByteArrayMatcher
Return a QStaticByteArrayMatcher with the correct \c{N} determined
automatically from the \a pattern passed.
To take full advantage of this function, assign the result to an
\c{auto} variable:
\code
static const auto matcher = qMakeStaticByteArrayMatcher("needle");
\endcode
*/
QT_END_NAMESPACE

View File

@ -83,6 +83,80 @@ private:
};
};
class QStaticByteArrayMatcherBase {
struct Skiptable {
uchar data[256];
} m_skiptable;
protected:
explicit Q_DECL_RELAXED_CONSTEXPR QStaticByteArrayMatcherBase(const char *pattern, uint n) Q_DECL_NOTHROW
: m_skiptable(generate(pattern, n)) {}
// compiler-generated copy/more ctors/assignment operators are ok!
// compiler-generated dtor is ok!
Q_CORE_EXPORT int indexOfIn(const char *needle, uint nlen, const char *haystack, int hlen, int from) const Q_DECL_NOTHROW;
private:
static Q_DECL_RELAXED_CONSTEXPR Skiptable generate(const char *pattern, uint n) Q_DECL_NOTHROW
{
const auto uchar_max = (std::numeric_limits<uchar>::max)();
uchar max = n > uchar_max ? uchar_max : n;
Skiptable table = {
// this verbose initialization code aims to avoid some opaque error messages
// even on powerful compilers such as GCC 5.3. Even though for GCC a loop
// format can be found that v5.3 groks, it's probably better to go with this
// for the time being:
{
max, max, max, max, max, max, max, max, max, max, max, max, max, max, max, max,
max, max, max, max, max, max, max, max, max, max, max, max, max, max, max, max,
max, max, max, max, max, max, max, max, max, max, max, max, max, max, max, max,
max, max, max, max, max, max, max, max, max, max, max, max, max, max, max, max,
max, max, max, max, max, max, max, max, max, max, max, max, max, max, max, max,
max, max, max, max, max, max, max, max, max, max, max, max, max, max, max, max,
max, max, max, max, max, max, max, max, max, max, max, max, max, max, max, max,
max, max, max, max, max, max, max, max, max, max, max, max, max, max, max, max,
max, max, max, max, max, max, max, max, max, max, max, max, max, max, max, max,
max, max, max, max, max, max, max, max, max, max, max, max, max, max, max, max,
max, max, max, max, max, max, max, max, max, max, max, max, max, max, max, max,
max, max, max, max, max, max, max, max, max, max, max, max, max, max, max, max,
max, max, max, max, max, max, max, max, max, max, max, max, max, max, max, max,
max, max, max, max, max, max, max, max, max, max, max, max, max, max, max, max,
max, max, max, max, max, max, max, max, max, max, max, max, max, max, max, max,
max, max, max, max, max, max, max, max, max, max, max, max, max, max, max, max,
}
};
pattern += n - max;
while (max--)
table.data[uchar(*pattern++)] = max;
return table;
}
};
template <uint N>
class QStaticByteArrayMatcher : QStaticByteArrayMatcherBase
{
char m_pattern[N];
Q_STATIC_ASSERT_X(N > 2, "QStaticByteArrayMatcher makes no sense for finding a single-char pattern");
public:
explicit Q_DECL_RELAXED_CONSTEXPR QStaticByteArrayMatcher(const char (&patternToMatch)[N]) Q_DECL_NOTHROW
: QStaticByteArrayMatcherBase(patternToMatch, N - 1), m_pattern()
{
for (uint i = 0; i < N; ++i)
m_pattern[i] = patternToMatch[i];
}
int indexIn(const QByteArray &haystack, int from = 0) const Q_DECL_NOTHROW
{ return this->indexOfIn(m_pattern, N - 1, haystack.data(), haystack.size(), from); }
int indexIn(const char *haystack, int hlen, int from = 0) const Q_DECL_NOTHROW
{ return this->indexOfIn(m_pattern, N - 1, haystack, hlen, from); }
QByteArray pattern() const { return QByteArray(m_pattern, int(N - 1)); }
};
template <uint N>
Q_DECL_RELAXED_CONSTEXPR QStaticByteArrayMatcher<N> qMakeStaticByteArrayMatcher(const char (&pattern)[N]) Q_DECL_NOTHROW
{ return QStaticByteArrayMatcher<N>(pattern); }
QT_END_NAMESPACE
#endif // QBYTEARRAYMATCHER_H

View File

@ -2,3 +2,4 @@ CONFIG += testcase
TARGET = tst_qbytearraymatcher
QT = core testlib
SOURCES = tst_qbytearraymatcher.cpp
contains(QT_CONFIG, c++14):CONFIG += c++14

View File

@ -43,10 +43,9 @@ class tst_QByteArrayMatcher : public QObject
private slots:
void interface();
void indexIn();
void staticByteArrayMatcher();
};
static QByteArrayMatcher matcher1;
void tst_QByteArrayMatcher::interface()
{
const char needle[] = "abc123";
@ -56,6 +55,8 @@ void tst_QByteArrayMatcher::interface()
haystack.insert(42, "abc123");
haystack.insert(84, "abc123");
QByteArrayMatcher matcher1;
matcher1 = QByteArrayMatcher(QByteArray(needle));
QByteArrayMatcher matcher2;
matcher2.setPattern(QByteArray(needle));
@ -90,8 +91,10 @@ void tst_QByteArrayMatcher::interface()
QCOMPARE(matcher7.indexIn(haystack), 42);
}
static QByteArrayMatcher matcher;
#define LONG_STRING__32 "abcdefghijklmnopqrstuvwxyz012345"
#define LONG_STRING__64 LONG_STRING__32 LONG_STRING__32
#define LONG_STRING_128 LONG_STRING__64 LONG_STRING__64
#define LONG_STRING_256 LONG_STRING_128 LONG_STRING_128
void tst_QByteArrayMatcher::indexIn()
{
@ -101,6 +104,8 @@ void tst_QByteArrayMatcher::indexIn()
QByteArray haystack(8, '\0');
haystack[7] = 0x1;
QByteArrayMatcher matcher;
matcher = QByteArrayMatcher(pattern);
QCOMPARE(matcher.indexIn(haystack, 0), 5);
QCOMPARE(matcher.indexIn(haystack, 1), 5);
@ -110,7 +115,103 @@ void tst_QByteArrayMatcher::indexIn()
QCOMPARE(matcher.indexIn(haystack, 0), 5);
QCOMPARE(matcher.indexIn(haystack, 1), 5);
QCOMPARE(matcher.indexIn(haystack, 2), 5);
QByteArray allChars(256, Qt::Uninitialized);
for (int i = 0; i < 256; ++i)
allChars[i] = char(i);
matcher = QByteArrayMatcher(allChars);
haystack = LONG_STRING__32 "x" + matcher.pattern();
QCOMPARE(matcher.indexIn(haystack, 0), 33);
QCOMPARE(matcher.indexIn(haystack, 1), 33);
QCOMPARE(matcher.indexIn(haystack, 2), 33);
QCOMPARE(matcher.indexIn(haystack, 33), 33);
QCOMPARE(matcher.indexIn(haystack, 34), -1);
matcher = QByteArrayMatcher(LONG_STRING_256);
haystack = LONG_STRING__32 "x" + matcher.pattern();
QCOMPARE(matcher.indexIn(haystack, 0), 33);
QCOMPARE(matcher.indexIn(haystack, 1), 33);
QCOMPARE(matcher.indexIn(haystack, 2), 33);
QCOMPARE(matcher.indexIn(haystack, 33), 33);
QCOMPARE(matcher.indexIn(haystack, 34), -1);
}
void tst_QByteArrayMatcher::staticByteArrayMatcher()
{
{
static Q_RELAXED_CONSTEXPR auto smatcher = qMakeStaticByteArrayMatcher("Hello");
QCOMPARE(smatcher.pattern(), QByteArrayLiteral("Hello"));
QCOMPARE(smatcher.indexIn(QByteArray("Hello, World!")), 0);
QCOMPARE(smatcher.indexIn(QByteArray("Hello, World!"), 0), 0);
QCOMPARE(smatcher.indexIn(QByteArray("Hello, World!"), 1), -1);
QCOMPARE(smatcher.indexIn(QByteArray("aHello, World!")), 1);
QCOMPARE(smatcher.indexIn(QByteArray("aaHello, World!")), 2);
QCOMPARE(smatcher.indexIn(QByteArray("aaaHello, World!")), 3);
QCOMPARE(smatcher.indexIn(QByteArray("aaaaHello, World!")), 4);
QCOMPARE(smatcher.indexIn(QByteArray("aaaaaHello, World!")), 5);
QCOMPARE(smatcher.indexIn(QByteArray("aaaaaaHello, World!")), 6);
QCOMPARE(smatcher.indexIn(QByteArray("HHello, World!")), 1);
QCOMPARE(smatcher.indexIn(QByteArray("HeHello, World!")), 2);
QCOMPARE(smatcher.indexIn(QByteArray("HelHello, World!")), 3);
QCOMPARE(smatcher.indexIn(QByteArray("HellHello, World!")), 4);
QCOMPARE(smatcher.indexIn(QByteArray("HellaHello, World!")), 5);
QCOMPARE(smatcher.indexIn(QByteArray("HellauHello, World!")), 6);
QCOMPARE(smatcher.indexIn(QByteArray("aHella, World!")), -1);
QCOMPARE(smatcher.indexIn(QByteArray("aaHella, World!")), -1);
QCOMPARE(smatcher.indexIn(QByteArray("aaaHella, World!")), -1);
QCOMPARE(smatcher.indexIn(QByteArray("aaaaHella, World!")), -1);
QCOMPARE(smatcher.indexIn(QByteArray("aaaaaHella, World!")), -1);
QCOMPARE(smatcher.indexIn(QByteArray("aaaaaaHella, World!")), -1);
QCOMPARE(smatcher.indexIn(QByteArray("aHello")), 1);
QCOMPARE(smatcher.indexIn(QByteArray("aaHello")), 2);
QCOMPARE(smatcher.indexIn(QByteArray("aaaHello")), 3);
QCOMPARE(smatcher.indexIn(QByteArray("aaaaHello")), 4);
QCOMPARE(smatcher.indexIn(QByteArray("aaaaaHello")), 5);
QCOMPARE(smatcher.indexIn(QByteArray("aaaaaaHello")), 6);
QCOMPARE(smatcher.indexIn(QByteArray("HHello")), 1);
QCOMPARE(smatcher.indexIn(QByteArray("HeHello")), 2);
QCOMPARE(smatcher.indexIn(QByteArray("HelHello")), 3);
QCOMPARE(smatcher.indexIn(QByteArray("HellHello")), 4);
QCOMPARE(smatcher.indexIn(QByteArray("HellaHello")), 5);
QCOMPARE(smatcher.indexIn(QByteArray("HellauHello")), 6);
QCOMPARE(smatcher.indexIn(QByteArray("aHella")), -1);
QCOMPARE(smatcher.indexIn(QByteArray("aaHella")), -1);
QCOMPARE(smatcher.indexIn(QByteArray("aaaHella")), -1);
QCOMPARE(smatcher.indexIn(QByteArray("aaaaHella")), -1);
QCOMPARE(smatcher.indexIn(QByteArray("aaaaaHella")), -1);
QCOMPARE(smatcher.indexIn(QByteArray("aaaaaaHella")), -1);
}
{
static Q_RELAXED_CONSTEXPR auto smatcher = qMakeStaticByteArrayMatcher(LONG_STRING_256);
QCOMPARE(smatcher.pattern(), QByteArrayLiteral(LONG_STRING_256));
QCOMPARE(smatcher.indexIn(QByteArray("a" LONG_STRING_256)), 1);
QCOMPARE(smatcher.indexIn(QByteArray("aa" LONG_STRING_256)), 2);
QCOMPARE(smatcher.indexIn(QByteArray("aaa" LONG_STRING_256)), 3);
QCOMPARE(smatcher.indexIn(QByteArray("aaaa" LONG_STRING_256)), 4);
QCOMPARE(smatcher.indexIn(QByteArray("aaaaa" LONG_STRING_256)), 5);
QCOMPARE(smatcher.indexIn(QByteArray("aaaaaa" LONG_STRING_256)), 6);
QCOMPARE(smatcher.indexIn(QByteArray("a" LONG_STRING_256 "a")), 1);
QCOMPARE(smatcher.indexIn(QByteArray("aa" LONG_STRING_256 "a")), 2);
QCOMPARE(smatcher.indexIn(QByteArray("aaa" LONG_STRING_256 "a")), 3);
QCOMPARE(smatcher.indexIn(QByteArray("aaaa" LONG_STRING_256 "a")), 4);
QCOMPARE(smatcher.indexIn(QByteArray("aaaaa" LONG_STRING_256 "a")), 5);
QCOMPARE(smatcher.indexIn(QByteArray("aaaaaa" LONG_STRING_256 "a")), 6);
QCOMPARE(smatcher.indexIn(QByteArray(LONG_STRING__32 "x" LONG_STRING_256)), 33);
QCOMPARE(smatcher.indexIn(QByteArray(LONG_STRING__64 "x" LONG_STRING_256)), 65);
QCOMPARE(smatcher.indexIn(QByteArray(LONG_STRING_128 "x" LONG_STRING_256)), 129);
}
}
#undef LONG_STRING_256
#undef LONG_STRING_128
#undef LONG_STRING__64
#undef LONG_STRING__32
QTEST_APPLESS_MAIN(tst_QByteArrayMatcher)
#include "tst_qbytearraymatcher.moc"