QRegularExpression: fix matching over QStringRefs

Playing with the offset argument of pcre_exec is not equivalent to
adjusting the pointer to the subject string. In particular, PCRE
can go behind the offset to check for lookbehinds or "transition"
metacharacters (\b, \B, etc.).

This made the code that deals with QStringRefs not matching in behavior
with the corresponding code dealing with QStrings. For instance,

   QString subject("Miss");
   QRegularExpression re("(?<=M)iss");
   re.match(subject.mid(1));           // doesn't match
   re.match(subject.midRef(1));        // matches!!!

Instead, actually adjust the pointer to the subject string so that
the behavior is identical. A broken test that relied on the
equivalence is also removed.

Change-Id: If96333241ef59621d7f5a6a170ebd0a186844874
Reviewed-by: Volker Krause <volker.krause@kdab.com>
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
This commit is contained in:
Giuseppe D'Angelo 2015-08-17 11:28:16 +02:00
parent 4738b450d2
commit ee15bef3ea
3 changed files with 393 additions and 39 deletions

View File

@ -1,7 +1,7 @@
/****************************************************************************
**
** Copyright (C) 2012 Giuseppe D'Angelo <dangelog@gmail.com>.
** Copyright (C) 2012 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.com, author Giuseppe D'Angelo <giuseppe.dangelo@kdab.com>
** Copyright (C) 2015 Giuseppe D'Angelo <dangelog@gmail.com>.
** Copyright (C) 2015 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.com, author Giuseppe D'Angelo <giuseppe.dangelo@kdab.com>
** Copyright (C) 2015 The Qt Company Ltd.
** Contact: http://www.qt.io/licensing/
**
@ -1325,48 +1325,45 @@ QRegularExpressionMatchPrivate *QRegularExpressionPrivate::doMatch(const QString
int * const captureOffsets = priv->capturedOffsets.data();
const int captureOffsetsCount = priv->capturedOffsets.size();
int realOffset = offset + subjectStart;
const int realSubjectLength = subjectLength + subjectStart;
const unsigned short * const subjectUtf16 = subject.utf16();
const unsigned short * const subjectUtf16 = subject.utf16() + subjectStart;
int result;
if (!previousMatchWasEmpty) {
result = pcre16SafeExec(compiledPattern, currentStudyData,
subjectUtf16, realSubjectLength,
realOffset, pcreOptions,
subjectUtf16, subjectLength,
offset, pcreOptions,
captureOffsets, captureOffsetsCount);
} else {
result = pcre16SafeExec(compiledPattern, currentStudyData,
subjectUtf16, realSubjectLength,
realOffset, pcreOptions | PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED,
subjectUtf16, subjectLength,
offset, pcreOptions | PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED,
captureOffsets, captureOffsetsCount);
if (result == PCRE_ERROR_NOMATCH) {
++realOffset;
++offset;
if (usingCrLfNewlines
&& realOffset < realSubjectLength
&& subjectUtf16[realOffset - 1] == QLatin1Char('\r')
&& subjectUtf16[realOffset] == QLatin1Char('\n')) {
++realOffset;
} else if (realOffset < realSubjectLength
&& QChar::isLowSurrogate(subjectUtf16[realOffset])) {
++realOffset;
&& offset < subjectLength
&& subjectUtf16[offset - 1] == QLatin1Char('\r')
&& subjectUtf16[offset] == QLatin1Char('\n')) {
++offset;
} else if (offset < subjectLength
&& QChar::isLowSurrogate(subjectUtf16[offset])) {
++offset;
}
result = pcre16SafeExec(compiledPattern, currentStudyData,
subjectUtf16, realSubjectLength,
realOffset, pcreOptions,
subjectUtf16, subjectLength,
offset, pcreOptions,
captureOffsets, captureOffsetsCount);
}
}
#ifdef QREGULAREXPRESSION_DEBUG
qDebug() << "Matching" << pattern << "against" << subject
<< "starting at" << subjectStart << "len" << subjectLength << "real len" << realSubjectLength
<< "offset" << offset << "real offset" << realOffset
<< "starting at" << subjectStart << "len" << subjectLength
<< "offset" << offset
<< matchType << matchOptions << previousMatchWasEmpty
<< "result" << result;
#endif
@ -2041,7 +2038,7 @@ QString QRegularExpressionMatch::captured(int nth) const
if (start == -1) // didn't capture
return QString();
return d->subject.mid(start, capturedLength(nth));
return d->subject.mid(start + d->subjectStart, capturedLength(nth));
}
/*!
@ -2062,7 +2059,7 @@ QStringRef QRegularExpressionMatch::capturedRef(int nth) const
if (start == -1) // didn't capture
return QStringRef();
return d->subject.midRef(start, capturedLength(nth));
return d->subject.midRef(start + d->subjectStart, capturedLength(nth));
}
/*!

View File

@ -1,7 +1,7 @@
/****************************************************************************
**
** Copyright (C) 2012 Giuseppe D'Angelo <dangelog@gmail.com>.
** Copyright (C) 2013 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.com, author Giuseppe D'Angelo <giuseppe.dangelo@kdab.com>
** Copyright (C) 2015 Giuseppe D'Angelo <dangelog@gmail.com>.
** Copyright (C) 2015 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.com, author Giuseppe D'Angelo <giuseppe.dangelo@kdab.com>
** Contact: http://www.qt.io/licensing/
**
** This file is part of the test suite of the Qt Toolkit.
@ -306,19 +306,6 @@ static void testMatch(const QRegularExpression &regexp,
matchType,
matchOptions,
result);
// offset <= 0 tested above; now also test stringrefs not spanning over
// the entire subject. Note that the offset can be negative, hence the above
// tests can't be merged into this one
for (int i = 1; i <= offset; ++i) {
testMatchImpl<QREMatch>(regexp,
matchingMethodForStringRef,
QStringRef(&subject, i, subject.length() - i),
offset - i,
matchType,
matchOptions,
result);
}
}
typedef QRegularExpressionMatch (QRegularExpression::*QREMatchStringPMF)(const QString &, int, QRegularExpression::MatchType, QRegularExpression::MatchOptions) const;
@ -736,6 +723,18 @@ void tst_QRegularExpression::normalMatch_data()
// ***
m.clear();
m.isValid = true; m.hasMatch = true;
m.captured << "bcd";
QTest::newRow("match12")
<< QRegularExpression("\\Bbcd\\B")
<< "abcde"
<< 1
<< QRegularExpression::MatchOptions(QRegularExpression::NoMatchOption)
<< m;
// ***
m.clear();
m.isValid = true;
QTest::newRow("nomatch01") << QRegularExpression("\\d+")
@ -1705,3 +1704,360 @@ void tst_QRegularExpression::JOptionUsage()
re.optimize();
QCOMPARE(re.isValid(), isValid);
}
void tst_QRegularExpression::QStringAndQStringRefEquivalence()
{
const QString subject = QStringLiteral("Mississippi");
{
const QRegularExpression re("\\Biss\\B");
QVERIFY(re.isValid());
{
const QRegularExpressionMatch match = re.match(subject);
consistencyCheck(match);
QVERIFY(match.isValid());
QVERIFY(match.hasMatch());
QCOMPARE(match.captured(), QStringLiteral("iss"));
QCOMPARE(match.capturedStart(), 1);
QCOMPARE(match.capturedEnd(), 4);
}
{
const QRegularExpressionMatch match = re.match(QStringRef(&subject));
consistencyCheck(match);
QVERIFY(match.isValid());
QVERIFY(match.hasMatch());
QCOMPARE(match.captured(), QStringLiteral("iss"));
QCOMPARE(match.capturedStart(), 1);
QCOMPARE(match.capturedEnd(), 4);
}
{
const QRegularExpressionMatch match = re.match(subject, 1);
consistencyCheck(match);
QVERIFY(match.isValid());
QVERIFY(match.hasMatch());
QCOMPARE(match.captured(), QStringLiteral("iss"));
QCOMPARE(match.capturedStart(), 1);
QCOMPARE(match.capturedEnd(), 4);
}
{
const QRegularExpressionMatch match = re.match(QStringRef(&subject), 1);
consistencyCheck(match);
QVERIFY(match.isValid());
QVERIFY(match.hasMatch());
QCOMPARE(match.captured(), QStringLiteral("iss"));
QCOMPARE(match.capturedStart(), 1);
QCOMPARE(match.capturedEnd(), 4);
}
{
const QRegularExpressionMatch match = re.match(subject.mid(1));
consistencyCheck(match);
QVERIFY(match.isValid());
QVERIFY(match.hasMatch());
QCOMPARE(match.captured(), QStringLiteral("iss"));
QCOMPARE(match.capturedStart(), 3);
QCOMPARE(match.capturedEnd(), 6);
}
{
const QRegularExpressionMatch match = re.match(subject.midRef(1));
consistencyCheck(match);
QVERIFY(match.isValid());
QVERIFY(match.hasMatch());
QCOMPARE(match.captured(), QStringLiteral("iss"));
QCOMPARE(match.capturedStart(), 3);
QCOMPARE(match.capturedEnd(), 6);
}
{
const QRegularExpressionMatch match = re.match(subject.mid(1), 1);
consistencyCheck(match);
QVERIFY(match.isValid());
QVERIFY(match.hasMatch());
QCOMPARE(match.captured(), QStringLiteral("iss"));
QCOMPARE(match.capturedStart(), 3);
QCOMPARE(match.capturedEnd(), 6);
}
{
const QRegularExpressionMatch match = re.match(subject.midRef(1), 1);
consistencyCheck(match);
QVERIFY(match.isValid());
QVERIFY(match.hasMatch());
QCOMPARE(match.captured(), QStringLiteral("iss"));
QCOMPARE(match.capturedStart(), 3);
QCOMPARE(match.capturedEnd(), 6);
}
{
const QRegularExpressionMatch match = re.match(subject, 4);
consistencyCheck(match);
QVERIFY(match.isValid());
QVERIFY(match.hasMatch());
QCOMPARE(match.captured(), QStringLiteral("iss"));
QCOMPARE(match.capturedStart(), 4);
QCOMPARE(match.capturedEnd(), 7);
}
{
const QRegularExpressionMatch match = re.match(QStringRef(&subject), 4);
consistencyCheck(match);
QVERIFY(match.isValid());
QVERIFY(match.hasMatch());
QCOMPARE(match.captured(), QStringLiteral("iss"));
QCOMPARE(match.capturedStart(), 4);
QCOMPARE(match.capturedEnd(), 7);
}
{
const QRegularExpressionMatch match = re.match(subject.mid(4));
consistencyCheck(match);
QVERIFY(match.isValid());
QVERIFY(!match.hasMatch());
}
{
const QRegularExpressionMatch match = re.match(subject.midRef(4));
consistencyCheck(match);
QVERIFY(match.isValid());
QVERIFY(!match.hasMatch());
}
{
QRegularExpressionMatchIterator i = re.globalMatch(subject);
QVERIFY(i.isValid());
consistencyCheck(i);
QVERIFY(i.hasNext());
const QRegularExpressionMatch match1 = i.next();
consistencyCheck(match1);
QVERIFY(match1.isValid());
QVERIFY(match1.hasMatch());
QCOMPARE(match1.captured(), QStringLiteral("iss"));
QCOMPARE(match1.capturedStart(), 1);
QCOMPARE(match1.capturedEnd(), 4);
consistencyCheck(i);
QVERIFY(i.hasNext());
const QRegularExpressionMatch match2 = i.next();
consistencyCheck(match2);
QVERIFY(match2.isValid());
QVERIFY(match2.hasMatch());
QCOMPARE(match2.captured(), QStringLiteral("iss"));
QCOMPARE(match2.capturedStart(), 4);
QCOMPARE(match2.capturedEnd(), 7);
QVERIFY(!i.hasNext());
}
{
QRegularExpressionMatchIterator i = re.globalMatch(QStringRef(&subject));
QVERIFY(i.isValid());
consistencyCheck(i);
QVERIFY(i.hasNext());
const QRegularExpressionMatch match1 = i.next();
consistencyCheck(match1);
QVERIFY(match1.isValid());
QVERIFY(match1.hasMatch());
QCOMPARE(match1.captured(), QStringLiteral("iss"));
QCOMPARE(match1.capturedStart(), 1);
QCOMPARE(match1.capturedEnd(), 4);
consistencyCheck(i);
QVERIFY(i.hasNext());
const QRegularExpressionMatch match2 = i.next();
consistencyCheck(match2);
QVERIFY(match2.isValid());
QVERIFY(match2.hasMatch());
QCOMPARE(match2.captured(), QStringLiteral("iss"));
QCOMPARE(match2.capturedStart(), 4);
QCOMPARE(match2.capturedEnd(), 7);
QVERIFY(!i.hasNext());
}
{
QRegularExpressionMatchIterator i = re.globalMatch(subject, 1);
QVERIFY(i.isValid());
consistencyCheck(i);
QVERIFY(i.hasNext());
const QRegularExpressionMatch match1 = i.next();
consistencyCheck(match1);
QVERIFY(match1.isValid());
QVERIFY(match1.hasMatch());
QCOMPARE(match1.captured(), QStringLiteral("iss"));
QCOMPARE(match1.capturedStart(), 1);
QCOMPARE(match1.capturedEnd(), 4);
consistencyCheck(i);
QVERIFY(i.hasNext());
const QRegularExpressionMatch match2 = i.next();
consistencyCheck(match2);
QVERIFY(match2.isValid());
QVERIFY(match2.hasMatch());
QCOMPARE(match2.captured(), QStringLiteral("iss"));
QCOMPARE(match2.capturedStart(), 4);
QCOMPARE(match2.capturedEnd(), 7);
QVERIFY(!i.hasNext());
}
{
QRegularExpressionMatchIterator i = re.globalMatch(QStringRef(&subject), 1);
QVERIFY(i.isValid());
consistencyCheck(i);
QVERIFY(i.hasNext());
const QRegularExpressionMatch match1 = i.next();
consistencyCheck(match1);
QVERIFY(match1.isValid());
QVERIFY(match1.hasMatch());
QCOMPARE(match1.captured(), QStringLiteral("iss"));
QCOMPARE(match1.capturedStart(), 1);
QCOMPARE(match1.capturedEnd(), 4);
consistencyCheck(i);
QVERIFY(i.hasNext());
const QRegularExpressionMatch match2 = i.next();
consistencyCheck(match2);
QVERIFY(match2.isValid());
QVERIFY(match2.hasMatch());
QCOMPARE(match2.captured(), QStringLiteral("iss"));
QCOMPARE(match2.capturedStart(), 4);
QCOMPARE(match2.capturedEnd(), 7);
QVERIFY(!i.hasNext());
}
{
QRegularExpressionMatchIterator i = re.globalMatch(subject.mid(1));
QVERIFY(i.isValid());
consistencyCheck(i);
QVERIFY(i.hasNext());
const QRegularExpressionMatch match = i.next();
consistencyCheck(match);
QVERIFY(match.isValid());
QVERIFY(match.hasMatch());
QCOMPARE(match.captured(), QStringLiteral("iss"));
QCOMPARE(match.capturedStart(), 3);
QCOMPARE(match.capturedEnd(), 6);
QVERIFY(!i.hasNext());
}
{
QRegularExpressionMatchIterator i = re.globalMatch(subject.midRef(1));
QVERIFY(i.isValid());
consistencyCheck(i);
QVERIFY(i.hasNext());
const QRegularExpressionMatch match = i.next();
consistencyCheck(match);
QVERIFY(match.isValid());
QVERIFY(match.hasMatch());
QCOMPARE(match.captured(), QStringLiteral("iss"));
QCOMPARE(match.capturedStart(), 3);
QCOMPARE(match.capturedEnd(), 6);
QVERIFY(!i.hasNext());
}
{
QRegularExpressionMatchIterator i = re.globalMatch(subject.mid(1), 1);
QVERIFY(i.isValid());
consistencyCheck(i);
QVERIFY(i.hasNext());
const QRegularExpressionMatch match = i.next();
consistencyCheck(match);
QVERIFY(match.isValid());
QVERIFY(match.hasMatch());
QCOMPARE(match.captured(), QStringLiteral("iss"));
QCOMPARE(match.capturedStart(), 3);
QCOMPARE(match.capturedEnd(), 6);
QVERIFY(!i.hasNext());
}
{
QRegularExpressionMatchIterator i = re.globalMatch(subject.midRef(1), 1);
QVERIFY(i.isValid());
consistencyCheck(i);
QVERIFY(i.hasNext());
const QRegularExpressionMatch match = i.next();
consistencyCheck(match);
QVERIFY(match.isValid());
QVERIFY(match.hasMatch());
QCOMPARE(match.captured(), QStringLiteral("iss"));
QCOMPARE(match.capturedStart(), 3);
QCOMPARE(match.capturedEnd(), 6);
QVERIFY(!i.hasNext());
}
{
QRegularExpressionMatchIterator i = re.globalMatch(subject.mid(1), 1);
QVERIFY(i.isValid());
consistencyCheck(i);
QVERIFY(i.hasNext());
const QRegularExpressionMatch match = i.next();
consistencyCheck(match);
QVERIFY(match.isValid());
QVERIFY(match.hasMatch());
QCOMPARE(match.captured(), QStringLiteral("iss"));
QCOMPARE(match.capturedStart(), 3);
QCOMPARE(match.capturedEnd(), 6);
QVERIFY(!i.hasNext());
}
{
QRegularExpressionMatchIterator i = re.globalMatch(subject.midRef(1), 1);
QVERIFY(i.isValid());
consistencyCheck(i);
QVERIFY(i.hasNext());
const QRegularExpressionMatch match = i.next();
consistencyCheck(match);
QVERIFY(match.isValid());
QVERIFY(match.hasMatch());
QCOMPARE(match.captured(), QStringLiteral("iss"));
QCOMPARE(match.capturedStart(), 3);
QCOMPARE(match.capturedEnd(), 6);
QVERIFY(!i.hasNext());
}
{
QRegularExpressionMatchIterator i = re.globalMatch(subject, 4);
QVERIFY(i.isValid());
consistencyCheck(i);
QVERIFY(i.hasNext());
const QRegularExpressionMatch match = i.next();
consistencyCheck(match);
QVERIFY(match.isValid());
QVERIFY(match.hasMatch());
QCOMPARE(match.captured(), QStringLiteral("iss"));
QCOMPARE(match.capturedStart(), 4);
QCOMPARE(match.capturedEnd(), 7);
QVERIFY(!i.hasNext());
}
{
QRegularExpressionMatchIterator i = re.globalMatch(QStringRef(&subject), 4);
QVERIFY(i.isValid());
consistencyCheck(i);
QVERIFY(i.hasNext());
const QRegularExpressionMatch match = i.next();
consistencyCheck(match);
QVERIFY(match.isValid());
QVERIFY(match.hasMatch());
QCOMPARE(match.captured(), QStringLiteral("iss"));
QCOMPARE(match.capturedStart(), 4);
QCOMPARE(match.capturedEnd(), 7);
QVERIFY(!i.hasNext());
}
{
QRegularExpressionMatchIterator i = re.globalMatch(subject.mid(4));
consistencyCheck(i);
QVERIFY(i.isValid());
QVERIFY(!i.hasNext());
}
{
QRegularExpressionMatchIterator i = re.globalMatch(subject.midRef(4));
consistencyCheck(i);
QVERIFY(i.isValid());
QVERIFY(!i.hasNext());
}
}
}

View File

@ -72,6 +72,7 @@ private slots:
void regularExpressionMatch();
void JOptionUsage_data();
void JOptionUsage();
void QStringAndQStringRefEquivalence();
private:
void provideRegularExpressions();