QString::replace(): protect sought text and replacement

When replacing each copy of one text with a copy of another, we do so
in batches of 1024; if we get more than one batch, we need to keep a
copy of the sought text and replacement if they're part of the string
we're modifying, for use in later batches.

Also do the replacements in full batches of 1024, not 1023 (which left
the last entry in an array unused); marked some related tests as
(un)likely; and move some repeated code out into a pair of little
local functions to save duplcation.

Those new functions can also serve replace_helper(); and it can shed a
const_cast and some conditioning of free() by using them the same way
replace() now does.  (There was also one place it still used the raw
after, rather than the replacement copy; which could have produced
errors if memcpy were to exercise its right to assume no overlap in
arrays.  This error is what prompted me to notice all of the above.)

Added tests.  The last error proved untestable as my memcpy is in fact
as fussy as memmove.  The first two tests added were attempts to get a
failure out of it.  The third did get a failure, but also tripped over
the problem in replace() itself.  Added to an existing test function
and renamed it to generally cover extra tests for replace.

Change-Id: I9ba6928c84ece266dbbe52b91e333ea54ab6d95e
Reviewed-by: Robin Burchell <robin.burchell@viroteck.net>
Reviewed-by: Lars Knoll <lars.knoll@qt.io>
This commit is contained in:
Edward Welbourne 2016-03-30 16:01:27 +02:00 committed by Edward Welbourne
parent 9a3bcd1c3d
commit e21bf5e6b3
2 changed files with 89 additions and 22 deletions

View File

@ -2381,26 +2381,40 @@ QString &QString::replace(const QString &before, const QString &after, Qt::CaseS
return replace(before.constData(), before.size(), after.constData(), after.size(), cs);
}
namespace { // helpers for replace and its helper:
QChar *textCopy(const QChar *start, int len)
{
const size_t size = len * sizeof(QChar);
QChar *const copy = static_cast<QChar *>(::malloc(size));
Q_CHECK_PTR(copy);
::memcpy(copy, start, size);
return copy;
}
bool pointsIntoRange(const QChar *ptr, const ushort *base, int len)
{
const QChar *const start = reinterpret_cast<const QChar *>(base);
return start <= ptr && ptr < start + len;
}
} // end namespace
/*!
\internal
*/
void QString::replace_helper(uint *indices, int nIndices, int blen, const QChar *after, int alen)
{
// copy *after in case it lies inside our own d->data() area
// (which we could possibly invalidate via a realloc or corrupt via memcpy operations.)
QChar *afterBuffer = const_cast<QChar *>(after);
if (after >= reinterpret_cast<QChar *>(d->data()) && after < reinterpret_cast<QChar *>(d->data()) + d->size) {
afterBuffer = static_cast<QChar *>(::malloc(alen*sizeof(QChar)));
Q_CHECK_PTR(afterBuffer);
::memcpy(afterBuffer, after, alen*sizeof(QChar));
}
// Copy after if it lies inside our own d->data() area (which we could
// possibly invalidate via a realloc or modify by replacement).
QChar *afterBuffer = 0;
if (pointsIntoRange(after, d->data(), d->size)) // Use copy in place of vulnerable original:
after = afterBuffer = textCopy(after, alen);
QT_TRY {
if (blen == alen) {
// replace in place
detach();
for (int i = 0; i < nIndices; ++i)
memcpy(d->data() + indices[i], afterBuffer, alen * sizeof(QChar));
memcpy(d->data() + indices[i], after, alen * sizeof(QChar));
} else if (alen < blen) {
// replace from front
detach();
@ -2416,7 +2430,7 @@ void QString::replace_helper(uint *indices, int nIndices, int blen, const QChar
to += msize;
}
if (alen) {
memcpy(d->data() + to, afterBuffer, alen*sizeof(QChar));
memcpy(d->data() + to, after, alen * sizeof(QChar));
to += alen;
}
movestart = indices[i] + blen;
@ -2439,17 +2453,15 @@ void QString::replace_helper(uint *indices, int nIndices, int blen, const QChar
int moveto = insertstart + alen;
memmove(d->data() + moveto, d->data() + movestart,
(moveend - movestart)*sizeof(QChar));
memcpy(d->data() + insertstart, afterBuffer, alen*sizeof(QChar));
memcpy(d->data() + insertstart, after, alen * sizeof(QChar));
moveend = movestart-blen;
}
}
} QT_CATCH(const std::bad_alloc &) {
if (afterBuffer != after)
::free(afterBuffer);
::free(afterBuffer);
QT_RETHROW;
}
if (afterBuffer != after)
::free(afterBuffer);
::free(afterBuffer);
}
/*!
@ -2478,12 +2490,13 @@ QString &QString::replace(const QChar *before, int blen,
return *this;
QStringMatcher matcher(before, blen, cs);
QChar *beforeBuffer = 0, *afterBuffer = 0;
int index = 0;
while (1) {
uint indices[1024];
uint pos = 0;
while (pos < 1023) {
while (pos < 1024) {
index = matcher.indexIn(*this, index);
if (index == -1)
break;
@ -2496,13 +2509,29 @@ QString &QString::replace(const QChar *before, int blen,
if (!pos) // Nothing to replace
break;
if (Q_UNLIKELY(index != -1)) {
/*
We're about to change data, that before and after might point
into, and we'll need that data for our next batch of indices.
*/
if (!afterBuffer && pointsIntoRange(after, d->data(), d->size))
after = afterBuffer = textCopy(after, alen);
if (!beforeBuffer && pointsIntoRange(before, d->data(), d->size)) {
beforeBuffer = textCopy(before, blen);
matcher = QStringMatcher(beforeBuffer, blen, cs);
}
}
replace_helper(indices, pos, blen, after, alen);
if (index == -1) // Nothing left to replace
if (Q_LIKELY(index == -1)) // Nothing left to replace
break;
// The call to replace_helper just moved what index points at:
index += pos*(alen-blen);
}
::free(afterBuffer);
::free(beforeBuffer);
return *this;
}
@ -2533,13 +2562,13 @@ QString& QString::replace(QChar ch, const QString &after, Qt::CaseSensitivity cs
uint indices[1024];
uint pos = 0;
if (cs == Qt::CaseSensitive) {
while (pos < 1023 && index < d->size) {
while (pos < 1024 && index < d->size) {
if (d->data()[index] == cc)
indices[pos++] = index;
index++;
}
} else {
while (pos < 1023 && index < d->size) {
while (pos < 1024 && index < d->size) {
if (QChar::toCaseFolded(d->data()[index]) == cc)
indices[pos++] = index;
index++;
@ -2550,7 +2579,7 @@ QString& QString::replace(QChar ch, const QString &after, Qt::CaseSensitivity cs
replace_helper(indices, pos, 1, after.constData(), after.d->size);
if (index == -1) // Nothing left to replace
if (Q_LIKELY(index == -1)) // Nothing left to replace
break;
// The call to replace_helper just moved what index points at:
index += pos*(after.d->size - 1);

View File

@ -360,7 +360,7 @@ private slots:
void replace_qchar_qstring();
void replace_uint_uint_data();
void replace_uint_uint();
void replace_uint_uint_extra();
void replace_extra();
void replace_string_data();
void replace_string();
void replace_regexp_data();
@ -2790,7 +2790,7 @@ void tst_QString::replace_uint_uint()
}
}
void tst_QString::replace_uint_uint_extra()
void tst_QString::replace_extra()
{
/*
This test is designed to be extremely slow if QString::replace() doesn't optimize the case
@ -2827,6 +2827,44 @@ void tst_QString::replace_uint_uint_extra()
QString str5("abcdefghij");
str5.replace(8, 10, str5);
QCOMPARE(str5, QString("abcdefghabcdefghij"));
// Replacements using only part of the string modified:
QString str6("abcdefghij");
str6.replace(1, 8, str6.constData() + 3, 3);
QCOMPARE(str6, QString("adefj"));
QString str7("abcdefghibcdefghij");
str7.replace(str7.constData() + 1, 6, str7.constData() + 2, 3);
QCOMPARE(str7, QString("acdehicdehij"));
const int many = 1024;
/*
QS::replace(const QChar *, int, const QChar *, int, Qt::CaseSensitivity)
does its replacements in batches of many (please keep in sync with any
changes to batch size), which lead to misbehaviour if ether QChar * array
was part of the data being modified.
*/
QString str8("abcdefg"), ans8("acdeg");
{
// Make str8 and ans8 repeat themselves many + 1 times:
int i = many;
QString big(str8), small(ans8);
while (i && !(i & 1)) { // Exploit many being a power of 2:
big += big;
small += small;
i >>= 1;
}
while (i-- > 0) {
str8 += big;
ans8 += small;
}
}
str8.replace(str8.constData() + 1, 5, str8.constData() + 2, 3);
// Pre-test the bit where the diff happens, so it gets displayed:
QCOMPARE(str8.mid((many - 3) * 5), ans8.mid((many - 3) * 5));
// Also check the full values match, of course:
QCOMPARE(str8.size(), ans8.size());
QCOMPARE(str8, ans8);
}
void tst_QString::replace_string()