QString::replace(): protect sought text and replacement
When replacing each copy of one text with a copy of another, we do so in batches of 1024; if we get more than one batch, we need to keep a copy of the sought text and replacement if they're part of the string we're modifying, for use in later batches. Also do the replacements in full batches of 1024, not 1023 (which left the last entry in an array unused); marked some related tests as (un)likely; and move some repeated code out into a pair of little local functions to save duplcation. Those new functions can also serve replace_helper(); and it can shed a const_cast and some conditioning of free() by using them the same way replace() now does. (There was also one place it still used the raw after, rather than the replacement copy; which could have produced errors if memcpy were to exercise its right to assume no overlap in arrays. This error is what prompted me to notice all of the above.) Added tests. The last error proved untestable as my memcpy is in fact as fussy as memmove. The first two tests added were attempts to get a failure out of it. The third did get a failure, but also tripped over the problem in replace() itself. Added to an existing test function and renamed it to generally cover extra tests for replace. Change-Id: I9ba6928c84ece266dbbe52b91e333ea54ab6d95e Reviewed-by: Robin Burchell <robin.burchell@viroteck.net> Reviewed-by: Lars Knoll <lars.knoll@qt.io>
This commit is contained in:
parent
9a3bcd1c3d
commit
e21bf5e6b3
@ -2381,26 +2381,40 @@ QString &QString::replace(const QString &before, const QString &after, Qt::CaseS
|
||||
return replace(before.constData(), before.size(), after.constData(), after.size(), cs);
|
||||
}
|
||||
|
||||
namespace { // helpers for replace and its helper:
|
||||
QChar *textCopy(const QChar *start, int len)
|
||||
{
|
||||
const size_t size = len * sizeof(QChar);
|
||||
QChar *const copy = static_cast<QChar *>(::malloc(size));
|
||||
Q_CHECK_PTR(copy);
|
||||
::memcpy(copy, start, size);
|
||||
return copy;
|
||||
}
|
||||
|
||||
bool pointsIntoRange(const QChar *ptr, const ushort *base, int len)
|
||||
{
|
||||
const QChar *const start = reinterpret_cast<const QChar *>(base);
|
||||
return start <= ptr && ptr < start + len;
|
||||
}
|
||||
} // end namespace
|
||||
|
||||
/*!
|
||||
\internal
|
||||
*/
|
||||
void QString::replace_helper(uint *indices, int nIndices, int blen, const QChar *after, int alen)
|
||||
{
|
||||
// copy *after in case it lies inside our own d->data() area
|
||||
// (which we could possibly invalidate via a realloc or corrupt via memcpy operations.)
|
||||
QChar *afterBuffer = const_cast<QChar *>(after);
|
||||
if (after >= reinterpret_cast<QChar *>(d->data()) && after < reinterpret_cast<QChar *>(d->data()) + d->size) {
|
||||
afterBuffer = static_cast<QChar *>(::malloc(alen*sizeof(QChar)));
|
||||
Q_CHECK_PTR(afterBuffer);
|
||||
::memcpy(afterBuffer, after, alen*sizeof(QChar));
|
||||
}
|
||||
// Copy after if it lies inside our own d->data() area (which we could
|
||||
// possibly invalidate via a realloc or modify by replacement).
|
||||
QChar *afterBuffer = 0;
|
||||
if (pointsIntoRange(after, d->data(), d->size)) // Use copy in place of vulnerable original:
|
||||
after = afterBuffer = textCopy(after, alen);
|
||||
|
||||
QT_TRY {
|
||||
if (blen == alen) {
|
||||
// replace in place
|
||||
detach();
|
||||
for (int i = 0; i < nIndices; ++i)
|
||||
memcpy(d->data() + indices[i], afterBuffer, alen * sizeof(QChar));
|
||||
memcpy(d->data() + indices[i], after, alen * sizeof(QChar));
|
||||
} else if (alen < blen) {
|
||||
// replace from front
|
||||
detach();
|
||||
@ -2416,7 +2430,7 @@ void QString::replace_helper(uint *indices, int nIndices, int blen, const QChar
|
||||
to += msize;
|
||||
}
|
||||
if (alen) {
|
||||
memcpy(d->data() + to, afterBuffer, alen*sizeof(QChar));
|
||||
memcpy(d->data() + to, after, alen * sizeof(QChar));
|
||||
to += alen;
|
||||
}
|
||||
movestart = indices[i] + blen;
|
||||
@ -2439,17 +2453,15 @@ void QString::replace_helper(uint *indices, int nIndices, int blen, const QChar
|
||||
int moveto = insertstart + alen;
|
||||
memmove(d->data() + moveto, d->data() + movestart,
|
||||
(moveend - movestart)*sizeof(QChar));
|
||||
memcpy(d->data() + insertstart, afterBuffer, alen*sizeof(QChar));
|
||||
memcpy(d->data() + insertstart, after, alen * sizeof(QChar));
|
||||
moveend = movestart-blen;
|
||||
}
|
||||
}
|
||||
} QT_CATCH(const std::bad_alloc &) {
|
||||
if (afterBuffer != after)
|
||||
::free(afterBuffer);
|
||||
::free(afterBuffer);
|
||||
QT_RETHROW;
|
||||
}
|
||||
if (afterBuffer != after)
|
||||
::free(afterBuffer);
|
||||
::free(afterBuffer);
|
||||
}
|
||||
|
||||
/*!
|
||||
@ -2478,12 +2490,13 @@ QString &QString::replace(const QChar *before, int blen,
|
||||
return *this;
|
||||
|
||||
QStringMatcher matcher(before, blen, cs);
|
||||
QChar *beforeBuffer = 0, *afterBuffer = 0;
|
||||
|
||||
int index = 0;
|
||||
while (1) {
|
||||
uint indices[1024];
|
||||
uint pos = 0;
|
||||
while (pos < 1023) {
|
||||
while (pos < 1024) {
|
||||
index = matcher.indexIn(*this, index);
|
||||
if (index == -1)
|
||||
break;
|
||||
@ -2496,13 +2509,29 @@ QString &QString::replace(const QChar *before, int blen,
|
||||
if (!pos) // Nothing to replace
|
||||
break;
|
||||
|
||||
if (Q_UNLIKELY(index != -1)) {
|
||||
/*
|
||||
We're about to change data, that before and after might point
|
||||
into, and we'll need that data for our next batch of indices.
|
||||
*/
|
||||
if (!afterBuffer && pointsIntoRange(after, d->data(), d->size))
|
||||
after = afterBuffer = textCopy(after, alen);
|
||||
|
||||
if (!beforeBuffer && pointsIntoRange(before, d->data(), d->size)) {
|
||||
beforeBuffer = textCopy(before, blen);
|
||||
matcher = QStringMatcher(beforeBuffer, blen, cs);
|
||||
}
|
||||
}
|
||||
|
||||
replace_helper(indices, pos, blen, after, alen);
|
||||
|
||||
if (index == -1) // Nothing left to replace
|
||||
if (Q_LIKELY(index == -1)) // Nothing left to replace
|
||||
break;
|
||||
// The call to replace_helper just moved what index points at:
|
||||
index += pos*(alen-blen);
|
||||
}
|
||||
::free(afterBuffer);
|
||||
::free(beforeBuffer);
|
||||
|
||||
return *this;
|
||||
}
|
||||
@ -2533,13 +2562,13 @@ QString& QString::replace(QChar ch, const QString &after, Qt::CaseSensitivity cs
|
||||
uint indices[1024];
|
||||
uint pos = 0;
|
||||
if (cs == Qt::CaseSensitive) {
|
||||
while (pos < 1023 && index < d->size) {
|
||||
while (pos < 1024 && index < d->size) {
|
||||
if (d->data()[index] == cc)
|
||||
indices[pos++] = index;
|
||||
index++;
|
||||
}
|
||||
} else {
|
||||
while (pos < 1023 && index < d->size) {
|
||||
while (pos < 1024 && index < d->size) {
|
||||
if (QChar::toCaseFolded(d->data()[index]) == cc)
|
||||
indices[pos++] = index;
|
||||
index++;
|
||||
@ -2550,7 +2579,7 @@ QString& QString::replace(QChar ch, const QString &after, Qt::CaseSensitivity cs
|
||||
|
||||
replace_helper(indices, pos, 1, after.constData(), after.d->size);
|
||||
|
||||
if (index == -1) // Nothing left to replace
|
||||
if (Q_LIKELY(index == -1)) // Nothing left to replace
|
||||
break;
|
||||
// The call to replace_helper just moved what index points at:
|
||||
index += pos*(after.d->size - 1);
|
||||
|
@ -360,7 +360,7 @@ private slots:
|
||||
void replace_qchar_qstring();
|
||||
void replace_uint_uint_data();
|
||||
void replace_uint_uint();
|
||||
void replace_uint_uint_extra();
|
||||
void replace_extra();
|
||||
void replace_string_data();
|
||||
void replace_string();
|
||||
void replace_regexp_data();
|
||||
@ -2790,7 +2790,7 @@ void tst_QString::replace_uint_uint()
|
||||
}
|
||||
}
|
||||
|
||||
void tst_QString::replace_uint_uint_extra()
|
||||
void tst_QString::replace_extra()
|
||||
{
|
||||
/*
|
||||
This test is designed to be extremely slow if QString::replace() doesn't optimize the case
|
||||
@ -2827,6 +2827,44 @@ void tst_QString::replace_uint_uint_extra()
|
||||
QString str5("abcdefghij");
|
||||
str5.replace(8, 10, str5);
|
||||
QCOMPARE(str5, QString("abcdefghabcdefghij"));
|
||||
|
||||
// Replacements using only part of the string modified:
|
||||
QString str6("abcdefghij");
|
||||
str6.replace(1, 8, str6.constData() + 3, 3);
|
||||
QCOMPARE(str6, QString("adefj"));
|
||||
|
||||
QString str7("abcdefghibcdefghij");
|
||||
str7.replace(str7.constData() + 1, 6, str7.constData() + 2, 3);
|
||||
QCOMPARE(str7, QString("acdehicdehij"));
|
||||
|
||||
const int many = 1024;
|
||||
/*
|
||||
QS::replace(const QChar *, int, const QChar *, int, Qt::CaseSensitivity)
|
||||
does its replacements in batches of many (please keep in sync with any
|
||||
changes to batch size), which lead to misbehaviour if ether QChar * array
|
||||
was part of the data being modified.
|
||||
*/
|
||||
QString str8("abcdefg"), ans8("acdeg");
|
||||
{
|
||||
// Make str8 and ans8 repeat themselves many + 1 times:
|
||||
int i = many;
|
||||
QString big(str8), small(ans8);
|
||||
while (i && !(i & 1)) { // Exploit many being a power of 2:
|
||||
big += big;
|
||||
small += small;
|
||||
i >>= 1;
|
||||
}
|
||||
while (i-- > 0) {
|
||||
str8 += big;
|
||||
ans8 += small;
|
||||
}
|
||||
}
|
||||
str8.replace(str8.constData() + 1, 5, str8.constData() + 2, 3);
|
||||
// Pre-test the bit where the diff happens, so it gets displayed:
|
||||
QCOMPARE(str8.mid((many - 3) * 5), ans8.mid((many - 3) * 5));
|
||||
// Also check the full values match, of course:
|
||||
QCOMPARE(str8.size(), ans8.size());
|
||||
QCOMPARE(str8, ans8);
|
||||
}
|
||||
|
||||
void tst_QString::replace_string()
|
||||
|
Loading…
Reference in New Issue
Block a user