Simplify and unify Q{ByteArray,String{,Ref}}::{simplify,trimmed}

As a side effect, QString::simplified() will always return a detached
copy, even if it's the same contents.

QStringRef::trimmed() can use the same calculation algorithm but can't
use the trimmed_helper() template function due to its lack of a
constructor taking begin pointer and size (std::string_view could do
it). That constructor can't be added because QStringRef always refers to
an existing QString, not to data in memory.

Change-Id: Ib966c1741819c68c6bac5fcbb00f8ac818b3ccab
Reviewed-by: Olivier Goffart <ogoffart@woboq.com>
Reviewed-by: Jędrzej Nowacki <jedrzej.nowacki@digia.com>
Reviewed-by: Marc Mutz <marc.mutz@kdab.com>
This commit is contained in:
Thiago Macieira 2014-07-28 23:31:32 -07:00
parent 19dd9a0ebd
commit 54da2b2911
5 changed files with 151 additions and 146 deletions

View File

@ -1,6 +1,7 @@
/****************************************************************************
**
** Copyright (C) 2013 Digia Plc and/or its subsidiary(-ies).
** Copyright (C) 2014 Intel Corporation.
** Contact: http://www.qt-project.org/legal
**
** This file is part of the QtCore module of the Qt Toolkit.
@ -46,6 +47,7 @@
#include "qlist.h"
#include "qlocale.h"
#include "qlocale_p.h"
#include "qstringalgorithms_p.h"
#include "qscopedpointer.h"
#include <qdatastream.h>
#include <qmath.h>
@ -3214,27 +3216,7 @@ QDataStream &operator>>(QDataStream &in, QByteArray &ba)
*/
QByteArray QByteArray::simplified() const
{
if (d->size == 0)
return *this;
QByteArray result(d->size, Qt::Uninitialized);
const char *from = d->data();
const char *fromend = from + d->size;
int outc=0;
char *to = result.d->data();
for (;;) {
while (from!=fromend && ascii_isspace(uchar(*from)))
from++;
while (from!=fromend && !ascii_isspace(uchar(*from)))
to[outc++] = *from++;
if (from!=fromend)
to[outc++] = ' ';
else
break;
}
if (outc > 0 && to[outc-1] == ' ')
outc--;
result.resize(outc);
return result;
return QStringAlgorithms<const QByteArray>::simplified_helper(*this);
}
/*!
@ -3254,25 +3236,7 @@ QByteArray QByteArray::simplified() const
*/
QByteArray QByteArray::trimmed() const
{
if (d->size == 0)
return *this;
const char *s = d->data();
if (!ascii_isspace(uchar(*s)) && !ascii_isspace(uchar(s[d->size-1])))
return *this;
int start = 0;
int end = d->size - 1;
while (start<=end && ascii_isspace(uchar(s[start]))) // skip white space from start
start++;
if (start <= end) { // only white space
while (end && ascii_isspace(uchar(s[end]))) // skip white space from end
end--;
}
int l = end - start + 1;
if (l <= 0) {
QByteArrayDataPtr empty = { Data::allocate(0) };
return QByteArray(empty);
}
return QByteArray(s+start, l);
return QStringAlgorithms<const QByteArray>::trimmed_helper(*this);
}
/*!

View File

@ -78,6 +78,7 @@
#include "qchar.cpp"
#include "qstringmatcher.cpp"
#include "qstringiterator_p.h"
#include "qstringalgorithms_p.h"
#include "qthreadstorage.h"
#ifdef Q_OS_WIN
@ -4633,78 +4634,7 @@ QString& QString::setUnicode(const QChar *unicode, int size)
*/
QString QString::simplified() const
{
if (d->size == 0)
return *this;
const QChar * const start = reinterpret_cast<QChar *>(d->data());
const QChar *from = start;
const QChar *fromEnd = start + d->size;
forever {
QChar ch = *from;
if (!ch.isSpace())
break;
if (++from == fromEnd) {
// All-whitespace string
QStringDataPtr empty = { Data::allocate(0) };
return QString(empty);
}
}
// This loop needs no underflow check, as we already determined that
// the string contains non-whitespace. If the string has exactly one
// non-whitespace, it will be checked twice - we can live with that.
while (fromEnd[-1].isSpace())
fromEnd--;
// The rest of the function depends on the fact that we already know
// that the last character in the source is no whitespace.
const QChar *copyFrom = from;
int copyCount;
forever {
if (++from == fromEnd) {
// Only leading and/or trailing whitespace, if any at all
return mid(copyFrom - start, from - copyFrom);
}
QChar ch = *from;
if (!ch.isSpace())
continue;
if (ch != QLatin1Char(' ')) {
copyCount = from - copyFrom;
break;
}
ch = *++from;
if (ch.isSpace()) {
copyCount = from - copyFrom - 1;
break;
}
}
// 'from' now points at the non-trailing whitespace which made the
// string not simplified in the first place. 'copyCount' is the number
// of already simplified characters - at least one, obviously -
// without a trailing space.
QString result((fromEnd - from) + copyCount, Qt::Uninitialized);
QChar *to = reinterpret_cast<QChar *>(result.d->data());
::memcpy(to, copyFrom, copyCount * 2);
to += copyCount;
fromEnd--;
QChar ch;
forever {
*to++ = QLatin1Char(' ');
do {
ch = *++from;
} while (ch.isSpace());
if (from == fromEnd)
break;
do {
*to++ = ch;
ch = *++from;
if (from == fromEnd)
goto done;
} while (!ch.isSpace());
}
done:
*to++ = ch;
result.truncate(to - reinterpret_cast<QChar *>(result.d->data()));
return result;
return QStringAlgorithms<const QString>::simplified_helper(*this);
}
/*!
@ -4725,25 +4655,7 @@ QString QString::simplified() const
*/
QString QString::trimmed() const
{
if (d->size == 0)
return *this;
const QChar *s = (const QChar*)d->data();
if (!s->isSpace() && !s[d->size-1].isSpace())
return *this;
int start = 0;
int end = d->size - 1;
while (start<=end && s[start].isSpace()) // skip white space from start
start++;
if (start <= end) { // only white space
while (end && s[end].isSpace()) // skip white space from end
end--;
}
int l = end - start + 1;
if (l <= 0) {
QStringDataPtr empty = { Data::allocate(0) };
return QString(empty);
}
return QString(s + start, l);
return QStringAlgorithms<const QString>::trimmed_helper(*this);
}
/*! \fn const QChar QString::at(int position) const
@ -9696,20 +9608,15 @@ QVector<uint> QStringRef::toUcs4() const
*/
QStringRef QStringRef::trimmed() const
{
if (m_size == 0 || m_string == 0)
const QChar *begin = cbegin();
const QChar *end = cend();
QStringAlgorithms<const QStringRef>::trimmed_helper_positions(begin, end);
if (begin == cbegin() && end == cend())
return *this;
const QChar *s = m_string->constData() + m_position;
int start = 0;
int end = m_size - 1;
while (start <= end && s[start].isSpace()) // skip white space from start
start++;
if (start <= end) { // only white space
while (end && s[end].isSpace()) // skip white space from end
end--;
}
int l = end - start + 1;
Q_ASSERT(l >= 0);
return QStringRef(m_string, m_position + start, l);
if (begin == end)
return QStringRef();
int position = m_position + (begin - cbegin());
return QStringRef(m_string, position, end - begin);
}
/*!

View File

@ -0,0 +1,135 @@
/****************************************************************************
**
** Copyright (C) 2014 Intel Corporation.
** Contact: http://www.qt-project.org/legal
**
** This file is part of the QtCore module of the Qt Toolkit.
**
** $QT_BEGIN_LICENSE:LGPL$
** Commercial License Usage
** Licensees holding valid commercial Qt licenses may use this file in
** accordance with the commercial license agreement provided with the
** Software or, alternatively, in accordance with the terms contained in
** a written agreement between you and Digia. For licensing terms and
** conditions see http://qt.digia.com/licensing. For further information
** use the contact form at http://qt.digia.com/contact-us.
**
** GNU Lesser General Public License Usage
** Alternatively, this file may be used under the terms of the GNU Lesser
** General Public License version 2.1 as published by the Free Software
** Foundation and appearing in the file LICENSE.LGPL included in the
** packaging of this file. Please review the following information to
** ensure the GNU Lesser General Public License version 2.1 requirements
** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
**
** In addition, as a special exception, Digia gives you certain additional
** rights. These rights are described in the Digia Qt LGPL Exception
** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
**
** GNU General Public License Usage
** Alternatively, this file may be used under the terms of the GNU
** General Public License version 3.0 as published by the Free Software
** Foundation and appearing in the file LICENSE.GPL included in the
** packaging of this file. Please review the following information to
** ensure the GNU General Public License version 3.0 requirements will be
** met: http://www.gnu.org/copyleft/gpl.html.
**
**
** $QT_END_LICENSE$
**
****************************************************************************/
#ifndef QSTRINGALGORITHMS_P_H
#define QSTRINGALGORITHMS_P_H
//
// W A R N I N G
// -------------
//
// This file is not part of the Qt API. It exists for the convenience
// of internal files. This header file may change from version to version
// without notice, or even be removed.
//
// We mean it.
//
#include "qstring.h"
#include "qlocale_p.h" // for ascii_isspace
QT_BEGIN_NAMESPACE
template <typename StringType> struct QStringAlgorithms
{
typedef typename StringType::value_type Char;
typedef typename StringType::size_type size_type;
typedef typename QtPrivate::remove_cv<StringType>::type NakedStringType;
static const bool isConst = QtPrivate::is_const<StringType>::value;
static inline bool isSpace(char ch) { return ascii_isspace(ch); }
static inline bool isSpace(QChar ch) { return ch.isSpace(); }
// Surrogate pairs are not handled in either of the functions below. That is
// not a problem because there are no space characters (Zs, Zl, Zp) outside the
// Basic Multilingual Plane.
static inline void trimmed_helper_positions(const Char *&begin, const Char *&end)
{
// skip white space from start
while (begin < end && isSpace(*begin))
begin++;
// skip white space from end
if (begin < end) {
while (begin < end && isSpace(end[-1]))
end--;
}
}
static inline StringType trimmed_helper(StringType &str)
{
const Char *begin = str.cbegin();
const Char *end = str.cend();
trimmed_helper_positions(begin, end);
if (begin == str.cbegin() && end == str.cend())
return str;
if (begin == end)
return StringType();
return StringType(begin, end - begin);
}
static inline StringType simplified_helper(StringType &str)
{
if (str.isEmpty())
return str;
const Char *src = str.cbegin();
const Char *end = str.cend();
NakedStringType result(str.size(), Qt::Uninitialized);
Char *dst = const_cast<Char *>(result.cbegin());
Char *ptr = dst;
forever {
while (src != end && isSpace(*src))
++src;
while (src != end && !isSpace(*src))
*ptr++ = *src++;
if (src != end)
*ptr++ = QChar::Space;
else
break;
}
if (ptr != dst && ptr[-1] == QChar::Space)
--ptr;
int newlen = ptr - dst;
if (newlen == str.size()) {
// nothing happened, return the original
return str;
}
result.resize(ptr - dst);
return qMove(result);
}
};
QT_END_NAMESPACE
#endif // QSTRINGALGORITHMS_P_H

View File

@ -57,6 +57,7 @@ HEADERS += \
tools/qsize.h \
tools/qstack.h \
tools/qstring.h \
tools/qstringalgorithms_p.h \
tools/qstringbuilder.h \
tools/qstringiterator_p.h \
tools/qstringlist.h \

View File

@ -2073,8 +2073,6 @@ void tst_QString::simplified()
QVERIFY2(result.isEmpty() && !result.isNull(), qPrintable("'" + full + "' did not yield empty: " + result));
} else {
QCOMPARE(result, simple);
if (full == simple)
QVERIFY(result.isSharedWith(full));
}
}