QString: add STL-style assign() [3/4]: (it,it) overload for char32_t

This no longer is range-length preserving now, so adapt the
documentation.

For the non-contiguous iterator case, it's actually ok to always
resize(0) and then append(), because, unlike for QList and QVLA, the
resize(0) doesn't actually iterate the container to destroy
elements. It just sets some members and conveniently detach()es for
us.

The char8_t case is even more complicated, since we can, atm, not
include qstringconverter.h into qstring.h, yet qstringconverter is
required for stateful UTF-8 decoding in the input_iterator case. So
that's postponed to yet another patch, and maybe won't make it into
6.6. But I feel it's important to have at least one
non-length-preserving version of assign(it, it) in before release lest
users come to rely on this documented (and de-facto) feature of the
the step-2 assign().

Fixes: QTBUG-106198
Pick-to: 6.6
Change-Id: Id458776e91b16fb2c80196e339cb817adee5d6d9
Reviewed-by: Qt CI Bot <qt_ci_bot@qt-project.org>
Reviewed-by: Ivan Solovev <ivan.solovev@qt.io>
This commit is contained in:
Marc Mutz 2023-06-02 16:22:37 +02:00
parent fe5d9340b1
commit 05e3880130
3 changed files with 64 additions and 4 deletions

View File

@ -3346,11 +3346,15 @@ QString &QString::append(QChar ch)
Replaces the contents of this string with a copy of the elements in the
iterator range [\a first, \a last) and returns a reference to this string.
The size of this string will be equal to the number of elements in the
range [\a first, \a last).
The size of this string will be equal to the decoded length of the elements
in the range [\a first, \a last), which need not be the same as the length of
the range itself, because this function transparently recodes the input
character set to UTF-16.
This function will only allocate memory if the number of elements in the
range exceeds the capacity of this string or this string is shared.
range, or, for non-UTF-16-encoded input, the maximum possible size of the
resulting string, exceeds the capacity of this string, or if this string is
shared.
\note This function overload only participates in overload resolution if
\c InputIterator meets the requirements of a
@ -3361,6 +3365,7 @@ QString &QString::append(QChar ch)
\li QLatin1Char
\li \c char16_t
\li (on platforms, such as Windows, where it is a 16-bit type) \c wchar_t
\li \c char32_t
\endlist
\note The behavior is undefined if either argument is an iterator into *this or
@ -3383,6 +3388,26 @@ QString &QString::assign(QAnyStringView s)
return *this;
}
QString &QString::assign_helper(const char32_t *data, qsizetype len)
{
// worst case: each char32_t requires a surrogate pair, so
const auto requiredCapacity = len * 2;
if (requiredCapacity <= capacity() && isDetached()) {
const auto offset = d.freeSpaceAtBegin();
if (offset)
d.setBegin(d.begin() - offset);
auto begin = reinterpret_cast<QChar *>(d.begin());
auto ba = QByteArrayView(reinterpret_cast<const std::byte*>(data), len * sizeof(char32_t));
QStringConverter::State state;
const auto end = QUtf32::convertToUnicode(begin, ba, &state, DetectEndianness);
d.size = end - begin;
d.data()[d.size] = u'\0';
} else {
*this = QString::fromUcs4(data, len);
}
return *this;
}
/*!
\fn QString &QString::remove(qsizetype position, qsizetype n)

View File

@ -47,6 +47,13 @@ class QString;
namespace QtPrivate {
template <bool...B> class BoolList;
template <typename Char>
using IsCompatibleChar32TypeHelper =
std::is_same<Char, char32_t>;
template <typename Char>
using IsCompatibleChar32Type
= IsCompatibleChar32TypeHelper<q20::remove_cvref_t<Char>>;
}
// Qt 4.x compatibility
@ -133,6 +140,7 @@ class Q_CORE_EXPORT QString
template <typename Char>
using is_compatible_char_helper = std::disjunction<
QtPrivate::IsCompatibleCharType<Char>,
QtPrivate::IsCompatibleChar32Type<Char>,
std::is_same<Char, QLatin1Char> // special case
>;
@ -418,15 +426,32 @@ public:
{
using V = typename std::iterator_traits<InputIterator>::value_type;
constexpr bool IsL1C = std::is_same_v<std::remove_cv_t<V>, QLatin1Char>;
constexpr bool IsFwdIt = std::is_convertible_v<
typename std::iterator_traits<InputIterator>::iterator_category,
std::forward_iterator_tag
>;
if constexpr (is_contiguous_iterator_v<InputIterator>) {
const auto p = q20::to_address(first);
const auto len = qsizetype(last - first);
if constexpr (IsL1C)
return assign(QLatin1StringView(reinterpret_cast<const char*>(p), len));
else if constexpr (sizeof(V) == 4)
return assign_helper(p, len);
else
return assign(QAnyStringView(p, len));
} else { // non-contiguous iterator, need to feed data piecemeal
} else if constexpr (sizeof(V) == 4) { // non-contiguous iterator, feed data piecemeal
resize(0);
if constexpr (IsFwdIt) {
const qsizetype requiredCapacity = 2 * std::distance(first, last);
reserve(requiredCapacity);
}
while (first != last) {
append(QChar::fromUcs4(*first));
++first;
}
return *this;
} else {
d.assign(first, last, [](QChar ch) -> char16_t { return ch.unicode(); });
d.data()[d.size] = u'\0';
return *this;
@ -896,6 +921,8 @@ private:
void reallocData(qsizetype alloc, QArrayData::AllocationOption option);
void reallocGrowData(qsizetype n);
// ### remove once QAnyStringView supports UTF-32:
QString &assign_helper(const char32_t *data, qsizetype len);
static int compare_helper(const QChar *data1, qsizetype length1,
const QChar *data2, qsizetype length2,
Qt::CaseSensitivity cs = Qt::CaseSensitive) noexcept;

View File

@ -3442,6 +3442,14 @@ void tst_QString::assign()
str.assign(c16str.begin(), c16str.end());
QCOMPARE(str, c16);
const char32_t c32[] = U"٩(⁎❛ᴗ❛⁎)۶ 🤷";
str.assign(std::begin(c32), std::end(c32) - 1);
QCOMPARE(str, c16);
std::u32string c32str(c32);
str.assign(c32str.begin(), c32str.end());
QCOMPARE(str, c16);
QVarLengthArray<QLatin1Char, 5> l1ch = {'F'_L1, 'G'_L1, 'H'_L1, 'I'_L1, 'J'_L1};
str.assign(l1ch.begin(), l1ch.end());
QCOMPARE(str, u"FGHIJ");