From 05e388013098887eb66090b8f145ef92d60657db Mon Sep 17 00:00:00 2001 From: Marc Mutz Date: Fri, 2 Jun 2023 16:22:37 +0200 Subject: [PATCH] QString: add STL-style assign() [3/4]: (it,it) overload for char32_t This no longer is range-length preserving now, so adapt the documentation. For the non-contiguous iterator case, it's actually ok to always resize(0) and then append(), because, unlike for QList and QVLA, the resize(0) doesn't actually iterate the container to destroy elements. It just sets some members and conveniently detach()es for us. The char8_t case is even more complicated, since we can, atm, not include qstringconverter.h into qstring.h, yet qstringconverter is required for stateful UTF-8 decoding in the input_iterator case. So that's postponed to yet another patch, and maybe won't make it into 6.6. But I feel it's important to have at least one non-length-preserving version of assign(it, it) in before release lest users come to rely on this documented (and de-facto) feature of the the step-2 assign(). Fixes: QTBUG-106198 Pick-to: 6.6 Change-Id: Id458776e91b16fb2c80196e339cb817adee5d6d9 Reviewed-by: Qt CI Bot Reviewed-by: Ivan Solovev --- src/corelib/text/qstring.cpp | 31 +++++++++++++++++-- src/corelib/text/qstring.h | 29 ++++++++++++++++- .../auto/corelib/text/qstring/tst_qstring.cpp | 8 +++++ 3 files changed, 64 insertions(+), 4 deletions(-) diff --git a/src/corelib/text/qstring.cpp b/src/corelib/text/qstring.cpp index ae6e6f67eb..1fae9ef07a 100644 --- a/src/corelib/text/qstring.cpp +++ b/src/corelib/text/qstring.cpp @@ -3346,11 +3346,15 @@ QString &QString::append(QChar ch) Replaces the contents of this string with a copy of the elements in the iterator range [\a first, \a last) and returns a reference to this string. - The size of this string will be equal to the number of elements in the - range [\a first, \a last). + The size of this string will be equal to the decoded length of the elements + in the range [\a first, \a last), which need not be the same as the length of + the range itself, because this function transparently recodes the input + character set to UTF-16. This function will only allocate memory if the number of elements in the - range exceeds the capacity of this string or this string is shared. + range, or, for non-UTF-16-encoded input, the maximum possible size of the + resulting string, exceeds the capacity of this string, or if this string is + shared. \note This function overload only participates in overload resolution if \c InputIterator meets the requirements of a @@ -3361,6 +3365,7 @@ QString &QString::append(QChar ch) \li QLatin1Char \li \c char16_t \li (on platforms, such as Windows, where it is a 16-bit type) \c wchar_t + \li \c char32_t \endlist \note The behavior is undefined if either argument is an iterator into *this or @@ -3383,6 +3388,26 @@ QString &QString::assign(QAnyStringView s) return *this; } +QString &QString::assign_helper(const char32_t *data, qsizetype len) +{ + // worst case: each char32_t requires a surrogate pair, so + const auto requiredCapacity = len * 2; + if (requiredCapacity <= capacity() && isDetached()) { + const auto offset = d.freeSpaceAtBegin(); + if (offset) + d.setBegin(d.begin() - offset); + auto begin = reinterpret_cast(d.begin()); + auto ba = QByteArrayView(reinterpret_cast(data), len * sizeof(char32_t)); + QStringConverter::State state; + const auto end = QUtf32::convertToUnicode(begin, ba, &state, DetectEndianness); + d.size = end - begin; + d.data()[d.size] = u'\0'; + } else { + *this = QString::fromUcs4(data, len); + } + return *this; +} + /*! \fn QString &QString::remove(qsizetype position, qsizetype n) diff --git a/src/corelib/text/qstring.h b/src/corelib/text/qstring.h index 76833147d4..e050ec356e 100644 --- a/src/corelib/text/qstring.h +++ b/src/corelib/text/qstring.h @@ -47,6 +47,13 @@ class QString; namespace QtPrivate { template class BoolList; + +template +using IsCompatibleChar32TypeHelper = + std::is_same; +template +using IsCompatibleChar32Type + = IsCompatibleChar32TypeHelper>; } // Qt 4.x compatibility @@ -133,6 +140,7 @@ class Q_CORE_EXPORT QString template using is_compatible_char_helper = std::disjunction< QtPrivate::IsCompatibleCharType, + QtPrivate::IsCompatibleChar32Type, std::is_same // special case >; @@ -418,15 +426,32 @@ public: { using V = typename std::iterator_traits::value_type; constexpr bool IsL1C = std::is_same_v, QLatin1Char>; + constexpr bool IsFwdIt = std::is_convertible_v< + typename std::iterator_traits::iterator_category, + std::forward_iterator_tag + >; if constexpr (is_contiguous_iterator_v) { const auto p = q20::to_address(first); const auto len = qsizetype(last - first); if constexpr (IsL1C) return assign(QLatin1StringView(reinterpret_cast(p), len)); + else if constexpr (sizeof(V) == 4) + return assign_helper(p, len); else return assign(QAnyStringView(p, len)); - } else { // non-contiguous iterator, need to feed data piecemeal + } else if constexpr (sizeof(V) == 4) { // non-contiguous iterator, feed data piecemeal + resize(0); + if constexpr (IsFwdIt) { + const qsizetype requiredCapacity = 2 * std::distance(first, last); + reserve(requiredCapacity); + } + while (first != last) { + append(QChar::fromUcs4(*first)); + ++first; + } + return *this; + } else { d.assign(first, last, [](QChar ch) -> char16_t { return ch.unicode(); }); d.data()[d.size] = u'\0'; return *this; @@ -896,6 +921,8 @@ private: void reallocData(qsizetype alloc, QArrayData::AllocationOption option); void reallocGrowData(qsizetype n); + // ### remove once QAnyStringView supports UTF-32: + QString &assign_helper(const char32_t *data, qsizetype len); static int compare_helper(const QChar *data1, qsizetype length1, const QChar *data2, qsizetype length2, Qt::CaseSensitivity cs = Qt::CaseSensitive) noexcept; diff --git a/tests/auto/corelib/text/qstring/tst_qstring.cpp b/tests/auto/corelib/text/qstring/tst_qstring.cpp index f3d2594e34..eaf35c969e 100644 --- a/tests/auto/corelib/text/qstring/tst_qstring.cpp +++ b/tests/auto/corelib/text/qstring/tst_qstring.cpp @@ -3442,6 +3442,14 @@ void tst_QString::assign() str.assign(c16str.begin(), c16str.end()); QCOMPARE(str, c16); + const char32_t c32[] = U"٩(⁎❛ᴗ❛⁎)۶ 🤷"; + str.assign(std::begin(c32), std::end(c32) - 1); + QCOMPARE(str, c16); + + std::u32string c32str(c32); + str.assign(c32str.begin(), c32str.end()); + QCOMPARE(str, c16); + QVarLengthArray l1ch = {'F'_L1, 'G'_L1, 'H'_L1, 'I'_L1, 'J'_L1}; str.assign(l1ch.begin(), l1ch.end()); QCOMPARE(str, u"FGHIJ");