QString: assign() [4/4]: (it,it) overload for UTF-8 data types

Implement the missing overload to handle UTF-8 specific data types,
including char8_t (C++20), char, uchar and signed char.

Introduce the helper function 'assign_helper_char8' which handles the
non-contiguous_iterator case. The contiguous_iterator case is already
handled by the QAnyStringView overload.

Include 'qstringconverter.h' at the end of the file, since it can't
be included at the top due to diamond dependency conflicts.
QStringDecoder is an implementation detail we don't want users to
depend on when using assign(it, it). It would be unnatural to not
be able to use a function just because we didn't include an
apparently unrelated header.

[ChangeLog][QtCore][QString] Enabled assign() for UTF-8 data types.

Fixes: QTBUG-114208
Change-Id: Ia39bbb70ca105a6bbf1a131b2533f29a919ff66d
Reviewed-by: Marc Mutz <marc.mutz@qt.io>
This commit is contained in:
Dennis Oberst 2023-06-28 18:47:34 +02:00
parent e68a0da0b9
commit 016addc201
4 changed files with 150 additions and 1 deletions

View File

@ -3364,6 +3364,10 @@ QString &QString::append(QChar ch)
\list
\li QChar
\li QLatin1Char
\li \c {char}
\li \c {unsigned char}
\li \c {signed char}
\li \c {char8_t}
\li \c char16_t
\li (on platforms, such as Windows, where it is a 16-bit type) \c wchar_t
\li \c char32_t

View File

@ -141,6 +141,7 @@ class Q_CORE_EXPORT QString
using is_compatible_char_helper = std::disjunction<
QtPrivate::IsCompatibleCharType<Char>,
QtPrivate::IsCompatibleChar32Type<Char>,
QtPrivate::IsCompatibleChar8Type<Char>,
std::is_same<Char, QLatin1Char> // special case
>;
@ -451,6 +452,10 @@ public:
++first;
}
return *this;
} else if constexpr (QtPrivate::IsCompatibleChar8Type<V>::value) {
assign_helper_char8(first, last);
d.data()[d.size] = u'\0';
return *this;
} else {
d.assign(first, last, [](QChar ch) -> char16_t { return ch.unicode(); });
d.data()[d.size] = u'\0';
@ -936,6 +941,9 @@ private:
void reallocGrowData(qsizetype n);
// ### remove once QAnyStringView supports UTF-32:
QString &assign_helper(const char32_t *data, qsizetype len);
// Defined in qstringconverter.h
template <typename InputIterator>
void assign_helper_char8(InputIterator first, InputIterator last);
static int compare_helper(const QChar *data1, qsizetype length1,
const QChar *data2, qsizetype length2,
Qt::CaseSensitivity cs = Qt::CaseSensitive) noexcept;
@ -1512,6 +1520,7 @@ inline QString operator""_qs(const char16_t *str, size_t size) noexcept
QT_END_NAMESPACE
#include <QtCore/qstringbuilder.h>
#include <QtCore/qstringconverter.h>
#ifdef Q_L1S_VIEW_IS_PRIMARY
# undef Q_L1S_VIEW_IS_PRIMARY

View File

@ -211,6 +211,66 @@ QByteArray &operator+=(QByteArray &a, const QStringEncoder::DecodedData<T> &b)
}
#endif
template <typename InputIterator>
void QString::assign_helper_char8(InputIterator first, InputIterator last)
{
static_assert(!QString::is_contiguous_iterator_v<InputIterator>,
"Internal error: Should have been handed over to the QAnyStringView overload."
);
using ValueType = typename std::iterator_traits<InputIterator>::value_type;
constexpr bool IsFwdIt = std::is_convertible_v<
typename std::iterator_traits<InputIterator>::iterator_category,
std::forward_iterator_tag
>;
resize(0);
// In case of not being shared, there is the possibility of having free space at begin
// even after the resize to zero.
if (const auto offset = d.freeSpaceAtBegin())
d.setBegin(d.begin() - offset);
if constexpr (IsFwdIt)
reserve(static_cast<qsizetype>(std::distance(first, last)));
auto toUtf16 = QStringDecoder(QStringDecoder::Utf8);
auto availableCapacity = d.constAllocatedCapacity();
auto *dst = d.data();
auto *dend = d.data() + availableCapacity;
while (true) {
if (first == last) { // ran out of input elements
Q_ASSERT(!std::less<>{}(dend, dst));
d.size = dst - d.begin();
return;
}
const ValueType next = *first; // decays proxies, if any
const auto chunk = QUtf8StringView(&next, 1);
// UTF-8 characters can have a maximum size of 4 bytes and may result in a surrogate
// pair of UTF-16 code units. In the input-iterator case, we don't know the size
// and would need to always reserve space for 2 code units. To keep our promise
// of 'not allocating if it fits', we have to pre-check this condition.
// We know that it fits in the forward-iterator case.
if constexpr (!IsFwdIt) {
constexpr qsizetype Pair = 2;
char16_t buf[Pair];
const qptrdiff n = toUtf16.appendToBuffer(buf, chunk) - buf;
if (dend - dst < n) { // ran out of allocated memory
const auto offset = dst - d.begin();
reallocData(d.constAllocatedCapacity() + Pair, QArrayData::Grow);
// update the pointers since we've re-allocated
availableCapacity = d.constAllocatedCapacity();
dst = d.data() + offset;
dend = d.data() + availableCapacity;
}
dst = std::copy_n(buf, n, dst);
} else { // take the fast path
dst = toUtf16.appendToBuffer(dst, chunk);
}
++first;
}
}
QT_END_NAMESPACE
#endif

View File

@ -3467,6 +3467,37 @@ void tst_QString::assign()
QCOMPARE_EQ(str.capacity(), oldCap);
QCOMPARE_EQ(str.size(), 0);
#ifndef QT_NO_CAST_FROM_ASCII
const char c8[] = "a©☻🂤"; // [1, 2, 3, 4] bytes in utf-8 code points
str.assign(std::begin(c8), std::end(c8) - 1);
QCOMPARE(str, c8);
std::string c8str(c8);
str.assign(c8str.begin(), c8str.end());
QCOMPARE(str, c8);
QCOMPARE(str.capacity(), qsizetype(std::size(c8) - 1));
oldCap = str.capacity();
str.assign(c8str.begin(), c8str.begin()); // empty range
QCOMPARE_EQ(str.capacity(), oldCap);
QCOMPARE_EQ(str.size(), 0);
std::forward_list<char> fwd(std::begin(c8), std::end(c8) - 1);
str.assign(fwd.begin(), fwd.end());
QCOMPARE(str, c8);
#endif
#ifdef __cpp_char8_t
const char8_t c8t[] = u8"🂤🂤🂤🂤🂤🂤🂤🂤🂤🂤"; // 10 x 4 bytes in utf-8 code points
str.assign(std::begin(c8t), std::end(c8t) - 1);
QCOMPARE(str, c8t);
QCOMPARE(str.size(), 20);
#endif
#ifdef __cpp_lib_char8_t
std::u8string c8tstr(c8t);
str.assign(c8tstr.begin(), c8tstr.end());
QCOMPARE(str, c8t);
#endif
const char16_t c16[] = u"٩(⁎❛ᴗ❛⁎)۶ 🤷";
str.assign(std::begin(c16), std::end(c16) - 1);
QCOMPARE(str, c16);
@ -3516,6 +3547,51 @@ void tst_QString::assign()
str.assign(std::istream_iterator<ushort>{}, std::istream_iterator<ushort>{}); // empty range
QCOMPARE_EQ(str.capacity(), oldCap);
QCOMPARE_EQ(str.size(), 0);
#ifndef QT_NO_CAST_FROM_ASCII
str.resize(0);
str.squeeze();
str.reserve(5);
const char c8cmp[] = "🂤🂤a"; // 2 + 2 + 1 byte
ss.clear();
ss.str(c8cmp);
str.assign(std::istream_iterator<char>{ss}, std::istream_iterator<char>{});
QCOMPARE(str, c8cmp);
QCOMPARE(str.size(), 5);
QCOMPARE(str.capacity(), 5);
// 1 code-point + ill-formed sequence + 1 code-point.
const char c8IllFormed[] = "a\xe0\x9f\x80""a";
ss.clear();
ss.str(c8IllFormed);
str.assign(std::istream_iterator<char>{ss}, std::istream_iterator<char>{});
QEXPECT_FAIL("", "Iconsistent handling of ill-formed sequences, QTBUG-117051", Continue);
QCOMPARE_EQ(str, QString(c8IllFormed));
const char c82[] = "ÌşṫһíᶊśꞧɨℼṩuDF49ïľι?";
ss.clear();
ss.str(c82);
str.assign(std::istream_iterator<char>{ss}, std::istream_iterator<char>{});
QCOMPARE(str, c82);
const char uc8[] = "ẵƽ𝔰ȉ𝚐ꞑ𝒾𝝿𝕘";
ss.clear();
ss.str(uc8);
str.assign(std::istream_iterator<uchar>{ss}, std::istream_iterator<uchar>{});
QCOMPARE(str, uc8);
ss.clear();
const char sc8[] = "𓁇ख़ॵ௵";
ss.str(sc8);
str.assign(std::istream_iterator<signed char>{ss}, std::istream_iterator<signed char>{});
QCOMPARE(str, sc8);
oldCap = str.capacity();
str.assign(std::istream_iterator<signed char>{}, // empty range
std::istream_iterator<signed char>{});
QCOMPARE_EQ(str.capacity(), oldCap);
QCOMPARE_EQ(str.size(), 0);
#endif
}
// Test chaining
{
@ -3634,7 +3710,7 @@ void tst_QString::assign_uses_prepend_buffer()
for (qsizetype i = 0; i < withFreeSpaceAtBegin.d.freeSpaceAtBegin(); ++i)
ss << "d ";
withFreeSpaceAtBegin.assign(std::istream_iterator<ushort>{ss}, std::istream_iterator<ushort>{});
withFreeSpaceAtBegin.assign(std::istream_iterator<char>{ss}, std::istream_iterator<char>{});
QCOMPARE_EQ(withFreeSpaceAtBegin.d.freeSpaceAtBegin(), 0); // we used the prepend buffer
QCOMPARE_EQ(capBegin(withFreeSpaceAtBegin), oldCapBegin);
QCOMPARE_EQ(capEnd(withFreeSpaceAtBegin), oldCapEnd);