QString: assign() [4/4]: (it,it) overload for UTF-8 data types
Implement the missing overload to handle UTF-8 specific data types, including char8_t (C++20), char, uchar and signed char. Introduce the helper function 'assign_helper_char8' which handles the non-contiguous_iterator case. The contiguous_iterator case is already handled by the QAnyStringView overload. Include 'qstringconverter.h' at the end of the file, since it can't be included at the top due to diamond dependency conflicts. QStringDecoder is an implementation detail we don't want users to depend on when using assign(it, it). It would be unnatural to not be able to use a function just because we didn't include an apparently unrelated header. [ChangeLog][QtCore][QString] Enabled assign() for UTF-8 data types. Fixes: QTBUG-114208 Change-Id: Ia39bbb70ca105a6bbf1a131b2533f29a919ff66d Reviewed-by: Marc Mutz <marc.mutz@qt.io>
This commit is contained in:
parent
e68a0da0b9
commit
016addc201
@ -3364,6 +3364,10 @@ QString &QString::append(QChar ch)
|
||||
\list
|
||||
\li QChar
|
||||
\li QLatin1Char
|
||||
\li \c {char}
|
||||
\li \c {unsigned char}
|
||||
\li \c {signed char}
|
||||
\li \c {char8_t}
|
||||
\li \c char16_t
|
||||
\li (on platforms, such as Windows, where it is a 16-bit type) \c wchar_t
|
||||
\li \c char32_t
|
||||
|
@ -141,6 +141,7 @@ class Q_CORE_EXPORT QString
|
||||
using is_compatible_char_helper = std::disjunction<
|
||||
QtPrivate::IsCompatibleCharType<Char>,
|
||||
QtPrivate::IsCompatibleChar32Type<Char>,
|
||||
QtPrivate::IsCompatibleChar8Type<Char>,
|
||||
std::is_same<Char, QLatin1Char> // special case
|
||||
>;
|
||||
|
||||
@ -451,6 +452,10 @@ public:
|
||||
++first;
|
||||
}
|
||||
return *this;
|
||||
} else if constexpr (QtPrivate::IsCompatibleChar8Type<V>::value) {
|
||||
assign_helper_char8(first, last);
|
||||
d.data()[d.size] = u'\0';
|
||||
return *this;
|
||||
} else {
|
||||
d.assign(first, last, [](QChar ch) -> char16_t { return ch.unicode(); });
|
||||
d.data()[d.size] = u'\0';
|
||||
@ -936,6 +941,9 @@ private:
|
||||
void reallocGrowData(qsizetype n);
|
||||
// ### remove once QAnyStringView supports UTF-32:
|
||||
QString &assign_helper(const char32_t *data, qsizetype len);
|
||||
// Defined in qstringconverter.h
|
||||
template <typename InputIterator>
|
||||
void assign_helper_char8(InputIterator first, InputIterator last);
|
||||
static int compare_helper(const QChar *data1, qsizetype length1,
|
||||
const QChar *data2, qsizetype length2,
|
||||
Qt::CaseSensitivity cs = Qt::CaseSensitive) noexcept;
|
||||
@ -1512,6 +1520,7 @@ inline QString operator""_qs(const char16_t *str, size_t size) noexcept
|
||||
QT_END_NAMESPACE
|
||||
|
||||
#include <QtCore/qstringbuilder.h>
|
||||
#include <QtCore/qstringconverter.h>
|
||||
|
||||
#ifdef Q_L1S_VIEW_IS_PRIMARY
|
||||
# undef Q_L1S_VIEW_IS_PRIMARY
|
||||
|
@ -211,6 +211,66 @@ QByteArray &operator+=(QByteArray &a, const QStringEncoder::DecodedData<T> &b)
|
||||
}
|
||||
#endif
|
||||
|
||||
template <typename InputIterator>
|
||||
void QString::assign_helper_char8(InputIterator first, InputIterator last)
|
||||
{
|
||||
static_assert(!QString::is_contiguous_iterator_v<InputIterator>,
|
||||
"Internal error: Should have been handed over to the QAnyStringView overload."
|
||||
);
|
||||
|
||||
using ValueType = typename std::iterator_traits<InputIterator>::value_type;
|
||||
constexpr bool IsFwdIt = std::is_convertible_v<
|
||||
typename std::iterator_traits<InputIterator>::iterator_category,
|
||||
std::forward_iterator_tag
|
||||
>;
|
||||
|
||||
resize(0);
|
||||
// In case of not being shared, there is the possibility of having free space at begin
|
||||
// even after the resize to zero.
|
||||
if (const auto offset = d.freeSpaceAtBegin())
|
||||
d.setBegin(d.begin() - offset);
|
||||
|
||||
if constexpr (IsFwdIt)
|
||||
reserve(static_cast<qsizetype>(std::distance(first, last)));
|
||||
|
||||
auto toUtf16 = QStringDecoder(QStringDecoder::Utf8);
|
||||
auto availableCapacity = d.constAllocatedCapacity();
|
||||
auto *dst = d.data();
|
||||
auto *dend = d.data() + availableCapacity;
|
||||
|
||||
while (true) {
|
||||
if (first == last) { // ran out of input elements
|
||||
Q_ASSERT(!std::less<>{}(dend, dst));
|
||||
d.size = dst - d.begin();
|
||||
return;
|
||||
}
|
||||
const ValueType next = *first; // decays proxies, if any
|
||||
const auto chunk = QUtf8StringView(&next, 1);
|
||||
// UTF-8 characters can have a maximum size of 4 bytes and may result in a surrogate
|
||||
// pair of UTF-16 code units. In the input-iterator case, we don't know the size
|
||||
// and would need to always reserve space for 2 code units. To keep our promise
|
||||
// of 'not allocating if it fits', we have to pre-check this condition.
|
||||
// We know that it fits in the forward-iterator case.
|
||||
if constexpr (!IsFwdIt) {
|
||||
constexpr qsizetype Pair = 2;
|
||||
char16_t buf[Pair];
|
||||
const qptrdiff n = toUtf16.appendToBuffer(buf, chunk) - buf;
|
||||
if (dend - dst < n) { // ran out of allocated memory
|
||||
const auto offset = dst - d.begin();
|
||||
reallocData(d.constAllocatedCapacity() + Pair, QArrayData::Grow);
|
||||
// update the pointers since we've re-allocated
|
||||
availableCapacity = d.constAllocatedCapacity();
|
||||
dst = d.data() + offset;
|
||||
dend = d.data() + availableCapacity;
|
||||
}
|
||||
dst = std::copy_n(buf, n, dst);
|
||||
} else { // take the fast path
|
||||
dst = toUtf16.appendToBuffer(dst, chunk);
|
||||
}
|
||||
++first;
|
||||
}
|
||||
}
|
||||
|
||||
QT_END_NAMESPACE
|
||||
|
||||
#endif
|
||||
|
@ -3467,6 +3467,37 @@ void tst_QString::assign()
|
||||
QCOMPARE_EQ(str.capacity(), oldCap);
|
||||
QCOMPARE_EQ(str.size(), 0);
|
||||
|
||||
#ifndef QT_NO_CAST_FROM_ASCII
|
||||
const char c8[] = "a©☻🂤"; // [1, 2, 3, 4] bytes in utf-8 code points
|
||||
str.assign(std::begin(c8), std::end(c8) - 1);
|
||||
QCOMPARE(str, c8);
|
||||
|
||||
std::string c8str(c8);
|
||||
str.assign(c8str.begin(), c8str.end());
|
||||
QCOMPARE(str, c8);
|
||||
QCOMPARE(str.capacity(), qsizetype(std::size(c8) - 1));
|
||||
|
||||
oldCap = str.capacity();
|
||||
str.assign(c8str.begin(), c8str.begin()); // empty range
|
||||
QCOMPARE_EQ(str.capacity(), oldCap);
|
||||
QCOMPARE_EQ(str.size(), 0);
|
||||
|
||||
std::forward_list<char> fwd(std::begin(c8), std::end(c8) - 1);
|
||||
str.assign(fwd.begin(), fwd.end());
|
||||
QCOMPARE(str, c8);
|
||||
#endif
|
||||
#ifdef __cpp_char8_t
|
||||
const char8_t c8t[] = u8"🂤🂤🂤🂤🂤🂤🂤🂤🂤🂤"; // 10 x 4 bytes in utf-8 code points
|
||||
str.assign(std::begin(c8t), std::end(c8t) - 1);
|
||||
QCOMPARE(str, c8t);
|
||||
QCOMPARE(str.size(), 20);
|
||||
#endif
|
||||
#ifdef __cpp_lib_char8_t
|
||||
std::u8string c8tstr(c8t);
|
||||
str.assign(c8tstr.begin(), c8tstr.end());
|
||||
QCOMPARE(str, c8t);
|
||||
#endif
|
||||
|
||||
const char16_t c16[] = u"٩(⁎❛ᴗ❛⁎)۶ 🤷";
|
||||
str.assign(std::begin(c16), std::end(c16) - 1);
|
||||
QCOMPARE(str, c16);
|
||||
@ -3516,6 +3547,51 @@ void tst_QString::assign()
|
||||
str.assign(std::istream_iterator<ushort>{}, std::istream_iterator<ushort>{}); // empty range
|
||||
QCOMPARE_EQ(str.capacity(), oldCap);
|
||||
QCOMPARE_EQ(str.size(), 0);
|
||||
|
||||
#ifndef QT_NO_CAST_FROM_ASCII
|
||||
str.resize(0);
|
||||
str.squeeze();
|
||||
str.reserve(5);
|
||||
const char c8cmp[] = "🂤🂤a"; // 2 + 2 + 1 byte
|
||||
ss.clear();
|
||||
ss.str(c8cmp);
|
||||
str.assign(std::istream_iterator<char>{ss}, std::istream_iterator<char>{});
|
||||
QCOMPARE(str, c8cmp);
|
||||
QCOMPARE(str.size(), 5);
|
||||
QCOMPARE(str.capacity(), 5);
|
||||
|
||||
// 1 code-point + ill-formed sequence + 1 code-point.
|
||||
const char c8IllFormed[] = "a\xe0\x9f\x80""a";
|
||||
ss.clear();
|
||||
ss.str(c8IllFormed);
|
||||
str.assign(std::istream_iterator<char>{ss}, std::istream_iterator<char>{});
|
||||
QEXPECT_FAIL("", "Iconsistent handling of ill-formed sequences, QTBUG-117051", Continue);
|
||||
QCOMPARE_EQ(str, QString(c8IllFormed));
|
||||
|
||||
const char c82[] = "ÌşṫһíᶊśꞧɨℼṩuDF49ïľι?";
|
||||
ss.clear();
|
||||
ss.str(c82);
|
||||
str.assign(std::istream_iterator<char>{ss}, std::istream_iterator<char>{});
|
||||
QCOMPARE(str, c82);
|
||||
|
||||
const char uc8[] = "ẵƽ𝔰ȉ𝚐ꞑ𝒾𝝿𝕘";
|
||||
ss.clear();
|
||||
ss.str(uc8);
|
||||
str.assign(std::istream_iterator<uchar>{ss}, std::istream_iterator<uchar>{});
|
||||
QCOMPARE(str, uc8);
|
||||
|
||||
ss.clear();
|
||||
const char sc8[] = "𓁇ख़ॵ௵";
|
||||
ss.str(sc8);
|
||||
str.assign(std::istream_iterator<signed char>{ss}, std::istream_iterator<signed char>{});
|
||||
QCOMPARE(str, sc8);
|
||||
|
||||
oldCap = str.capacity();
|
||||
str.assign(std::istream_iterator<signed char>{}, // empty range
|
||||
std::istream_iterator<signed char>{});
|
||||
QCOMPARE_EQ(str.capacity(), oldCap);
|
||||
QCOMPARE_EQ(str.size(), 0);
|
||||
#endif
|
||||
}
|
||||
// Test chaining
|
||||
{
|
||||
@ -3634,7 +3710,7 @@ void tst_QString::assign_uses_prepend_buffer()
|
||||
for (qsizetype i = 0; i < withFreeSpaceAtBegin.d.freeSpaceAtBegin(); ++i)
|
||||
ss << "d ";
|
||||
|
||||
withFreeSpaceAtBegin.assign(std::istream_iterator<ushort>{ss}, std::istream_iterator<ushort>{});
|
||||
withFreeSpaceAtBegin.assign(std::istream_iterator<char>{ss}, std::istream_iterator<char>{});
|
||||
QCOMPARE_EQ(withFreeSpaceAtBegin.d.freeSpaceAtBegin(), 0); // we used the prepend buffer
|
||||
QCOMPARE_EQ(capBegin(withFreeSpaceAtBegin), oldCapBegin);
|
||||
QCOMPARE_EQ(capEnd(withFreeSpaceAtBegin), oldCapEnd);
|
||||
|
Loading…
Reference in New Issue
Block a user