QLocal8Bit::convertFromUnicode[win]: handle trailing high surrogate
The win32 API doesn't give us much choice. _Some_ code pages have support for returning some error if we pass a specific flag, but not all of them. Anyway, since the code pages might not support all that UTF-16 provides, we can't reasonably make it error out on characters that cannot be converted. So, the most reasonable thing we can handle is a unpaired high surrogate at the end of a string, assume that the rest of the string was fine, and that the low surrogate will be provided in the next call. Pick-to: 6.6 6.5 Fixes: QTBUG-118185 Task-number: QTBUG-105105 Change-Id: I1f193c9d8e04bec769d885d32440c759d9dff0c2 Reviewed-by: Thiago Macieira <thiago.macieira@intel.com> Reviewed-by: Edward Welbourne <edward.welbourne@qt.io>
This commit is contained in:
parent
e34ee8e88a
commit
d8d5922f16
@ -1388,7 +1388,6 @@ QByteArray QLocal8Bit::convertFromUnicode_sys(QStringView in, quint32 codePage,
|
||||
|
||||
Q_ASSERT(uclen < INT_MAX); // ### FIXME
|
||||
Q_ASSERT(state);
|
||||
Q_UNUSED(state); // ### Fixme
|
||||
if (state->flags & QStringConverter::Flag::Stateless) // temporary
|
||||
state = nullptr;
|
||||
|
||||
@ -1402,15 +1401,47 @@ QByteArray QLocal8Bit::convertFromUnicode_sys(QStringView in, quint32 codePage,
|
||||
qsizetype outlen = buf.size();
|
||||
QByteArray mb;
|
||||
|
||||
int len;
|
||||
if (state && state->remainingChars > 0) {
|
||||
Q_ASSERT(state->remainingChars == 1);
|
||||
// Let's try to decode the pending character
|
||||
wchar_t wc[2] = { wchar_t(state->state_data[0]), ch[0] };
|
||||
int len = WideCharToMultiByte(codePage, 0, wc, int(std::size(wc)), out, outlen, nullptr,
|
||||
nullptr);
|
||||
if (!len)
|
||||
return {}; // Cannot recover, and I refuse to believe it was a size limitation
|
||||
out += len;
|
||||
outlen -= len;
|
||||
++ch;
|
||||
--uclen;
|
||||
state->remainingChars = 0;
|
||||
state->state_data[0] = 0;
|
||||
if (uclen == 0)
|
||||
return QByteArrayView(buf.data(), len).toByteArray();
|
||||
}
|
||||
|
||||
if (state && QChar::isHighSurrogate(ch[uclen - 1])) {
|
||||
// We can handle a missing low surrogate at the end of the string,
|
||||
// so if there is one, exclude it now and store it in the state.
|
||||
state->remainingChars = 1;
|
||||
state->state_data[0] = ch[uclen - 1];
|
||||
--uclen;
|
||||
if (uclen == 0)
|
||||
return QByteArray();
|
||||
}
|
||||
|
||||
Q_ASSERT(uclen > 0);
|
||||
|
||||
int len = 0;
|
||||
while (!(len = WideCharToMultiByte(codePage, 0, ch, int(uclen), out, int(outlen), nullptr,
|
||||
nullptr))) {
|
||||
int r = GetLastError();
|
||||
if (r == ERROR_INSUFFICIENT_BUFFER) {
|
||||
int neededLength = WideCharToMultiByte(codePage, 0, ch, int(uclen), nullptr, 0, nullptr,
|
||||
nullptr);
|
||||
mb.resize(neededLength);
|
||||
out = mb.data();
|
||||
const qsizetype currentLength = out - buf.data();
|
||||
mb.resize(currentLength + neededLength);
|
||||
memcpy(mb.data(), out, currentLength * sizeof(*out));
|
||||
out = mb.data() + currentLength;
|
||||
outlen = neededLength;
|
||||
// and try again...
|
||||
} else {
|
||||
@ -1424,12 +1455,13 @@ QByteArray QLocal8Bit::convertFromUnicode_sys(QStringView in, quint32 codePage,
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!len)
|
||||
return QByteArray();
|
||||
if (out == buf.data())
|
||||
mb = QByteArray(buf.data(), len);
|
||||
else
|
||||
mb.resize(len);
|
||||
auto end = out + len;
|
||||
if (QtPrivate::q_points_into_range(out, buf.data(), buf.data() + buf.size())) {
|
||||
if (end != buf.data()) // else: we return null-array
|
||||
mb = QByteArrayView(buf.data(), end).toByteArray();
|
||||
} else {
|
||||
mb.truncate(end - mb.data());
|
||||
}
|
||||
return mb;
|
||||
}
|
||||
#endif
|
||||
|
@ -2508,6 +2508,10 @@ void tst_QStringConverter::fromLocal8Bit_data()
|
||||
QTest::newRow("shiftJIS")
|
||||
<< "\x82\xb1\x82\xf1\x82\xc9\x82\xbf\x82\xcd\xa4\x90\xa2\x8a\x45\x81\x49"_ba
|
||||
<< u"こんにちは、世界!"_s << SHIFT_JIS;
|
||||
|
||||
constexpr uint GB_18030 = 54936u;
|
||||
QTest::newRow("GB-18030") << "\xc4\xe3\xba\xc3\xca\xc0\xbd\xe7\xa3\xa1"_ba << u"你好世界!"_s
|
||||
<< GB_18030;
|
||||
}
|
||||
|
||||
void tst_QStringConverter::fromLocal8Bit()
|
||||
@ -2604,6 +2608,7 @@ void tst_QStringConverter::toLocal8Bit()
|
||||
for (QChar c : utf16)
|
||||
result += QLocal8Bit::convertFromUnicode_sys(QStringView(&c, 1), codePage, &state);
|
||||
QCOMPARE(result, eightBit);
|
||||
QCOMPARE(state.remainingChars, 0);
|
||||
}
|
||||
|
||||
void tst_QStringConverter::toLocal8Bit_special_cases()
|
||||
@ -2613,20 +2618,33 @@ void tst_QStringConverter::toLocal8Bit_special_cases()
|
||||
constexpr uint UTF8 = 65001u;
|
||||
// Decode a 2-code unit character, but only provide 1 code unit at first:
|
||||
const char16_t a[] = u"𬽦";
|
||||
QStringView firstHalf = QStringView(a, 1);
|
||||
QByteArray result = QLocal8Bit::convertFromUnicode_sys(firstHalf, UTF8, &state);
|
||||
QEXPECT_FAIL("", "We don't currently handle missing the low surrogate", Abort);
|
||||
QStringView codeUnits = a;
|
||||
QByteArray result = QLocal8Bit::convertFromUnicode_sys(codeUnits.first(1), UTF8, &state);
|
||||
QCOMPARE(result, QString());
|
||||
QVERIFY(result.isNull());
|
||||
QCOMPARE_GT(state.remainingChars, 0);
|
||||
// Then provide the second code unit:
|
||||
QStringView secondHalf = QStringView(a + 1, 1);
|
||||
result = QLocal8Bit::convertFromUnicode_sys(secondHalf, UTF8, &state);
|
||||
result = QLocal8Bit::convertFromUnicode_sys(codeUnits.sliced(1), UTF8, &state);
|
||||
QCOMPARE(result, "\xf0\xac\xbd\xa6"_ba);
|
||||
QCOMPARE(state.remainingChars, 0);
|
||||
|
||||
// Retain compat with the behavior for toLocal8Bit:
|
||||
QCOMPARE(firstHalf.toLocal8Bit(), "?");
|
||||
QCOMPARE(codeUnits.first(1).toLocal8Bit(), "?");
|
||||
|
||||
// Now do the same, but the second time we feed in a character, we also
|
||||
// provide many more so the internal stack buffer is not large enough.
|
||||
result.clear();
|
||||
state.clear();
|
||||
QString str = QStringView(a).toString().repeated(2048);
|
||||
codeUnits = str;
|
||||
result = QLocal8Bit::convertFromUnicode_sys(codeUnits.first(1), UTF8, &state);
|
||||
QCOMPARE(result, QString());
|
||||
QVERIFY(result.isNull());
|
||||
QCOMPARE_GT(state.remainingChars, 0);
|
||||
// Then we provide the rest of the string:
|
||||
result = QLocal8Bit::convertFromUnicode_sys(codeUnits.sliced(1), UTF8, &state);
|
||||
QCOMPARE(result.first(4), "\xf0\xac\xbd\xa6"_ba);
|
||||
QCOMPARE(state.remainingChars, 0);
|
||||
}
|
||||
#endif // Q_OS_WIN
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user