QStringConverter[win]: expose+test control of code-page
Then we can easily test how fromLocal8Bit() and toLocal8Bit() behave with different code-pages. Pick-to: 6.6 6.5 Task-number: QTBUG-118318 Task-number: QTBUG-118185 Task-number: QTBUG-105105 Change-Id: Ib1cd3bccd27d598f4c80915557e332befcd96354 Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
This commit is contained in:
parent
66b7cb2a88
commit
13fbedd162
@ -1253,7 +1253,8 @@ int QLocal8Bit::checkUtf8()
|
|||||||
return GetACP() == CP_UTF8 ? 1 : -1;
|
return GetACP() == CP_UTF8 ? 1 : -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
static QString convertToUnicodeCharByChar(QByteArrayView in, QStringConverter::State *state)
|
static QString convertToUnicodeCharByChar(QByteArrayView in, quint32 codePage,
|
||||||
|
QStringConverter::State *state)
|
||||||
{
|
{
|
||||||
qsizetype length = in.size();
|
qsizetype length = in.size();
|
||||||
const char *chars = in.data();
|
const char *chars = in.data();
|
||||||
@ -1285,10 +1286,10 @@ static QString convertToUnicodeCharByChar(QByteArrayView in, QStringConverter::S
|
|||||||
const char *mb = mbcs;
|
const char *mb = mbcs;
|
||||||
const char *next = 0;
|
const char *next = 0;
|
||||||
QString s;
|
QString s;
|
||||||
while ((next = CharNextExA(CP_ACP, mb, 0)) != mb) {
|
while ((next = CharNextExA(codePage, mb, 0)) != mb) {
|
||||||
wchar_t wc[2] ={0};
|
wchar_t wc[2] ={0};
|
||||||
int charlength = int(next - mb); // always just a few bytes
|
int charlength = int(next - mb); // always just a few bytes
|
||||||
int len = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED|MB_ERR_INVALID_CHARS, mb, charlength, wc, 2);
|
int len = MultiByteToWideChar(codePage, MB_PRECOMPOSED|MB_ERR_INVALID_CHARS, mb, charlength, wc, 2);
|
||||||
if (len>0) {
|
if (len>0) {
|
||||||
s.append(QChar(wc[0]));
|
s.append(QChar(wc[0]));
|
||||||
} else {
|
} else {
|
||||||
@ -1305,8 +1306,13 @@ static QString convertToUnicodeCharByChar(QByteArrayView in, QStringConverter::S
|
|||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
QString QLocal8Bit::convertToUnicode_sys(QByteArrayView in, QStringConverter::State *state)
|
QString QLocal8Bit::convertToUnicode_sys(QByteArrayView in, QStringConverter::State *state)
|
||||||
|
{
|
||||||
|
return convertToUnicode_sys(in, CP_ACP, state);
|
||||||
|
}
|
||||||
|
|
||||||
|
QString QLocal8Bit::convertToUnicode_sys(QByteArrayView in, quint32 codePage,
|
||||||
|
QStringConverter::State *state)
|
||||||
{
|
{
|
||||||
qsizetype length = in.size();
|
qsizetype length = in.size();
|
||||||
|
|
||||||
@ -1336,7 +1342,7 @@ QString QLocal8Bit::convertToUnicode_sys(QByteArrayView in, QStringConverter::St
|
|||||||
prev[0] = state_data;
|
prev[0] = state_data;
|
||||||
prev[1] = mb[0];
|
prev[1] = mb[0];
|
||||||
remainingChars = 0;
|
remainingChars = 0;
|
||||||
len = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED,
|
len = MultiByteToWideChar(codePage, MB_PRECOMPOSED,
|
||||||
prev, 2, wc.data(), wc.length());
|
prev, 2, wc.data(), wc.length());
|
||||||
if (len) {
|
if (len) {
|
||||||
sp.append(QChar(wc[0]));
|
sp.append(QChar(wc[0]));
|
||||||
@ -1351,11 +1357,11 @@ QString QLocal8Bit::convertToUnicode_sys(QByteArrayView in, QStringConverter::St
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
while (!(len=MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED|MB_ERR_INVALID_CHARS,
|
while (!(len=MultiByteToWideChar(codePage, MB_PRECOMPOSED|MB_ERR_INVALID_CHARS,
|
||||||
mb, mblen, wc.data(), wc.length()))) {
|
mb, mblen, wc.data(), wc.length()))) {
|
||||||
int r = GetLastError();
|
int r = GetLastError();
|
||||||
if (r == ERROR_INSUFFICIENT_BUFFER) {
|
if (r == ERROR_INSUFFICIENT_BUFFER) {
|
||||||
const int wclen = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED,
|
const int wclen = MultiByteToWideChar(codePage, MB_PRECOMPOSED,
|
||||||
mb, mblen, 0, 0);
|
mb, mblen, 0, 0);
|
||||||
wc.resize(wclen);
|
wc.resize(wclen);
|
||||||
} else if (r == ERROR_NO_UNICODE_TRANSLATION) {
|
} else if (r == ERROR_NO_UNICODE_TRANSLATION) {
|
||||||
@ -1364,7 +1370,7 @@ QString QLocal8Bit::convertToUnicode_sys(QByteArrayView in, QStringConverter::St
|
|||||||
mblen--;
|
mblen--;
|
||||||
//check whether, we hit an invalid character in the middle
|
//check whether, we hit an invalid character in the middle
|
||||||
if ((mblen <= 1) || (remainingChars && state_data))
|
if ((mblen <= 1) || (remainingChars && state_data))
|
||||||
return convertToUnicodeCharByChar(in, state);
|
return convertToUnicodeCharByChar(in, codePage, state);
|
||||||
//Remove the last character and try again...
|
//Remove the last character and try again...
|
||||||
state_data = mb[mblen-1];
|
state_data = mb[mblen-1];
|
||||||
remainingChars = 1;
|
remainingChars = 1;
|
||||||
@ -1395,6 +1401,12 @@ QString QLocal8Bit::convertToUnicode_sys(QByteArrayView in, QStringConverter::St
|
|||||||
}
|
}
|
||||||
|
|
||||||
QByteArray QLocal8Bit::convertFromUnicode_sys(QStringView in, QStringConverter::State *state)
|
QByteArray QLocal8Bit::convertFromUnicode_sys(QStringView in, QStringConverter::State *state)
|
||||||
|
{
|
||||||
|
return convertFromUnicode_sys(in, CP_ACP, state);
|
||||||
|
}
|
||||||
|
|
||||||
|
QByteArray QLocal8Bit::convertFromUnicode_sys(QStringView in, quint32 codePage,
|
||||||
|
QStringConverter::State *state)
|
||||||
{
|
{
|
||||||
const QChar *ch = in.data();
|
const QChar *ch = in.data();
|
||||||
qsizetype uclen = in.size();
|
qsizetype uclen = in.size();
|
||||||
@ -1412,12 +1424,12 @@ QByteArray QLocal8Bit::convertFromUnicode_sys(QStringView in, QStringConverter::
|
|||||||
BOOL used_def;
|
BOOL used_def;
|
||||||
QByteArray mb(4096, 0);
|
QByteArray mb(4096, 0);
|
||||||
int len;
|
int len;
|
||||||
while (!(len=WideCharToMultiByte(CP_ACP, 0, (const wchar_t*)ch, uclen,
|
while (!(len=WideCharToMultiByte(codePage, 0, (const wchar_t*)ch, uclen,
|
||||||
mb.data(), mb.size()-1, 0, &used_def)))
|
mb.data(), mb.size()-1, 0, &used_def)))
|
||||||
{
|
{
|
||||||
int r = GetLastError();
|
int r = GetLastError();
|
||||||
if (r == ERROR_INSUFFICIENT_BUFFER) {
|
if (r == ERROR_INSUFFICIENT_BUFFER) {
|
||||||
mb.resize(1+WideCharToMultiByte(CP_ACP, 0,
|
mb.resize(1+WideCharToMultiByte(codePage, 0,
|
||||||
(const wchar_t*)ch, uclen,
|
(const wchar_t*)ch, uclen,
|
||||||
0, 0, 0, &used_def));
|
0, 0, 0, &used_def));
|
||||||
// and try again...
|
// and try again...
|
||||||
|
@ -362,6 +362,7 @@ struct Q_CORE_EXPORT QLocal8Bit
|
|||||||
}
|
}
|
||||||
return r > 0;
|
return r > 0;
|
||||||
}
|
}
|
||||||
|
static QString convertToUnicode_sys(QByteArrayView, quint32, QStringConverter::State *);
|
||||||
static QString convertToUnicode_sys(QByteArrayView, QStringConverter::State *);
|
static QString convertToUnicode_sys(QByteArrayView, QStringConverter::State *);
|
||||||
static QString convertToUnicode(QByteArrayView in, QStringConverter::State *state)
|
static QString convertToUnicode(QByteArrayView in, QStringConverter::State *state)
|
||||||
{
|
{
|
||||||
@ -369,6 +370,7 @@ struct Q_CORE_EXPORT QLocal8Bit
|
|||||||
return QUtf8::convertToUnicode(in, state);
|
return QUtf8::convertToUnicode(in, state);
|
||||||
return convertToUnicode_sys(in, state);
|
return convertToUnicode_sys(in, state);
|
||||||
}
|
}
|
||||||
|
static QByteArray convertFromUnicode_sys(QStringView, quint32, QStringConverter::State *);
|
||||||
static QByteArray convertFromUnicode_sys(QStringView, QStringConverter::State *);
|
static QByteArray convertFromUnicode_sys(QStringView, QStringConverter::State *);
|
||||||
static QByteArray convertFromUnicode(QStringView in, QStringConverter::State *state)
|
static QByteArray convertFromUnicode(QStringView in, QStringConverter::State *state)
|
||||||
{
|
{
|
||||||
|
@ -180,6 +180,16 @@ private slots:
|
|||||||
void encodingForHtml();
|
void encodingForHtml();
|
||||||
|
|
||||||
void availableCodesAreAvailable();
|
void availableCodesAreAvailable();
|
||||||
|
|
||||||
|
#ifdef Q_OS_WIN
|
||||||
|
// On all other systems local 8-bit encoding is UTF-8
|
||||||
|
void fromLocal8Bit_data();
|
||||||
|
void fromLocal8Bit();
|
||||||
|
void fromLocal8Bit_special_cases();
|
||||||
|
void toLocal8Bit_data();
|
||||||
|
void toLocal8Bit();
|
||||||
|
void toLocal8Bit_special_cases();
|
||||||
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
void tst_QStringConverter::constructByName()
|
void tst_QStringConverter::constructByName()
|
||||||
@ -2484,6 +2494,144 @@ void tst_QStringConverter::threadSafety()
|
|||||||
QCOMPARE(b, QString::fromLatin1("abcdefghijklmonpqrstufvxyz"));
|
QCOMPARE(b, QString::fromLatin1("abcdefghijklmonpqrstufvxyz"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef Q_OS_WIN
|
||||||
|
void tst_QStringConverter::fromLocal8Bit_data()
|
||||||
|
{
|
||||||
|
QTest::addColumn<QByteArray>("eightBit");
|
||||||
|
QTest::addColumn<QString>("utf16");
|
||||||
|
QTest::addColumn<quint32>("codePage");
|
||||||
|
|
||||||
|
constexpr uint WINDOWS_1252 = 1252u;
|
||||||
|
QTest::newRow("windows-1252") << "Hello, world!"_ba << u"Hello, world!"_s << WINDOWS_1252;
|
||||||
|
constexpr uint SHIFT_JIS = 932u;
|
||||||
|
// Mostly two byte characters, but the comma is a single byte character (0xa4)
|
||||||
|
QTest::newRow("shiftJIS")
|
||||||
|
<< "\x82\xb1\x82\xf1\x82\xc9\x82\xbf\x82\xcd\xa4\x90\xa2\x8a\x45\x81\x49"_ba
|
||||||
|
<< u"こんにちは、世界!"_s << SHIFT_JIS;
|
||||||
|
}
|
||||||
|
|
||||||
|
void tst_QStringConverter::fromLocal8Bit()
|
||||||
|
{
|
||||||
|
QFETCH(const QByteArray, eightBit);
|
||||||
|
QFETCH(const QString, utf16);
|
||||||
|
QFETCH(const quint32, codePage);
|
||||||
|
|
||||||
|
QStringConverter::State state;
|
||||||
|
|
||||||
|
QString result = QLocal8Bit::convertToUnicode_sys(eightBit, codePage, &state);
|
||||||
|
QCOMPARE(result, utf16);
|
||||||
|
QCOMPARE(state.remainingChars, 0);
|
||||||
|
|
||||||
|
result.clear();
|
||||||
|
state.clear();
|
||||||
|
for (char c : eightBit)
|
||||||
|
result += QLocal8Bit::convertToUnicode_sys({&c, 1}, codePage, &state);
|
||||||
|
QCOMPARE(result, utf16);
|
||||||
|
QCOMPARE(state.remainingChars, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void tst_QStringConverter::fromLocal8Bit_special_cases()
|
||||||
|
{
|
||||||
|
QStringConverter::State state;
|
||||||
|
constexpr uint SHIFT_JIS = 932u;
|
||||||
|
// Decode a 2-octet character, but only provide 1 octet at first:
|
||||||
|
QString result = QLocal8Bit::convertToUnicode_sys("\x82", SHIFT_JIS, &state);
|
||||||
|
QCOMPARE(result, QString());
|
||||||
|
QVERIFY(result.isNull());
|
||||||
|
QCOMPARE_GT(state.remainingChars, 0);
|
||||||
|
// Then provide the second octet:
|
||||||
|
result = QLocal8Bit::convertToUnicode_sys("\xb1", SHIFT_JIS, &state);
|
||||||
|
QCOMPARE(result, u"こ");
|
||||||
|
QCOMPARE(state.remainingChars, 0);
|
||||||
|
|
||||||
|
// Now try a 3-octet UTF-8 sequence:
|
||||||
|
result.clear();
|
||||||
|
state.clear();
|
||||||
|
constexpr uint UTF8 = 65001u;
|
||||||
|
// First the first 2 octets:
|
||||||
|
result = QLocal8Bit::convertToUnicode_sys("\xe4\xbd", UTF8, &state);
|
||||||
|
QCOMPARE(result, QString());
|
||||||
|
QVERIFY(result.isNull());
|
||||||
|
QCOMPARE_GT(state.remainingChars, 0);
|
||||||
|
// Then provide the remaining octet:
|
||||||
|
result = QLocal8Bit::convertToUnicode_sys("\xa0", UTF8, &state);
|
||||||
|
QEXPECT_FAIL("", "We don't store enough state to handle this case", Abort);
|
||||||
|
QCOMPARE(result, u"你");
|
||||||
|
QCOMPARE(state.remainingChars, 0);
|
||||||
|
|
||||||
|
// Now try a 4-octet GB 18030 sequence:
|
||||||
|
result.clear();
|
||||||
|
state.clear();
|
||||||
|
constexpr uint GB_18030 = 54936u;
|
||||||
|
const char sequence[] = "\x95\x32\x90\x31";
|
||||||
|
QByteArrayView octets = QByteArrayView(sequence);
|
||||||
|
result = QLocal8Bit::convertToUnicode_sys(octets.first(2), GB_18030, &state);
|
||||||
|
QCOMPARE(result, QString());
|
||||||
|
QVERIFY(result.isNull());
|
||||||
|
QEXPECT_FAIL("",
|
||||||
|
"We don't store enough state to handle this case. + GB 18030 does not work with "
|
||||||
|
"the MB_PRECOMPOSED flag.",
|
||||||
|
Abort);
|
||||||
|
QCOMPARE_GT(state.remainingChars, 0);
|
||||||
|
// Then provide one more octet:
|
||||||
|
result = QLocal8Bit::convertToUnicode_sys(octets.sliced(2, 1), GB_18030, &state);
|
||||||
|
QCOMPARE(result, QString());
|
||||||
|
QVERIFY(result.isNull());
|
||||||
|
QCOMPARE_GT(state.remainingChars, 0);
|
||||||
|
// Then provide the last octet
|
||||||
|
result = QLocal8Bit::convertToUnicode_sys(octets.last(1), GB_18030, &state);
|
||||||
|
QCOMPARE(result, u"𠂇");
|
||||||
|
QCOMPARE(state.remainingChars, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void tst_QStringConverter::toLocal8Bit_data()
|
||||||
|
{
|
||||||
|
fromLocal8Bit_data();
|
||||||
|
}
|
||||||
|
|
||||||
|
void tst_QStringConverter::toLocal8Bit()
|
||||||
|
{
|
||||||
|
QFETCH(const QByteArray, eightBit);
|
||||||
|
QFETCH(const QString, utf16);
|
||||||
|
QFETCH(const quint32, codePage);
|
||||||
|
|
||||||
|
QStringConverter::State state;
|
||||||
|
|
||||||
|
QByteArray result = QLocal8Bit::convertFromUnicode_sys(utf16, codePage, &state);
|
||||||
|
QCOMPARE(result, eightBit);
|
||||||
|
QCOMPARE(state.remainingChars, 0);
|
||||||
|
|
||||||
|
result.clear();
|
||||||
|
state.clear();
|
||||||
|
for (QChar c : utf16)
|
||||||
|
result += QLocal8Bit::convertFromUnicode_sys(QStringView(&c, 1), codePage, &state);
|
||||||
|
QCOMPARE(result, eightBit);
|
||||||
|
}
|
||||||
|
|
||||||
|
void tst_QStringConverter::toLocal8Bit_special_cases()
|
||||||
|
{
|
||||||
|
QStringConverter::State state;
|
||||||
|
// Normally utf8 goes through a different code path, but we can force it here
|
||||||
|
constexpr uint UTF8 = 65001u;
|
||||||
|
// Decode a 2-code unit character, but only provide 1 code unit at first:
|
||||||
|
const char16_t a[] = u"𬽦";
|
||||||
|
QStringView firstHalf = QStringView(a, 1);
|
||||||
|
QByteArray result = QLocal8Bit::convertFromUnicode_sys(firstHalf, UTF8, &state);
|
||||||
|
QEXPECT_FAIL("", "We don't currently handle missing the low surrogate", Abort);
|
||||||
|
QCOMPARE(result, QString());
|
||||||
|
QVERIFY(result.isNull());
|
||||||
|
QCOMPARE_GT(state.remainingChars, 0);
|
||||||
|
// Then provide the second code unit:
|
||||||
|
QStringView secondHalf = QStringView(a + 1, 1);
|
||||||
|
result = QLocal8Bit::convertFromUnicode_sys(secondHalf, UTF8, &state);
|
||||||
|
QCOMPARE(result, "\xf0\xac\xbd\xa6"_ba);
|
||||||
|
QCOMPARE(state.remainingChars, 0);
|
||||||
|
|
||||||
|
// Retain compat with the behavior for toLocal8Bit:
|
||||||
|
QCOMPARE(firstHalf.toLocal8Bit(), "?");
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
struct DontCrashAtExit {
|
struct DontCrashAtExit {
|
||||||
~DontCrashAtExit() {
|
~DontCrashAtExit() {
|
||||||
QStringDecoder decoder(QStringDecoder::Utf8);
|
QStringDecoder decoder(QStringDecoder::Utf8);
|
||||||
|
Loading…
Reference in New Issue
Block a user