Cleanup QUtf32::convertToUnicode

Cleanup the implementation and improve performance by
handling the first char outside of the main loop.

Also avoid one copy of the data when using QStringConverter.

Change-Id: Ie698e62de1864352612a4dddc907cb139e7e6407
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
This commit is contained in:
Lars Knoll 2020-04-23 16:44:38 +02:00
parent b1d8ce32cd
commit 4b2edde373
2 changed files with 91 additions and 68 deletions

View File

@ -903,18 +903,18 @@ QByteArray QUtf32::convertFromUnicode(const QChar *uc, qsizetype len, QStringCon
if (writeBom)
length += 4;
QByteArray ba(length, Qt::Uninitialized);
char *end = convertFromUnicode(ba.data(), uc, len, state, endian);
char *end = convertFromUnicode(ba.data(), QStringView(uc, len), state, endian);
Q_ASSERT(end - ba.constData() == length);
Q_UNUSED(end);
return ba;
}
char *QUtf32::convertFromUnicode(char *out, const QChar *uc, qsizetype len, QStringConverter::State *state, DataEndianness endian)
char *QUtf32::convertFromUnicode(char *out, QStringView in, QStringConverter::State *state, DataEndianness endian)
{
Q_ASSERT(state);
bool writeBom = !(state->internalState & HeaderDone) && state->flags & QStringConverter::Flag::WriteBom;
qsizetype length = 4*len;
qsizetype length = 4*in.length();
if (writeBom)
length += 4;
@ -937,7 +937,8 @@ char *QUtf32::convertFromUnicode(char *out, const QChar *uc, qsizetype len, QStr
state->internalState |= HeaderDone;
}
const QChar *end = uc + len;
const QChar *uc = in.data();
const QChar *end = in.data() + in.length();
QChar ch;
uint ucs4;
if (state->remainingChars == 1) {
@ -981,69 +982,96 @@ decode_surrogate:
QString QUtf32::convertToUnicode(const char *chars, qsizetype len, QStringConverter::State *state, DataEndianness endian)
{
Q_ASSERT(state);
if (state->flags & QStringConverter::Flag::Stateless) // temporary
state = nullptr;
uchar tuple[4];
int num = 0;
bool headerdone = state && state->internalState & HeaderDone;
if (state) {
if (state->flags & QStringConverter::Flag::DontSkipInitialBom)
headerdone = true;
if (endian == DetectEndianness)
endian = (DataEndianness)state->state_data[Endian];
num = state->remainingChars;
memcpy(tuple, &state->state_data[Data], 4);
}
if (headerdone && endian == DetectEndianness)
endian = (QSysInfo::ByteOrder == QSysInfo::BigEndian) ? BigEndianness : LittleEndianness;
QString result;
result.resize((num + len) >> 2 << 1); // worst case
QChar *qch = (QChar *)result.data();
result.resize((len + 7) >> 1); // worst case
QChar *end = convertToUnicode(result.data(), chars, len, state, endian);
result.truncate(end - result.constData());
return result;
}
QChar *QUtf32::convertToUnicode(QChar *out, const char *chars, qsizetype len, QStringConverter::State *state, DataEndianness endian)
{
Q_ASSERT(state);
if (endian == DetectEndianness)
endian = (DataEndianness)state->state_data[Endian];
const char *end = chars + len;
uchar tuple[4];
memcpy(tuple, &state->state_data[Data], 4);
// make sure we can decode at least one char
if (state->remainingChars + len < 4) {
if (len) {
while (chars < end) {
tuple[state->remainingChars] = *chars;
++state->remainingChars;
++chars;
}
Q_ASSERT(state->remainingChars < 4);
memcpy(&state->state_data[Data], tuple, 4);
}
return out;
}
bool headerdone = state->internalState & HeaderDone;
if (state->flags & QStringConverter::Flag::DontSkipInitialBom)
headerdone = true;
int num = state->remainingChars;
state->remainingChars = 0;
if (!headerdone || endian == DetectEndianness || num) {
while (num < 4)
tuple[num++] = *chars++;
if (endian == DetectEndianness) {
if (tuple[0] == 0xff && tuple[1] == 0xfe && tuple[2] == 0 && tuple[3] == 0) {
endian = LittleEndianness;
} else if (tuple[0] == 0 && tuple[1] == 0 && tuple[2] == 0xfe && tuple[3] == 0xff) {
endian = BigEndianness;
} else if (QSysInfo::ByteOrder == QSysInfo::BigEndian) {
endian = BigEndianness;
} else {
endian = LittleEndianness;
}
}
uint code = (endian == BigEndianness) ? qFromBigEndian<quint32>(tuple) : qFromLittleEndian<quint32>(tuple);
if (headerdone || code != QChar::ByteOrderMark) {
if (QChar::requiresSurrogates(code)) {
*out++ = QChar(QChar::highSurrogate(code));
*out++ = QChar(QChar::lowSurrogate(code));
} else {
*out++ = QChar(code);
}
}
num = 0;
} else if (endian == DetectEndianness) {
endian = (QSysInfo::ByteOrder == QSysInfo::BigEndian) ? BigEndianness : LittleEndianness;
}
state->state_data[Endian] = endian;
state->internalState |= HeaderDone;
while (chars < end) {
tuple[num++] = *chars++;
if (num == 4) {
if (!headerdone) {
headerdone = true;
if (endian == DetectEndianness) {
if (tuple[0] == 0xff && tuple[1] == 0xfe && tuple[2] == 0 && tuple[3] == 0 && endian != BigEndianness) {
endian = LittleEndianness;
num = 0;
continue;
} else if (tuple[0] == 0 && tuple[1] == 0 && tuple[2] == 0xfe && tuple[3] == 0xff && endian != LittleEndianness) {
endian = BigEndianness;
num = 0;
continue;
} else if (QSysInfo::ByteOrder == QSysInfo::BigEndian) {
endian = BigEndianness;
} else {
endian = LittleEndianness;
}
} else if (((endian == BigEndianness) ? qFromBigEndian<quint32>(tuple) : qFromLittleEndian<quint32>(tuple)) == QChar::ByteOrderMark) {
num = 0;
continue;
}
}
uint code = (endian == BigEndianness) ? qFromBigEndian<quint32>(tuple) : qFromLittleEndian<quint32>(tuple);
for (char16_t c : QChar::fromUcs4(code))
*qch++ = c;
*out++ = c;
num = 0;
}
}
result.truncate(qch - result.unicode());
if (state) {
if (headerdone)
state->internalState |= HeaderDone;
state->state_data[Endian] = endian;
state->remainingChars = num;
memcpy(&state->state_data[Data], tuple, 4);
if (num) {
if (state->flags & QStringDecoder::Flag::Stateless) {
*out++ = QChar::ReplacementCharacter;
} else {
state->state_data[Endian] = endian;
state->remainingChars = num;
memcpy(&state->state_data[Data], tuple, 4);
}
}
return result;
return out;
}
QString qFromUtfEncoded(const QByteArray &ba)
@ -1322,38 +1350,32 @@ static char *toUtf16LE(char *out, QStringView in, QStringConverter::State *state
static QChar *fromUtf32(QChar *out, const char *in, qsizetype length, QStringConverter::State *state)
{
QString s = QUtf32::convertToUnicode(in, length, state);
memcpy(out, s.constData(), s.length()*sizeof(QChar));
return out + s.length();
return QUtf32::convertToUnicode(out, in, length, state, DetectEndianness);
}
static char *toUtf32(char *out, QStringView in, QStringConverter::State *state)
{
return QUtf32::convertFromUnicode(out, in.data(), in.length(), state, DetectEndianness);
return QUtf32::convertFromUnicode(out, in, state, DetectEndianness);
}
static QChar *fromUtf32BE(QChar *out, const char *in, qsizetype length, QStringConverter::State *state)
{
QString s = QUtf32::convertToUnicode(in, length, state, BigEndianness);
memcpy(out, s.constData(), s.length()*sizeof(QChar));
return out + s.length();
return QUtf32::convertToUnicode(out, in, length, state, BigEndianness);
}
static char *toUtf32BE(char *out, QStringView in, QStringConverter::State *state)
{
return QUtf32::convertFromUnicode(out, in.data(), in.length(), state, BigEndianness);
return QUtf32::convertFromUnicode(out, in, state, BigEndianness);
}
static QChar *fromUtf32LE(QChar *out, const char *in, qsizetype length, QStringConverter::State *state)
{
QString s = QUtf32::convertToUnicode(in, length, state, LittleEndianness);
memcpy(out, s.constData(), s.length()*sizeof(QChar));
return out + s.length();
return QUtf32::convertToUnicode(out, in, length, state, LittleEndianness);
}
static char *toUtf32LE(char *out, QStringView in, QStringConverter::State *state)
{
return QUtf32::convertFromUnicode(out, in.data(), in.length(), state, LittleEndianness);
return QUtf32::convertFromUnicode(out, in, state, LittleEndianness);
}
void qt_from_latin1(char16_t *dst, const char *str, size_t size) noexcept;
@ -1411,7 +1433,7 @@ static qsizetype toUtf8Len(qsizetype l) { return 3*(l + 1); }
static qsizetype fromUtf16Len(qsizetype l) { return l/2 + 2; }
static qsizetype toUtf16Len(qsizetype l) { return 2*(l + 1); }
static qsizetype fromUtf32Len(qsizetype l) { return l + 1; }
static qsizetype fromUtf32Len(qsizetype l) { return l/2 + 2; }
static qsizetype toUtf32Len(qsizetype l) { return 4*(l + 1); }
static qsizetype fromLatin1Len(qsizetype l) { return l + 1; }

View File

@ -312,9 +312,10 @@ struct QUtf16
struct QUtf32
{
static QChar *convertToUnicode(QChar *out, const char *chars, qsizetype len, QStringConverter::State *state, DataEndianness endian);
static QString convertToUnicode(const char *, qsizetype, QStringConverter::State *, DataEndianness = DetectEndianness);
static QByteArray convertFromUnicode(const QChar *, qsizetype, QStringConverter::State *, DataEndianness = DetectEndianness);
static char *convertFromUnicode(char *out, const QChar *uc, qsizetype len, QStringConverter::State *state, DataEndianness endian);
static char *convertFromUnicode(char *out, QStringView in, QStringConverter::State *state, DataEndianness endian);
};
struct QLocal8Bit