Long live QUtf8::convertFromLatin1()!

With the introduction of QAnyStringView, overloading based on UTF-8
and Latin-1 is becoming more common. Often, the two overloads can
share the processing backend, because we're only interested in the
US-ASCII subset of each.

But if they can't, we need a faster way to convert L1 into UTF-8 than
going via UTF-16. This is where the new private API comes in.

Eventually, we should have the converse operation, too, to complete
the set of direct conversions between the possible three
QAnyStringView encodings L1/U8/U16, but this direction is easier to
code (there are no error cases) and more immediately useful, so
provide L1->U8 alone for now.

Change-Id: I3f7e1a9c89979d0eb604cb9e42dedf3d514fca2c
Reviewed-by: Edward Welbourne <edward.welbourne@qt.io>
Reviewed-by: Qt CI Bot <qt_ci_bot@qt-project.org>
Reviewed-by: Mårten Nordheim <marten.nordheim@qt.io>
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
This commit is contained in:
Marc Mutz 2022-10-31 17:27:15 +01:00
parent 3834fee3d3
commit 8acec4dbe6
3 changed files with 32 additions and 0 deletions

View File

@ -571,6 +571,21 @@ char *QUtf8::convertFromUnicode(char *out, QStringView in, QStringConverter::Sta
return reinterpret_cast<char *>(cursor);
}
char *QUtf8::convertFromLatin1(char *out, QLatin1StringView in)
{
// ### SIMD-optimize:
for (uchar ch : in) {
if (ch < 128) {
*out++ = ch;
} else {
// as per https://en.wikipedia.org/wiki/UTF-8#Encoding, 2nd row
*out++ = 0b110'0'0000u | (ch >> 6);
*out++ = 0b10'00'0000u | (ch & 0b0011'1111);
}
}
return out;
}
QString QUtf8::convertToUnicode(QByteArrayView in)
{
// UTF-8 to UTF-16 always needs the exact same number of words or less:

View File

@ -270,6 +270,7 @@ struct QUtf8
Q_CORE_EXPORT static QByteArray convertFromUnicode(QStringView in);
Q_CORE_EXPORT static QByteArray convertFromUnicode(QStringView in, QStringConverterBase::State *state);
static char *convertFromUnicode(char *out, QStringView in, QStringConverter::State *state);
Q_CORE_EXPORT static char *convertFromLatin1(char *out, QLatin1StringView in);
struct ValidUtf8Result {
bool isValidUtf8;
bool isValidAscii;

View File

@ -6,9 +6,11 @@
#include <QtCore/private/qglobal_p.h>
#include <qstringconverter.h>
#include <private/qstringconverter_p.h>
#include <qthreadpool.h>
#include <array>
#include <numeric>
using namespace Qt::StringLiterals;
@ -130,6 +132,8 @@ private slots:
void roundtrip_data();
void roundtrip();
void convertL1U8();
#if QT_CONFIG(icu)
void roundtripIcu_data();
void roundtripIcu();
@ -427,6 +431,18 @@ void tst_QStringConverter::roundtrip()
QCOMPARE(decoded, uniString);
}
void tst_QStringConverter::convertL1U8()
{
{
std::array<char, 256> latin1;
std::iota(latin1.data(), latin1.data() + latin1.size(), uchar(0));
std::array<char, 512> utf8;
auto out = QUtf8::convertFromLatin1(utf8.data(), QLatin1StringView{latin1.data(), latin1.size()});
QCOMPARE(QString::fromLatin1(latin1.data(), latin1.size()),
QString::fromUtf8(utf8.data(), out - utf8.data()));
}
}
#if QT_CONFIG(icu)
void tst_QStringConverter::roundtripIcu_data()