Long live QUtf8::convertFromLatin1()!
With the introduction of QAnyStringView, overloading based on UTF-8 and Latin-1 is becoming more common. Often, the two overloads can share the processing backend, because we're only interested in the US-ASCII subset of each. But if they can't, we need a faster way to convert L1 into UTF-8 than going via UTF-16. This is where the new private API comes in. Eventually, we should have the converse operation, too, to complete the set of direct conversions between the possible three QAnyStringView encodings L1/U8/U16, but this direction is easier to code (there are no error cases) and more immediately useful, so provide L1->U8 alone for now. Change-Id: I3f7e1a9c89979d0eb604cb9e42dedf3d514fca2c Reviewed-by: Edward Welbourne <edward.welbourne@qt.io> Reviewed-by: Qt CI Bot <qt_ci_bot@qt-project.org> Reviewed-by: Mårten Nordheim <marten.nordheim@qt.io> Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
This commit is contained in:
parent
3834fee3d3
commit
8acec4dbe6
@ -571,6 +571,21 @@ char *QUtf8::convertFromUnicode(char *out, QStringView in, QStringConverter::Sta
|
||||
return reinterpret_cast<char *>(cursor);
|
||||
}
|
||||
|
||||
char *QUtf8::convertFromLatin1(char *out, QLatin1StringView in)
|
||||
{
|
||||
// ### SIMD-optimize:
|
||||
for (uchar ch : in) {
|
||||
if (ch < 128) {
|
||||
*out++ = ch;
|
||||
} else {
|
||||
// as per https://en.wikipedia.org/wiki/UTF-8#Encoding, 2nd row
|
||||
*out++ = 0b110'0'0000u | (ch >> 6);
|
||||
*out++ = 0b10'00'0000u | (ch & 0b0011'1111);
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
QString QUtf8::convertToUnicode(QByteArrayView in)
|
||||
{
|
||||
// UTF-8 to UTF-16 always needs the exact same number of words or less:
|
||||
|
@ -270,6 +270,7 @@ struct QUtf8
|
||||
Q_CORE_EXPORT static QByteArray convertFromUnicode(QStringView in);
|
||||
Q_CORE_EXPORT static QByteArray convertFromUnicode(QStringView in, QStringConverterBase::State *state);
|
||||
static char *convertFromUnicode(char *out, QStringView in, QStringConverter::State *state);
|
||||
Q_CORE_EXPORT static char *convertFromLatin1(char *out, QLatin1StringView in);
|
||||
struct ValidUtf8Result {
|
||||
bool isValidUtf8;
|
||||
bool isValidAscii;
|
||||
|
@ -6,9 +6,11 @@
|
||||
|
||||
#include <QtCore/private/qglobal_p.h>
|
||||
#include <qstringconverter.h>
|
||||
#include <private/qstringconverter_p.h>
|
||||
#include <qthreadpool.h>
|
||||
|
||||
#include <array>
|
||||
#include <numeric>
|
||||
|
||||
using namespace Qt::StringLiterals;
|
||||
|
||||
@ -130,6 +132,8 @@ private slots:
|
||||
void roundtrip_data();
|
||||
void roundtrip();
|
||||
|
||||
void convertL1U8();
|
||||
|
||||
#if QT_CONFIG(icu)
|
||||
void roundtripIcu_data();
|
||||
void roundtripIcu();
|
||||
@ -427,6 +431,18 @@ void tst_QStringConverter::roundtrip()
|
||||
QCOMPARE(decoded, uniString);
|
||||
}
|
||||
|
||||
void tst_QStringConverter::convertL1U8()
|
||||
{
|
||||
{
|
||||
std::array<char, 256> latin1;
|
||||
std::iota(latin1.data(), latin1.data() + latin1.size(), uchar(0));
|
||||
std::array<char, 512> utf8;
|
||||
auto out = QUtf8::convertFromLatin1(utf8.data(), QLatin1StringView{latin1.data(), latin1.size()});
|
||||
QCOMPARE(QString::fromLatin1(latin1.data(), latin1.size()),
|
||||
QString::fromUtf8(utf8.data(), out - utf8.data()));
|
||||
}
|
||||
}
|
||||
|
||||
#if QT_CONFIG(icu)
|
||||
|
||||
void tst_QStringConverter::roundtripIcu_data()
|
||||
|
Loading…
Reference in New Issue
Block a user