QUnicodeTables: use array for case folding tables

Instead of four pairs of :1 :15 bit fields, use an array of four :1,
:15 structs.  This allows to replace the case folding traits classes
with a simple enum that indexes into said array.

I don't know what the WASM #ifdef'ed code is supposed to effect (a :0
bit-field is only useful to separate adjacent bit-field into separate
memory locations for multi-threading), but I thought it safer to leave
it in, and that means the array must be a 64-bit block of its own, so
I had to move two fields around.

Saves ~4.5KiB in text size on optimized GCC 10 LTO Linux AMD64 builds.

Change-Id: Ib52cd7706342d5227b50b57545d073829c45da9a
Reviewed-by: Lars Knoll <lars.knoll@qt.io>
This commit is contained in:
Marc Mutz 2019-09-03 20:53:31 +02:00
parent 90ae72d852
commit effbf147a4
5 changed files with 2732 additions and 2789 deletions

View File

@ -1465,18 +1465,18 @@ QChar::UnicodeVersion QChar::currentUnicodeVersion() noexcept
} }
template <typename Traits, typename T> template <typename T>
Q_DECL_CONST_FUNCTION static inline T convertCase_helper(T uc) noexcept Q_DECL_CONST_FUNCTION static inline T convertCase_helper(T uc, QUnicodeTables::Case which) noexcept
{ {
const QUnicodeTables::Properties *prop = qGetProp(uc); const auto fold = qGetProp(uc)->cases[which];
if (Q_UNLIKELY(Traits::caseSpecial(prop))) { if (Q_UNLIKELY(fold.special)) {
const ushort *specialCase = specialCaseMap + Traits::caseDiff(prop); const ushort *specialCase = specialCaseMap + fold.diff;
// so far, there are no special cases beyond BMP (guaranteed by the qunicodetables generator) // so far, there are no special cases beyond BMP (guaranteed by the qunicodetables generator)
return *specialCase == 1 ? specialCase[1] : uc; return *specialCase == 1 ? specialCase[1] : uc;
} }
return uc + Traits::caseDiff(prop); return uc + fold.diff;
} }
/*! /*!
@ -1496,7 +1496,7 @@ uint QChar::toLower(uint ucs4) noexcept
{ {
if (ucs4 > LastValidCodePoint) if (ucs4 > LastValidCodePoint)
return ucs4; return ucs4;
return convertCase_helper<QUnicodeTables::LowercaseTraits>(ucs4); return convertCase_helper(ucs4, QUnicodeTables::LowerCase);
} }
/*! /*!
@ -1516,7 +1516,7 @@ uint QChar::toUpper(uint ucs4) noexcept
{ {
if (ucs4 > LastValidCodePoint) if (ucs4 > LastValidCodePoint)
return ucs4; return ucs4;
return convertCase_helper<QUnicodeTables::UppercaseTraits>(ucs4); return convertCase_helper(ucs4, QUnicodeTables::UpperCase);
} }
/*! /*!
@ -1536,7 +1536,7 @@ uint QChar::toTitleCase(uint ucs4) noexcept
{ {
if (ucs4 > LastValidCodePoint) if (ucs4 > LastValidCodePoint)
return ucs4; return ucs4;
return convertCase_helper<QUnicodeTables::TitlecaseTraits>(ucs4); return convertCase_helper(ucs4, QUnicodeTables::TitleCase);
} }
static inline uint foldCase(const ushort *ch, const ushort *start) static inline uint foldCase(const ushort *ch, const ushort *start)
@ -1544,7 +1544,7 @@ static inline uint foldCase(const ushort *ch, const ushort *start)
uint ucs4 = *ch; uint ucs4 = *ch;
if (QChar::isLowSurrogate(ucs4) && ch > start && QChar::isHighSurrogate(*(ch - 1))) if (QChar::isLowSurrogate(ucs4) && ch > start && QChar::isHighSurrogate(*(ch - 1)))
ucs4 = QChar::surrogateToUcs4(*(ch - 1), ucs4); ucs4 = QChar::surrogateToUcs4(*(ch - 1), ucs4);
return convertCase_helper<QUnicodeTables::CasefoldTraits>(ucs4); return convertCase_helper(ucs4, QUnicodeTables::CaseFold);
} }
static inline uint foldCase(uint ch, uint &last) noexcept static inline uint foldCase(uint ch, uint &last) noexcept
@ -1553,12 +1553,12 @@ static inline uint foldCase(uint ch, uint &last) noexcept
if (QChar::isLowSurrogate(ucs4) && QChar::isHighSurrogate(last)) if (QChar::isLowSurrogate(ucs4) && QChar::isHighSurrogate(last))
ucs4 = QChar::surrogateToUcs4(last, ucs4); ucs4 = QChar::surrogateToUcs4(last, ucs4);
last = ch; last = ch;
return convertCase_helper<QUnicodeTables::CasefoldTraits>(ucs4); return convertCase_helper(ucs4, QUnicodeTables::CaseFold);
} }
static inline ushort foldCase(ushort ch) noexcept static inline ushort foldCase(ushort ch) noexcept
{ {
return convertCase_helper<QUnicodeTables::CasefoldTraits>(ch); return convertCase_helper(ch, QUnicodeTables::CaseFold);
} }
static inline QChar foldCase(QChar ch) noexcept static inline QChar foldCase(QChar ch) noexcept
@ -1582,7 +1582,7 @@ uint QChar::toCaseFolded(uint ucs4) noexcept
{ {
if (ucs4 > LastValidCodePoint) if (ucs4 > LastValidCodePoint)
return ucs4; return ucs4;
return convertCase_helper<QUnicodeTables::CasefoldTraits>(ucs4); return convertCase_helper(ucs4, QUnicodeTables::CaseFold);
} }
/*! /*!

View File

@ -6629,9 +6629,9 @@ namespace QUnicodeTables {
reallocate memory to grow the buffer. In that case, we need to adjust the \a reallocate memory to grow the buffer. In that case, we need to adjust the \a
it pointer. it pointer.
*/ */
template <typename Traits, typename T> template <typename T>
Q_NEVER_INLINE Q_NEVER_INLINE
static QString detachAndConvertCase(T &str, QStringIterator it) static QString detachAndConvertCase(T &str, QStringIterator it, QUnicodeTables::Case which)
{ {
Q_ASSERT(!str.isEmpty()); Q_ASSERT(!str.isEmpty());
QString s = std::move(str); // will copy if T is const QString QString s = std::move(str); // will copy if T is const QString
@ -6640,10 +6640,10 @@ static QString detachAndConvertCase(T &str, QStringIterator it)
do { do {
uint uc = it.nextUnchecked(); uint uc = it.nextUnchecked();
const QUnicodeTables::Properties *prop = qGetProp(uc); const auto fold = qGetProp(uc)->cases[which];
signed short caseDiff = Traits::caseDiff(prop); signed short caseDiff = fold.diff;
if (Q_UNLIKELY(Traits::caseSpecial(prop))) { if (Q_UNLIKELY(fold.special)) {
const ushort *specialCase = specialCaseMap + caseDiff; const ushort *specialCase = specialCaseMap + caseDiff;
ushort length = *specialCase++; ushort length = *specialCase++;
@ -6674,8 +6674,8 @@ static QString detachAndConvertCase(T &str, QStringIterator it)
return s; return s;
} }
template <typename Traits, typename T> template <typename T>
static QString convertCase(T &str) static QString convertCase(T &str, QUnicodeTables::Case which)
{ {
const QChar *p = str.constBegin(); const QChar *p = str.constBegin();
const QChar *e = p + str.size(); const QChar *e = p + str.size();
@ -6687,9 +6687,9 @@ static QString convertCase(T &str)
QStringIterator it(p, e); QStringIterator it(p, e);
while (it.hasNext()) { while (it.hasNext()) {
uint uc = it.nextUnchecked(); uint uc = it.nextUnchecked();
if (Traits::caseDiff(qGetProp(uc))) { if (qGetProp(uc)->cases[which].diff) {
it.recedeUnchecked(); it.recedeUnchecked();
return detachAndConvertCase<Traits>(str, it); return detachAndConvertCase(str, it, which);
} }
} }
return std::move(str); return std::move(str);
@ -6698,12 +6698,12 @@ static QString convertCase(T &str)
QString QString::toLower_helper(const QString &str) QString QString::toLower_helper(const QString &str)
{ {
return QUnicodeTables::convertCase<QUnicodeTables::LowercaseTraits>(str); return QUnicodeTables::convertCase(str, QUnicodeTables::LowerCase);
} }
QString QString::toLower_helper(QString &str) QString QString::toLower_helper(QString &str)
{ {
return QUnicodeTables::convertCase<QUnicodeTables::LowercaseTraits>(str); return QUnicodeTables::convertCase(str, QUnicodeTables::LowerCase);
} }
/*! /*!
@ -6715,12 +6715,12 @@ QString QString::toLower_helper(QString &str)
QString QString::toCaseFolded_helper(const QString &str) QString QString::toCaseFolded_helper(const QString &str)
{ {
return QUnicodeTables::convertCase<QUnicodeTables::CasefoldTraits>(str); return QUnicodeTables::convertCase(str, QUnicodeTables::CaseFold);
} }
QString QString::toCaseFolded_helper(QString &str) QString QString::toCaseFolded_helper(QString &str)
{ {
return QUnicodeTables::convertCase<QUnicodeTables::CasefoldTraits>(str); return QUnicodeTables::convertCase(str, QUnicodeTables::CaseFold);
} }
/*! /*!
@ -6738,12 +6738,12 @@ QString QString::toCaseFolded_helper(QString &str)
QString QString::toUpper_helper(const QString &str) QString QString::toUpper_helper(const QString &str)
{ {
return QUnicodeTables::convertCase<QUnicodeTables::UppercaseTraits>(str); return QUnicodeTables::convertCase(str, QUnicodeTables::UpperCase);
} }
QString QString::toUpper_helper(QString &str) QString QString::toUpper_helper(QString &str)
{ {
return QUnicodeTables::convertCase<QUnicodeTables::UppercaseTraits>(str); return QUnicodeTables::convertCase(str, QUnicodeTables::UpperCase);
} }
#if QT_DEPRECATED_SINCE(5, 14) #if QT_DEPRECATED_SINCE(5, 14)

File diff suppressed because it is too large Load Diff

View File

@ -63,6 +63,15 @@ QT_BEGIN_NAMESPACE
namespace QUnicodeTables { namespace QUnicodeTables {
enum Case {
LowerCase,
UpperCase,
TitleCase,
CaseFold,
NumCases
};
struct Properties { struct Properties {
ushort category : 8; /* 5 used */ ushort category : 8; /* 5 used */
ushort direction : 8; /* 5 used */ ushort direction : 8; /* 5 used */
@ -70,19 +79,15 @@ struct Properties {
ushort joining : 3; ushort joining : 3;
signed short digitValue : 5; signed short digitValue : 5;
signed short mirrorDiff : 16; signed short mirrorDiff : 16;
ushort lowerCaseSpecial : 1; ushort unicodeVersion : 8; /* 5 used */
signed short lowerCaseDiff : 15; ushort nfQuickCheck : 8;
#ifdef Q_OS_WASM #ifdef Q_OS_WASM
unsigned char : 0; //wasm 64 packing trick unsigned char : 0; //wasm 64 packing trick
#endif #endif
ushort upperCaseSpecial : 1; struct {
signed short upperCaseDiff : 15; ushort special : 1;
ushort titleCaseSpecial : 1; signed short diff : 15;
signed short titleCaseDiff : 15; } cases[NumCases];
ushort caseFoldSpecial : 1;
signed short caseFoldDiff : 15;
ushort unicodeVersion : 8; /* 5 used */
ushort nfQuickCheck : 8;
#ifdef Q_OS_WASM #ifdef Q_OS_WASM
unsigned char : 0; //wasm 64 packing trick unsigned char : 0; //wasm 64 packing trick
#endif #endif
@ -96,38 +101,6 @@ struct Properties {
Q_CORE_EXPORT const Properties * QT_FASTCALL properties(uint ucs4) noexcept; Q_CORE_EXPORT const Properties * QT_FASTCALL properties(uint ucs4) noexcept;
Q_CORE_EXPORT const Properties * QT_FASTCALL properties(ushort ucs2) noexcept; Q_CORE_EXPORT const Properties * QT_FASTCALL properties(ushort ucs2) noexcept;
struct LowercaseTraits
{
static inline signed short caseDiff(const Properties *prop)
{ return prop->lowerCaseDiff; }
static inline bool caseSpecial(const Properties *prop)
{ return prop->lowerCaseSpecial; }
};
struct UppercaseTraits
{
static inline signed short caseDiff(const Properties *prop)
{ return prop->upperCaseDiff; }
static inline bool caseSpecial(const Properties *prop)
{ return prop->upperCaseSpecial; }
};
struct TitlecaseTraits
{
static inline signed short caseDiff(const Properties *prop)
{ return prop->titleCaseDiff; }
static inline bool caseSpecial(const Properties *prop)
{ return prop->titleCaseSpecial; }
};
struct CasefoldTraits
{
static inline signed short caseDiff(const Properties *prop)
{ return prop->caseFoldDiff; }
static inline bool caseSpecial(const Properties *prop)
{ return prop->caseFoldSpecial; }
};
Q_STATIC_ASSERT(sizeof(Properties) == 20); Q_STATIC_ASSERT(sizeof(Properties) == 20);
enum GraphemeBreakClass { enum GraphemeBreakClass {

View File

@ -789,6 +789,15 @@ static void initScriptMap()
// Keep this one in sync with the code in createPropertyInfo // Keep this one in sync with the code in createPropertyInfo
static const char *property_string = static const char *property_string =
"enum Case {\n"
" LowerCase,\n"
" UpperCase,\n"
" TitleCase,\n"
" CaseFold,\n"
"\n"
" NumCases\n"
"};\n"
"\n"
"struct Properties {\n" "struct Properties {\n"
" ushort category : 8; /* 5 used */\n" " ushort category : 8; /* 5 used */\n"
" ushort direction : 8; /* 5 used */\n" " ushort direction : 8; /* 5 used */\n"
@ -796,19 +805,15 @@ static const char *property_string =
" ushort joining : 3;\n" " ushort joining : 3;\n"
" signed short digitValue : 5;\n" " signed short digitValue : 5;\n"
" signed short mirrorDiff : 16;\n" " signed short mirrorDiff : 16;\n"
" ushort lowerCaseSpecial : 1;\n" " ushort unicodeVersion : 8; /* 5 used */\n"
" signed short lowerCaseDiff : 15;\n" " ushort nfQuickCheck : 8;\n" // could be narrowed
"#ifdef Q_OS_WASM\n" "#ifdef Q_OS_WASM\n"
" unsigned char : 0; //wasm 64 packing trick\n" " unsigned char : 0; //wasm 64 packing trick\n"
"#endif\n" "#endif\n"
" ushort upperCaseSpecial : 1;\n" " struct {\n"
" signed short upperCaseDiff : 15;\n" " ushort special : 1;\n"
" ushort titleCaseSpecial : 1;\n" " signed short diff : 15;\n"
" signed short titleCaseDiff : 15;\n" " } cases[NumCases];\n"
" ushort caseFoldSpecial : 1;\n"
" signed short caseFoldDiff : 15;\n"
" ushort unicodeVersion : 8; /* 5 used */\n"
" ushort nfQuickCheck : 8;\n" // could be narrowed
"#ifdef Q_OS_WASM\n" "#ifdef Q_OS_WASM\n"
" unsigned char : 0; //wasm 64 packing trick\n" " unsigned char : 0; //wasm 64 packing trick\n"
"#endif\n" "#endif\n"
@ -820,38 +825,6 @@ static const char *property_string =
"};\n\n" "};\n\n"
"Q_CORE_EXPORT const Properties * QT_FASTCALL properties(uint ucs4) noexcept;\n" "Q_CORE_EXPORT const Properties * QT_FASTCALL properties(uint ucs4) noexcept;\n"
"Q_CORE_EXPORT const Properties * QT_FASTCALL properties(ushort ucs2) noexcept;\n" "Q_CORE_EXPORT const Properties * QT_FASTCALL properties(ushort ucs2) noexcept;\n"
"\n"
"struct LowercaseTraits\n"
"{\n"
" static inline signed short caseDiff(const Properties *prop)\n"
" { return prop->lowerCaseDiff; }\n"
" static inline bool caseSpecial(const Properties *prop)\n"
" { return prop->lowerCaseSpecial; }\n"
"};\n"
"\n"
"struct UppercaseTraits\n"
"{\n"
" static inline signed short caseDiff(const Properties *prop)\n"
" { return prop->upperCaseDiff; }\n"
" static inline bool caseSpecial(const Properties *prop)\n"
" { return prop->upperCaseSpecial; }\n"
"};\n"
"\n"
"struct TitlecaseTraits\n"
"{\n"
" static inline signed short caseDiff(const Properties *prop)\n"
" { return prop->titleCaseDiff; }\n"
" static inline bool caseSpecial(const Properties *prop)\n"
" { return prop->titleCaseSpecial; }\n"
"};\n"
"\n"
"struct CasefoldTraits\n"
"{\n"
" static inline signed short caseDiff(const Properties *prop)\n"
" { return prop->caseFoldDiff; }\n"
" static inline bool caseSpecial(const Properties *prop)\n"
" { return prop->caseFoldSpecial; }\n"
"};\n"
"\n"; "\n";
static const char *methods = static const char *methods =
@ -2473,36 +2446,33 @@ static QByteArray createPropertyInfo()
// " signed short mirrorDiff : 16;\n" // " signed short mirrorDiff : 16;\n"
out += QByteArray::number( p.mirrorDiff ); out += QByteArray::number( p.mirrorDiff );
out += ", "; out += ", ";
// " ushort lowerCaseSpecial : 1;\n"
// " signed short lowerCaseDiff : 15;\n"
out += QByteArray::number( p.lowerCaseSpecial );
out += ", ";
out += QByteArray::number( p.lowerCaseDiff );
out += ", ";
// " ushort upperCaseSpecial : 1;\n"
// " signed short upperCaseDiff : 15;\n"
out += QByteArray::number( p.upperCaseSpecial );
out += ", ";
out += QByteArray::number( p.upperCaseDiff );
out += ", ";
// " ushort titleCaseSpecial : 1;\n"
// " signed short titleCaseDiff : 15;\n"
out += QByteArray::number( p.titleCaseSpecial );
out += ", ";
out += QByteArray::number( p.titleCaseDiff );
out += ", ";
// " ushort caseFoldSpecial : 1;\n"
// " signed short caseFoldDiff : 15;\n"
out += QByteArray::number( p.caseFoldSpecial );
out += ", ";
out += QByteArray::number( p.caseFoldDiff );
out += ", ";
// " ushort unicodeVersion : 8; /* 5 used */\n" // " ushort unicodeVersion : 8; /* 5 used */\n"
out += QByteArray::number( p.age ); out += QByteArray::number( p.age );
out += ", "; out += ", ";
// " ushort nfQuickCheck : 8;\n" // " ushort nfQuickCheck : 8;\n"
out += QByteArray::number( p.nfQuickCheck ); out += QByteArray::number( p.nfQuickCheck );
out += ", "; out += ", ";
// " struct {\n"
// " ushort special : 1;\n"
// " signed short diff : 15;\n"
// " } cases[NumCases];\n"
out += " { {";
out += QByteArray::number( p.lowerCaseSpecial );
out += ", ";
out += QByteArray::number( p.lowerCaseDiff );
out += "}, {";
out += QByteArray::number( p.upperCaseSpecial );
out += ", ";
out += QByteArray::number( p.upperCaseDiff );
out += "}, {";
out += QByteArray::number( p.titleCaseSpecial );
out += ", ";
out += QByteArray::number( p.titleCaseDiff );
out += "}, {";
out += QByteArray::number( p.caseFoldSpecial );
out += ", ";
out += QByteArray::number( p.caseFoldDiff );
out += "} }, ";
// " ushort graphemeBreakClass : 5; /* 5 used */\n" // " ushort graphemeBreakClass : 5; /* 5 used */\n"
// " ushort wordBreakClass : 5; /* 5 used */\n" // " ushort wordBreakClass : 5; /* 5 used */\n"
// " ushort lineBreakClass : 6; /* 6 used */\n" // " ushort lineBreakClass : 6; /* 6 used */\n"