qunicodetables generator: improve the output and the generated code

better memory usage report;
an additional asserts with conditions the implementation is depends on;
a namespace for the internal static data;
styling fixes

Change-Id: Id4048ff6104c56b5f590f9ac6fbf7c0bce79ec47
Reviewed-by: Lars Knoll <lars.knoll@nokia.com>
Reviewed-by: Konstantin Ritt <ritt.ks@gmail.com>
This commit is contained in:
Konstantin Ritt 2012-04-23 06:00:16 +03:00 committed by Qt by Nokia
parent ba0d752c2d
commit f948bb3c6c
3 changed files with 142 additions and 119 deletions

View File

@ -41,8 +41,12 @@
/* This file is autogenerated from the Unicode 5.0 database. Do not edit */
#include "qunicodetables_p.h"
QT_BEGIN_NAMESPACE
namespace QUnicodeTables {
static const unsigned short uc_property_trie[] = {
// 0 - 0x11000
@ -3422,9 +3426,9 @@ static const unsigned short uc_property_trie[] = {
: (uc_property_trie[uc_property_trie[((ucs4 - 0x11000)>>8) + 0x880] + (ucs4 & 0xff)]))
#define GET_PROP_INDEX_UCS2(ucs2) \
(uc_property_trie[uc_property_trie[ucs2>>5] + (ucs2 & 0x1f)])
(uc_property_trie[uc_property_trie[ucs2>>5] + (ucs2 & 0x1f)])
static const QUnicodeTables::Properties uc_properties[] = {
static const Properties uc_properties[] = {
{ 9, 19, 18, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0 },
{ 9, 15, 8, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 3 },
{ 9, 30, 7, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 1 },
@ -4319,38 +4323,37 @@ static const QUnicodeTables::Properties uc_properties[] = {
{ 3, 10, 2, 0, 0, 9, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 7 },
{ 13, 11, 0, 0, 0, -1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 18, 12, 0, 0, 0, -1, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6 },
{ 12, 11, 0, 0, 0, -1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 12, 11, 0, 0, 0, -1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
};
static inline const QUnicodeTables::Properties *qGetProp(uint ucs4)
static inline const Properties *qGetProp(uint ucs4)
{
int index = GET_PROP_INDEX(ucs4);
const int index = GET_PROP_INDEX(ucs4);
return uc_properties + index;
}
static inline const QUnicodeTables::Properties *qGetProp(ushort ucs2)
static inline const Properties *qGetProp(ushort ucs2)
{
int index = GET_PROP_INDEX_UCS2(ucs2);
const int index = GET_PROP_INDEX_UCS2(ucs2);
return uc_properties + index;
}
Q_CORE_EXPORT const QUnicodeTables::Properties * QT_FASTCALL QUnicodeTables::properties(uint ucs4)
Q_CORE_EXPORT const Properties * QT_FASTCALL properties(uint ucs4)
{
int index = GET_PROP_INDEX(ucs4);
return uc_properties + index;
return qGetProp(ucs4);
}
Q_CORE_EXPORT const QUnicodeTables::Properties * QT_FASTCALL QUnicodeTables::properties(ushort ucs2)
Q_CORE_EXPORT const Properties * QT_FASTCALL properties(ushort ucs2)
{
int index = GET_PROP_INDEX_UCS2(ucs2);
return uc_properties + index;
return qGetProp(ucs2);
}
Q_CORE_EXPORT QUnicodeTables::LineBreakClass QT_FASTCALL QUnicodeTables::lineBreakClass(uint ucs4)
Q_CORE_EXPORT LineBreakClass QT_FASTCALL lineBreakClass(uint ucs4)
{
return (QUnicodeTables::LineBreakClass)qGetProp(ucs4)->line_break_class;
return (LineBreakClass)qGetProp(ucs4)->line_break_class;
}
static const ushort specialCaseMap[] = {
0x0, // placeholder
0x2, 0x53, 0x73,
@ -4449,7 +4452,7 @@ static const ushort specialCaseMap[] = {
0x3, 0x3a9, 0x342, 0x345,
0x3, 0x3a9, 0x342, 0x399
};
#define SPECIAL_CASE_MAX_LEN 3
static const unsigned short uc_decomposition_trie[] = {
// 0 - 0x3400
@ -5893,7 +5896,7 @@ static const unsigned short uc_decomposition_trie[] = {
0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff
};
#define GET_DECOMPOSITION_INDEX(ucs4) \
@ -5904,7 +5907,6 @@ static const unsigned short uc_decomposition_trie[] = {
: 0xffff))
static const unsigned short uc_decomposition_map[] = {
0x103, 0x20, 0x210, 0x20, 0x308, 0x109, 0x61, 0x210,
0x20, 0x304, 0x109, 0x32, 0x109, 0x33, 0x210, 0x20,
0x301, 0x110, 0x3bc, 0x210, 0x20, 0x327, 0x109, 0x31,
@ -7608,7 +7610,7 @@ static const unsigned short uc_decomposition_map[] = {
0xdd05, 0x201, 0xd868, 0xde0e, 0x201, 0xd868, 0xde91, 0x101,
0x9ebb, 0x101, 0x4d56, 0x101, 0x9ef9, 0x101, 0x9efe, 0x101,
0x9f05, 0x101, 0x9f0f, 0x101, 0x9f16, 0x101, 0x9f3b, 0x201,
0xd869, 0xde00,
0xd869, 0xde00
};
static const unsigned short uc_ligature_trie[] = {
@ -7766,13 +7768,13 @@ static const unsigned short uc_ligature_trie[] = {
0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
0xffff, 0x700, 0x761, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
0xffff, 0x700, 0x761, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff
};
#define GET_LIGATURE_INDEX(u2) (u2 < 0x3100 ? uc_ligature_trie[uc_ligature_trie[u2>>5] + (u2 & 0x1f)] : 0xffff);
#define GET_LIGATURE_INDEX(u2) \
(u2 < 0x3100 ? uc_ligature_trie[uc_ligature_trie[u2>>5] + (u2 & 0x1f)] : 0xffff);
static const unsigned short uc_ligature_map[] = {
0x54, 0x41, 0xc0, 0x45, 0xc8, 0x49, 0xcc, 0x4e,
0x1f8, 0x4f, 0xd2, 0x55, 0xd9, 0x57, 0x1e80, 0x59,
0x1ef2, 0x61, 0xe0, 0x65, 0xe8, 0x69, 0xec, 0x6e,
@ -8011,9 +8013,10 @@ static const unsigned short uc_ligature_map[] = {
0x30f7, 0x30f0, 0x30f8, 0x30f1, 0x30f9, 0x30f2, 0x30fa, 0x30fd,
0x30fe, 0xa, 0x306f, 0x3071, 0x3072, 0x3074, 0x3075, 0x3077,
0x3078, 0x307a, 0x307b, 0x307d, 0x30cf, 0x30d1, 0x30d2, 0x30d4,
0x30d5, 0x30d7, 0x30d8, 0x30da, 0x30db, 0x30dd,
0x30d5, 0x30d7, 0x30d8, 0x30da, 0x30db, 0x30dd
};
struct NormalizationCorrection {
uint ucs4;
uint old_mapping;
@ -8032,11 +8035,10 @@ static const NormalizationCorrection uc_normalization_corrections[] = {
enum { NumNormalizationCorrections = 6 };
enum { NormalizationCorrectionsVersionMax = 7 };
enum { UnicodeBlockCount = 512 }; // number of unicode blocks
enum { UnicodeBlockSize = 128 }; // size of each block
namespace QUnicodeTables {
static const unsigned char uc_scripts[] = {
Common, /* U+0000-007f */
Common, /* U+0080-00ff */
@ -9471,9 +9473,7 @@ static const unsigned char uc_scripts[] = {
Common, Common, Common, Common, Common, Common, Common, Common
};
} // namespace QUnicodeTables
Q_CORE_EXPORT int QT_FASTCALL QUnicodeTables::script(uint ucs4)
Q_CORE_EXPORT int QT_FASTCALL script(uint ucs4)
{
if (ucs4 > 0xffff)
return Common;
@ -9485,4 +9485,8 @@ Q_CORE_EXPORT int QT_FASTCALL QUnicodeTables::script(uint ucs4)
return script;
}
} // namespace QUnicodeTables
using namespace QUnicodeTables;
QT_END_NAMESPACE

View File

@ -217,7 +217,7 @@ namespace QUnicodeTables {
};
Q_CORE_EXPORT QUnicodeTables::LineBreakClass QT_FASTCALL lineBreakClass(uint ucs4);
Q_CORE_EXPORT LineBreakClass QT_FASTCALL lineBreakClass(uint ucs4);
inline int lineBreakClass(QChar ch)
{ return lineBreakClass(ch.unicode()); }

View File

@ -288,7 +288,7 @@ static void initSentenceBreak()
}
static const char *lineBreakClass =
static const char *line_break_class_string =
" // see http://www.unicode.org/reports/tr14/tr14-19.html\n"
" // we don't use the XX, AI and CB properties and map them to AL instead.\n"
" // as we don't support any EBDIC based OS'es, NL is ignored and mapped to AL as well.\n"
@ -397,7 +397,7 @@ static const char *property_string =
" Q_CORE_EXPORT const Properties * QT_FASTCALL properties(ushort ucs2);\n";
static const char *methods =
" Q_CORE_EXPORT QUnicodeTables::LineBreakClass QT_FASTCALL lineBreakClass(uint ucs4);\n"
" Q_CORE_EXPORT LineBreakClass QT_FASTCALL lineBreakClass(uint ucs4);\n"
" inline int lineBreakClass(QChar ch)\n"
" { return lineBreakClass(ch.unicode()); }\n"
"\n"
@ -458,7 +458,6 @@ struct PropertyFlags {
static QList<int> specialCaseMap;
static int specialCaseMaxLen = 0;
static int appendToSpecialCaseMap(const QList<int> &map)
{
@ -474,7 +473,6 @@ static int appendToSpecialCaseMap(const QList<int> &map)
}
int length = utf16map.size();
utf16map.prepend(length);
specialCaseMaxLen = qMax(specialCaseMaxLen, length);
if (specialCaseMap.isEmpty())
specialCaseMap << 0; // placeholder
@ -734,6 +732,7 @@ static int maxTitleCaseDiff = 0;
static void readUnicodeData()
{
qDebug() << "Reading UnicodeData.txt";
QFile f("data/UnicodeData.txt");
if (!f.exists())
qFatal("Couldn't find UnicodeData.txt");
@ -865,10 +864,7 @@ static void readUnicodeData()
data.decomposition.append(d[i].toInt(&ok, 16));
Q_ASSERT(ok);
}
if (!decompositionLength.contains(data.decomposition.size()))
decompositionLength[data.decomposition.size()] = 1;
else
++decompositionLength[data.decomposition.size()];
++decompositionLength[data.decomposition.size()];
}
for (int i = codepoint; i <= lastCodepoint; ++i)
@ -881,6 +877,7 @@ static int maxMirroredDiff = 0;
static void readBidiMirroring()
{
qDebug() << "Reading BidiMirroring.txt";
QFile f("data/BidiMirroring.txt");
if (!f.exists())
qFatal("Couldn't find BidiMirroring.txt");
@ -920,6 +917,7 @@ static void readBidiMirroring()
static void readArabicShaping()
{
qDebug() << "Reading ArabicShaping.txt";
QFile f("data/ArabicShaping.txt");
if (!f.exists())
qFatal("Couldn't find ArabicShaping.txt");
@ -971,6 +969,7 @@ static void readArabicShaping()
static void readDerivedAge()
{
qDebug() << "Reading DerivedAge.txt";
QFile f("data/DerivedAge.txt");
if (!f.exists())
qFatal("Couldn't find DerivedAge.txt");
@ -1023,6 +1022,7 @@ static void readDerivedAge()
static void readDerivedNormalizationProps()
{
qDebug() << "Reading DerivedNormalizationProps.txt";
QFile f("data/DerivedNormalizationProps.txt");
if (!f.exists())
qFatal("Couldn't find DerivedNormalizationProps.txt");
@ -1101,6 +1101,7 @@ struct NormalizationCorrection {
static QByteArray createNormalizationCorrections()
{
qDebug() << "Reading NormalizationCorrections.txt";
QFile f("data/NormalizationCorrections.txt");
if (!f.exists())
qFatal("Couldn't find NormalizationCorrections.txt");
@ -1317,7 +1318,7 @@ static void readSpecialCasing()
}
if (upperMap.size() > 1) {
ud.p.upperCaseSpecial = true;
ud.p.upperCaseDiff = appendToSpecialCaseMap(upperMap);;
ud.p.upperCaseDiff = appendToSpecialCaseMap(upperMap);
}
unicodeData.insert(codepoint, ud);
@ -1388,8 +1389,8 @@ static void readCaseFolding()
Q_ASSERT(QChar::highSurrogate(codepoint) == QChar::highSurrogate(caseFolded));
Q_ASSERT(QChar::lowSurrogate(codepoint) + diff == QChar::lowSurrogate(caseFolded));
}
if (caseFolded != codepoint + ud.p.lowerCaseDiff)
qDebug() << hex << codepoint;
// if (caseFolded != codepoint + ud.p.lowerCaseDiff)
// qDebug() << hex << codepoint;
} else {
qFatal("we currently don't support full case foldings");
// qDebug() << "special" << hex << foldMap;
@ -1695,6 +1696,7 @@ static QList<BlockInfo> blockInfoList;
static void readBlocks()
{
qDebug() << "Reading Blocks.txt";
QFile f("data/Blocks.txt");
if (!f.exists())
qFatal("Couldn't find Blocks.txt");
@ -1923,7 +1925,7 @@ QByteArray createScriptTableDeclaration()
declaration += " }; // size of each block\n\n";
// script table
declaration += "namespace QUnicodeTables {\n\nstatic const unsigned char uc_scripts[] = {\n";
declaration += "static const unsigned char uc_scripts[] = {\n";
for (int i = 0; i < unicodeBlockCount; ++i) {
int block = (((i << 7) & 0xff00) | ((i & 1) * 0x80));
int blockAssignment[unicodeBlockSize];
@ -2000,10 +2002,10 @@ QByteArray createScriptTableDeclaration()
if (declaration.endsWith(' '))
declaration.chop(1);
}
declaration += "\n};\n\n} // namespace QUnicodeTables\n\n";
declaration += "\n};\n\n";
declaration +=
"Q_CORE_EXPORT int QT_FASTCALL QUnicodeTables::script(uint ucs4)\n"
declaration +=
"Q_CORE_EXPORT int QT_FASTCALL script(uint ucs4)\n"
"{\n"
" if (ucs4 > 0xffff)\n"
" return Common;\n"
@ -2015,8 +2017,8 @@ QByteArray createScriptTableDeclaration()
" return script;\n"
"}\n\n";
qDebug("createScriptTableDeclaration: table size is %d bytes",
unicodeBlockCount + (extraBlockList.size() * unicodeBlockSize));
qDebug("createScriptTableDeclaration:");
qDebug(" memory usage: %d bytes", unicodeBlockCount + (extraBlockList.size() * unicodeBlockSize));
return declaration;
}
@ -2049,6 +2051,13 @@ static QByteArray createPropertyInfo()
{
qDebug("createPropertyInfo:");
// we reserve one bit more than in the assert below for the sign
Q_ASSERT(maxMirroredDiff < (1<<12));
Q_ASSERT(maxLowerCaseDiff < (1<<14));
Q_ASSERT(maxUpperCaseDiff < (1<<14));
Q_ASSERT(maxTitleCaseDiff < (1<<14));
Q_ASSERT(maxCaseFoldDiff < (1<<14));
const int BMP_BLOCKSIZE = 32;
const int BMP_SHIFT = 5;
const int BMP_END = 0x11000;
@ -2133,7 +2142,7 @@ static QByteArray createPropertyInfo()
}
if (out.endsWith(' '))
out.chop(1);
out += "\n\n // 0x" + QByteArray::number(BMP_END, 16) + " - 0x" + QByteArray::number(SMP_END, 16) + "\n";;
out += "\n\n // 0x" + QByteArray::number(BMP_END, 16) + " - 0x" + QByteArray::number(SMP_END, 16) + "\n";
for (int i = BMP_END/BMP_BLOCKSIZE; i < blockMap.size(); ++i) {
if (!(i % 8)) {
if (out.endsWith(' '))
@ -2164,14 +2173,6 @@ static QByteArray createPropertyInfo()
out += ", ";
}
}
// we reserve one bit more than in the assert below for the sign
Q_ASSERT(maxMirroredDiff < (1<<12));
Q_ASSERT(maxLowerCaseDiff < (1<<14));
Q_ASSERT(maxUpperCaseDiff < (1<<14));
Q_ASSERT(maxTitleCaseDiff < (1<<14));
Q_ASSERT(maxCaseFoldDiff < (1<<14));
if (out.endsWith(' '))
out.chop(1);
out += "\n};\n\n"
@ -2184,16 +2185,15 @@ static QByteArray createPropertyInfo()
")>>" + QByteArray::number(SMP_SHIFT) + ") + 0x" + QByteArray::number(BMP_END/BMP_BLOCKSIZE, 16) + "]"
" + (ucs4 & 0x" + QByteArray::number(SMP_BLOCKSIZE-1, 16) + ")]))\n\n"
"#define GET_PROP_INDEX_UCS2(ucs2) \\\n"
"(uc_property_trie[uc_property_trie[ucs2>>" + QByteArray::number(BMP_SHIFT) +
" (uc_property_trie[uc_property_trie[ucs2>>" + QByteArray::number(BMP_SHIFT) +
"] + (ucs2 & 0x" + QByteArray::number(BMP_BLOCKSIZE-1, 16)+ ")])\n\n"
"static const QUnicodeTables::Properties uc_properties[] = {\n";
"static const Properties uc_properties[] = {";
// keep in sync with the property declaration
for (int i = 0; i < uniqueProperties.size(); ++i) {
PropertyFlags p = uniqueProperties.at(i);
out += " { ";
out += "\n { ";
// " ushort category : 8;\n"
out += QByteArray::number( p.category );
out += ", ";
@ -2247,56 +2247,61 @@ static QByteArray createPropertyInfo()
out += QByteArray::number( p.wordBreak );
out += ", ";
out += QByteArray::number( p.sentenceBreak );
out += " },\n";
out += " },";
}
out += "};\n\n";
out.chop(1);
out += "\n};\n\n";
out += "static inline const QUnicodeTables::Properties *qGetProp(uint ucs4)\n"
out += "static inline const Properties *qGetProp(uint ucs4)\n"
"{\n"
" int index = GET_PROP_INDEX(ucs4);\n"
" const int index = GET_PROP_INDEX(ucs4);\n"
" return uc_properties + index;\n"
"}\n"
"\n"
"static inline const QUnicodeTables::Properties *qGetProp(ushort ucs2)\n"
"static inline const Properties *qGetProp(ushort ucs2)\n"
"{\n"
" int index = GET_PROP_INDEX_UCS2(ucs2);\n"
" const int index = GET_PROP_INDEX_UCS2(ucs2);\n"
" return uc_properties + index;\n"
"}\n"
"\n"
"Q_CORE_EXPORT const QUnicodeTables::Properties * QT_FASTCALL QUnicodeTables::properties(uint ucs4)\n"
"Q_CORE_EXPORT const Properties * QT_FASTCALL properties(uint ucs4)\n"
"{\n"
" int index = GET_PROP_INDEX(ucs4);\n"
" return uc_properties + index;\n"
" return qGetProp(ucs4);\n"
"}\n"
"\n"
"Q_CORE_EXPORT const QUnicodeTables::Properties * QT_FASTCALL QUnicodeTables::properties(ushort ucs2)\n"
"Q_CORE_EXPORT const Properties * QT_FASTCALL properties(ushort ucs2)\n"
"{\n"
" int index = GET_PROP_INDEX_UCS2(ucs2);\n"
" return uc_properties + index;\n"
" return qGetProp(ucs2);\n"
"}\n\n";
out += "Q_CORE_EXPORT QUnicodeTables::LineBreakClass QT_FASTCALL QUnicodeTables::lineBreakClass(uint ucs4)\n"
out += "Q_CORE_EXPORT LineBreakClass QT_FASTCALL lineBreakClass(uint ucs4)\n"
"{\n"
" return (QUnicodeTables::LineBreakClass)qGetProp(ucs4)->line_break_class;\n"
" return (LineBreakClass)qGetProp(ucs4)->line_break_class;\n"
"}\n\n";
return out;
}
out += "static const ushort specialCaseMap[] = {\n";
out += " 0x0, // placeholder";
static QByteArray createSpecialCaseMap()
{
qDebug("createSpecialCaseMap:");
QByteArray out;
out += "static const ushort specialCaseMap[] = {\n"
" 0x0, // placeholder";
int i = 1;
while (i < specialCaseMap.size()) {
out += "\n ";
int n = specialCaseMap.at(i);
int j;
for (j = 0; j <= n; ++j) {
for (int j = 0; j <= n; ++j) {
out += QByteArray(" 0x") + QByteArray::number(specialCaseMap.at(i+j), 16);
if (i+j < specialCaseMap.size() - 1)
out += ",";
out += ",";
}
i += n + 1;
}
out += "\n};\n";
out += "#define SPECIAL_CASE_MAX_LEN " + QByteArray::number(specialCaseMaxLen) + "\n\n";
out.chop(1);
out += "\n};\n\n";
qDebug("Special case map uses : %d bytes", specialCaseMap.size()*2);
@ -2314,7 +2319,7 @@ struct DecompositionBlock {
static QByteArray createCompositionInfo()
{
qDebug("createCompositionInfo:");
qDebug("createCompositionInfo: highestComposedCharacter=0x%x", highestComposedCharacter);
const int BMP_BLOCKSIZE = 16;
const int BMP_SHIFT = 4;
@ -2324,7 +2329,7 @@ static QByteArray createCompositionInfo()
const int SMP_SHIFT = 8;
if (SMP_END <= highestComposedCharacter)
qFatal("end of table smaller than highest composed character at %x", highestComposedCharacter);
qFatal("end of table smaller than highest composed character 0x%x", highestComposedCharacter);
QList<DecompositionBlock> blocks;
QList<int> blockMap;
@ -2339,24 +2344,23 @@ static QByteArray createCompositionInfo()
int uc = block*BMP_BLOCKSIZE + i;
UnicodeData d = unicodeData.value(uc, UnicodeData(uc));
if (!d.decomposition.isEmpty()) {
int utf16Chars = 0;
for (int j = 0; j < d.decomposition.size(); ++j)
utf16Chars += QChar::requiresSurrogates(d.decomposition.at(j)) ? 2 : 1;
decompositions.append(d.decompositionType + (utf16Chars<<8));
int utf16Length = 0;
decompositions.append(0);
for (int j = 0; j < d.decomposition.size(); ++j) {
int code = d.decomposition.at(j);
if (QChar::requiresSurrogates(code)) {
// save as surrogate pair
ushort high = QChar::highSurrogate(code);
ushort low = QChar::lowSurrogate(code);
decompositions.append(high);
decompositions.append(low);
decompositions.append(QChar::highSurrogate(code));
decompositions.append(QChar::lowSurrogate(code));
utf16Length += 2;
} else {
decompositions.append(code);
utf16Length++;
}
}
decompositions[tableIndex] = d.decompositionType + (utf16Length<<8);
b.decompositionPositions.append(tableIndex);
tableIndex += utf16Chars + 1;
tableIndex += utf16Length + 1;
} else {
b.decompositionPositions.append(0xffff);
}
@ -2380,24 +2384,23 @@ static QByteArray createCompositionInfo()
int uc = block*SMP_BLOCKSIZE + i;
UnicodeData d = unicodeData.value(uc, UnicodeData(uc));
if (!d.decomposition.isEmpty()) {
int utf16Chars = 0;
for (int j = 0; j < d.decomposition.size(); ++j)
utf16Chars += QChar::requiresSurrogates(d.decomposition.at(j)) ? 2 : 1;
decompositions.append(d.decompositionType + (utf16Chars<<8));
int utf16Length = 0;
decompositions.append(0);
for (int j = 0; j < d.decomposition.size(); ++j) {
int code = d.decomposition.at(j);
if (QChar::requiresSurrogates(code)) {
// save as surrogate pair
ushort high = QChar::highSurrogate(code);
ushort low = QChar::lowSurrogate(code);
decompositions.append(high);
decompositions.append(low);
decompositions.append(QChar::highSurrogate(code));
decompositions.append(QChar::lowSurrogate(code));
utf16Length += 2;
} else {
decompositions.append(code);
utf16Length++;
}
}
decompositions[tableIndex] = d.decompositionType + (utf16Length<<8);
b.decompositionPositions.append(tableIndex);
tableIndex += utf16Chars + 1;
tableIndex += utf16Length + 1;
} else {
b.decompositionPositions.append(0xffff);
}
@ -2412,6 +2415,9 @@ static QByteArray createCompositionInfo()
blockMap.append(blocks.at(index).index);
}
// if the condition below doesn't hold anymore we need to modify our decomposition code
Q_ASSERT(tableIndex < 0xffff);
int bmp_block_data = bmp_blocks*BMP_BLOCKSIZE*2;
int bmp_trie = BMP_END/BMP_BLOCKSIZE*2;
int bmp_mem = bmp_block_data + bmp_trie;
@ -2427,7 +2433,7 @@ static QByteArray createCompositionInfo()
qDebug(" block data uses: %d bytes", smp_block_data);
qDebug(" trie data uses : %d bytes", smp_trie);
qDebug("\n decomposition table use : %d bytes", decompositions.size()*2);
qDebug("\n decomposition table uses : %d bytes", decompositions.size()*2);
qDebug(" memory usage: %d bytes", bmp_mem+smp_mem + decompositions.size()*2);
QByteArray out;
@ -2449,7 +2455,7 @@ static QByteArray createCompositionInfo()
}
if (out.endsWith(' '))
out.chop(1);
out += "\n\n // 0x" + QByteArray::number(BMP_END, 16) + " - 0x" + QByteArray::number(SMP_END, 16) + "\n";;
out += "\n\n // 0x" + QByteArray::number(BMP_END, 16) + " - 0x" + QByteArray::number(SMP_END, 16) + "\n";
for (int i = BMP_END/BMP_BLOCKSIZE; i < blockMap.size(); ++i) {
if (!(i % 8)) {
if (out.endsWith(' '))
@ -2480,9 +2486,8 @@ static QByteArray createCompositionInfo()
out += ", ";
}
}
if (out.endsWith(' '))
out.chop(1);
out.chop(2);
out += "\n};\n\n"
"#define GET_DECOMPOSITION_INDEX(ucs4) \\\n"
@ -2495,7 +2500,7 @@ static QByteArray createCompositionInfo()
" + (ucs4 & 0x" + QByteArray::number(SMP_BLOCKSIZE-1, 16) + ")]\\\n"
" : 0xffff))\n\n"
"static const unsigned short uc_decomposition_map[] = {\n";
"static const unsigned short uc_decomposition_map[] = {";
for (int i = 0; i < decompositions.size(); ++i) {
if (!(i % 8)) {
@ -2506,9 +2511,8 @@ static QByteArray createCompositionInfo()
out += "0x" + QByteArray::number(decompositions.at(i), 16);
out += ", ";
}
if (out.endsWith(' '))
out.chop(1);
out.chop(2);
out += "\n};\n\n";
return out;
@ -2516,7 +2520,7 @@ static QByteArray createCompositionInfo()
static QByteArray createLigatureInfo()
{
qDebug("createLigatureInfo: numLigatures=%d", numLigatures);
qDebug("createLigatureInfo: numLigatures=%d, highestLigature=0x%x", numLigatures, highestLigature);
QList<DecompositionBlock> blocks;
QList<int> blockMap;
@ -2525,7 +2529,9 @@ static QByteArray createLigatureInfo()
const int BMP_BLOCKSIZE = 32;
const int BMP_SHIFT = 5;
const int BMP_END = 0x3100;
Q_ASSERT(highestLigature < BMP_END);
if (BMP_END <= highestLigature)
qFatal("end of table smaller than highest ligature character 0x%x", highestLigature);
int used = 0;
int tableIndex = 0;
@ -2536,11 +2542,11 @@ static QByteArray createLigatureInfo()
int uc = block*BMP_BLOCKSIZE + i;
QList<Ligature> l = ligatureHashes.value(uc);
if (!l.isEmpty()) {
qSort(l);
Q_ASSERT(!QChar::requiresSurrogates(uc));
qSort(l); // needed for bsearch in ligatureHelper code
ligatures.append(l.size());
for (int j = 0; j < l.size(); ++j) {
Q_ASSERT(l.at(j).u2 == uc);
ligatures.append(l.at(j).u1);
ligatures.append(l.at(j).ligature);
}
@ -2563,12 +2569,16 @@ static QByteArray createLigatureInfo()
int bmp_blocks = blocks.size();
Q_ASSERT(blockMap.size() == BMP_END/BMP_BLOCKSIZE);
// if the condition below doesn't hold anymore we need to modify our composition code
Q_ASSERT(tableIndex < 0xffff);
int bmp_block_data = bmp_blocks*BMP_BLOCKSIZE*2;
int bmp_trie = BMP_END/BMP_BLOCKSIZE*2;
int bmp_mem = bmp_block_data + bmp_trie;
qDebug(" %d unique blocks in BMP.", blocks.size());
qDebug(" block data uses: %d bytes", bmp_block_data);
qDebug(" trie data uses : %d bytes", bmp_trie);
qDebug(" memory usage: %d bytes", bmp_mem);
qDebug("\n ligature data uses : %d bytes", ligatures.size()*2);
qDebug(" memory usage: %d bytes", bmp_mem + ligatures.size() * 2);
@ -2609,15 +2619,15 @@ static QByteArray createLigatureInfo()
}
}
if (out.endsWith(' '))
out.chop(1);
out.chop(2);
out += "\n};\n\n"
"#define GET_LIGATURE_INDEX(u2) "
"(u2 < 0x" + QByteArray::number(BMP_END, 16) + " ? "
"#define GET_LIGATURE_INDEX(u2) \\\n"
" (u2 < 0x" + QByteArray::number(BMP_END, 16) + " ? "
"uc_ligature_trie[uc_ligature_trie[u2>>" + QByteArray::number(BMP_SHIFT) +
"] + (u2 & 0x" + QByteArray::number(BMP_BLOCKSIZE-1, 16)+ ")] : 0xffff);\n\n"
"static const unsigned short uc_ligature_map[] = {\n";
"static const unsigned short uc_ligature_map[] = {";
for (int i = 0; i < ligatures.size(); ++i) {
if (!(i % 8)) {
@ -2628,9 +2638,8 @@ static QByteArray createLigatureInfo()
out += "0x" + QByteArray::number(ligatures.at(i), 16);
out += ", ";
}
if (out.endsWith(' '))
out.chop(1);
out.chop(2);
out += "\n};\n\n";
return out;
@ -2678,6 +2687,7 @@ int main(int, char **)
computeUniqueProperties();
QByteArray properties = createPropertyInfo();
QByteArray specialCases = createSpecialCaseMap();
QByteArray compositions = createCompositionInfo();
QByteArray ligatures = createLigatureInfo();
QByteArray normalizationCorrections = createNormalizationCorrections();
@ -2745,12 +2755,21 @@ int main(int, char **)
f.open(QFile::WriteOnly|QFile::Truncate);
f.write(header);
f.write(note);
f.write("#include \"qunicodetables_p.h\"\n\n");
f.write("QT_BEGIN_NAMESPACE\n\n");
f.write("namespace QUnicodeTables {\n\n");
f.write(properties);
f.write("\n");
f.write(specialCases);
f.write("\n");
f.write(compositions);
f.write(ligatures);
f.write("\n");
f.write(normalizationCorrections);
f.write("\n");
f.write(scriptTableDeclaration);
f.write("} // namespace QUnicodeTables\n\n");
f.write("using namespace QUnicodeTables;\n\n");
f.write("QT_END_NAMESPACE\n");
f.close();
@ -2776,7 +2795,7 @@ int main(int, char **)
f.write("\n");
f.write(sentence_break_string);
f.write("\n");
f.write(lineBreakClass);
f.write(line_break_class_string);
f.write("\n");
f.write(methods);
f.write("} // namespace QUnicodeTables\n\n"