Clean-up the Unicode tables generator code and the generated header

This fixes the blocks and memory consumption reports, the whitespace issues and makes the code a bit cleaner. Since I'm the only one who does change this code, such a no-op commit could not hurt anyone or even git blame ;) Change-Id: Ib069f925a3791c82e16c368c8392bcffbfd68c53 Reviewed-by: Lars Knoll <lars.knoll@nokia.com> Reviewed-by: Konstantin Ritt <ritt.ks@gmail.com>
2012-06-17 04:20:59 +03:00 · 2012-06-17 04:20:59 +03:00 · c1329fba13
commit c1329fba13
parent 57ca02b1d2
2 changed files with 637 additions and 638 deletions
--- a/src/corelib/tools/qunicodetables_p.h
+++ b/src/corelib/tools/qunicodetables_p.h
@ -63,221 +63,217 @@ QT_BEGIN_NAMESPACE

 namespace QUnicodeTables {

-    struct Properties {
-        ushort category         : 8; /* 5 used */
-        ushort direction        : 8; /* 5 used */
-        ushort combiningClass   : 8;
-        ushort joining          : 2;
-        signed short digitValue : 6; /* 5 used */
-        signed short mirrorDiff    : 16;
-        signed short lowerCaseDiff : 16;
-        signed short upperCaseDiff : 16;
-        signed short titleCaseDiff : 16;
-        signed short caseFoldDiff  : 16;
-        ushort lowerCaseSpecial : 1;
-        ushort upperCaseSpecial : 1;
-        ushort titleCaseSpecial : 1;
-        ushort caseFoldSpecial  : 1;
-        ushort unicodeVersion   : 4;
-        ushort graphemeBreak    : 8; /* 4 used */
-        ushort wordBreak        : 8; /* 4 used */
-        ushort sentenceBreak    : 8; /* 4 used */
-        ushort line_break_class : 8; /* 6 used */
-        ushort script           : 8; /* 5 used */
-    };
-    Q_CORE_EXPORT const Properties * QT_FASTCALL properties(uint ucs4);
-    Q_CORE_EXPORT const Properties * QT_FASTCALL properties(ushort ucs2);
+struct Properties {
+    ushort category            : 8; /* 5 used */
+    ushort direction           : 8; /* 5 used */
+    ushort combiningClass      : 8;
+    ushort joining             : 2;
+    signed short digitValue    : 6; /* 5 used */
+    signed short mirrorDiff    : 16;
+    signed short lowerCaseDiff : 16;
+    signed short upperCaseDiff : 16;
+    signed short titleCaseDiff : 16;
+    signed short caseFoldDiff  : 16;
+    ushort lowerCaseSpecial    : 1;
+    ushort upperCaseSpecial    : 1;
+    ushort titleCaseSpecial    : 1;
+    ushort caseFoldSpecial     : 1;
+    ushort unicodeVersion      : 4;
+    ushort graphemeBreak       : 8; /* 4 used */
+    ushort wordBreak           : 8; /* 4 used */
+    ushort sentenceBreak       : 8; /* 4 used */
+    ushort line_break_class    : 8; /* 6 used */
+    ushort script              : 8; /* 5 used */
+};

-    // See http://www.unicode.org/reports/tr24/tr24-5.html
-    enum Script {
-        Common,
-        Greek,
-        Cyrillic,
-        Armenian,
-        Hebrew,
-        Arabic,
-        Syriac,
-        Thaana,
-        Devanagari,
-        Bengali,
-        Gurmukhi,
-        Gujarati,
-        Oriya,
-        Tamil,
-        Telugu,
-        Kannada,
-        Malayalam,
-        Sinhala,
-        Thai,
-        Lao,
-        Tibetan,
-        Myanmar,
-        Georgian,
-        Hangul,
-        Ogham,
-        Runic,
-        Khmer,
-        Nko,
-        Inherited,
-        ScriptCount = Inherited,
-        Latin = Common,
-        Ethiopic = Common,
-        Cherokee = Common,
-        CanadianAboriginal = Common,
-        Mongolian = Common,
-        Hiragana = Common,
-        Katakana = Common,
-        Bopomofo = Common,
-        Han = Common,
-        Yi = Common,
-        OldItalic = Common,
-        Gothic = Common,
-        Deseret = Common,
-        Tagalog = Common,
-        Hanunoo = Common,
-        Buhid = Common,
-        Tagbanwa = Common,
-        Limbu = Common,
-        TaiLe = Common,
-        LinearB = Common,
-        Ugaritic = Common,
-        Shavian = Common,
-        Osmanya = Common,
-        Cypriot = Common,
-        Braille = Common,
-        Buginese = Common,
-        Coptic = Common,
-        NewTaiLue = Common,
-        Glagolitic = Common,
-        Tifinagh = Common,
-        SylotiNagri = Common,
-        OldPersian = Common,
-        Kharoshthi = Common,
-        Balinese = Common,
-        Cuneiform = Common,
-        Phoenician = Common,
-        PhagsPa = Common,
-        Sundanese = Common,
-        Lepcha = Common,
-        OlChiki = Common,
-        Vai = Common,
-        Saurashtra = Common,
-        KayahLi = Common,
-        Rejang = Common,
-        Lycian = Common,
-        Carian = Common,
-        Lydian = Common,
-        Cham = Common,
-        TaiTham = Common,
-        TaiViet = Common,
-        Avestan = Common,
-        EgyptianHieroglyphs = Common,
-        Samaritan = Common,
-        Lisu = Common,
-        Bamum = Common,
-        Javanese = Common,
-        MeeteiMayek = Common,
-        ImperialAramaic = Common,
-        OldSouthArabian = Common,
-        InscriptionalParthian = Common,
-        InscriptionalPahlavi = Common,
-        OldTurkic = Common,
-        Kaithi = Common,
-        Batak = Common,
-        Brahmi = Common,
-        Mandaic = Common,
-        Chakma = Common,
-        MeroiticCursive = Common,
-        MeroiticHieroglyphs = Common,
-        Miao = Common,
-        Sharada = Common,
-        SoraSompeng = Common,
-        Takri = Common
-    };
+Q_CORE_EXPORT const Properties * QT_FASTCALL properties(uint ucs4);
+Q_CORE_EXPORT const Properties * QT_FASTCALL properties(ushort ucs2);

+// See http://www.unicode.org/reports/tr24/tr24-5.html
+enum Script {
+    Common,
+    Greek,
+    Cyrillic,
+    Armenian,
+    Hebrew,
+    Arabic,
+    Syriac,
+    Thaana,
+    Devanagari,
+    Bengali,
+    Gurmukhi,
+    Gujarati,
+    Oriya,
+    Tamil,
+    Telugu,
+    Kannada,
+    Malayalam,
+    Sinhala,
+    Thai,
+    Lao,
+    Tibetan,
+    Myanmar,
+    Georgian,
+    Hangul,
+    Ogham,
+    Runic,
+    Khmer,
+    Nko,
+    Inherited,
+    ScriptCount = Inherited,
+    Latin = Common,
+    Ethiopic = Common,
+    Cherokee = Common,
+    CanadianAboriginal = Common,
+    Mongolian = Common,
+    Hiragana = Common,
+    Katakana = Common,
+    Bopomofo = Common,
+    Han = Common,
+    Yi = Common,
+    OldItalic = Common,
+    Gothic = Common,
+    Deseret = Common,
+    Tagalog = Common,
+    Hanunoo = Common,
+    Buhid = Common,
+    Tagbanwa = Common,
+    Limbu = Common,
+    TaiLe = Common,
+    LinearB = Common,
+    Ugaritic = Common,
+    Shavian = Common,
+    Osmanya = Common,
+    Cypriot = Common,
+    Braille = Common,
+    Buginese = Common,
+    Coptic = Common,
+    NewTaiLue = Common,
+    Glagolitic = Common,
+    Tifinagh = Common,
+    SylotiNagri = Common,
+    OldPersian = Common,
+    Kharoshthi = Common,
+    Balinese = Common,
+    Cuneiform = Common,
+    Phoenician = Common,
+    PhagsPa = Common,
+    Sundanese = Common,
+    Lepcha = Common,
+    OlChiki = Common,
+    Vai = Common,
+    Saurashtra = Common,
+    KayahLi = Common,
+    Rejang = Common,
+    Lycian = Common,
+    Carian = Common,
+    Lydian = Common,
+    Cham = Common,
+    TaiTham = Common,
+    TaiViet = Common,
+    Avestan = Common,
+    EgyptianHieroglyphs = Common,
+    Samaritan = Common,
+    Lisu = Common,
+    Bamum = Common,
+    Javanese = Common,
+    MeeteiMayek = Common,
+    ImperialAramaic = Common,
+    OldSouthArabian = Common,
+    InscriptionalParthian = Common,
+    InscriptionalPahlavi = Common,
+    OldTurkic = Common,
+    Kaithi = Common,
+    Batak = Common,
+    Brahmi = Common,
+    Mandaic = Common,
+    Chakma = Common,
+    MeroiticCursive = Common,
+    MeroiticHieroglyphs = Common,
+    Miao = Common,
+    Sharada = Common,
+    SoraSompeng = Common,
+    Takri = Common
+};

-    enum GraphemeBreak {
-        GraphemeBreakOther,
-        GraphemeBreakCR,
-        GraphemeBreakLF,
-        GraphemeBreakControl,
-        GraphemeBreakExtend,
-        GraphemeBreakPrepend,
-        GraphemeBreakSpacingMark,
-        GraphemeBreakL,
-        GraphemeBreakV,
-        GraphemeBreakT,
-        GraphemeBreakLV,
-        GraphemeBreakLVT
-    };
+enum GraphemeBreak {
+    GraphemeBreakOther,
+    GraphemeBreakCR,
+    GraphemeBreakLF,
+    GraphemeBreakControl,
+    GraphemeBreakExtend,
+    GraphemeBreakPrepend,
+    GraphemeBreakSpacingMark,
+    GraphemeBreakL,
+    GraphemeBreakV,
+    GraphemeBreakT,
+    GraphemeBreakLV,
+    GraphemeBreakLVT
+};

+enum WordBreak {
+    WordBreakOther,
+    WordBreakCR,
+    WordBreakLF,
+    WordBreakNewline,
+    WordBreakFormat,
+    WordBreakKatakana,
+    WordBreakALetter,
+    WordBreakMidNumLet,
+    WordBreakMidLetter,
+    WordBreakMidNum,
+    WordBreakNumeric,
+    WordBreakExtendNumLet
+};

-    enum WordBreak {
-        WordBreakOther,
-        WordBreakCR,
-        WordBreakLF,
-        WordBreakNewline,
-        WordBreakFormat,
-        WordBreakKatakana,
-        WordBreakALetter,
-        WordBreakMidNumLet,
-        WordBreakMidLetter,
-        WordBreakMidNum,
-        WordBreakNumeric,
-        WordBreakExtendNumLet
-    };
+enum SentenceBreak {
+    SentenceBreakOther,
+    SentenceBreakCR,
+    SentenceBreakLF,
+    SentenceBreakSep,
+    SentenceBreakFormat,
+    SentenceBreakSp,
+    SentenceBreakLower,
+    SentenceBreakUpper,
+    SentenceBreakOLetter,
+    SentenceBreakNumeric,
+    SentenceBreakATerm,
+    SentenceBreakSContinue,
+    SentenceBreakSTerm,
+    SentenceBreakClose
+};

+// see http://www.unicode.org/reports/tr14/tr14-28.html
+// we don't use the XX and AI classes and map them to AL instead.
+enum LineBreakClass {
+    LineBreak_OP, LineBreak_CL, LineBreak_CP, LineBreak_QU, LineBreak_GL,
+    LineBreak_NS, LineBreak_EX, LineBreak_SY, LineBreak_IS, LineBreak_PR,
+    LineBreak_PO, LineBreak_NU, LineBreak_AL, LineBreak_HL, LineBreak_ID,
+    LineBreak_IN, LineBreak_HY, LineBreak_BA, LineBreak_BB, LineBreak_B2,
+    LineBreak_ZW, LineBreak_CM, LineBreak_WJ, LineBreak_H2, LineBreak_H3,
+    LineBreak_JL, LineBreak_JV, LineBreak_JT, LineBreak_CB, LineBreak_SA,
+    LineBreak_SG, LineBreak_SP, LineBreak_CR, LineBreak_LF, LineBreak_BK
+};

-    enum SentenceBreak {
-        SentenceBreakOther,
-        SentenceBreakCR,
-        SentenceBreakLF,
-        SentenceBreakSep,
-        SentenceBreakFormat,
-        SentenceBreakSp,
-        SentenceBreakLower,
-        SentenceBreakUpper,
-        SentenceBreakOLetter,
-        SentenceBreakNumeric,
-        SentenceBreakATerm,
-        SentenceBreakSContinue,
-        SentenceBreakSTerm,
-        SentenceBreakClose
-    };
+Q_CORE_EXPORT GraphemeBreak QT_FASTCALL graphemeBreakClass(uint ucs4);
+inline GraphemeBreak graphemeBreakClass(QChar ch)
+{ return graphemeBreakClass(ch.unicode()); }

+Q_CORE_EXPORT WordBreak QT_FASTCALL wordBreakClass(uint ucs4);
+inline WordBreak wordBreakClass(QChar ch)
+{ return wordBreakClass(ch.unicode()); }

-    // see http://www.unicode.org/reports/tr14/tr14-28.html
-    // we don't use the XX and AI classes and map them to AL instead.
-    enum LineBreakClass {
-        LineBreak_OP, LineBreak_CL, LineBreak_CP, LineBreak_QU, LineBreak_GL,
-        LineBreak_NS, LineBreak_EX, LineBreak_SY, LineBreak_IS, LineBreak_PR,
-        LineBreak_PO, LineBreak_NU, LineBreak_AL, LineBreak_HL, LineBreak_ID,
-        LineBreak_IN, LineBreak_HY, LineBreak_BA, LineBreak_BB, LineBreak_B2,
-        LineBreak_ZW, LineBreak_CM, LineBreak_WJ, LineBreak_H2, LineBreak_H3,
-        LineBreak_JL, LineBreak_JV, LineBreak_JT, LineBreak_CB, LineBreak_SA,
-        LineBreak_SG, LineBreak_SP, LineBreak_CR, LineBreak_LF, LineBreak_BK
-    };
+Q_CORE_EXPORT SentenceBreak QT_FASTCALL sentenceBreakClass(uint ucs4);
+inline SentenceBreak sentenceBreakClass(QChar ch)
+{ return sentenceBreakClass(ch.unicode()); }

+Q_CORE_EXPORT LineBreakClass QT_FASTCALL lineBreakClass(uint ucs4);
+inline LineBreakClass lineBreakClass(QChar ch)
+{ return lineBreakClass(ch.unicode()); }

-    Q_CORE_EXPORT GraphemeBreak QT_FASTCALL graphemeBreakClass(uint ucs4);
-    inline GraphemeBreak graphemeBreakClass(QChar ch)
-    { return graphemeBreakClass(ch.unicode()); }
-
-    Q_CORE_EXPORT WordBreak QT_FASTCALL wordBreakClass(uint ucs4);
-    inline WordBreak wordBreakClass(QChar ch)
-    { return wordBreakClass(ch.unicode()); }
-
-    Q_CORE_EXPORT SentenceBreak QT_FASTCALL sentenceBreakClass(uint ucs4);
-    inline SentenceBreak sentenceBreakClass(QChar ch)
-    { return sentenceBreakClass(ch.unicode()); }
-
-    Q_CORE_EXPORT LineBreakClass QT_FASTCALL lineBreakClass(uint ucs4);
-    inline LineBreakClass lineBreakClass(QChar ch)
-    { return lineBreakClass(ch.unicode()); }
-
-    Q_CORE_EXPORT Script QT_FASTCALL script(uint ucs4);
-    inline Script script(QChar ch)
-    { return script(ch.unicode()); }
+Q_CORE_EXPORT Script QT_FASTCALL script(uint ucs4);
+inline Script script(QChar ch)
+{ return script(ch.unicode()); }

 } // namespace QUnicodeTables

--- a/util/unicode/main.cpp
+++ b/util/unicode/main.cpp