diff --git a/icu4c/source/layout/LayoutEngine.cpp b/icu4c/source/layout/LayoutEngine.cpp index b9dd50a38a..41e6ae8e5a 100644 --- a/icu4c/source/layout/LayoutEngine.cpp +++ b/icu4c/source/layout/LayoutEngine.cpp @@ -16,7 +16,7 @@ #include "IndicLayoutEngine.h" #include "KhmerLayoutEngine.h" #include "ThaiLayoutEngine.h" -//#include "TibetanLayoutEngine.h" +#include "TibetanLayoutEngine.h" #include "GXLayoutEngine.h" #include "ScriptAndLanguageTags.h" #include "CharSubstitutionFilter.h" @@ -478,11 +478,9 @@ LayoutEngine *LayoutEngine::layoutEngineFactory(const LEFontInstance *fontInstan break; -#if 0 case tibtScriptCode: result = new TibetanOpenTypeLayoutEngine(fontInstance, scriptCode, languageCode, typoFlags, gsubTable); break; -#endif case khmrScriptCode: result = new KhmerOpenTypeLayoutEngine(fontInstance, scriptCode, languageCode, typoFlags, gsubTable); diff --git a/icu4c/source/layout/Makefile.in b/icu4c/source/layout/Makefile.in index d5882f6c80..ebdc9cc1f0 100644 --- a/icu4c/source/layout/Makefile.in +++ b/icu4c/source/layout/Makefile.in @@ -119,6 +119,8 @@ SubstitutionLookups.o \ ValueRecords.o \ KhmerLayoutEngine.o \ KhmerReordering.o \ +TibetanLayoutEngine.o \ +TibetanReordering.o \ KernTable.o STATIC_OBJECTS = $(OBJECTS:.o=.$(STATIC_O)) diff --git a/icu4c/source/layout/TibetanLayoutEngine.cpp b/icu4c/source/layout/TibetanLayoutEngine.cpp new file mode 100644 index 0000000000..be97ab5baf --- /dev/null +++ b/icu4c/source/layout/TibetanLayoutEngine.cpp @@ -0,0 +1,87 @@ +/* + * + * (C) Copyright IBM Corp. 1998-2005 - All Rights Reserved + * + * Developed at DIT - Government of Bhutan + * + * Contact person: Pema Geyleg - + * + * This file is a modification of the ICU file KhmerReordering.cpp + * by Jens Herden and Javier Sola who have given all their possible rights to IBM and the Governement of Bhutan + * A first module for Dzongkha was developed by Karunakar under Panlocalisation funding. + * Assistance for this module has been received from Namgay Thinley, Christopher Fynn and Javier Sola + * + */ + + +#include "OpenTypeLayoutEngine.h" +#include "TibetanLayoutEngine.h" +#include "LEGlyphStorage.h" +#include "TibetanReordering.h" + +U_NAMESPACE_BEGIN + +UOBJECT_DEFINE_RTTI_IMPLEMENTATION(TibetanOpenTypeLayoutEngine) + +TibetanOpenTypeLayoutEngine::TibetanOpenTypeLayoutEngine(const LEFontInstance *fontInstance, le_int32 scriptCode, le_int32 languageCode, + le_int32 typoFlags, const GlyphSubstitutionTableHeader *gsubTable) + : OpenTypeLayoutEngine(fontInstance, scriptCode, languageCode, typoFlags, gsubTable) +{ + fFeatureMap = TibetanReordering::getFeatureMap(fFeatureMapCount); + fFeatureOrder = TRUE; +} + +TibetanOpenTypeLayoutEngine::TibetanOpenTypeLayoutEngine(const LEFontInstance *fontInstance, le_int32 scriptCode, le_int32 languageCode, + le_int32 typoFlags) + : OpenTypeLayoutEngine(fontInstance, scriptCode, languageCode, typoFlags) +{ + fFeatureMap = TibetanReordering::getFeatureMap(fFeatureMapCount); + fFeatureOrder = TRUE; +} + +TibetanOpenTypeLayoutEngine::~TibetanOpenTypeLayoutEngine() +{ + // nothing to do +} + +// Input: characters +// Output: characters, char indices, tags +// Returns: output character count +le_int32 TibetanOpenTypeLayoutEngine::characterProcessing(const LEUnicode chars[], le_int32 offset, le_int32 count, le_int32 max, le_bool rightToLeft, + LEUnicode *&outChars, LEGlyphStorage &glyphStorage, LEErrorCode &success) +{ + if (LE_FAILURE(success)) { + return 0; + } + + if (chars == NULL || offset < 0 || count < 0 || max < 0 || offset >= max || offset + count > max) { + success = LE_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + le_int32 worstCase = count * 3; // worst case is 3 for Khmer TODO check if 2 is enough + + outChars = LE_NEW_ARRAY(LEUnicode, worstCase); + + if (outChars == NULL) { + success = LE_MEMORY_ALLOCATION_ERROR; + return 0; + } + + glyphStorage.allocateGlyphArray(worstCase, rightToLeft, success); + glyphStorage.allocateAuxData(success); + + if (LE_FAILURE(success)) { + LE_DELETE_ARRAY(outChars); + return 0; + } + + // NOTE: assumes this allocates featureTags... + // (probably better than doing the worst case stuff here...) + le_int32 outCharCount = TibetanReordering::reorder(&chars[offset], count, fScriptCode, outChars, glyphStorage); + + glyphStorage.adoptGlyphCount(outCharCount); + return outCharCount; +} + +U_NAMESPACE_END diff --git a/icu4c/source/layout/TibetanLayoutEngine.h b/icu4c/source/layout/TibetanLayoutEngine.h new file mode 100644 index 0000000000..9deb4f0241 --- /dev/null +++ b/icu4c/source/layout/TibetanLayoutEngine.h @@ -0,0 +1,129 @@ +/* + * + * (C) Copyright IBM Corp. 1998-2005 - All Rights Reserved + * + * Developed at DIT - Government of Bhutan + * + * Contact person: Pema Geyleg - + * + * This file is a modification of the ICU file KhmerReordering.cpp + * by Jens Herden and Javier Sola who have given all their possible rights to IBM and the Governement of Bhutan + * A first module for Dzongkha was developed by Karunakar under Panlocalisation funding. + * Assistance for this module has been received from Namgay Thinley, Christopher Fynn and Javier Sola + * + */ + +#ifndef __TIBETANLAYOUTENGINE_H +#define __TIBETANLAYOUTENGINE_H + +// #include "LETypes.h" +// #include "LEFontInstance.h" +// #include "LEGlyphFilter.h" +// #include "LayoutEngine.h" +// #include "OpenTypeLayoutEngine.h" + +// #include "GlyphSubstitutionTables.h" +// #include "GlyphDefinitionTables.h" +// #include "GlyphPositioningTables.h" + +U_NAMESPACE_BEGIN + +// class MPreFixups; +// class LEGlyphStorage; + +/** + * This class implements OpenType layout for Dzongkha and Tibetan OpenType fonts + * + * @internal + */ +class TibetanOpenTypeLayoutEngine : public OpenTypeLayoutEngine +{ +public: + /** + * This is the main constructor. It constructs an instance of TibetanOpenTypeLayoutEngine for + * a particular font, script and language. It takes the GSUB table as a parameter since + * LayoutEngine::layoutEngineFactory has to read the GSUB table to know that it has an + * Tibetan OpenType font. + * + * @param fontInstance - the font + * @param scriptCode - the script + * @param langaugeCode - the language + * @param gsubTable - the GSUB table + * + * @see LayoutEngine::layoutEngineFactory + * @see OpenTypeLayoutEngine + * @see ScriptAndLangaugeTags.h for script and language codes + * + * @internal + */ + TibetanOpenTypeLayoutEngine(const LEFontInstance *fontInstance, le_int32 scriptCode, le_int32 languageCode, + le_int32 typoFlags, const GlyphSubstitutionTableHeader *gsubTable); + + /** + * This constructor is used when the font requires a "canned" GSUB table which can't be known + * until after this constructor has been invoked. + * + * @param fontInstance - the font + * @param scriptCode - the script + * @param langaugeCode - the language + * + * @see OpenTypeLayoutEngine + * @see ScriptAndLangaugeTags.h for script and language codes + * + * @internal + */ + TibetanOpenTypeLayoutEngine(const LEFontInstance *fontInstance, le_int32 scriptCode, le_int32 languageCode, + le_int32 typoFlags); + + /** + * The destructor, virtual for correct polymorphic invocation. + * + * @internal + */ + virtual ~TibetanOpenTypeLayoutEngine(); + + /** + * ICU "poor man's RTTI", returns a UClassID for the actual class. + * + * @draft ICU 3.6 + */ + virtual UClassID getDynamicClassID() const; + + /** + * ICU "poor man's RTTI", returns a UClassID for this class. + * + * @draft ICU 3.6 + */ + static UClassID getStaticClassID(); + +protected: + + /** + * This method does Tibetan OpenType character processing. It assigns the OpenType feature + * tags to the characters, and may generate output characters which have been reordered. + * It may also split some vowels, resulting in more output characters than input characters. + * + * Input parameters: + * @param chars - the input character context + * @param offset - the index of the first character to process + * @param count - the number of characters to process + * @param max - the number of characters in the input context + * @param rightToLeft - TRUE if the characters are in a right to left directional run + * @param glyphStorage - the glyph storage object. The glyph and character index arrays will be set. + * the auxillary data array will be set to the feature tags. + * + * Output parameters: + * @param success - set to an error code if the operation fails + * + * @return the output character count + * + * @internal + */ + virtual le_int32 characterProcessing(const LEUnicode chars[], le_int32 offset, le_int32 count, le_int32 max, le_bool rightToLeft, + LEUnicode *&outChars, LEGlyphStorage &glyphStorage, LEErrorCode &success); + +}; + +U_NAMESPACE_END +#endif + diff --git a/icu4c/source/layout/TibetanReordering.cpp b/icu4c/source/layout/TibetanReordering.cpp new file mode 100644 index 0000000000..ea5f78a17c --- /dev/null +++ b/icu4c/source/layout/TibetanReordering.cpp @@ -0,0 +1,380 @@ +/* + * + * (C) Copyright IBM Corp. 1998-2005 - All Rights Reserved + * + * Developed at DIT - Government of Bhutan + * + * Contact person: Pema Geyleg - + * + * This file is a modification of the ICU file KhmerReordering.cpp + * by Jens Herden and Javier Sola who have given all their possible rights to IBM and the Governement of Bhutan + * A first module for Dzongkha was developed by Karunakar under Panlocalisation funding. + * Assistance for this module has been received from Namgay Thinley, Christopher Fynn and Javier Sola + * + */ + +//#include +#include "LETypes.h" +#include "OpenTypeTables.h" +#include "TibetanReordering.h" +#include "LEGlyphStorage.h" + + +U_NAMESPACE_BEGIN + +// Characters that get refered to by name... +enum +{ + C_DOTTED_CIRCLE = 0x25CC, + C_PRE_NUMBER_MARK = 0x0F3F, + }; + + +enum +{ + // simple classes, they are used in the statetable (in this file) to control the length of a syllable + // they are also used to know where a character should be placed (location in reference to the base character) + // and also to know if a character, when independtly displayed, should be displayed with a dotted-circle to + // indicate error in syllable construction + _xx = TibetanClassTable::CC_RESERVED, + _ba = TibetanClassTable::CC_BASE, + _sj = TibetanClassTable::CC_SUBJOINED | TibetanClassTable::CF_DOTTED_CIRCLE | TibetanClassTable::CF_POS_BELOW, + _tp = TibetanClassTable::CC_TSA_PHRU | TibetanClassTable::CF_DOTTED_CIRCLE | TibetanClassTable::CF_POS_ABOVE, + _ac = TibetanClassTable::CC_A_CHUNG | TibetanClassTable::CF_DOTTED_CIRCLE | TibetanClassTable::CF_POS_BELOW, + _cs = TibetanClassTable::CC_COMP_SANSKRIT | TibetanClassTable::CF_DOTTED_CIRCLE | TibetanClassTable::CF_POS_BELOW, + _ha = TibetanClassTable::CC_HALANTA | TibetanClassTable::CF_DOTTED_CIRCLE | TibetanClassTable::CF_POS_BELOW, + _bv = TibetanClassTable::CC_BELOW_VOWEL | TibetanClassTable::CF_DOTTED_CIRCLE | TibetanClassTable::CF_POS_BELOW, + _av = TibetanClassTable::CC_ABOVE_VOWEL | TibetanClassTable::CF_DOTTED_CIRCLE | TibetanClassTable::CF_POS_ABOVE, + _an = TibetanClassTable::CC_ANUSVARA | TibetanClassTable::CF_DOTTED_CIRCLE | TibetanClassTable::CF_POS_ABOVE, + _cb = TibetanClassTable::CC_CANDRABINDU | TibetanClassTable::CF_DOTTED_CIRCLE | TibetanClassTable::CF_POS_ABOVE, + _vs = TibetanClassTable::CC_VISARGA | TibetanClassTable::CF_DOTTED_CIRCLE| TibetanClassTable::CF_POS_AFTER, + _as = TibetanClassTable::CC_ABOVE_S_MARK | TibetanClassTable::CF_DOTTED_CIRCLE | TibetanClassTable::CF_POS_ABOVE, + _bs = TibetanClassTable::CC_BELOW_S_MARK | TibetanClassTable::CF_DOTTED_CIRCLE | TibetanClassTable::CF_POS_BELOW, + _di = TibetanClassTable::CC_DIGIT | TibetanClassTable::CF_DIGIT, + _pd = TibetanClassTable::CC_PRE_DIGIT_MARK | TibetanClassTable::CF_DOTTED_CIRCLE | TibetanClassTable::CF_PREDIGIT | TibetanClassTable::CF_POS_BEFORE , + _bd = TibetanClassTable::CC_POST_BELOW_DIGIT_M | TibetanClassTable::CF_DOTTED_CIRCLE | TibetanClassTable::CF_POS_AFTER , + + +}; + + +// Character class tables +//_xx Non Combining characters +//_ba Base Consonants +//_sj Subjoined consonants +//_tp Tsa - phru +//_ac A-chung, Vowel Lengthening mark +//_cs Precomposed Sanskrit vowel + subjoined consonants +//_ha Halanta/Virama +//_bv Below vowel +//_av above vowel +//_an Anusvara +//_cb Candrabindu +//_vs Visaraga/Post mark +//_as Upper Stress marks +//_bs Lower Stress marks +//_di Digit +//_pd Number pre combining, Needs reordering +//_bd Other number combining marks + +static const TibetanClassTable::CharClass tibetanCharClasses[] = +{ + // 0 1 2 3 4 5 6 7 8 9 a b c d e f + _xx, _ba, _xx, _xx, _ba, _ba, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, // 0F00 - 0F0F 0 + _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _bd, _bd, _xx, _xx, _xx, _xx, _xx, _xx, // 0F10 - 0F1F 1 + _di, _di, _di, _di, _di, _di, _di, _di, _di, _di, _xx, _xx, _xx, _xx, _xx, _xx, // 0F20 - 0F2F 2 + _xx, _xx, _xx, _xx, _xx, _bs, _xx, _bs, _xx, _tp, _xx, _xx, _xx, _xx, _bd, _pd, // 0F30 - 0F3F 3 + _ba, _ba, _ba, _ba, _ba, _ba, _ba, _ba, _xx, _ba, _ba, _ba, _ba, _ba, _ba, _ba, // 0F40 - 0F4F 4 + _ba, _ba, _ba, _ba, _ba, _ba, _ba, _ba, _ba, _ba, _ba, _ba, _ba, _ba, _ba, _ba, // 0F50 - 0F5F 5 + _ba, _ba, _ba, _ba, _ba, _ba, _ba, _ba, _ba, _ba, _ba, _xx, _xx, _xx, _xx, _xx, // 0F60 - 0F6F 6 + _xx, _ac, _av, _cs, _bv, _bv, _cs, _cs, _cs, _cs, _av, _av, _av, _av, _an, _vs, // 0F70 - 0F7F 7 + _av, _cs, _cb, _cb, _ha, _xx, _as, _as, _ba, _ba, _ba, _ba, _xx, _xx, _xx, _xx, // 0F80 - 0F8F 8 + _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _xx, _sj, _sj, _sj, _sj, _sj, _sj, _sj, // 0F90 - 0F9F 9 + _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, // 0FA0 - 0FAF a + _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _xx, _sj, _sj, // 0FB0 - 0FBF b + _xx, _xx, _xx, _xx, _xx, _xx, _bs, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, // 0FC0 - 0FCF c + _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx,// 0FD0 - 0FDF d + _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, // 0FE0 - 0FEF e + _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, // 0FF0 - 0FFF f +}; + + +// +// Tibetan Class Tables +// + +// +// The range of characters defined in the above table is defined here. For Tibetan 0F00 to 0FFF +// Even if the Tibetan range is bigger, most of the characters are not combinable, and therefore treated +// as _xx +static const TibetanClassTable tibetanClassTable = {0x0F00, 0x0FFF, tibetanCharClasses}; + + +// Below we define how a character in the input string is either in the tibetanCharClasses table +// (in which case we get its type back), or an unknown object in which case we get _xx (CC_RESERVED) back +TibetanClassTable::CharClass TibetanClassTable::getCharClass(LEUnicode ch) const +{ + if (ch < firstChar || ch > lastChar) { + return CC_RESERVED; + } + + return classTable[ch - firstChar]; +} + +const TibetanClassTable *TibetanClassTable::getTibetanClassTable() +{ + return &tibetanClassTable; +} + + + +class ReorderingOutput : public UMemory { +private: + le_int32 fOutIndex; + LEUnicode *fOutChars; + + LEGlyphStorage &fGlyphStorage; + + +public: + ReorderingOutput(LEUnicode *outChars, LEGlyphStorage &glyphStorage) + : fOutIndex(0), fOutChars(outChars), fGlyphStorage(glyphStorage) + { + // nothing else to do... + } + + ~ReorderingOutput() + { + // nothing to do here... + } + + void writeChar(LEUnicode ch, le_uint32 charIndex, FeatureMask featureMask) + { + LEErrorCode success = LE_NO_ERROR; + + fOutChars[fOutIndex] = ch; + + fGlyphStorage.setCharIndex(fOutIndex, charIndex, success); + fGlyphStorage.setAuxData(fOutIndex, featureMask, success); + + fOutIndex += 1; + } + + le_int32 getOutputIndex() + { + return fOutIndex; + } +}; + + +//TODO remove unused flags +#define blwfFeatureTag LE_BLWF_FEATURE_TAG +#define pstfFeatureTag LE_PSTF_FEATURE_TAG +#define presFeatureTag LE_PRES_FEATURE_TAG +#define blwsFeatureTag LE_BLWS_FEATURE_TAG +#define abvsFeatureTag LE_ABVS_FEATURE_TAG +#define pstsFeatureTag LE_PSTS_FEATURE_TAG + +#define blwmFeatureTag LE_BLWM_FEATURE_TAG +#define abvmFeatureTag LE_ABVM_FEATURE_TAG +#define distFeatureTag LE_DIST_FEATURE_TAG + +#define prefFeatureTag LE_PREF_FEATURE_TAG +#define abvfFeatureTag LE_ABVF_FEATURE_TAG +#define cligFeatureTag LE_CLIG_FEATURE_TAG +#define mkmkFeatureTag LE_MKMK_FEATURE_TAG + +// Shaping features +#define prefFeatureMask 0x80000000UL +#define blwfFeatureMask 0x40000000UL +#define abvfFeatureMask 0x20000000UL +#define pstfFeatureMask 0x10000000UL +#define presFeatureMask 0x08000000UL +#define blwsFeatureMask 0x04000000UL +#define abvsFeatureMask 0x02000000UL +#define pstsFeatureMask 0x01000000UL +#define cligFeatureMask 0x00800000UL + +// Positioning features +#define distFeatureMask 0x00400000UL +#define blwmFeatureMask 0x00200000UL +#define abvmFeatureMask 0x00100000UL +#define mkmkFeatureMask 0x00080000UL + +#define tagPref (prefFeatureMask | presFeatureMask | cligFeatureMask | distFeatureMask) +#define tagAbvf (abvfFeatureMask | abvsFeatureMask | cligFeatureMask | distFeatureMask | abvmFeatureMask | mkmkFeatureMask) +#define tagPstf (blwfFeatureMask | blwsFeatureMask | prefFeatureMask | presFeatureMask | pstfFeatureMask | pstsFeatureMask | cligFeatureMask | distFeatureMask | blwmFeatureMask) +#define tagBlwf (blwfFeatureMask | blwsFeatureMask | cligFeatureMask | distFeatureMask | blwmFeatureMask | mkmkFeatureMask) +#define tagDefault (prefFeatureMask | blwfFeatureMask | presFeatureMask | blwsFeatureMask | cligFeatureMask | distFeatureMask | abvmFeatureMask | blwmFeatureMask | mkmkFeatureMask) + + + +// These are in the order in which the features need to be applied +// for correct processing +static const FeatureMap featureMap[] = +{ + // Shaping features + {prefFeatureTag, prefFeatureMask}, + {blwfFeatureTag, blwfFeatureMask}, + {abvfFeatureTag, abvfFeatureMask}, + {pstfFeatureTag, pstfFeatureMask}, + {presFeatureTag, presFeatureMask}, + {blwsFeatureTag, blwsFeatureMask}, + {abvsFeatureTag, abvsFeatureMask}, + {pstsFeatureTag, pstsFeatureMask}, + {cligFeatureTag, cligFeatureMask}, + + // Positioning features + {distFeatureTag, distFeatureMask}, + {blwmFeatureTag, blwmFeatureMask}, + {abvmFeatureTag, abvmFeatureMask}, + {mkmkFeatureTag, mkmkFeatureMask}, +}; + +static const le_int32 featureMapCount = LE_ARRAY_SIZE(featureMap); + +// The stateTable is used to calculate the end (the length) of a well +// formed Tibetan Syllable. +// +// Each horizontal line is ordered exactly the same way as the values in TibetanClassTable +// CharClassValues in TibetanReordering.h This coincidence of values allows the +// follow up of the table. +// +// Each line corresponds to a state, which does not necessarily need to be a type +// of component... for example, state 2 is a base, with is always a first character +// in the syllable, but the state could be produced a consonant of any type when +// it is the first character that is analysed (in ground state). +// +static const le_int8 tibetanStateTable[][TibetanClassTable::CC_COUNT] = +{ + + + //Dzongkha state table + //xx ba sj tp ac cs ha bv av an cb vs as bs di pd bd + { 1, 2, 4, 3, 8, 7, 9, 10, 14, 13, 17, 18, 19, 19, 20, 21, 21,}, // 0 - ground state + {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,}, // 1 - exit state (or sign to the right of the syllable) + {-1, -1, 4, 3, 8, 7, 9, 10, 14, 13, 17, 18, 19, 19, -1, -1, -1,}, // 2 - Base consonant + {-1, -1, 5, -1, 8, 7, -1, 10, 14, 13, 17, 18, 19, 19, -1, -1, -1,}, // 3 - Tsa phru after base + {-1, -1, 4, 6, 8, 7, 9, 10, 14, 13, 17, 18, 19, 19, -1, -1, -1,}, // 4 - Subjoined consonant after base + {-1, -1, 5, -1, 8, 7, -1, 10, 14, 13, 17, 18, 19, 19, -1, -1, -1,}, // 5 - Subjoined consonant after tsa phru + {-1, -1, -1, -1, 8, 7, -1, 10, 14, 13, 17, 18, 19, 19, -1, -1, -1,}, // 6 - Tsa phru after subjoined consonant + {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 19, 19, -1, -1, -1,}, // 7 - Pre Composed Sanskrit + {-1, -1, -1, -1, -1, -1, -1, 10, 14, 13, 17, 18, 19, 19, -1, -1, -1,}, // 8 - A-chung + {-1, -1, -1, -1, -1, -1, -1, -1, 14, 13, 17, -1, 19, 19, -1, -1, -1,}, // 9 - Halanta + {-1, -1, -1, -1, -1, -1, -1, 11, 14, 13, 17, 18, 19, 19, -1, -1, -1,}, // 10 - below vowel 1 + {-1, -1, -1, -1, -1, -1, -1, 12, 14, 13, 17, 18, 19, 19, -1, -1, -1,}, // 11 - below vowel 2 + {-1, -1, -1, -1, -1, -1, -1, -1, 14, 13, 17, 18, 19, 19, -1, -1, -1,}, // 12 - below vowel 3 + {-1, -1, -1, -1, -1, -1, -1, -1, 14, 17, 17, 18, 19, 19, -1, -1, -1,}, // 13 - Anusvara before vowel + {-1, -1, -1, -1, -1, -1, -1, -1, 15, 17, 17, 18, 19, 19, -1, -1, -1,}, // 14 - above vowel 1 + {-1, -1, -1, -1, -1, -1, -1, -1, 16, 17, 17, 18, 19, 19, -1, -1, -1,}, // 15 - above vowel 2 + {-1, -1, -1, -1, -1, -1, -1, -1, -1, 17, 17, 18, 19, 19, -1, -1, -1,}, // 16 - above vowel 3 + {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 18, 19, 19, -1, -1, -1,}, // 17 - Anusvara or Candrabindu after vowel + {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 19, 19, -1, -1, -1,}, // 18 - Visarga + {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,}, // 19 - strss mark + {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 21, 21,}, // 20 - digit + {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,}, // 21 - digit mark + + +}; + + +const FeatureMap *TibetanReordering::getFeatureMap(le_int32 &count) +{ + count = featureMapCount; + + return featureMap; +} + + +// Given an input string of characters and a location in which to start looking +// calculate, using the state table, which one is the last character of the syllable +// that starts in the starting position. +le_int32 TibetanReordering::findSyllable(const TibetanClassTable *classTable, const LEUnicode *chars, le_int32 prev, le_int32 charCount) +{ + le_int32 cursor = prev; + le_int8 state = 0; + + while (cursor < charCount) { + TibetanClassTable::CharClass charClass = (classTable->getCharClass(chars[cursor]) & TibetanClassTable::CF_CLASS_MASK); + + state = tibetanStateTable[state][charClass]; + + if (state < 0) { + break; + } + + cursor += 1; + } + + return cursor; +} + + +// This is the real reordering function as applied to the Tibetan language + +le_int32 TibetanReordering::reorder(const LEUnicode *chars, le_int32 charCount, le_int32, + LEUnicode *outChars, LEGlyphStorage &glyphStorage) +{ + const TibetanClassTable *classTable = TibetanClassTable::getTibetanClassTable(); + + ReorderingOutput output(outChars, glyphStorage); + TibetanClassTable::CharClass charClass; + le_int32 i, prev = 0; + + // This loop only exits when we reach the end of a run, which may contain + // several syllables. + while (prev < charCount) { + le_int32 syllable = findSyllable(classTable, chars, prev, charCount); + + // shall we add a dotted circle? + // If in the position in which the base should be (first char in the string) there is + // a character that has the Dotted circle flag (a character that cannot be a base) + // then write a dotted circle + if (classTable->getCharClass(chars[prev]) & TibetanClassTable::CF_DOTTED_CIRCLE) { + output.writeChar(C_DOTTED_CIRCLE, prev, tagDefault); + } + + // copy the rest to output, inverting the pre-number mark if present after a digit. + for (i = prev; i < syllable; i += 1) { + charClass = classTable->getCharClass(chars[i]); + + if ((TibetanClassTable::CF_DIGIT & charClass) + && ( classTable->getCharClass(chars[i+1]) & TibetanClassTable::CF_PREDIGIT)) + { + output.writeChar(C_PRE_NUMBER_MARK, i, tagPref); + output.writeChar(chars[i], i+1 , tagPref); + i += 1; + } else { + switch (charClass & TibetanClassTable::CF_POS_MASK) { + + // If the present character is a number, and the next character is a pre-number combining mark + // then the two characters are reordered + + case TibetanClassTable::CF_POS_ABOVE : + output.writeChar(chars[i], i, tagAbvf); + break; + + case TibetanClassTable::CF_POS_AFTER : + output.writeChar(chars[i], i, tagPstf); + break; + + case TibetanClassTable::CF_POS_BELOW : + output.writeChar(chars[i], i, tagBlwf); + break; + + default: + // default - any other characters + output.writeChar(chars[i], i, tagDefault); + break; + } // switch + } // if + } // for + + prev = syllable; // move the pointer to the start of next syllable + } + + return output.getOutputIndex(); +} + + +U_NAMESPACE_END diff --git a/icu4c/source/layout/TibetanReordering.h b/icu4c/source/layout/TibetanReordering.h new file mode 100644 index 0000000000..cafcf5646c --- /dev/null +++ b/icu4c/source/layout/TibetanReordering.h @@ -0,0 +1,151 @@ +/* + * + * (C) Copyright IBM Corp. 1998-2005 - All Rights Reserved + * + * Developed at DIT - Government of Bhutan + * + * Contact person: Pema Geyleg - + * + * This file is a modification of the ICU file KhmerReordering.h + * by Jens Herden and Javier Sola who have given all their possible rights to IBM and the Governement of Bhutan + * A first module for Dzongkha was developed by Karunakar under Panlocalisation funding. + * Assistance for this module has been received from Namgay Thinley, Christopher Fynn and Javier Sola + * + */ + +#ifndef __TIBETANREORDERING_H +#define __TIBETANORDERING_H + +/** + * \file + * \internal + */ + +// #include "LETypes.h" +// #include "OpenTypeTables.h" + +U_NAMESPACE_BEGIN + +class LEGlyphStorage; + +// Vocabulary +// Base -> A consonant in its full (not subscript) form. It is the +// center of the syllable, it can be souranded by subjoined consonants, vowels, +// signs... but there is only one base in a stack, it has to be coded as +// the first character of the syllable.Included here are also groups of base + subjoined +// which are represented by one single code point in unicode (e.g. 0F43) Also other characters that might take +// subjoined consonants or other combining characters. +// Subjoined -> Subjoined consonants and groups of subjoined consonants which have a single code-point +// to repersent the group (even if each subjoined consonant is represented independently +// by anothe code-point +// Tsa Phru --> Tsa Phru character, Bhutanese people will always place it right after the base, but sometimes, due to +// "normalization" +// is placed after all the subjoined consonants, and it is also permitted there. +// A Chung Vowel lengthening mark --> . 0F71 It is placed after the base and any subjoined consonants but before any vowels +// Precomposed Sanskrit vowels --> The are combinations of subjoined consonants + vowels that have been assigned +// a given code-point (in spite of each single part of them having also a code-point +// They are avoided, and users are encouraged to use the combination of code-points that +// represents the same sound instead of using this combined characters. This is included here +// for compatibility with possible texts that use them (they are not in the Dzongkha keyboard). +// Halanta -> The Halanta or Virama character 0F84 indicates that a consonant should not use its inheernt vowel, +// in spite of not having other vowels present. It is usually placed immediatly after a base consonant, +// but in some special cases it can also be placed after a subjoined consonant, so this is also +// permitted in this algorithm. (Halanta is always displayed in Tibetan not used as a connecting char) +// +// Subjoined vowels -> Dependent vowels (matras) placed below the base and below all subjoined consonants. There +// might be as much as three subjoined vowels in a given stack (only one in general text, but up +// to three for abreviations, they have to be permitted). +// Superscript vowels -> There are three superscript vowels, and they can be repeated or combined (up to three +// times. They can combine with subjoined vowels, and are always coded after these. +// Anusvara --> Nasalisation sign. Traditioinally placed in absence of vowels, but also after vowels. In some +// special cases it can be placed before a vowel, so this is also permitted +// Candrabindu -> Forms of the Anusvara with different glyphs (and different in identity) which can be placed +// without vowel or after the vowel, but never before. Cannot combine with Anusvara. +// Stress marks -> Marks placed above or below a syllable, affecting the whole syllable. They are combining +// marks, so they have to be attached to a specific stack. The are using to emphasise a syllable. +// +// Digits -> Digits are not considered as non-combining characters because there are a few characters which +// combine with them, so they have to be considered independently. +// Digit combining marks -> dependent marks that combine with digits. +// +// TODO +// There are a number of characters in the CJK block that are used in Tibetan script, two of these are symbols +// are used as bases for combining glyphs, and have not been encoded in Tibetan. As these characters are outside +// of the tibetan block, they have not been treated in this program. + + +struct TibetanClassTable // This list must include all types of components that can be used inside a syllable +{ + enum CharClassValues // order is important here! This order must be the same that is found in each horizontal + // line in the statetable for Tibetan (file TibetanReordering.cpp). It assigns one number + // to each type of character that has to be considered when analysing the order in which + // characters can be placed + { + CC_RESERVED = 0, //Non Combining Characters + CC_BASE = 1, // Base Consonants, Base Consonants with Subjoined attached in code point, Sanskrit base marks + CC_SUBJOINED = 2, // Subjoined Consonats, combination of more than Subjoined Consonants in the code point + CC_TSA_PHRU = 3, // Tsa-Phru character 0F39 + CC_A_CHUNG = 4, // Vowel Lenthening a-chung mark 0F71 + CC_COMP_SANSKRIT = 5, // Precomposed Sanskrit vowels including Subjoined characters and vowels + CC_HALANTA = 6, // Halanta Character 0F84 + CC_BELOW_VOWEL = 7, // Subjoined vowels + CC_ABOVE_VOWEL = 8, // Superscript vowels + CC_ANUSVARA = 9, // Tibetan sign Rjes Su Nga Ro 0F7E + CC_CANDRABINDU = 10, // Tibetan sign Sna Ldan and Nyi Zla Naa Da 0F82, 0F83 + CC_VISARGA = 11, // Tibetan sign Rnam Bcad (0F7F) + CC_ABOVE_S_MARK = 12, // Stress Marks placed above the text + CC_BELOW_S_MARK = 13, // Stress Marks placed below the text + CC_DIGIT = 14, // Dzongkha Digits + CC_PRE_DIGIT_MARK = 15, // Mark placed before the digit + CC_POST_BELOW_DIGIT_M = 16, // Mark placed below or after the digit + CC_COUNT = 17 // This is the number of character classes + }; + + enum CharClassFlags + { + CF_CLASS_MASK = 0x0000FFFF, + + CF_DOTTED_CIRCLE = 0x04000000, // add a dotted circle if a character with this flag is the first in a syllable + CF_DIGIT = 0x01000000, // flag to speed up comparaisson + CF_PREDIGIT = 0x02000000, // flag to detect pre-digit marks for reordering + + // position flags + CF_POS_BEFORE = 0x00080000, + CF_POS_BELOW = 0x00040000, + CF_POS_ABOVE = 0x00020000, + CF_POS_AFTER = 0x00010000, + CF_POS_MASK = 0x000f0000 + }; + + typedef le_uint32 CharClass; + + typedef le_int32 ScriptFlags; + + LEUnicode firstChar; // for Tibetan this will become xOF00 + LEUnicode lastChar; // and this x0FFF + const CharClass *classTable; + + CharClass getCharClass(LEUnicode ch) const; + + static const TibetanClassTable *getTibetanClassTable(); +}; + + +class TibetanReordering /* not : public UObject because all methods are static */ { +public: + static le_int32 reorder(const LEUnicode *theChars, le_int32 charCount, le_int32 scriptCode, + LEUnicode *outChars, LEGlyphStorage &glyphStorage); + + static const FeatureMap *getFeatureMap(le_int32 &count); + +private: + // do not instantiate + TibetanReordering(); + + static le_int32 findSyllable(const TibetanClassTable *classTable, const LEUnicode *chars, le_int32 prev, le_int32 charCount); + +}; + + +U_NAMESPACE_END +#endif diff --git a/icu4c/source/layout/layout.vcproj b/icu4c/source/layout/layout.vcproj index 9f8631077a..8c0af6f197 100644 --- a/icu4c/source/layout/layout.vcproj +++ b/icu4c/source/layout/layout.vcproj @@ -352,6 +352,12 @@ + + + + @@ -746,6 +752,12 @@ + + + +