ICU-4696 Allow vowel OO to take vowel sign U. Allow vowel sign OO to be followed by vowel sign U.

X-SVN-Rev: 18371
This commit is contained in:
Eric Mader 2005-07-28 21:27:12 +00:00
parent b8ba6dc98d
commit bf235f0291
3 changed files with 29 additions and 25 deletions

View File

@ -33,6 +33,7 @@ U_NAMESPACE_BEGIN
#define _sb (CC_STRESS_MARK | CF_POS_BELOW)
#define _iv (CC_INDEPENDENT_VOWEL)
#define _i2 (CC_INDEPENDENT_VOWEL_2)
#define _i3 (CC_INDEPENDENT_VOWEL_3)
#define _ct (CC_CONSONANT | CF_CONSONANT)
#define _cn (CC_CONSONANT_WITH_NUKTA | CF_CONSONANT)
#define _nu (CC_NUKTA)
@ -44,6 +45,7 @@ U_NAMESPACE_BEGIN
#define _lm (_dv | CF_LENGTH_MARK)
#define _l1 (CC_SPLIT_VOWEL_PIECE_1 | CF_POS_BEFORE)
#define _a1 (CC_SPLIT_VOWEL_PIECE_1 | CF_POS_ABOVE)
#define _b2 (CC_SPLIT_VOWEL_PIECE_2 | CF_POS_BELOW)
#define _r2 (CC_SPLIT_VOWEL_PIECE_2 | CF_POS_AFTER)
#define _m2 (CC_SPLIT_VOWEL_PIECE_2 | CF_LENGTH_MARK)
#define _m3 (CC_SPLIT_VOWEL_PIECE_3 | CF_LENGTH_MARK)
@ -100,10 +102,10 @@ static const IndicClassTable::CharClass bengCharClasses[] =
static const IndicClassTable::CharClass punjCharClasses[] =
{
_xx, _ma, _ma, _mp, _xx, _iv, _iv, _iv, _iv, _iv, _iv, _xx, _xx, _xx, _xx, _iv, // 0A00 - 0A0F
_iv, _xx, _xx, _iv, _iv, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, // 0A10 - 0A1F
_iv, _xx, _xx, _i3, _iv, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, // 0A10 - 0A1F
_ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _xx, _ct, _ct, _ct, _ct, _ct, _bb, // 0A20 - 0A2F
_vt, _xx, _ct, _cn, _xx, _bb, _cn, _xx, _ct, _bb, _xx, _xx, _nu, _xx, _dr, _dl, // 0A30 - 0A3F
_dr, _db, _db, _xx, _xx, _xx, _xx, _da, _da, _xx, _xx, _da, _da, _vr, _xx, _xx, // 0A40 - 0A4F
_dr, _b2, _db, _xx, _xx, _xx, _xx, _da, _da, _xx, _xx, _a1, _da, _vr, _xx, _xx, // 0A40 - 0A4F
_xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _cn, _cn, _cn, _ct, _xx, _cn, _xx, // 0A50 - 0A5F
_xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, // 0A60 - 0A6F
_ma, _ma, _xx, _xx, _xx // 0A70 - 0A74

View File

@ -336,19 +336,19 @@ static const LETag tagArray[] =
static const le_int8 stateTable[][CC_COUNT] =
{
// xx vm sm iv i2 ct cn nu dv s1 s2 s3 vr zw
{ 1, 1, 1, 5, 8, 3, 2, 1, 5, 9, 5, 1, 1, 1}, // 0 - ground state
{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 1 - exit state
{-1, 6, 1, -1, -1, -1, -1, -1, 5, 9, 5, 5, 4, -1}, // 2 - consonant with nukta
{-1, 6, 1, -1, -1, -1, -1, 2, 5, 9, 5, 5, 4, -1}, // 3 - consonant
{-1, -1, -1, -1, -1, 3, 2, -1, -1, -1, -1, -1, -1, 7}, // 4 - consonant virama
{-1, 6, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 5 - dependent vowels
{-1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 6 - vowel mark
{-1, -1, -1, -1, -1, 3, 2, -1, -1, -1, -1, -1, -1, -1}, // 7 - ZWJ, ZWNJ
{-1, 6, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 4, -1}, // 8 - independent vowels that can take a virama
{-1, 6, 1, -1, -1, -1, -1, -1, -1, -1, 10, 5, -1, -1}, // 9 - first part of split vowel
{-1, 6, 1, -1, -1, -1, -1, -1, -1, -1, -1, 5, -1, -1} // 10 - second part of split vowel
// xx vm sm iv i2 i3 ct cn nu dv s1 s2 s3 vr zw
{ 1, 1, 1, 5, 8, 11, 3, 2, 1, 5, 9, 5, 1, 1, 1}, // 0 - ground state
{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 1 - exit state
{-1, 6, 1, -1, -1, -1, -1, -1, -1, 5, 9, 5, 5, 4, -1}, // 2 - consonant with nukta
{-1, 6, 1, -1, -1, -1, -1, -1, 2, 5, 9, 5, 5, 4, -1}, // 3 - consonant
{-1, -1, -1, -1, -1, -1, 3, 2, -1, -1, -1, -1, -1, -1, 7}, // 4 - consonant virama
{-1, 6, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 5 - dependent vowels
{-1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 6 - vowel mark
{-1, -1, -1, -1, -1, -1, 3, 2, -1, -1, -1, -1, -1, -1, -1}, // 7 - ZWJ, ZWNJ
{-1, 6, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 4, -1}, // 8 - independent vowels that can take a virama
{-1, 6, 1, -1, -1, -1, -1, -1, -1, -1, -1, 10, 5, -1, -1}, // 9 - first part of split vowel
{-1, 6, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 5, -1, -1}, // 10 - second part of split vowel
{-1, 6, 1, -1, -1, -1, -1, -1, -1, 5, 9, 5, 5, 4, -1} // 11 - independent vowels that can take an iv
};
const LETag *IndicReordering::getFeatureOrder()
@ -467,6 +467,7 @@ le_int32 IndicReordering::reorder(const LEUnicode *chars, le_int32 charCount, le
break;
case CC_INDEPENDENT_VOWEL_2:
case CC_INDEPENDENT_VOWEL_3:
case CC_CONSONANT:
case CC_CONSONANT_WITH_NUKTA:
{

View File

@ -27,16 +27,17 @@ U_NAMESPACE_BEGIN
#define CC_STRESS_MARK 2U
#define CC_INDEPENDENT_VOWEL 3U
#define CC_INDEPENDENT_VOWEL_2 4U
#define CC_CONSONANT 5U
#define CC_CONSONANT_WITH_NUKTA 6U
#define CC_NUKTA 7U
#define CC_DEPENDENT_VOWEL 8U
#define CC_SPLIT_VOWEL_PIECE_1 9U
#define CC_SPLIT_VOWEL_PIECE_2 10U
#define CC_SPLIT_VOWEL_PIECE_3 11U
#define CC_VIRAMA 12U
#define CC_ZERO_WIDTH_MARK 13U
#define CC_COUNT 14U
#define CC_INDEPENDENT_VOWEL_3 5U
#define CC_CONSONANT 6U
#define CC_CONSONANT_WITH_NUKTA 7U
#define CC_NUKTA 8U
#define CC_DEPENDENT_VOWEL 9U
#define CC_SPLIT_VOWEL_PIECE_1 10U
#define CC_SPLIT_VOWEL_PIECE_2 11U
#define CC_SPLIT_VOWEL_PIECE_3 12U
#define CC_VIRAMA 13U
#define CC_ZERO_WIDTH_MARK 14U
#define CC_COUNT 15U
// Character class flags
#define CF_CLASS_MASK 0x0000FFFFU