ICU-4696 Allow vowel OO to take vowel sign U. Allow vowel sign OO to be followed by vowel sign U.
X-SVN-Rev: 18371
This commit is contained in:
parent
b8ba6dc98d
commit
bf235f0291
@ -33,6 +33,7 @@ U_NAMESPACE_BEGIN
|
||||
#define _sb (CC_STRESS_MARK | CF_POS_BELOW)
|
||||
#define _iv (CC_INDEPENDENT_VOWEL)
|
||||
#define _i2 (CC_INDEPENDENT_VOWEL_2)
|
||||
#define _i3 (CC_INDEPENDENT_VOWEL_3)
|
||||
#define _ct (CC_CONSONANT | CF_CONSONANT)
|
||||
#define _cn (CC_CONSONANT_WITH_NUKTA | CF_CONSONANT)
|
||||
#define _nu (CC_NUKTA)
|
||||
@ -44,6 +45,7 @@ U_NAMESPACE_BEGIN
|
||||
#define _lm (_dv | CF_LENGTH_MARK)
|
||||
#define _l1 (CC_SPLIT_VOWEL_PIECE_1 | CF_POS_BEFORE)
|
||||
#define _a1 (CC_SPLIT_VOWEL_PIECE_1 | CF_POS_ABOVE)
|
||||
#define _b2 (CC_SPLIT_VOWEL_PIECE_2 | CF_POS_BELOW)
|
||||
#define _r2 (CC_SPLIT_VOWEL_PIECE_2 | CF_POS_AFTER)
|
||||
#define _m2 (CC_SPLIT_VOWEL_PIECE_2 | CF_LENGTH_MARK)
|
||||
#define _m3 (CC_SPLIT_VOWEL_PIECE_3 | CF_LENGTH_MARK)
|
||||
@ -100,10 +102,10 @@ static const IndicClassTable::CharClass bengCharClasses[] =
|
||||
static const IndicClassTable::CharClass punjCharClasses[] =
|
||||
{
|
||||
_xx, _ma, _ma, _mp, _xx, _iv, _iv, _iv, _iv, _iv, _iv, _xx, _xx, _xx, _xx, _iv, // 0A00 - 0A0F
|
||||
_iv, _xx, _xx, _iv, _iv, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, // 0A10 - 0A1F
|
||||
_iv, _xx, _xx, _i3, _iv, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, // 0A10 - 0A1F
|
||||
_ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _xx, _ct, _ct, _ct, _ct, _ct, _bb, // 0A20 - 0A2F
|
||||
_vt, _xx, _ct, _cn, _xx, _bb, _cn, _xx, _ct, _bb, _xx, _xx, _nu, _xx, _dr, _dl, // 0A30 - 0A3F
|
||||
_dr, _db, _db, _xx, _xx, _xx, _xx, _da, _da, _xx, _xx, _da, _da, _vr, _xx, _xx, // 0A40 - 0A4F
|
||||
_dr, _b2, _db, _xx, _xx, _xx, _xx, _da, _da, _xx, _xx, _a1, _da, _vr, _xx, _xx, // 0A40 - 0A4F
|
||||
_xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _cn, _cn, _cn, _ct, _xx, _cn, _xx, // 0A50 - 0A5F
|
||||
_xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, // 0A60 - 0A6F
|
||||
_ma, _ma, _xx, _xx, _xx // 0A70 - 0A74
|
||||
|
@ -336,19 +336,19 @@ static const LETag tagArray[] =
|
||||
|
||||
static const le_int8 stateTable[][CC_COUNT] =
|
||||
{
|
||||
// xx vm sm iv i2 ct cn nu dv s1 s2 s3 vr zw
|
||||
{ 1, 1, 1, 5, 8, 3, 2, 1, 5, 9, 5, 1, 1, 1}, // 0 - ground state
|
||||
{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 1 - exit state
|
||||
{-1, 6, 1, -1, -1, -1, -1, -1, 5, 9, 5, 5, 4, -1}, // 2 - consonant with nukta
|
||||
{-1, 6, 1, -1, -1, -1, -1, 2, 5, 9, 5, 5, 4, -1}, // 3 - consonant
|
||||
{-1, -1, -1, -1, -1, 3, 2, -1, -1, -1, -1, -1, -1, 7}, // 4 - consonant virama
|
||||
{-1, 6, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 5 - dependent vowels
|
||||
{-1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 6 - vowel mark
|
||||
{-1, -1, -1, -1, -1, 3, 2, -1, -1, -1, -1, -1, -1, -1}, // 7 - ZWJ, ZWNJ
|
||||
{-1, 6, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 4, -1}, // 8 - independent vowels that can take a virama
|
||||
{-1, 6, 1, -1, -1, -1, -1, -1, -1, -1, 10, 5, -1, -1}, // 9 - first part of split vowel
|
||||
{-1, 6, 1, -1, -1, -1, -1, -1, -1, -1, -1, 5, -1, -1} // 10 - second part of split vowel
|
||||
|
||||
// xx vm sm iv i2 i3 ct cn nu dv s1 s2 s3 vr zw
|
||||
{ 1, 1, 1, 5, 8, 11, 3, 2, 1, 5, 9, 5, 1, 1, 1}, // 0 - ground state
|
||||
{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 1 - exit state
|
||||
{-1, 6, 1, -1, -1, -1, -1, -1, -1, 5, 9, 5, 5, 4, -1}, // 2 - consonant with nukta
|
||||
{-1, 6, 1, -1, -1, -1, -1, -1, 2, 5, 9, 5, 5, 4, -1}, // 3 - consonant
|
||||
{-1, -1, -1, -1, -1, -1, 3, 2, -1, -1, -1, -1, -1, -1, 7}, // 4 - consonant virama
|
||||
{-1, 6, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 5 - dependent vowels
|
||||
{-1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 6 - vowel mark
|
||||
{-1, -1, -1, -1, -1, -1, 3, 2, -1, -1, -1, -1, -1, -1, -1}, // 7 - ZWJ, ZWNJ
|
||||
{-1, 6, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 4, -1}, // 8 - independent vowels that can take a virama
|
||||
{-1, 6, 1, -1, -1, -1, -1, -1, -1, -1, -1, 10, 5, -1, -1}, // 9 - first part of split vowel
|
||||
{-1, 6, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 5, -1, -1}, // 10 - second part of split vowel
|
||||
{-1, 6, 1, -1, -1, -1, -1, -1, -1, 5, 9, 5, 5, 4, -1} // 11 - independent vowels that can take an iv
|
||||
};
|
||||
|
||||
const LETag *IndicReordering::getFeatureOrder()
|
||||
@ -467,6 +467,7 @@ le_int32 IndicReordering::reorder(const LEUnicode *chars, le_int32 charCount, le
|
||||
break;
|
||||
|
||||
case CC_INDEPENDENT_VOWEL_2:
|
||||
case CC_INDEPENDENT_VOWEL_3:
|
||||
case CC_CONSONANT:
|
||||
case CC_CONSONANT_WITH_NUKTA:
|
||||
{
|
||||
|
@ -27,16 +27,17 @@ U_NAMESPACE_BEGIN
|
||||
#define CC_STRESS_MARK 2U
|
||||
#define CC_INDEPENDENT_VOWEL 3U
|
||||
#define CC_INDEPENDENT_VOWEL_2 4U
|
||||
#define CC_CONSONANT 5U
|
||||
#define CC_CONSONANT_WITH_NUKTA 6U
|
||||
#define CC_NUKTA 7U
|
||||
#define CC_DEPENDENT_VOWEL 8U
|
||||
#define CC_SPLIT_VOWEL_PIECE_1 9U
|
||||
#define CC_SPLIT_VOWEL_PIECE_2 10U
|
||||
#define CC_SPLIT_VOWEL_PIECE_3 11U
|
||||
#define CC_VIRAMA 12U
|
||||
#define CC_ZERO_WIDTH_MARK 13U
|
||||
#define CC_COUNT 14U
|
||||
#define CC_INDEPENDENT_VOWEL_3 5U
|
||||
#define CC_CONSONANT 6U
|
||||
#define CC_CONSONANT_WITH_NUKTA 7U
|
||||
#define CC_NUKTA 8U
|
||||
#define CC_DEPENDENT_VOWEL 9U
|
||||
#define CC_SPLIT_VOWEL_PIECE_1 10U
|
||||
#define CC_SPLIT_VOWEL_PIECE_2 11U
|
||||
#define CC_SPLIT_VOWEL_PIECE_3 12U
|
||||
#define CC_VIRAMA 13U
|
||||
#define CC_ZERO_WIDTH_MARK 14U
|
||||
#define CC_COUNT 15U
|
||||
|
||||
// Character class flags
|
||||
#define CF_CLASS_MASK 0x0000FFFFU
|
||||
|
Loading…
Reference in New Issue
Block a user