ICU-5431 Modifications to allow pre-base consonant reordering for Malayalam
X-SVN-Rev: 26090
This commit is contained in:
parent
9100ba632e
commit
76edc36b6e
@ -66,6 +66,7 @@ U_NAMESPACE_BEGIN
|
|||||||
// special forms... (Bengali RA?)
|
// special forms... (Bengali RA?)
|
||||||
#define _bb (_ct | CF_BELOW_BASE)
|
#define _bb (_ct | CF_BELOW_BASE)
|
||||||
#define _pb (_ct | CF_POST_BASE)
|
#define _pb (_ct | CF_POST_BASE)
|
||||||
|
#define _fb (_ct | CF_PRE_BASE)
|
||||||
#define _vt (_bb | CF_VATTU)
|
#define _vt (_bb | CF_VATTU)
|
||||||
#define _rv (_vt | CF_REPH)
|
#define _rv (_vt | CF_REPH)
|
||||||
#define _rp (_pb | CF_REPH)
|
#define _rp (_pb | CF_REPH)
|
||||||
@ -195,7 +196,7 @@ static const IndicClassTable::CharClass mlymCharClasses[] =
|
|||||||
_xx, _xx, _mp, _mp, _xx, _iv, _iv, _iv, _iv, _iv, _iv, _iv, _iv, _xx, _iv, _iv, // 0D00 - 0D0F
|
_xx, _xx, _mp, _mp, _xx, _iv, _iv, _iv, _iv, _iv, _iv, _iv, _iv, _xx, _iv, _iv, // 0D00 - 0D0F
|
||||||
_iv, _xx, _iv, _iv, _iv, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, // 0D10 - 0D1F
|
_iv, _xx, _iv, _iv, _iv, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, // 0D10 - 0D1F
|
||||||
_ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _xx, _ct, _ct, _ct, _ct, _ct, _pb, // 0D20 - 0D2F
|
_ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _xx, _ct, _ct, _ct, _ct, _ct, _pb, // 0D20 - 0D2F
|
||||||
_pb, _cn, _bb, _ct, _ct, _pb, _ct, _ct, _ct, _ct, _xx, _xx, _xx, _xx, _r2, _dr, // 0D30 - 0D3F
|
_fb, _fb, _bb, _ct, _ct, _pb, _ct, _ct, _ct, _ct, _xx, _xx, _xx, _xx, _r2, _dr, // 0D30 - 0D3F
|
||||||
_dr, _dr, _dr, _dr, _xx, _xx, _l1, _l1, _dl, _xx, _s1, _s2, _s3, _vr, _xx, _xx, // 0D40 - 0D4F
|
_dr, _dr, _dr, _dr, _xx, _xx, _l1, _l1, _dl, _xx, _s1, _s2, _s3, _vr, _xx, _xx, // 0D40 - 0D4F
|
||||||
_xx, _xx, _xx, _xx, _xx, _xx, _xx, _m2, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, // 0D50 - 0D5F
|
_xx, _xx, _xx, _xx, _xx, _xx, _xx, _m2, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, // 0D50 - 0D5F
|
||||||
_iv, _iv, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx // 0D60 - 0D6F
|
_iv, _iv, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx // 0D60 - 0D6F
|
||||||
|
@ -125,6 +125,10 @@ private:
|
|||||||
le_int32 fSMIndex;
|
le_int32 fSMIndex;
|
||||||
FeatureMask fSMFeatures;
|
FeatureMask fSMFeatures;
|
||||||
|
|
||||||
|
LEUnicode fPreBaseConsonant;
|
||||||
|
LEUnicode fPreBaseVirama;
|
||||||
|
le_int32 fPBCIndex;
|
||||||
|
FeatureMask fPBCFeatures;
|
||||||
|
|
||||||
void saveMatra(LEUnicode matra, le_int32 matraIndex, IndicClassTable::CharClass matraClass)
|
void saveMatra(LEUnicode matra, le_int32 matraIndex, IndicClassTable::CharClass matraClass)
|
||||||
{
|
{
|
||||||
@ -171,7 +175,8 @@ public:
|
|||||||
fMpost(0), fMpostIndex(0), fLengthMark(0), fLengthMarkIndex(0), fAlLakuna(0), fAlLakunaIndex(0),
|
fMpost(0), fMpostIndex(0), fLengthMark(0), fLengthMarkIndex(0), fAlLakuna(0), fAlLakunaIndex(0),
|
||||||
fMatraFeatures(0), fMPreOutIndex(-1), fMPreFixups(mpreFixups),
|
fMatraFeatures(0), fMPreOutIndex(-1), fMPreFixups(mpreFixups),
|
||||||
fVMabove(0), fVMpost(0), fVMIndex(0), fVMFeatures(0),
|
fVMabove(0), fVMpost(0), fVMIndex(0), fVMFeatures(0),
|
||||||
fSMabove(0), fSMbelow(0), fSMIndex(0), fSMFeatures(0)
|
fSMabove(0), fSMbelow(0), fSMIndex(0), fSMFeatures(0),
|
||||||
|
fPreBaseConsonant(0), fPreBaseVirama(0), fPBCIndex(0), fPBCFeatures(0)
|
||||||
{
|
{
|
||||||
// nothing else to do...
|
// nothing else to do...
|
||||||
}
|
}
|
||||||
@ -190,6 +195,8 @@ public:
|
|||||||
|
|
||||||
fVMabove = fVMpost = 0;
|
fVMabove = fVMpost = 0;
|
||||||
fSMabove = fSMbelow = 0;
|
fSMabove = fSMbelow = 0;
|
||||||
|
|
||||||
|
fPreBaseConsonant = fPreBaseVirama = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void writeChar(LEUnicode ch, le_uint32 charIndex, FeatureMask charFeatures)
|
void writeChar(LEUnicode ch, le_uint32 charIndex, FeatureMask charFeatures)
|
||||||
@ -385,6 +392,14 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void notePreBaseConsonant(le_uint32 index,LEUnicode PBConsonant, LEUnicode PBVirama, FeatureMask features)
|
||||||
|
{
|
||||||
|
fPBCIndex = index;
|
||||||
|
fPreBaseConsonant = PBConsonant;
|
||||||
|
fPreBaseVirama = PBVirama;
|
||||||
|
fPBCFeatures = features;
|
||||||
|
}
|
||||||
|
|
||||||
void noteBaseConsonant()
|
void noteBaseConsonant()
|
||||||
{
|
{
|
||||||
if (fMPreFixups != NULL && fMPreOutIndex >= 0) {
|
if (fMPreFixups != NULL && fMPreOutIndex >= 0) {
|
||||||
@ -464,6 +479,22 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void writePreBaseConsonant()
|
||||||
|
{
|
||||||
|
// The TDIL spec says that consonant + virama + RRA should produce a rakar in Malayalam. However,
|
||||||
|
// it seems that almost none of the fonts for Malayalam are set up to handle this.
|
||||||
|
// So, we're going to force the issue here by using the rakar as defined with RA in most fonts.
|
||||||
|
|
||||||
|
if (fPreBaseConsonant == 0x0d31) { // RRA
|
||||||
|
fPreBaseConsonant = 0x0d30; // RA
|
||||||
|
}
|
||||||
|
|
||||||
|
if (fPreBaseConsonant != 0) {
|
||||||
|
writeChar(fPreBaseConsonant, fPBCIndex, fPBCFeatures);
|
||||||
|
writeChar(fPreBaseVirama,fPBCIndex-1,fPBCFeatures);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
le_int32 getOutputIndex()
|
le_int32 getOutputIndex()
|
||||||
{
|
{
|
||||||
return fOutIndex;
|
return fOutIndex;
|
||||||
@ -722,6 +753,7 @@ le_int32 IndicReordering::reorder(const LEUnicode *chars, le_int32 charCount, le
|
|||||||
lastConsonant -= 1;
|
lastConsonant -= 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
IndicClassTable::CharClass charClass = CC_RESERVED;
|
IndicClassTable::CharClass charClass = CC_RESERVED;
|
||||||
IndicClassTable::CharClass nextClass = CC_RESERVED;
|
IndicClassTable::CharClass nextClass = CC_RESERVED;
|
||||||
le_int32 baseConsonant = lastConsonant;
|
le_int32 baseConsonant = lastConsonant;
|
||||||
@ -729,9 +761,11 @@ le_int32 IndicReordering::reorder(const LEUnicode *chars, le_int32 charCount, le
|
|||||||
le_int32 postBaseLimit = classTable->scriptFlags & SF_POST_BASE_LIMIT_MASK;
|
le_int32 postBaseLimit = classTable->scriptFlags & SF_POST_BASE_LIMIT_MASK;
|
||||||
le_bool seenVattu = FALSE;
|
le_bool seenVattu = FALSE;
|
||||||
le_bool seenBelowBaseForm = FALSE;
|
le_bool seenBelowBaseForm = FALSE;
|
||||||
|
le_bool seenPreBaseForm = FALSE;
|
||||||
le_bool hasNukta = FALSE;
|
le_bool hasNukta = FALSE;
|
||||||
le_bool hasBelowBaseForm = FALSE;
|
le_bool hasBelowBaseForm = FALSE;
|
||||||
le_bool hasPostBaseForm = FALSE;
|
le_bool hasPostBaseForm = FALSE;
|
||||||
|
le_bool hasPreBaseForm = FALSE;
|
||||||
|
|
||||||
if (postBase < markStart && classTable->isNukta(chars[postBase])) {
|
if (postBase < markStart && classTable->isNukta(chars[postBase])) {
|
||||||
charClass = CC_NUKTA;
|
charClass = CC_NUKTA;
|
||||||
@ -745,14 +779,22 @@ le_int32 IndicReordering::reorder(const LEUnicode *chars, le_int32 charCount, le
|
|||||||
|
|
||||||
hasBelowBaseForm = IndicClassTable::hasBelowBaseForm(charClass) && !hasNukta;
|
hasBelowBaseForm = IndicClassTable::hasBelowBaseForm(charClass) && !hasNukta;
|
||||||
hasPostBaseForm = IndicClassTable::hasPostBaseForm(charClass) && !hasNukta;
|
hasPostBaseForm = IndicClassTable::hasPostBaseForm(charClass) && !hasNukta;
|
||||||
|
hasPreBaseForm = IndicClassTable::hasPreBaseForm(charClass) && !hasNukta;
|
||||||
|
|
||||||
if (IndicClassTable::isConsonant(charClass)) {
|
if (IndicClassTable::isConsonant(charClass)) {
|
||||||
if (postBaseLimit == 0 || seenVattu ||
|
if (postBaseLimit == 0 || seenVattu ||
|
||||||
(baseConsonant > baseLimit && !classTable->isVirama(chars[baseConsonant - 1])) ||
|
(baseConsonant > baseLimit && !classTable->isVirama(chars[baseConsonant - 1])) ||
|
||||||
!(hasBelowBaseForm || hasPostBaseForm)) {
|
!(hasBelowBaseForm || hasPostBaseForm || hasPreBaseForm)) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Note any pre-base consonants
|
||||||
|
if ( baseConsonant == lastConsonant && lastConsonant > 0 &&
|
||||||
|
hasPreBaseForm && classTable->isVirama(chars[baseConsonant - 1])) {
|
||||||
|
output.notePreBaseConsonant(lastConsonant,chars[lastConsonant],chars[lastConsonant-1],tagArray2);
|
||||||
|
seenPreBaseForm = TRUE;
|
||||||
|
|
||||||
|
}
|
||||||
// consonants with nuktas are never vattus
|
// consonants with nuktas are never vattus
|
||||||
seenVattu = IndicClassTable::isVattu(charClass) && !hasNukta;
|
seenVattu = IndicClassTable::isVattu(charClass) && !hasNukta;
|
||||||
|
|
||||||
@ -785,12 +827,14 @@ le_int32 IndicReordering::reorder(const LEUnicode *chars, le_int32 charCount, le
|
|||||||
}
|
}
|
||||||
|
|
||||||
// write any pre-base consonants
|
// write any pre-base consonants
|
||||||
|
output.writePreBaseConsonant();
|
||||||
|
|
||||||
le_bool supressVattu = TRUE;
|
le_bool supressVattu = TRUE;
|
||||||
|
|
||||||
for (i = baseLimit; i < baseConsonant; i += 1) {
|
for (i = baseLimit; i < baseConsonant; i += 1) {
|
||||||
LEUnicode ch = chars[i];
|
LEUnicode ch = chars[i];
|
||||||
// Don't put 'blwf' on first consonant.
|
// Don't put 'pstf' or 'blwf' on anything before the base consonant.
|
||||||
FeatureMask features = (i == baseLimit? tagArray2 : tagArray1);
|
FeatureMask features = tagArray1 & ~( pstfFeatureMask | blwfFeatureMask );
|
||||||
|
|
||||||
charClass = classTable->getCharClass(ch);
|
charClass = classTable->getCharClass(ch);
|
||||||
nextClass = classTable->getCharClass(chars[i + 1]);
|
nextClass = classTable->getCharClass(chars[i + 1]);
|
||||||
@ -841,7 +885,7 @@ le_int32 IndicReordering::reorder(const LEUnicode *chars, le_int32 charCount, le
|
|||||||
}
|
}
|
||||||
|
|
||||||
// write below-base consonants
|
// write below-base consonants
|
||||||
if (baseConsonant != lastConsonant) {
|
if (baseConsonant != lastConsonant && !seenPreBaseForm) {
|
||||||
for (i = bcSpan + 1; i < postBase; i += 1) {
|
for (i = bcSpan + 1; i < postBase; i += 1) {
|
||||||
output.writeChar(chars[i], i, tagArray1);
|
output.writeChar(chars[i], i, tagArray1);
|
||||||
}
|
}
|
||||||
@ -871,7 +915,7 @@ le_int32 IndicReordering::reorder(const LEUnicode *chars, le_int32 charCount, le
|
|||||||
|
|
||||||
// write post-base consonants
|
// write post-base consonants
|
||||||
// FIXME: does this put the right tags on post-base consonants?
|
// FIXME: does this put the right tags on post-base consonants?
|
||||||
if (baseConsonant != lastConsonant) {
|
if (baseConsonant != lastConsonant && !seenPreBaseForm) {
|
||||||
if (postBase <= lastConsonant) {
|
if (postBase <= lastConsonant) {
|
||||||
for (i = postBase; i <= lastConsonant; i += 1) {
|
for (i = postBase; i <= lastConsonant; i += 1) {
|
||||||
output.writeChar(chars[i], i, tagArray3);
|
output.writeChar(chars[i], i, tagArray3);
|
||||||
@ -1139,7 +1183,7 @@ le_int32 IndicReordering::v2process(const LEUnicode *chars, le_int32 charCount,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void IndicReordering::getDynamicProperties( DynamicProperties */*dProps*/, const IndicClassTable *classTable ) {
|
void IndicReordering::getDynamicProperties( DynamicProperties *, const IndicClassTable *classTable ) {
|
||||||
|
|
||||||
|
|
||||||
LEUnicode currentChar;
|
LEUnicode currentChar;
|
||||||
|
@ -50,6 +50,7 @@ U_NAMESPACE_BEGIN
|
|||||||
#define CF_BELOW_BASE 0x10000000U
|
#define CF_BELOW_BASE 0x10000000U
|
||||||
#define CF_POST_BASE 0x08000000U
|
#define CF_POST_BASE 0x08000000U
|
||||||
#define CF_LENGTH_MARK 0x04000000U
|
#define CF_LENGTH_MARK 0x04000000U
|
||||||
|
#define CF_PRE_BASE 0x02000000U
|
||||||
|
|
||||||
#define CF_POS_BEFORE 0x00300000U
|
#define CF_POS_BEFORE 0x00300000U
|
||||||
#define CF_POS_BELOW 0x00200000U
|
#define CF_POS_BELOW 0x00200000U
|
||||||
@ -118,6 +119,7 @@ struct IndicClassTable
|
|||||||
inline le_bool hasPostBaseForm(LEUnicode ch) const;
|
inline le_bool hasPostBaseForm(LEUnicode ch) const;
|
||||||
inline le_bool hasBelowBaseForm(LEUnicode ch) const;
|
inline le_bool hasBelowBaseForm(LEUnicode ch) const;
|
||||||
inline le_bool hasAboveBaseForm(LEUnicode ch) const;
|
inline le_bool hasAboveBaseForm(LEUnicode ch) const;
|
||||||
|
inline le_bool hasPreBaseForm(LEUnicode ch) const;
|
||||||
|
|
||||||
inline static le_bool isVowelModifier(CharClass charClass);
|
inline static le_bool isVowelModifier(CharClass charClass);
|
||||||
inline static le_bool isStressMark(CharClass charClass);
|
inline static le_bool isStressMark(CharClass charClass);
|
||||||
@ -134,6 +136,7 @@ struct IndicClassTable
|
|||||||
inline static le_bool hasPostBaseForm(CharClass charClass);
|
inline static le_bool hasPostBaseForm(CharClass charClass);
|
||||||
inline static le_bool hasBelowBaseForm(CharClass charClass);
|
inline static le_bool hasBelowBaseForm(CharClass charClass);
|
||||||
inline static le_bool hasAboveBaseForm(CharClass charClass);
|
inline static le_bool hasAboveBaseForm(CharClass charClass);
|
||||||
|
inline static le_bool hasPreBaseForm(CharClass charClass);
|
||||||
|
|
||||||
static const IndicClassTable *getScriptClassTable(le_int32 scriptCode);
|
static const IndicClassTable *getScriptClassTable(le_int32 scriptCode);
|
||||||
};
|
};
|
||||||
@ -255,6 +258,11 @@ inline le_bool IndicClassTable::hasPostBaseForm(CharClass charClass)
|
|||||||
return (charClass & CF_POST_BASE) != 0;
|
return (charClass & CF_POST_BASE) != 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline le_bool IndicClassTable::hasPreBaseForm(CharClass charClass)
|
||||||
|
{
|
||||||
|
return (charClass & CF_PRE_BASE) != 0;
|
||||||
|
}
|
||||||
|
|
||||||
inline le_bool IndicClassTable::hasBelowBaseForm(CharClass charClass)
|
inline le_bool IndicClassTable::hasBelowBaseForm(CharClass charClass)
|
||||||
{
|
{
|
||||||
return (charClass & CF_BELOW_BASE) != 0;
|
return (charClass & CF_BELOW_BASE) != 0;
|
||||||
@ -335,6 +343,11 @@ inline le_bool IndicClassTable::hasBelowBaseForm(LEUnicode ch) const
|
|||||||
return hasBelowBaseForm(getCharClass(ch));
|
return hasBelowBaseForm(getCharClass(ch));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline le_bool IndicClassTable::hasPreBaseForm(LEUnicode ch) const
|
||||||
|
{
|
||||||
|
return hasPreBaseForm(getCharClass(ch));
|
||||||
|
}
|
||||||
|
|
||||||
inline le_bool IndicClassTable::hasAboveBaseForm(LEUnicode ch) const
|
inline le_bool IndicClassTable::hasAboveBaseForm(LEUnicode ch) const
|
||||||
{
|
{
|
||||||
return hasAboveBaseForm(getCharClass(ch));
|
return hasAboveBaseForm(getCharClass(ch));
|
||||||
|
Loading…
Reference in New Issue
Block a user