ICU-2427 support property Hangul_Syllable_Type
X-SVN-Rev: 11272
This commit is contained in:
parent
62d930de7e
commit
cb850817d8
@ -918,6 +918,9 @@ _enumPropertyStartsRange(const void *context, UChar32 start, UChar32 limit, uint
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
uchar_addPropertyStarts(USet *set) {
|
||||
UChar32 c;
|
||||
int32_t value, value2;
|
||||
|
||||
if(!HAVE_DATA) {
|
||||
return;
|
||||
}
|
||||
@ -980,6 +983,38 @@ uchar_addPropertyStarts(USet *set) {
|
||||
uset_add(set, ZWNJ); /* range ZWNJ..ZWJ */
|
||||
uset_add(set, ZWJ+1);
|
||||
|
||||
/* add Jamo type boundaries for UCHAR_HANGUL_SYLLABLE_TYPE */
|
||||
uset_add(set, 0x1100);
|
||||
value=U_HST_LEADING_JAMO;
|
||||
for(c=0x115a; c<=0x115f; ++c) {
|
||||
value2=u_getIntPropertyValue(c, UCHAR_HANGUL_SYLLABLE_TYPE);
|
||||
if(value!=value2) {
|
||||
value=value2;
|
||||
uset_add(set, c);
|
||||
}
|
||||
}
|
||||
|
||||
uset_add(set, 0x1160);
|
||||
value=U_HST_VOWEL_JAMO;
|
||||
for(c=0x11a3; c<=0x11a7; ++c) {
|
||||
value2=u_getIntPropertyValue(c, UCHAR_HANGUL_SYLLABLE_TYPE);
|
||||
if(value!=value2) {
|
||||
value=value2;
|
||||
uset_add(set, c);
|
||||
}
|
||||
}
|
||||
|
||||
uset_add(set, 0x11a8);
|
||||
value=U_HST_TRAILING_JAMO;
|
||||
for(c=0x11fa; c<=0x11ff; ++c) {
|
||||
value2=u_getIntPropertyValue(c, UCHAR_HANGUL_SYLLABLE_TYPE);
|
||||
if(value!=value2) {
|
||||
value=value2;
|
||||
uset_add(set, c);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Omit code points for u_charCellWidth() because
|
||||
* - it is deprecated and not a real Unicode property
|
||||
|
@ -294,6 +294,9 @@ typedef enum UProperty {
|
||||
/** Enumerated property Script.
|
||||
Same as uscript_getScript, returns UScriptCode values. @draft ICU 2.2 */
|
||||
UCHAR_SCRIPT,
|
||||
/** Enumerated property Hangul_Syllable_Type, new in Unicode 4.
|
||||
Returns UHangulSyllableType values. @draft ICU 2.6 */
|
||||
UCHAR_HANGUL_SYLLABLE_TYPE,
|
||||
/** One more than the last constant for enumerated/integer Unicode properties. @draft ICU 2.2 */
|
||||
UCHAR_INT_LIMIT,
|
||||
|
||||
@ -1230,6 +1233,22 @@ typedef enum UNumericType {
|
||||
U_NT_COUNT
|
||||
} UNumericType;
|
||||
|
||||
/**
|
||||
* Hangul Syllable Type constants.
|
||||
*
|
||||
* @see UCHAR_HANGUL_SYLLABLE_TYPE
|
||||
* @draft ICU 2.6
|
||||
*/
|
||||
typedef enum UHangulSyllableType {
|
||||
U_HST_NOT_APPLICABLE, /*[NA]*/ /*See note !!*/
|
||||
U_HST_LEADING_JAMO, /*[L]*/
|
||||
U_HST_VOWEL_JAMO, /*[V]*/
|
||||
U_HST_TRAILING_JAMO, /*[T]*/
|
||||
U_HST_LV_SYLLABLE, /*[LV]*/
|
||||
U_HST_LVT_SYLLABLE, /*[LVT]*/
|
||||
U_HST_COUNT
|
||||
} UHangulSyllableType;
|
||||
|
||||
/**
|
||||
* Check a binary Unicode property for a code point.
|
||||
*
|
||||
|
@ -241,6 +241,35 @@ u_getIntPropertyValue(UChar32 c, UProperty which) {
|
||||
case UCHAR_SCRIPT:
|
||||
errorCode=U_ZERO_ERROR;
|
||||
return (int32_t)uscript_getScript(c, &errorCode);
|
||||
case UCHAR_HANGUL_SYLLABLE_TYPE:
|
||||
/* purely algorithmic; hardcode known characters, check for assigned new ones */
|
||||
if(c<JAMO_L_BASE) {
|
||||
/* NA */
|
||||
} else if(c<=0x11ff) {
|
||||
/* Jamo range */
|
||||
if(c<=0x115f) {
|
||||
/* Jamo L range, HANGUL CHOSEONG ... */
|
||||
if(c==0x115f || c<=0x1159 || u_charType(c)==U_OTHER_LETTER) {
|
||||
return U_HST_LEADING_JAMO;
|
||||
}
|
||||
} else if(c<=0x11a7) {
|
||||
/* Jamo V range, HANGUL JUNGSEONG ... */
|
||||
if(c<=0x11a2 || u_charType(c)==U_OTHER_LETTER) {
|
||||
return U_HST_VOWEL_JAMO;
|
||||
}
|
||||
} else {
|
||||
/* Jamo T range */
|
||||
if(c<=0x11f9 || u_charType(c)==U_OTHER_LETTER) {
|
||||
return U_HST_TRAILING_JAMO;
|
||||
}
|
||||
}
|
||||
} else if((c-=HANGUL_BASE)<0) {
|
||||
/* NA */
|
||||
} else if(c<HANGUL_COUNT) {
|
||||
/* Hangul syllable */
|
||||
return c%JAMO_T_COUNT==0 ? U_HST_LV_SYLLABLE : U_HST_LVT_SYLLABLE;
|
||||
}
|
||||
return 0; /* NA */
|
||||
default:
|
||||
return 0; /* undefined */
|
||||
}
|
||||
@ -297,6 +326,8 @@ u_getIntPropertyMaxValue(UProperty which) {
|
||||
case UCHAR_SCRIPT:
|
||||
max=uprv_getMaxValues(0)&UPROPS_SCRIPT_MASK;
|
||||
return max!=0 ? max : (int32_t)USCRIPT_CODE_LIMIT-1;
|
||||
case UCHAR_HANGUL_SYLLABLE_TYPE:
|
||||
return (int32_t)U_HST_COUNT-1;
|
||||
default:
|
||||
return -1; /* undefined */
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user