ICU-1785 implement unorm_isNFSkippable() and unorm_addPropertyStarts()

X-SVN-Rev: 10153
This commit is contained in:
Markus Scherer 2002-11-05 00:57:52 +00:00
parent 7f33a69caf
commit 6ebcafd22d

View File

@ -140,7 +140,7 @@ static const uint16_t *extraData=NULL,
*canonStartSets=NULL;
static uint8_t formatVersion[4]={ 0, 0, 0, 0 };
static UBool formatVersion_2_1=FALSE;
static UBool formatVersion_2_1=FALSE, formatVersion_2_2=FALSE;
/* the Unicode version of the normalization data */
static UVersionInfo dataVersion={ 3, 1, 0, 0 };
@ -172,6 +172,12 @@ getFoldingNormOffset(uint32_t norm32) {
}
}
/* fcdTrie: the folding offset is the lead FCD value itself */
static int32_t U_CALLCONV
getFoldingFCDOffset(uint32_t data) {
return (int32_t)data;
}
/* auxTrie: the folding offset is in bits 9..0 of the 16-bit trie result */
static int32_t U_CALLCONV
getFoldingAuxOffset(uint32_t data) {
@ -231,6 +237,7 @@ loadNormData(UErrorCode &errorCode) {
pb+=p[_NORM_INDEX_TRIE_SIZE]+p[_NORM_INDEX_UCHAR_COUNT]*2+p[_NORM_INDEX_COMBINE_DATA_COUNT]*2;
utrie_unserialize(&_fcdTrie, pb, p[_NORM_INDEX_FCD_TRIE_SIZE], &errorCode);
_fcdTrie.getFoldingOffset=getFoldingFCDOffset;
if(p[_NORM_INDEX_FCD_TRIE_SIZE]!=0) {
pb+=p[_NORM_INDEX_FCD_TRIE_SIZE];
@ -263,6 +270,7 @@ loadNormData(UErrorCode &errorCode) {
extraData=(uint16_t *)((uint8_t *)(p+_NORM_INDEX_TOP)+indexes[_NORM_INDEX_TRIE_SIZE]);
combiningTable=extraData+indexes[_NORM_INDEX_UCHAR_COUNT];
formatVersion_2_1=formatVersion[0]>2 || (formatVersion[0]==2 && formatVersion[1]>=1);
formatVersion_2_2=formatVersion[0]>2 || (formatVersion[0]==2 && formatVersion[1]>=2);
if(formatVersion_2_1) {
canonStartSets=combiningTable+
indexes[_NORM_INDEX_COMBINE_DATA_COUNT]+
@ -752,6 +760,108 @@ u_getFC_NFKC_Closure(UChar32 c, UChar *dest, int32_t destCapacity, UErrorCode *p
}
}
/* Is c an NF<mode>-skippable code point? See unormimp.h. */
U_CAPI UBool U_EXPORT2
unorm_isNFSkippable(UChar32 c, UNormalizationMode mode) {
UErrorCode errorCode;
uint32_t norm32, mask;
uint16_t aux, fcd;
errorCode=U_ZERO_ERROR;
if(!_haveData(errorCode)) {
return FALSE;
}
/* handle trivial cases; set the comparison mask for the normal ones */
switch(mode) {
case UNORM_NONE:
return TRUE;
case UNORM_NFD:
mask=_NORM_CC_MASK|_NORM_QC_NFD;
break;
case UNORM_NFKD:
mask=_NORM_CC_MASK|_NORM_QC_NFKD;
break;
case UNORM_NFC:
/* case UNORM_FCC: */
mask=_NORM_CC_MASK|_NORM_COMBINES_ANY|(_NORM_QC_NFC&_NORM_QC_ANY_NO);
break;
case UNORM_NFKC:
mask=_NORM_CC_MASK|_NORM_COMBINES_ANY|(_NORM_QC_NFKC&_NORM_QC_ANY_NO);
break;
case UNORM_FCD:
/* FCD: skippable if lead cc==0 and trail cc<=1 */
UTRIE_GET16(&fcdTrie, c, fcd);
return fcd<=1;
default:
return FALSE;
}
/* check conditions (a)..(e), see unormimp.h */
UTRIE_GET32(&normTrie, c, norm32);
if((norm32&mask)!=0) {
return FALSE; /* fails (a)..(e), not skippable */
}
if(mode<UNORM_NFC) {
return TRUE; /* NF*D, passed (a)..(c), is skippable */
}
/* NF*C/FCC, passed (a)..(e) */
if((norm32&_NORM_QC_NFD)==0) {
return TRUE; /* no canonical decomposition, is skippable */
}
/* check Hangul syllables algorithmically */
if(isNorm32HangulOrJamo(norm32)) {
/* Jamo passed (a)..(e) above, must be Hangul */
return !isHangulWithoutJamoT((UChar)c); /* LVT are skippable, LV are not */
}
/* if(mode<=UNORM_NFKC) { -- enable when implementing FCC */
/* NF*C, test (f) flag */
if(!formatVersion_2_2) {
return FALSE; /* no (f) data, say not skippable to be safe */
}
UTRIE_GET16(&auxTrie, c, aux);
return (aux&_NORM_AUX_NFC_SKIP_F_MASK)==0; /* TRUE=skippable if the (f) flag is not set */
/* } else { FCC, test fcd<=1 instead of the above } */
}
static UBool U_CALLCONV
_enumPropertyStartsRange(const void *context, UChar32 start, UChar32 limit, uint32_t value) {
/* add the start code point to the USet */
uset_add((USet *)context, start);
return TRUE;
}
U_CAPI void U_EXPORT2
unorm_addPropertyStarts(USet *set) {
UErrorCode errorCode;
UChar c;
errorCode=U_ZERO_ERROR;
if(!_haveData(errorCode)) {
return;
}
/* add the start code point of each same-value range of each trie */
utrie_enum(&normTrie, NULL, _enumPropertyStartsRange, set);
utrie_enum(&fcdTrie, NULL, _enumPropertyStartsRange, set);
if(formatVersion_2_1) {
utrie_enum(&auxTrie, NULL, _enumPropertyStartsRange, set);
}
/* add Hangul LV syllables and LV+1 because of skippables */
for(c=HANGUL_BASE; c<HANGUL_BASE+HANGUL_COUNT; c+=JAMO_T_COUNT) {
uset_add(set, c);
uset_add(set, c+1);
}
uset_add(set, HANGUL_BASE+HANGUL_COUNT); /* add Hangul+1 to continue with other properties */
}
/* reorder UTF-16 in-place -------------------------------------------------- */
/*