ICU-96 fixed jamo special handling, made building more robust
X-SVN-Rev: 4274
This commit is contained in:
parent
5735b8a078
commit
5bbb91861f
@ -245,8 +245,8 @@ ucol_openRules( const UChar *rules,
|
||||
ucol_setOptionsFromHeader(result, src.image, status);
|
||||
result->hasRealData = FALSE;
|
||||
}
|
||||
result->dataInfo.dataVersion[0] = UCOL_BUILDER_VERSION;
|
||||
if(U_SUCCESS(*status)) {
|
||||
result->dataInfo.dataVersion[0] = UCOL_BUILDER_VERSION;
|
||||
result->rules = (UChar *)uprv_malloc((u_strlen(rules)+1)*sizeof(UChar));
|
||||
u_strcpy((UChar *)result->rules, rules);
|
||||
result->freeRulesOnClose = TRUE;
|
||||
@ -489,6 +489,8 @@ uint32_t ucol_getFirstCE(const UCollator *coll, UChar u, UErrorCode *status) {
|
||||
return order;
|
||||
}
|
||||
|
||||
#if 0
|
||||
/* bogus code, based on the wrong assumption */
|
||||
void getSpecialJamo(const UCollator *coll, uint32_t CE, uint32_t **buffer) {
|
||||
for(;;) {
|
||||
uint32_t tag = getCETag(CE);
|
||||
@ -535,6 +537,8 @@ void ucol_getJamoCEs(const UCollator *coll, UChar ch, uint32_t **buffer) {
|
||||
*(*buffer++) = order;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/* This function tries to get a CE from UCA, which should be always around */
|
||||
/* UChar is passed in in order to speed things up */
|
||||
/* here is also the generation of implicit CEs */
|
||||
@ -592,12 +596,39 @@ uint32_t ucol_getNextUCA(UChar ch, collIterate *collationSource, UErrorCode *sta
|
||||
return ucmp32_get(UCA->mapping, L); // return first one
|
||||
|
||||
} else { // Jamo is Special
|
||||
collIterate jamos;
|
||||
UChar jamoString[3];
|
||||
uint32_t CE = UCOL_NOT_FOUND;
|
||||
const UCollator *collator = collationSource->coll;
|
||||
jamoString[0] = L;
|
||||
jamoString[1] = V;
|
||||
if (T != TBase) {
|
||||
jamoString[2] = T;
|
||||
init_collIterate(collator, jamoString, 3, &jamos, TRUE);
|
||||
} else {
|
||||
init_collIterate(collator, jamoString, 2, &jamos, TRUE);
|
||||
}
|
||||
|
||||
CE = ucol_getNextCE(collator, &jamos, status);
|
||||
|
||||
while(CE != UCOL_NO_MORE_CES) {
|
||||
*(collationSource->CEpos++) = CE;
|
||||
CE = ucol_getNextCE(collator, &jamos, status);
|
||||
}
|
||||
return *(collationSource->toReturn++);
|
||||
|
||||
/* Code and pseudocode below is bogus - we didn't take into */
|
||||
/* account that any combo of L,V,T could be */
|
||||
/* in fact a contraction - we cannot look at them separately */
|
||||
|
||||
/*
|
||||
ucol_getJamoCEs(collationSource->coll, L, &collationSource->CEpos);
|
||||
ucol_getJamoCEs(collationSource->coll, V, &collationSource->CEpos);
|
||||
if (T != TBase) {
|
||||
ucol_getJamoCEs(collationSource->coll, T, &collationSource->CEpos);
|
||||
}
|
||||
return *(collationSource->toReturn++);
|
||||
*/
|
||||
/*
|
||||
// do recursive processing of L, V, and T with fetchCE (but T only if not equal to TBase!!)
|
||||
// Since fetchCE returns a CE, and (potentially) stuffs items into the ce buffer,
|
||||
@ -718,6 +749,30 @@ uint32_t ucol_getPrevUCA(UChar ch, collIterate *collationSource,
|
||||
collationSource->toReturn = collationSource->CEpos - 1;
|
||||
return *(collationSource->toReturn);
|
||||
} else {
|
||||
collIterate jamos;
|
||||
UChar jamoString[3];
|
||||
uint32_t CE = UCOL_NOT_FOUND;
|
||||
const UCollator *collator = collationSource->coll;
|
||||
jamoString[0] = L;
|
||||
jamoString[1] = V;
|
||||
if (T != TBase) {
|
||||
jamoString[2] = T;
|
||||
init_collIterate(collator, jamoString, 3, &jamos, TRUE);
|
||||
} else {
|
||||
init_collIterate(collator, jamoString, 2, &jamos, TRUE);
|
||||
}
|
||||
|
||||
CE = ucol_getNextCE(collator, &jamos, status);
|
||||
|
||||
while(CE != UCOL_NO_MORE_CES) {
|
||||
*(collationSource->CEpos++) = CE;
|
||||
CE = ucol_getNextCE(collator, &jamos, status);
|
||||
}
|
||||
collationSource->toReturn = collationSource->CEpos - 1;
|
||||
return *(collationSource->toReturn);
|
||||
|
||||
/*return *(collationSource->toReturn++);*/
|
||||
/*
|
||||
ucol_getJamoCEs(collationSource->coll, L, &collationSource->CEpos);
|
||||
ucol_getJamoCEs(collationSource->coll, V, &collationSource->CEpos);
|
||||
if (T != TBase) {
|
||||
@ -725,6 +780,7 @@ uint32_t ucol_getPrevUCA(UChar ch, collIterate *collationSource,
|
||||
}
|
||||
collationSource->toReturn = collationSource->CEpos - 1;
|
||||
return *(collationSource->toReturn);
|
||||
*/
|
||||
/*
|
||||
Jamo is Special
|
||||
do recursive processing of L, V, and T with fetchCE (but T only if not
|
||||
@ -2687,6 +2743,37 @@ static UBool ucol_unsafeCP(UChar c, const UCollator *coll) {
|
||||
return ((htbyte >> (hash & 7)) & 1) == 1;
|
||||
}
|
||||
|
||||
/* This internal API checks whether a character is tailored or not */
|
||||
U_CAPI UBool isTailored(const UCollator *coll, const UChar u, UErrorCode *status) {
|
||||
uint32_t CE = UCOL_NOT_FOUND;
|
||||
const UChar *ContractionStart = NULL;
|
||||
if(U_SUCCESS(*status) && coll != NULL) {
|
||||
if(coll == UCA) {
|
||||
return FALSE;
|
||||
} else if(u < 0x100) { /* latin-1 */
|
||||
CE = coll->latinOneMapping[u];
|
||||
if(CE == UCA->latinOneMapping[u]) {
|
||||
return FALSE;
|
||||
}
|
||||
} else { /* regular */
|
||||
CE = ucmp32_get(coll->mapping, u);
|
||||
}
|
||||
|
||||
if(isContraction(CE)) {
|
||||
ContractionStart = (UChar *)coll->image+getContractOffset(CE);
|
||||
CE = *(coll->contractionCEs + (ContractionStart- coll->contractionIndex));
|
||||
}
|
||||
|
||||
if(CE == UCOL_NOT_FOUND) {
|
||||
return FALSE;
|
||||
} else {
|
||||
return TRUE;
|
||||
}
|
||||
} else {
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/****************************************************************************/
|
||||
/* Following are the string compare functions */
|
||||
@ -4029,12 +4116,35 @@ uint32_t ucol_getIncrementalUCA(UChar ch, incrementalContext *collationSource, U
|
||||
return ucmp32_get(UCA->mapping, L); // return first one
|
||||
|
||||
} else { // Jamo is Special
|
||||
collIterate jamos;
|
||||
UChar jamoString[3];
|
||||
uint32_t CE = UCOL_NOT_FOUND;
|
||||
const UCollator *collator = collationSource->coll;
|
||||
jamoString[0] = L;
|
||||
jamoString[1] = V;
|
||||
if (T != TBase) {
|
||||
jamoString[2] = T;
|
||||
init_collIterate(collator, jamoString, 3, &jamos, TRUE);
|
||||
} else {
|
||||
init_collIterate(collator, jamoString, 2, &jamos, TRUE);
|
||||
}
|
||||
|
||||
CE = ucol_getNextCE(collator, &jamos, status);
|
||||
|
||||
while(CE != UCOL_NO_MORE_CES) {
|
||||
*(collationSource->CEpos++) = CE;
|
||||
CE = ucol_getNextCE(collator, &jamos, status);
|
||||
}
|
||||
return *(collationSource->toReturn++);
|
||||
|
||||
/*
|
||||
ucol_getJamoCEs(collationSource->coll, L, &collationSource->CEpos);
|
||||
ucol_getJamoCEs(collationSource->coll, V, &collationSource->CEpos);
|
||||
if (T != TBase) {
|
||||
ucol_getJamoCEs(collationSource->coll, T, &collationSource->CEpos);
|
||||
}
|
||||
return *(collationSource->toReturn++);
|
||||
*/
|
||||
|
||||
/*
|
||||
// do recursive processing of L, V, and T with fetchCE (but T only if not equal to TBase!!)
|
||||
|
@ -573,6 +573,14 @@ U_CFUNC void ucol_createElements(UColTokenParser *src, tempUCATable *t, UColTokL
|
||||
el.isThai = FALSE;
|
||||
}
|
||||
|
||||
if(src->UCA != NULL) {
|
||||
for(i = 0; i<el.cSize; i++) {
|
||||
if(UCOL_ISJAMO(el.cPoints[i])) {
|
||||
t->image->jamoSpecial = TRUE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* we also need a case bit here, and we'll fish it out from the UCA for the first codepoint */
|
||||
uint32_t caseCE = ucol_getFirstCE(src->UCA, el.cPoints[0], status);
|
||||
if((caseCE & 0x40) != 0) {
|
||||
@ -769,6 +777,9 @@ uint32_t ucol_getDynamicCEs(UColTokenParser *src, tempUCATable *t, UChar *decomp
|
||||
|
||||
UCATableHeader *ucol_assembleTailoringTable(UColTokenParser *src, UErrorCode *status) {
|
||||
uint32_t i = 0;
|
||||
if(U_FAILURE(*status)) {
|
||||
return NULL;
|
||||
}
|
||||
/*
|
||||
2. Eliminate the negative lists by doing the following for each non-null negative list:
|
||||
o if previousCE(baseCE, strongestN) != some ListHeader X's baseCE,
|
||||
@ -814,7 +825,9 @@ UCATableHeader *ucol_assembleTailoringTable(UColTokenParser *src, UErrorCode *st
|
||||
/* now we need to generate the CEs */
|
||||
/* We stuff the initial value in the buffers, and increase the appropriate buffer */
|
||||
/* According to strength */
|
||||
ucol_initBuffers(&src->lh[i], tailored, status);
|
||||
if(U_SUCCESS(*status)) {
|
||||
ucol_initBuffers(&src->lh[i], tailored, status);
|
||||
}
|
||||
}
|
||||
|
||||
tempUCATable *t = uprv_uca_initTempTable(src->image, src->UCA, status);
|
||||
@ -827,7 +840,9 @@ UCATableHeader *ucol_assembleTailoringTable(UColTokenParser *src, UErrorCode *st
|
||||
/* now we need to generate the CEs */
|
||||
/* We stuff the initial value in the buffers, and increase the appropriate buffer */
|
||||
/* According to strength */
|
||||
ucol_createElements(src, t, &src->lh[i], tailored, status);
|
||||
if(U_SUCCESS(*status)) {
|
||||
ucol_createElements(src, t, &src->lh[i], tailored, status);
|
||||
}
|
||||
}
|
||||
|
||||
UCATableHeader *myData = NULL;
|
||||
@ -842,21 +857,23 @@ UCATableHeader *ucol_assembleTailoringTable(UColTokenParser *src, UErrorCode *st
|
||||
uint32_t compCE[256];
|
||||
uint32_t compRes = 0;
|
||||
|
||||
/* produce canonical closure */
|
||||
for(u = 0; u < 0xFFFF; u++) {
|
||||
if((noOfDec = unorm_normalize(&u, 1, UNORM_NFD, 0, decomp, 256, status)) > 1
|
||||
|| (noOfDec == 1 && *decomp != (UChar)u))
|
||||
/*if((noOfDec = uprv_ucol_decompose ((UChar)u, decomp)) > 1 || (noOfDec == 1 && *decomp != (UChar)u))*/
|
||||
{
|
||||
compRes = ucol_getDynamicCEs(src, t, (UChar *)&u, 1, compCE, 256, status);
|
||||
el.noOfCEs = ucol_getDynamicCEs(src, t, decomp, noOfDec, el.CEs, 128, status);
|
||||
if(U_SUCCESS(*status)) {
|
||||
/* produce canonical closure */
|
||||
for(u = 0; u < 0xFFFF; u++) {
|
||||
if((noOfDec = unorm_normalize(&u, 1, UNORM_NFD, 0, decomp, 256, status)) > 1
|
||||
|| (noOfDec == 1 && *decomp != (UChar)u))
|
||||
/*if((noOfDec = uprv_ucol_decompose ((UChar)u, decomp)) > 1 || (noOfDec == 1 && *decomp != (UChar)u))*/
|
||||
{
|
||||
compRes = ucol_getDynamicCEs(src, t, (UChar *)&u, 1, compCE, 256, status);
|
||||
el.noOfCEs = ucol_getDynamicCEs(src, t, decomp, noOfDec, el.CEs, 128, status);
|
||||
|
||||
if((compRes != el.noOfCEs) || (uprv_memcmp(compCE, el.CEs, compRes*sizeof(uint32_t)) != 0)) {
|
||||
el.uchars[0] = (UChar)u;
|
||||
el.cPoints = el.uchars;
|
||||
el.cSize = 1;
|
||||
if((compRes != el.noOfCEs) || (uprv_memcmp(compCE, el.CEs, compRes*sizeof(uint32_t)) != 0)) {
|
||||
el.uchars[0] = (UChar)u;
|
||||
el.cPoints = el.uchars;
|
||||
el.cSize = 1;
|
||||
|
||||
uprv_uca_addAnElement(t, &el, status);
|
||||
uprv_uca_addAnElement(t, &el, status);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -864,27 +881,29 @@ UCATableHeader *ucol_assembleTailoringTable(UColTokenParser *src, UErrorCode *st
|
||||
/* still need to produce compatibility closure */
|
||||
|
||||
/* add latin-1 stuff */
|
||||
for(u = 0; u<0x100; u++) {
|
||||
if((CE = ucmp32_get(t->mapping, u)) == UCOL_NOT_FOUND /*) {*/
|
||||
/* this test is for contractions that are missing the starting element. Looks like latin-1 should be done before assembling */
|
||||
/* the table, even if it results in more false closure elements */
|
||||
|| ((isContraction(CE)) &&
|
||||
(uprv_cnttab_getCE(t->contractions, CE, 0, TRUE, status) == UCOL_NOT_FOUND))
|
||||
) {
|
||||
decomp[0] = (UChar)u;
|
||||
el.uchars[0] = (UChar)u;
|
||||
el.cPoints = el.uchars;
|
||||
el.cSize = 1;
|
||||
el.noOfCEs = 0;
|
||||
init_collIterate(src->UCA, decomp, 1, &colIt, TRUE);
|
||||
while(CE != UCOL_NO_MORE_CES) {
|
||||
CE = ucol_getNextCE(src->UCA, &colIt, status);
|
||||
/*UCOL_GETNEXTCE(CE, temp, colIt, status);*/
|
||||
if(CE != UCOL_NO_MORE_CES) {
|
||||
el.CEs[el.noOfCEs++] = CE;
|
||||
if(U_SUCCESS(*status)) {
|
||||
for(u = 0; u<0x100; u++) {
|
||||
if((CE = ucmp32_get(t->mapping, u)) == UCOL_NOT_FOUND /*) {*/
|
||||
/* this test is for contractions that are missing the starting element. Looks like latin-1 should be done before assembling */
|
||||
/* the table, even if it results in more false closure elements */
|
||||
|| ((isContraction(CE)) &&
|
||||
(uprv_cnttab_getCE(t->contractions, CE, 0, TRUE, status) == UCOL_NOT_FOUND))
|
||||
) {
|
||||
decomp[0] = (UChar)u;
|
||||
el.uchars[0] = (UChar)u;
|
||||
el.cPoints = el.uchars;
|
||||
el.cSize = 1;
|
||||
el.noOfCEs = 0;
|
||||
init_collIterate(src->UCA, decomp, 1, &colIt, TRUE);
|
||||
while(CE != UCOL_NO_MORE_CES) {
|
||||
CE = ucol_getNextCE(src->UCA, &colIt, status);
|
||||
/*UCOL_GETNEXTCE(CE, temp, colIt, status);*/
|
||||
if(CE != UCOL_NO_MORE_CES) {
|
||||
el.CEs[el.noOfCEs++] = CE;
|
||||
}
|
||||
}
|
||||
uprv_uca_addAnElement(t, &el, status);
|
||||
}
|
||||
uprv_uca_addAnElement(t, &el, status);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -550,6 +550,7 @@ int32_t ucol_getIncrementalSpecialCE(const UCollator *coll, uint32_t CE, increme
|
||||
void ucol_updateInternalState(UCollator *coll);
|
||||
uint32_t ucol_getFirstCE(const UCollator *coll, UChar u, UErrorCode *status);
|
||||
U_CAPI char U_EXPORT2 *ucol_sortKeyToString(const UCollator *coll, const uint8_t *sortkey, char *buffer, uint32_t *len);
|
||||
U_CAPI UBool isTailored(const UCollator *coll, const UChar u, UErrorCode *status);
|
||||
|
||||
#endif
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user