ICU-96 fix for the multiple level contraction bug noted by Syn Wee. Other CE getting procedures that deal with contractions should be also checked

X-SVN-Rev: 3994
This commit is contained in:
Vladimir Weinstein 2001-03-09 07:18:33 +00:00
parent df7656578e
commit cc0d1a23ba
2 changed files with 47 additions and 12 deletions

View File

@ -869,6 +869,7 @@ uint32_t ucol_getPrevUCA(UChar ch, collIterate *collationSource,
/* It is called by both getNextCE and getNextUCA */
uint32_t getSpecialCE(const UCollator *coll, uint32_t CE, collIterate *source, UErrorCode *status) {
uint32_t i = 0; /* general counter */
uint32_t firstFound = UCOL_NOT_FOUND;
//uint32_t CE = *source->CEpos;
for (;;) {
const uint32_t *CEOffset = NULL;
@ -937,15 +938,31 @@ uint32_t getSpecialCE(const UCollator *coll, uint32_t CE, collIterate *source, U
UCharOffset++;
}
if(schar != tchar) { /* we didn't find the correct codepoint. We can use either the first or the last CE */
if(tchar != 0xFFFF) {
UCharOffset = ContractionStart; /* We're not at the end, bailed out in the middle. Better use starting CE */
}
UCharOffset = ContractionStart; /* We're not at the end, bailed out in the middle. Better use starting CE */
source->pos--; /* Spit out the last char of the string, wasn't tasty enough */
}
CE = *(coll->contractionCEs + (UCharOffset - coll->contractionIndex));
#if 0
/* old code, with problem */
if(!isContraction(CE)) {
break;
}
#endif
/* there is a bug here which will make us look bad if we have multiple level contraction */
/* that fails after level 1 */
if(CE == UCOL_NOT_FOUND) {
if(firstFound != UCOL_NOT_FOUND) {
CE = firstFound;
firstFound = UCOL_NOT_FOUND;
break;
}
} else if(isContraction(CE)) { /* fix for the bug. Other places need to be checked */
/* this is contraction, and we will continue. However, we can fail along the */
/* the road, which means that we have part of contraction correct */
firstFound = *(coll->contractionCEs + (ContractionStart - coll->contractionIndex));
} else {
break;
}
}
break;
case EXPANSION_TAG:

View File

@ -789,7 +789,7 @@ int32_t uprv_ucol_decompose (UChar curChar, UChar *result) {
uint32_t ucol_getDynamicCEs(UColTokenParser *src, tempUCATable *t, UChar *decomp, uint32_t noOfDec, uint32_t *result, uint32_t resultSize, UErrorCode *status) {
uint32_t j = 0, i = 0;
uint32_t CE = 0;
uint32_t CE = 0, firstFound = UCOL_NOT_FOUND;
uint32_t resLen = 0;
collIterate colIt;
UBool lastNotFound = FALSE;
@ -799,13 +799,19 @@ uint32_t ucol_getDynamicCEs(UColTokenParser *src, tempUCATable *t, UChar *decomp
CE = ucmp32_get(t->mapping, decomp[j]);
if(CE == UCOL_NOT_FOUND || lastNotFound) { /* get it from the UCA */
lastNotFound = FALSE;
init_collIterate(src->UCA, decomp+j, 1, &colIt, TRUE);
while(CE != UCOL_NO_MORE_CES) {
CE = ucol_getNextCE(src->UCA, &colIt, status);
if(CE != UCOL_NO_MORE_CES) {
result[resLen++] = CE;
if(firstFound == UCOL_NOT_FOUND) {
init_collIterate(src->UCA, decomp+j, 1, &colIt, TRUE);
while(CE != UCOL_NO_MORE_CES) {
CE = ucol_getNextCE(src->UCA, &colIt, status);
if(CE != UCOL_NO_MORE_CES) {
result[resLen++] = CE;
}
}
}
} else { /* there was some stuff found in contraction */
result[resLen++] = firstFound;
continue;
}
} else if(CE < UCOL_NOT_FOUND) { /*normal CE */
result[resLen++] = CE;
} else { /* special CE, contraction, expansion or Thai */
@ -815,7 +821,7 @@ uint32_t ucol_getDynamicCEs(UColTokenParser *src, tempUCATable *t, UChar *decomp
uint32_t *CEOffset = t->expansions->CEs+(getExpansionOffset(CE) - (paddedsize(sizeof(UCATableHeader))>>2)); /* find the offset to expansion table */
uint32_t size = getExpansionCount(CE);
if(size != 0) { /* if there are less than 16 elements in expansion, we don't terminate */
for(i = 1; i<size; i++) {
for(i = 0; i<size; i++) {
result[resLen++] = *CEOffset++;
}
} else { /* else, we do */
@ -843,7 +849,19 @@ uint32_t ucol_getDynamicCEs(UColTokenParser *src, tempUCATable *t, UChar *decomp
j--;
break;
} else if(CE > UCOL_NOT_FOUND) {
continue;
if((tag = getCETag(CE)) == CONTRACTION_TAG) {
/* this is tricky - we're not closed, so for Japanese, */
/* we want to record the first success */
/* i.e. 0x30D0 decomposes to 0x30CF 0x3099 */
/* 0x30CF is contraction in table */
/* there are no 0x30CF 0x3099 in table, but there are */
/* longer contractions. If we don't note that we're already */
/* had something, we'll return not found and pick the wrong */
/* guys from UCA. I think getComplicatedCE needs to be checked */
/* for this type of error */
firstFound = ctb->CEs[0];
}
continue;
} else {
result[resLen++] = CE;
break;