ICU-96 more collation stuff

X-SVN-Rev: 3402
This commit is contained in:
Vladimir Weinstein 2001-01-10 00:52:06 +00:00
parent 6e4cb32a81
commit 3eee3d6dcf
2 changed files with 39 additions and 25 deletions

View File

@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:4f351caf9c3fa730c95dfe8cdf703657753af6741c88c5a5f16553b6dcc7ea2a
size 86832
oid sha256:82cd751338d1267c1b2296b29dc55046a7431968210fbf26cc572b0d1bde6b3f
size 80240

View File

@ -146,9 +146,6 @@ ucol_openNew( const char *loc,
ucol_initUCA(status);
/* Open to discussion - */
/* Do we want to cache these objects? */
/* Do we need a name or other stuff? */
UCollatorNew *result = NULL;
UResourceBundle *b = ures_open(NULL, loc, status);
UResourceBundle *binary = ures_getByKey(b, "%%CollationNew", NULL, status);
@ -189,6 +186,7 @@ uint32_t ucol_getNextCENew(const UCollatorNew *coll, collIterate *collationSourc
order = UCOL_NULLORDER; /* if so, we won't play any more */
} else if (collationSource->CEpos > collationSource->toReturn) { /* Are there any CEs from previous expansions? */
order = *(collationSource->toReturn++); /* if so, return them */
collationSource->pos--;
} else { /* This is the real business now */
UChar ch = *collationSource->pos;
collationSource->CEpos = collationSource->toReturn = collationSource->CEs;
@ -201,7 +199,7 @@ uint32_t ucol_getNextCENew(const UCollatorNew *coll, collIterate *collationSourc
*(collationSource->CEpos) = order; /* prepare the buffer */
order = getSpecialCENew(coll, collationSource, status); /* and try to get the special CE */
if(order == UCOL_NOT_FOUND) { /* We couldn't find a good CE in the tailoring */
ucol_getNextUCA(ch, collationSource, status);
order = ucol_getNextUCA(ch, collationSource, status);
}
}
}
@ -222,6 +220,9 @@ uint32_t ucol_getNextUCA(UChar ch, collIterate *collationSource, UErrorCode *sta
}
if(order == UCOL_NOT_FOUND) { /* This is where we have to resort to algorithmical generation */
/* Make up an artifical CE from code point as per UCA */
order = 0xD08004F1;
order |= (ch & 0xF000)<<12;
order |= (ch & 0x0FFF)<<11;
}
return order; /* return the CE */
}
@ -291,15 +292,15 @@ uint32_t getSpecialCENew(const UCollatorNew *coll, collIterate *source, UErrorCo
break;
}
schar = *(++source->pos);
while(schar > (tchar = *(UCharOffset++))) ; /* since the contraction codepoints should be ordered, we skip all that are smaller */
while(schar > (tchar = *UCharOffset)) { /* since the contraction codepoints should be ordered, we skip all that are smaller */
UCharOffset++;
}
if(schar != tchar) { /* we didn't find the correct codepoint. We can use either the first or the last CE */
if(tchar == 0xFFFF) {
UCharOffset--; /* We moved one after the 0xFFFF, so we better back up. We're gonna use the last CE*/
} else {
if(tchar != 0xFFFF) {
UCharOffset = ContractionStart; /* We're not at the end, bailed out in the middle. Better use starting CE */
}
source->pos--; /* Spit out the last char of the string, wasn't tasty enough */
}
}
CE = *(coll->contractionCEs + (UCharOffset - coll->contractionIndex));
if(!isContraction(CE)) {
/* Maybe not */
@ -312,7 +313,7 @@ uint32_t getSpecialCENew(const UCollatorNew *coll, collIterate *source, UErrorCo
/* This should handle expansion. */
/* NOTE: we can encounter both continuations and expansions in an expansion! */
/* I have to decide where continuations are going to be dealt with */
CEOffset = coll->expansion+getExpansionOffset(CE); /* find the offset to expansion table */
CEOffset = (uint32_t *)coll->image+getExpansionOffset(CE); /* find the offset to expansion table */
size = getExpansionCount(CE);
CE = *CEOffset++;
if(size != 0) { /* if there are less than 16 elements in expansion, we don't terminate */
@ -324,7 +325,8 @@ uint32_t getSpecialCENew(const UCollatorNew *coll, collIterate *source, UErrorCo
*(source->CEpos++) = *CEOffset++;
}
}
source->toReturn++;
/*source->toReturn++;*/
return CE;
break;
case CHARSET_TAG:
/* probably after 1.8 */
@ -332,6 +334,7 @@ uint32_t getSpecialCENew(const UCollatorNew *coll, collIterate *source, UErrorCo
break;
default:
*status = U_INTERNAL_PROGRAM_ERROR;
CE=0;
break;
}
if (CE <= UCOL_NOT_FOUND) break;
@ -588,6 +591,7 @@ ucol_calcSortKeyNew(const UCollatorNew *coll,
uint32_t order = 0;
uint32_t ce = 0;
uint8_t carry = 0;
uint8_t primary1 = 0;
uint8_t primary2 = 0;
uint8_t primary3 = 0;
@ -602,8 +606,8 @@ ucol_calcSortKeyNew(const UCollatorNew *coll,
for(;;) {
for(i=prevBuffSize; i<minBufferSize; ++i) {
/*order = ucol_getNextCENew(coll, &s, status);*/
UCOL_GETNEXTCENEW(order, coll, s, status);
order = ucol_getNextCENew(coll, &s, status);
/*UCOL_GETNEXTCENEW(order, coll, s, status);*/
if(order == UCOL_NULLORDER) {
finished = TRUE;
@ -615,10 +619,25 @@ ucol_calcSortKeyNew(const UCollatorNew *coll,
tertiary = (order & UCOL_TERTIARYORDERMASK);
secondary = (order >>= 8) & 0xFF;
primary3 = 0; /* the third primary */
primary2 = (order >>= 8) & 0xFF;;
primary1 = order >>= 8;
if(!isContinuation(ce)) {
primary3 = 0; /* the third primary */
primary2 = (order >>= 8) & 0xFF;
primary1 = order >>= 8;
if(upperFirst) {
/* Upper cases have this bit turned on, so that they always come after the lower cases */
/* if we want to reverse this situation, we'll flip this bit */
tertiary ^= 0x80;
}
} else {
primary3 = 0;
if(carry != 0) {
carry = (order >>= 8) & 0xF;
primary1 = (order >>= 4) & 0xFF;
} else {
primary2 = (order >>= 8) & 0xFF;
primary1 = ((order >>= 8) & 0xF) | carry<<4;
}
}
if((tertiary & 0xF0) == 0xF0) { /* This indicates a long primary (11110000) */
/* Note: long primary can appear both as a normal CE or as a continuation CE (not that it matters much) */
primary3 = secondary;
@ -626,11 +645,6 @@ ucol_calcSortKeyNew(const UCollatorNew *coll,
tertiary = UNMARKED;
}
if(upperFirst && !(isContinuation(ce))) {
/* Upper cases have this bit turned on, so that they always come after the lower cases */
/* if we want to reverse this situation, we'll flip this bit */
tertiary ^= 0x80;
}
/* In the code below, every increase in any of buffers is followed by the increase to */
/* sortKeySize - this might look tedious, but it is needed so that we can find out if */
@ -657,7 +671,7 @@ ucol_calcSortKeyNew(const UCollatorNew *coll,
*primaries++ = primary2; /* second part */
sortKeySize++;
if(primary3 != UCOL_NEW_IGNORABLE) {
*primaries++ = primary2; /* third part */
*primaries++ = primary3; /* third part */
sortKeySize++;
}
}