ICU-96 strcoll redesigned

X-SVN-Rev: 3468
This commit is contained in:
Vladimir Weinstein 2001-01-24 00:35:19 +00:00
parent 5f5c348447
commit 43827ea5e2

View File

@ -323,7 +323,8 @@ uint32_t ucol_getNextUCA(UChar ch, collIterate *collationSource, UErrorCode *sta
return 0; /* completely ignorable */
}
/* Make up an artifical CE from code point as per UCA */
order = 0xD08004C3 | (ch & 0xF000) << 12 | (ch & 0x0FFF) << 11;
order = 0xD08003C3 | (ch & 0xF000) << 12 | (ch & 0x0FE0) << 11;
*(collationSource->CEpos++) = 0x04000080 | (ch & 0x001F) << 27;
}
}
return order; /* return the CE */
@ -372,12 +373,12 @@ uint32_t getSpecialCE(const UCollator *coll, collIterate *source, UErrorCode *st
*(targetCopy++) = *(sourceCopy++);
}
}
source->pos = source->writableBuffer;
source->pos = source->writableBuffer-1;
source->len = targetCopy;
source->CEpos = source->toReturn = source->CEs;
CE = UCOL_IGNORABLE;
} else { /* we have already played with the string, so treat Thai as a length one expansion */
CEOffset = coll->expansion+getExpansionOffset(CE); /* find the offset to expansion table */
CEOffset = (uint32_t *)coll->image+getExpansionOffset(CE); /* find the offset to expansion table */
CE = *CEOffset++;
}
break;
@ -1563,8 +1564,6 @@ ucol_strcoll( const UCollator *coll,
}
UColAttributeValue strength = coll->strength;
uint32_t sOrder=UCOL_NULLORDER, tOrder=UCOL_NULLORDER;
uint32_t pSOrder, pTOrder;
UBool gets = TRUE, gett = TRUE;
UBool initialCheckSecTer = (strength >= UCOL_SECONDARY);
@ -1581,87 +1580,131 @@ ucol_strcoll( const UCollator *coll,
uint32_t *sCEs = sCEsArray, *tCEs = tCEsArray;
uint32_t *sCEend = sCEs+512, *tCEend = tCEs+512;
uint8_t LVT = shifted*variableMax;
uint8_t LVT = shifted*(variableMax<<24);
if(!isFrenchSec) {
for(;;)
{
UBool stopS = FALSE, stopT = FALSE;
uint32_t sOrder=0, tOrder=0;
for(;;) {
if(sCEs == sCEend || tCEs == tCEend) {
return ucol_compareUsingSortKeys(coll, source, sourceLength, target, targetLength);
}
/* Get the next collation element in each of the strings, unless */
/* we've been requested to skip it. */
if (gets)
{
/*UCOL_GETNEXTCE(sOrder, coll, sColl, &status);*/
sOrder = ucol_getNextCE(coll, &sColl, &status);
*(sCEs++) = sOrder;
/* Get the next collation element in each of the strings, unless */
/* we've been requested to skip it. */
while(sOrder <= LVT && (sOrder & 0xF) != 1) {
/*UCOL_GETNEXTCE(sOrder, coll, sColl, &status);*/
sOrder = ucol_getNextCE(coll, &sColl, &status);
if (sOrder == UCOL_NULLORDER) {
*(sCEs++) = 0x0101;
sOrder = 1;
} else {
*(sCEs++) = sOrder;
sOrder &= 0xFFFF0000;
}
gets = TRUE;
}
if (gett)
{
/*UCOL_GETNEXTCE(tOrder, coll, tColl, &status);*/
tOrder = ucol_getNextCE(coll, &tColl, &status);
*(tCEs++) = tOrder;
}
gett = TRUE;
while(tOrder <= LVT && tOrder != 1) {
/*UCOL_GETNEXTCE(tOrder, coll, tColl, &status);*/
tOrder = ucol_getNextCE(coll, &tColl, &status);
if (tOrder == UCOL_NULLORDER) {
*(tCEs++) = 0x0101;
tOrder = 1;
} else {
*(tCEs++) = tOrder;
tOrder &= 0xFFFF0000;
}
}
/* If we've hit the end of one of the strings, jump out of the loop */
if ((sOrder == UCOL_NULLORDER)||
(tOrder == UCOL_NULLORDER)) {
if(sOrder == tOrder) {
if(sOrder == 1) {
break;
} else { /* probably some more processing */
sOrder &= 0xFFFFFFBF;
tOrder &= 0xFFFFFFBF;
}
/* If there's no difference at this position, we can skip to the */
/* next one. */
if (sOrder == tOrder)
{
continue;
}
if (sOrder == UCOL_IGNORABLE)
{
/* The entire source element is ignorable. */
/* Skip to the next source element, but don't fetch another target element. */
gett = FALSE;
continue;
}
if (tOrder == UCOL_IGNORABLE)
{
gets = FALSE;
continue;
}
/* Compare primary differences first. */
pSOrder = UCOL_PRIMARYORDER(sOrder);
pTOrder = UCOL_PRIMARYORDER(tOrder);
if (pSOrder != pTOrder)
{
/* we need to get the shifted thing in here also */
/* The source and target elements aren't ignorable, but it's still possible */
/* for the primary component of one of the elements to be ignorable.... */
if (pSOrder <= LVT) { /* primary order in source is ignorable */
gett = FALSE;
} else if (pTOrder <= LVT) {
gets = FALSE;
} else {
/* Neither of the orders is ignorable, and we already know that the primary */
/* orders are different because of the (pSOrder != pTOrder) test above. */
/* Record the difference and stop the comparison. */
if (pSOrder < pTOrder)
{
return UCOL_LESS; /* (strength is PRIMARY) */
}
return UCOL_GREATER; /* (strength is PRIMARY) */
sOrder = 0; tOrder = 0;
continue;
}
} else { /* else of if ( pSOrder != pTOrder )*/
} else if(sOrder < tOrder) {
return UCOL_LESS;
} else {
return UCOL_GREATER;
}
} /* no primary difference... do the rest from the buffers */
/* now, we're gonna reexamine collected CEs */
sCEend = sCEs;
tCEend = tCEs;
uint32_t secS = 0, secT = 0;
if(checkSecTer) {
if(!isFrenchSec) { /* normal */
sCEs = sCEsArray;
tCEs = tCEsArray;
for(;;) {
while (secS == 0 && secS != 0x0100) {
secS = *(sCEs++) & 0xFF00;
}
while(secT == 0 && secT != 0x0100) {
secT = *(tCEs++) & 0xFF00;
}
if(secS == secT) {
if(secS == 0x0100) {
break;
} else {
secS = 0; secT = 0;
continue;
}
} else if(secS < secT) {
return UCOL_LESS;
} else {
return UCOL_GREATER;
}
}
} else { /* do the French */
}
}
secS = 0;
secT = 0;
if(checkTertiary) {
sCEs = sCEsArray;
tCEs = tCEsArray;
for(;;) {
while(secS == 0 && secS != 1) {
secS = *(sCEs++) & 0x3F;
}
while(secT == 0 && secT != 1) {
secT = *(tCEs++) & 0x3F;
}
if(secS == secT) {
if(secS == 1) {
break;
} else {
secS = 0; secT = 0;
continue;
}
} else if(secS < secT) {
return UCOL_LESS;
} else {
return UCOL_GREATER;
}
}
}
if(checkQuad) {
}
#if 0
else { /* else of if ( pSOrder != pTOrder )*/
/* primary order is the same, but complete order is different. So there*/
/* are no base elements at this point, only ignorables (Since the strings are*/
/* normalized) */
@ -2064,6 +2107,7 @@ ucol_strcoll( const UCollator *coll,
result = UCOL_GREATER;
}
}
#endif
return result;
}