From 3eee3d6dcf4004867f4489df55c6272f04841d7e Mon Sep 17 00:00:00 2001 From: Vladimir Weinstein Date: Wed, 10 Jan 2001 00:52:06 +0000 Subject: [PATCH] ICU-96 more collation stuff X-SVN-Rev: 3402 --- icu4c/source/data/UCATable.dat | 4 +-- icu4c/source/i18n/ucol.cpp | 60 +++++++++++++++++++++------------- 2 files changed, 39 insertions(+), 25 deletions(-) diff --git a/icu4c/source/data/UCATable.dat b/icu4c/source/data/UCATable.dat index 84e0938355..32d7462d82 100644 --- a/icu4c/source/data/UCATable.dat +++ b/icu4c/source/data/UCATable.dat @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4f351caf9c3fa730c95dfe8cdf703657753af6741c88c5a5f16553b6dcc7ea2a -size 86832 +oid sha256:82cd751338d1267c1b2296b29dc55046a7431968210fbf26cc572b0d1bde6b3f +size 80240 diff --git a/icu4c/source/i18n/ucol.cpp b/icu4c/source/i18n/ucol.cpp index 8d453e9c53..51eb3f8679 100644 --- a/icu4c/source/i18n/ucol.cpp +++ b/icu4c/source/i18n/ucol.cpp @@ -146,9 +146,6 @@ ucol_openNew( const char *loc, ucol_initUCA(status); - /* Open to discussion - */ - /* Do we want to cache these objects? */ - /* Do we need a name or other stuff? */ UCollatorNew *result = NULL; UResourceBundle *b = ures_open(NULL, loc, status); UResourceBundle *binary = ures_getByKey(b, "%%CollationNew", NULL, status); @@ -189,6 +186,7 @@ uint32_t ucol_getNextCENew(const UCollatorNew *coll, collIterate *collationSourc order = UCOL_NULLORDER; /* if so, we won't play any more */ } else if (collationSource->CEpos > collationSource->toReturn) { /* Are there any CEs from previous expansions? */ order = *(collationSource->toReturn++); /* if so, return them */ + collationSource->pos--; } else { /* This is the real business now */ UChar ch = *collationSource->pos; collationSource->CEpos = collationSource->toReturn = collationSource->CEs; @@ -201,7 +199,7 @@ uint32_t ucol_getNextCENew(const UCollatorNew *coll, collIterate *collationSourc *(collationSource->CEpos) = order; /* prepare the buffer */ order = getSpecialCENew(coll, collationSource, status); /* and try to get the special CE */ if(order == UCOL_NOT_FOUND) { /* We couldn't find a good CE in the tailoring */ - ucol_getNextUCA(ch, collationSource, status); + order = ucol_getNextUCA(ch, collationSource, status); } } } @@ -222,6 +220,9 @@ uint32_t ucol_getNextUCA(UChar ch, collIterate *collationSource, UErrorCode *sta } if(order == UCOL_NOT_FOUND) { /* This is where we have to resort to algorithmical generation */ /* Make up an artifical CE from code point as per UCA */ + order = 0xD08004F1; + order |= (ch & 0xF000)<<12; + order |= (ch & 0x0FFF)<<11; } return order; /* return the CE */ } @@ -291,15 +292,15 @@ uint32_t getSpecialCENew(const UCollatorNew *coll, collIterate *source, UErrorCo break; } schar = *(++source->pos); - while(schar > (tchar = *(UCharOffset++))) ; /* since the contraction codepoints should be ordered, we skip all that are smaller */ + while(schar > (tchar = *UCharOffset)) { /* since the contraction codepoints should be ordered, we skip all that are smaller */ + UCharOffset++; + } if(schar != tchar) { /* we didn't find the correct codepoint. We can use either the first or the last CE */ - if(tchar == 0xFFFF) { - UCharOffset--; /* We moved one after the 0xFFFF, so we better back up. We're gonna use the last CE*/ - } else { + if(tchar != 0xFFFF) { UCharOffset = ContractionStart; /* We're not at the end, bailed out in the middle. Better use starting CE */ } source->pos--; /* Spit out the last char of the string, wasn't tasty enough */ - } + } CE = *(coll->contractionCEs + (UCharOffset - coll->contractionIndex)); if(!isContraction(CE)) { /* Maybe not */ @@ -312,7 +313,7 @@ uint32_t getSpecialCENew(const UCollatorNew *coll, collIterate *source, UErrorCo /* This should handle expansion. */ /* NOTE: we can encounter both continuations and expansions in an expansion! */ /* I have to decide where continuations are going to be dealt with */ - CEOffset = coll->expansion+getExpansionOffset(CE); /* find the offset to expansion table */ + CEOffset = (uint32_t *)coll->image+getExpansionOffset(CE); /* find the offset to expansion table */ size = getExpansionCount(CE); CE = *CEOffset++; if(size != 0) { /* if there are less than 16 elements in expansion, we don't terminate */ @@ -324,7 +325,8 @@ uint32_t getSpecialCENew(const UCollatorNew *coll, collIterate *source, UErrorCo *(source->CEpos++) = *CEOffset++; } } - source->toReturn++; + /*source->toReturn++;*/ + return CE; break; case CHARSET_TAG: /* probably after 1.8 */ @@ -332,6 +334,7 @@ uint32_t getSpecialCENew(const UCollatorNew *coll, collIterate *source, UErrorCo break; default: *status = U_INTERNAL_PROGRAM_ERROR; + CE=0; break; } if (CE <= UCOL_NOT_FOUND) break; @@ -588,6 +591,7 @@ ucol_calcSortKeyNew(const UCollatorNew *coll, uint32_t order = 0; uint32_t ce = 0; + uint8_t carry = 0; uint8_t primary1 = 0; uint8_t primary2 = 0; uint8_t primary3 = 0; @@ -602,8 +606,8 @@ ucol_calcSortKeyNew(const UCollatorNew *coll, for(;;) { for(i=prevBuffSize; i>= 8) & 0xFF; - primary3 = 0; /* the third primary */ - primary2 = (order >>= 8) & 0xFF;; - primary1 = order >>= 8; - + if(!isContinuation(ce)) { + primary3 = 0; /* the third primary */ + primary2 = (order >>= 8) & 0xFF; + primary1 = order >>= 8; + if(upperFirst) { + /* Upper cases have this bit turned on, so that they always come after the lower cases */ + /* if we want to reverse this situation, we'll flip this bit */ + tertiary ^= 0x80; + } + } else { + primary3 = 0; + if(carry != 0) { + carry = (order >>= 8) & 0xF; + primary1 = (order >>= 4) & 0xFF; + } else { + primary2 = (order >>= 8) & 0xFF; + primary1 = ((order >>= 8) & 0xF) | carry<<4; + } + } if((tertiary & 0xF0) == 0xF0) { /* This indicates a long primary (11110000) */ /* Note: long primary can appear both as a normal CE or as a continuation CE (not that it matters much) */ primary3 = secondary; @@ -626,11 +645,6 @@ ucol_calcSortKeyNew(const UCollatorNew *coll, tertiary = UNMARKED; } - if(upperFirst && !(isContinuation(ce))) { - /* Upper cases have this bit turned on, so that they always come after the lower cases */ - /* if we want to reverse this situation, we'll flip this bit */ - tertiary ^= 0x80; - } /* In the code below, every increase in any of buffers is followed by the increase to */ /* sortKeySize - this might look tedious, but it is needed so that we can find out if */ @@ -657,7 +671,7 @@ ucol_calcSortKeyNew(const UCollatorNew *coll, *primaries++ = primary2; /* second part */ sortKeySize++; if(primary3 != UCOL_NEW_IGNORABLE) { - *primaries++ = primary2; /* third part */ + *primaries++ = primary3; /* third part */ sortKeySize++; } }