ICU-96 more collation
X-SVN-Rev: 3559
This commit is contained in:
parent
7124fe04ef
commit
020916d4ab
@ -78,7 +78,7 @@ isAcceptableInvUCA(void *context,
|
||||
|
||||
uint32_t ucol_inv_findCE(uint32_t CE, uint32_t SecondCE, UErrorCode *status) {
|
||||
uint32_t bottom = 0, top = invUCA->tableSize;
|
||||
uint32_t i = (top-bottom)/2;
|
||||
uint32_t i;
|
||||
uint32_t first = 0;
|
||||
uint32_t *CETable = (uint32_t *)((uint8_t *)invUCA+invUCA->table);
|
||||
|
||||
@ -86,7 +86,8 @@ uint32_t ucol_inv_findCE(uint32_t CE, uint32_t SecondCE, UErrorCode *status) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
while(first != CE && top != bottom) {
|
||||
while(bottom < top-1) {
|
||||
i = (top+bottom)/2;
|
||||
first = *(CETable+3*i);
|
||||
if(first > CE) {
|
||||
top = i;
|
||||
@ -95,7 +96,6 @@ uint32_t ucol_inv_findCE(uint32_t CE, uint32_t SecondCE, UErrorCode *status) {
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
i = (top-bottom)/2 + bottom;
|
||||
}
|
||||
|
||||
if(first == CE) {
|
||||
@ -112,7 +112,7 @@ static uint32_t strengthMask[3] = {
|
||||
0xFFFFFFFF
|
||||
};
|
||||
|
||||
uint32_t ucol_inv_getPrevious(uint32_t CE, uint32_t SecondCE, UColAttributeValue strength, UErrorCode *status) {
|
||||
uint32_t ucol_inv_getPrevious(uint32_t CE, uint32_t SecondCE, uint32_t strength, UErrorCode *status) {
|
||||
|
||||
uint32_t *CETable = (uint32_t *)((uint8_t *)invUCA+invUCA->table);
|
||||
uint32_t previousCE;
|
||||
@ -134,7 +134,7 @@ uint32_t ucol_inv_getPrevious(uint32_t CE, uint32_t SecondCE, UColAttributeValue
|
||||
return previousCE;
|
||||
}
|
||||
|
||||
uint32_t ucol_inv_getNext(uint32_t CE, uint32_t SecondCE, UColAttributeValue strength, UErrorCode *status) {
|
||||
uint32_t ucol_inv_getNext(uint32_t CE, uint32_t SecondCE, uint32_t strength, UErrorCode *status) {
|
||||
uint32_t *CETable = (uint32_t *)((uint8_t *)invUCA+invUCA->table);
|
||||
uint32_t nextCE;
|
||||
uint32_t iCE;
|
||||
@ -150,7 +150,7 @@ uint32_t ucol_inv_getNext(uint32_t CE, uint32_t SecondCE, UColAttributeValue str
|
||||
nextCE = CE;
|
||||
|
||||
while(nextCE == CE) {
|
||||
nextCE = (*(CETable+3*(--iCE))) & strengthMask[strength];
|
||||
nextCE = (*(CETable+3*(++iCE))) & strengthMask[strength];
|
||||
}
|
||||
return nextCE;
|
||||
}
|
||||
@ -205,7 +205,8 @@ ucol_close(UCollator *coll)
|
||||
}
|
||||
}
|
||||
|
||||
UCATableHeader *ucol_assembleTailoringTable(UColTokenParser *src, int32_t resLen, UErrorCode *status) {
|
||||
UCATableHeader *ucol_assembleTailoringTable(UColTokenParser *src, uint32_t *resLen, UErrorCode *status) {
|
||||
int32_t i = 0;
|
||||
/*
|
||||
2. Eliminate the negative lists by doing the following for each non-null negative list:
|
||||
o if previousCE(baseCE, strongestN) != some ListHeader X's baseCE,
|
||||
@ -224,6 +225,10 @@ UCATableHeader *ucol_assembleTailoringTable(UColTokenParser *src, int32_t resLen
|
||||
tailoring has & x < z...
|
||||
? Then we change the tailoring to & x <<< X << x' <<< X' < z ...
|
||||
*/
|
||||
/* It is possible that this part should be done even while constructing list */
|
||||
/* The problem is that it is unknown what is going to be the strongest weight */
|
||||
/* So we might as well do it here */
|
||||
|
||||
/*
|
||||
o Allocate CEs for each token in the list, based on the total number N of the
|
||||
largest level difference, and the gap G between baseCE and nextCE at that
|
||||
@ -241,6 +246,23 @@ UCATableHeader *ucol_assembleTailoringTable(UColTokenParser *src, int32_t resLen
|
||||
boundaries except where there is only a single-byte primary. That is to
|
||||
ensure that the script reordering will continue to work.
|
||||
*/
|
||||
for(i = 0; i<src->resultLen; i++) {
|
||||
src->lh[i].nextCE = ucol_inv_getNext(src->lh[i].baseCE, 0,
|
||||
src->lh[i].strongest[UCOL_TOK_POLARITY_POSITIVE], status);
|
||||
/* now we need to generate the CEs */
|
||||
/* I'd really like to get them in a UCAElements structure very soon */
|
||||
UColToken *t = src->lh[i].first[UCOL_TOK_POLARITY_POSITIVE];
|
||||
uint32_t strongest = 0;
|
||||
/* Count the strongest */
|
||||
while(t->next != NULL) {
|
||||
if(t->strength == src->lh[i].strongest[UCOL_TOK_POLARITY_POSITIVE]) {
|
||||
strongest++;
|
||||
}
|
||||
t = t->next;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
*status = U_UNSUPPORTED_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
@ -252,7 +274,9 @@ ucol_openRules( const UChar *rules,
|
||||
UCollationStrength strength,
|
||||
UErrorCode *status)
|
||||
{
|
||||
int32_t resLen = 0;
|
||||
uint32_t resLen = 0;
|
||||
uint32_t listLen = 0;
|
||||
UColTokenParser src;
|
||||
|
||||
ucol_initUCA(status);
|
||||
ucol_initInverseUCA(status);
|
||||
@ -283,8 +307,6 @@ ucol_openRules( const UChar *rules,
|
||||
|
||||
/* do we need to normalize the string beforehand? */
|
||||
|
||||
uint32_t listLen = 0;
|
||||
UColTokenParser src;
|
||||
src.source = rules;
|
||||
src.current = rules;
|
||||
src.end = rules+rulesLength;
|
||||
@ -298,7 +320,7 @@ ucol_openRules( const UChar *rules,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
UCATableHeader *table = ucol_assembleTailoringTable(&src, resLen, status);
|
||||
UCATableHeader *table = ucol_assembleTailoringTable(&src, &resLen, status);
|
||||
UCollator *result = ucol_initCollator(table,0,status);
|
||||
|
||||
if(U_SUCCESS(*status)) {
|
||||
|
@ -417,6 +417,15 @@ uint32_t ucol_tok_assembleTokenList(UColTokenParser *src, UErrorCode *status) {
|
||||
Create new list, create new sourceToken, make the baseCE from source, put
|
||||
the sourceToken in ListHeader of the new list */
|
||||
if(sourceToken == NULL) {
|
||||
|
||||
/*
|
||||
3. The rule for "& abcdefg < xyz" is a bit tricky. What it turns into is:
|
||||
|
||||
a. Find the longest sequence in "abcdefg" that is in UCA *OR* in the
|
||||
tailoring so far. Suppose that is "abcd".
|
||||
b. Then treat this rule as equivalent to:
|
||||
"& abcd < xyz / efg"
|
||||
*/
|
||||
if(newCharsLen > 1) {
|
||||
key.source = 0x01000000 | charsOffset;
|
||||
sourceToken = (UColToken *)uhash_get(uchars2tokens, &key);
|
||||
@ -482,6 +491,7 @@ uint32_t ucol_tok_assembleTokenList(UColTokenParser *src, UErrorCode *status) {
|
||||
}
|
||||
|
||||
src->lh = ListList;
|
||||
src->resultLen = listPosition;
|
||||
|
||||
return listPosition;
|
||||
}
|
||||
|
@ -52,7 +52,7 @@ typedef struct {
|
||||
|
||||
|
||||
#define ucol_tok_isSpecialChar(ch) \
|
||||
(((((ch) <= 0x002F) && ((ch) >= 0x0020))|| \
|
||||
(((((ch) <= 0x002F) && ((ch) >= 0x0020)) || \
|
||||
(((ch) <= 0x003F) && ((ch) >= 0x003A)) || \
|
||||
(((ch) <= 0x0060) && ((ch) >= 0x005B)) || \
|
||||
(((ch) <= 0x007E) && ((ch) >= 0x007B))))
|
||||
@ -61,4 +61,5 @@ typedef struct {
|
||||
U_CFUNC UColToken *ucol_tok_parse_next_token(UColTokenParser *src, UErrorCode *status);
|
||||
U_CFUNC UColToken *ucol_tok_open();
|
||||
U_CFUNC uint32_t ucol_tok_assembleTokenList(UColTokenParser *src, UErrorCode *status);
|
||||
|
||||
#endif
|
||||
|
Loading…
Reference in New Issue
Block a user