ICU-96 more changes to collation
X-SVN-Rev: 3706
This commit is contained in:
parent
3d6b4ba255
commit
4accd4cd4d
@ -18,17 +18,16 @@
|
||||
#include "unicode/ustring.h"
|
||||
#include "unicode/normlzr.h"
|
||||
#include "unicode/unorm.h"
|
||||
#include "unicode/udata.h"
|
||||
|
||||
#include "cpputils.h"
|
||||
#include "cstring.h"
|
||||
#include "ucmp32.h"
|
||||
#include "umutex.h"
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#include "ucmp32.h"
|
||||
#include "tcoldata.h"
|
||||
#include "tables.h"
|
||||
|
||||
#include "unicode/udata.h"
|
||||
#include "umutex.h"
|
||||
|
||||
static UCollator* UCA = NULL;
|
||||
static const InverseTableHeader* invUCA = NULL;
|
||||
@ -122,13 +121,13 @@ int32_t ucol_inv_findCE(uint32_t CE, uint32_t SecondCE) {
|
||||
}
|
||||
}
|
||||
|
||||
static uint32_t strengthMask[3] = {
|
||||
static uint32_t strengthMask[UCOL_CE_STRENGTH_LIMIT] = {
|
||||
0xFFFF0000,
|
||||
0xFFFFFF00,
|
||||
0xFFFFFFFF
|
||||
};
|
||||
|
||||
static uint32_t strengthShift[3] = {
|
||||
static uint32_t strengthShift[UCOL_CE_STRENGTH_LIMIT] = {
|
||||
16,
|
||||
8,
|
||||
0
|
||||
@ -223,19 +222,23 @@ U_CFUNC void ucol_inv_getGapPositions(UColTokListHeader *lh) {
|
||||
}
|
||||
|
||||
for(;;) {
|
||||
if((lh->pos[tokStrength] = ucol_inv_getNext(lh, tokStrength)) >= 0) {
|
||||
lh->fStrToken[tokStrength] = tok;
|
||||
} else {
|
||||
/* Error */
|
||||
fprintf(stderr, "Error! couldn't find the CE!\n");
|
||||
if(tokStrength < UCOL_CE_STRENGTH_LIMIT) {
|
||||
if((lh->pos[tokStrength] = ucol_inv_getNext(lh, tokStrength)) >= 0) {
|
||||
lh->fStrToken[tokStrength] = tok;
|
||||
} else {
|
||||
/* Error */
|
||||
fprintf(stderr, "Error! couldn't find the CE!\n");
|
||||
}
|
||||
}
|
||||
|
||||
while(tok != NULL && tok->strength >= tokStrength) {
|
||||
lh->lStrToken[tokStrength] = tok;
|
||||
if(tokStrength < UCOL_CE_STRENGTH_LIMIT) {
|
||||
lh->lStrToken[tokStrength] = tok;
|
||||
}
|
||||
tok = tok->next;
|
||||
}
|
||||
|
||||
if(tokStrength < 2) {
|
||||
if(tokStrength < UCOL_CE_STRENGTH_LIMIT-1) {
|
||||
/* check if previous interval is the same and merge the intervals if it is so */
|
||||
if(lh->pos[tokStrength] == lh->pos[tokStrength+1]) {
|
||||
lh->fStrToken[tokStrength] = lh->fStrToken[tokStrength+1];
|
||||
@ -311,6 +314,7 @@ ucol_open( const char *loc,
|
||||
u_versionFromString(trVInfo,tVer );
|
||||
result->trVersion=(uint8_t)trVInfo[0];
|
||||
}
|
||||
ures_close(resB);
|
||||
ures_close(binary);
|
||||
|
||||
return result;
|
||||
@ -457,16 +461,16 @@ U_CFUNC void ucol_doCE(uint32_t *CEparts, UColToken *tok) {
|
||||
}
|
||||
|
||||
U_CFUNC void ucol_initBuffers(UColTokListHeader *lh, bufs *b, UErrorCode *status) {
|
||||
ucolCEGenerator Gens[3];
|
||||
|
||||
uint32_t CEparts[0x10];
|
||||
ucolCEGenerator Gens[UCOL_CE_STRENGTH_LIMIT];
|
||||
uint32_t CEparts[UCOL_CE_STRENGTH_LIMIT];
|
||||
|
||||
uint32_t i = 0;
|
||||
|
||||
UColToken *tok = lh->last[UCOL_TOK_POLARITY_POSITIVE];
|
||||
uint32_t t[3];
|
||||
uint32_t t[UCOL_STRENGTH_LIMIT];
|
||||
|
||||
for(i=0; i<0x10; i++) {
|
||||
for(i=0; i<UCOL_STRENGTH_LIMIT; i++) {
|
||||
t[i] = 0;
|
||||
}
|
||||
|
||||
@ -513,13 +517,29 @@ U_CFUNC void ucol_initBuffers(UColTokListHeader *lh, bufs *b, UErrorCode *status
|
||||
fprintf(stderr, "gapsHi[%i] [%08X %08X %08X]\n", j, lh->gapsHi[j*3], lh->gapsHi[j*3+1], lh->gapsHi[j*3+2]);
|
||||
}
|
||||
|
||||
|
||||
/* I strongly believe that this code can be refactored and simplified. */
|
||||
/* have to do CE generation now, so let this soak a little bit */
|
||||
|
||||
tok = lh->first[UCOL_TOK_POLARITY_POSITIVE];
|
||||
uint32_t fStrength = tok->strength;
|
||||
|
||||
/* Treat starting identicals */
|
||||
/* &0 = nula = zero */
|
||||
if(tok != NULL && fStrength == UCOL_IDENTICAL) {
|
||||
CEparts[0] = (lh->baseCE & UCOL_PRIMARYMASK) | (lh->baseContCE & UCOL_PRIMARYMASK) >> 16;
|
||||
CEparts[1] = (lh->baseCE & UCOL_SECONDARYMASK) << 16 | (lh->baseContCE & UCOL_SECONDARYMASK) << 8;
|
||||
CEparts[2] = (UCOL_TERTIARYORDER(lh->baseCE)) << 24 | (UCOL_TERTIARYORDER(lh->baseContCE)) << 16;
|
||||
|
||||
if(tok != NULL && fStrength == 2) { /* starting with tertiary */
|
||||
while(tok != NULL && tok->strength == UCOL_IDENTICAL) {
|
||||
ucol_doCE(CEparts, tok);
|
||||
tok = tok->next;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if(tok != NULL && tok->strength == UCOL_TERTIARY) { /* starting with tertiary */
|
||||
fStrength = tok->strength;
|
||||
if(lh->pos[fStrength] == -1) {
|
||||
while(lh->pos[fStrength] == -1 && fStrength > 0) {
|
||||
fStrength--;
|
||||
@ -529,26 +549,23 @@ U_CFUNC void ucol_initBuffers(UColTokListHeader *lh, bufs *b, UErrorCode *status
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
|
||||
CEparts[0] = lh->gapsLo[fStrength*3];
|
||||
CEparts[1] = lh->gapsLo[fStrength*3+1];
|
||||
CEparts[2] = ucol_getCEGenerator(&Gens[2], lh->gapsLo[fStrength*3+2], lh->gapsHi[fStrength*3+2], tok->toInsert);
|
||||
|
||||
ucol_doCE(CEparts, tok);
|
||||
CEparts[2] = ucol_getCEGenerator(&Gens[2], lh->gapsLo[fStrength*3+2], lh->gapsHi[fStrength*3+2], tok->toInsert);
|
||||
|
||||
while(tok != NULL && tok->strength == 2) {
|
||||
while(tok != NULL && tok->strength >= UCOL_TERTIARY) {
|
||||
ucol_doCE(CEparts, tok);
|
||||
tok = tok->next;
|
||||
/* Treat identicals in starting tertiaries*/
|
||||
/* &0, <funny_tertiary_different_zero> = FunnyZero */
|
||||
|
||||
if(tok->strength == 2) {
|
||||
/* Treat identicals in starting tertiaries by NOT changing the tertiary value */
|
||||
if(tok != NULL && tok->strength == UCOL_TERTIARY) {
|
||||
CEparts[2] = ucol_getNextGenerated(&Gens[2]);
|
||||
ucol_doCE(CEparts, tok);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if(tok != NULL && tok->strength == 1) { /* secondaries */
|
||||
if(tok != NULL && tok->strength == UCOL_SECONDARY) { /* secondaries */
|
||||
fStrength = tok->strength;
|
||||
if(lh->pos[1] == -1) {
|
||||
fStrength = 0;
|
||||
@ -563,7 +580,7 @@ U_CFUNC void ucol_initBuffers(UColTokListHeader *lh, bufs *b, UErrorCode *status
|
||||
|
||||
CEparts[0] = lh->gapsLo[fStrength*3];
|
||||
CEparts[1] = ucol_getCEGenerator(&Gens[1], lh->gapsLo[fStrength*3+1], lh->gapsHi[fStrength*3+1], tok->toInsert);
|
||||
if(tok->next->strength == 2) {
|
||||
if(tok->next->strength == UCOL_TERTIARY) {
|
||||
CEparts[2] = ucol_getCEGenerator(&Gens[2], 0x02000000, 0xFF000000, tok->next->toInsert);
|
||||
} else {
|
||||
CEparts[2] = 0x03000000;
|
||||
@ -573,30 +590,34 @@ U_CFUNC void ucol_initBuffers(UColTokListHeader *lh, bufs *b, UErrorCode *status
|
||||
tok = tok->next;
|
||||
|
||||
while(tok->next != NULL && tok->next->strength > 0) {
|
||||
if(tok->strength == 2) {
|
||||
if(tok->strength == UCOL_TERTIARY) {
|
||||
CEparts[2] = ucol_getNextGenerated(&Gens[2]);
|
||||
ucol_doCE(CEparts, tok);
|
||||
} else if(tok->strength == 1) {
|
||||
} else if(tok->strength == UCOL_SECONDARY) {
|
||||
CEparts[1] = ucol_getNextGenerated(&Gens[1]);
|
||||
if(tok->next->strength == 2) {
|
||||
CEparts[2] = ucol_getCEGenerator(&Gens[2], 0x02000000, 0xFF000000, tok->next->toInsert);
|
||||
} else {
|
||||
if(tok->next->strength == UCOL_SECONDARY) {
|
||||
CEparts[2] = 0x03000000;
|
||||
} else {
|
||||
CEparts[2] = ucol_getCEGenerator(&Gens[2], 0x02000000, 0xFF000000, tok->next->toInsert);
|
||||
}
|
||||
ucol_doCE(CEparts, tok);
|
||||
} else { /* Strength is identical */
|
||||
ucol_doCE(CEparts, tok);
|
||||
}
|
||||
tok = tok->next;
|
||||
}
|
||||
|
||||
if(tok->strength == 2) {
|
||||
/* This is the last token in rule */
|
||||
if(tok->strength == UCOL_TERTIARY) {
|
||||
CEparts[2] = ucol_getNextGenerated(&Gens[2]);
|
||||
} else if(tok->strength == 1) {
|
||||
} else if(tok->strength == UCOL_SECONDARY) {
|
||||
CEparts[1] = ucol_getNextGenerated(&Gens[1]);
|
||||
CEparts[2] = 0x03000000;
|
||||
}
|
||||
/* if the strength is identical, it will just repeat the last CE value */
|
||||
ucol_doCE(CEparts, tok);
|
||||
tok = tok->next;
|
||||
} else {
|
||||
} else { /* only one secondary at the end of the rule fragment */
|
||||
CEparts[0] = lh->gapsLo[fStrength*3];
|
||||
CEparts[1] = lh->gapsLo[fStrength*3+1];
|
||||
CEparts[2] = lh->gapsLo[fStrength*3+2];
|
||||
@ -612,18 +633,21 @@ U_CFUNC void ucol_initBuffers(UColTokListHeader *lh, bufs *b, UErrorCode *status
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
/* what if the next token is identical??? */
|
||||
/* How should the things be set up */
|
||||
|
||||
if(tok->next != NULL) {
|
||||
CEparts[0] = ucol_getCEGenerator(&Gens[0], lh->gapsLo[0], lh->gapsHi[0], tok->toInsert);
|
||||
if(tok->next->strength == 0) {
|
||||
if(tok->next->strength == UCOL_PRIMARY) {
|
||||
CEparts[1] = 0x03000000;
|
||||
CEparts[2] = 0x03000000;
|
||||
} else {
|
||||
if(tok->next->strength == 1) {
|
||||
} else { /* Secondaries will also be generated */
|
||||
CEparts[1] = ucol_getCEGenerator(&Gens[1], 0x02000000, 0xFF000000, tok->next->toInsert);
|
||||
if(tok->next->strength == UCOL_SECONDARY) {
|
||||
CEparts[2] = 0x03000000;
|
||||
} else {
|
||||
CEparts[2] = ucol_getCEGenerator(&Gens[2], 0x02000000, 0xFF000000, tok->next->toInsert);
|
||||
}
|
||||
CEparts[1] = ucol_getCEGenerator(&Gens[1], 0x02000000, 0xFF000000, tok->next->toInsert);
|
||||
}
|
||||
|
||||
ucol_doCE(CEparts, tok);
|
||||
@ -663,20 +687,21 @@ U_CFUNC void ucol_initBuffers(UColTokListHeader *lh, bufs *b, UErrorCode *status
|
||||
}
|
||||
tok = tok->next;
|
||||
}
|
||||
|
||||
if(tok->strength == 2) {
|
||||
|
||||
/* OK, there are no next tokens, we just have to wrap up with the last one */
|
||||
if(tok->strength == UCOL_TERTIARY) {
|
||||
CEparts[2] = ucol_getNextGenerated(&Gens[2]);
|
||||
} else if(tok->strength == 1) {
|
||||
} else if(tok->strength == UCOL_SECONDARY) {
|
||||
CEparts[1] = ucol_getNextGenerated(&Gens[1]);
|
||||
CEparts[2] = 0x03000000;
|
||||
} else {
|
||||
} else if(tok->strength == UCOL_PRIMARY) {
|
||||
CEparts[0] = ucol_getNextGenerated(&Gens[0]);
|
||||
CEparts[1] = 0x03000000;
|
||||
CEparts[2] = 0x03000000;
|
||||
}
|
||||
} /* else it is identical and do nothing */
|
||||
ucol_doCE(CEparts, tok);
|
||||
|
||||
} else {
|
||||
} else { /* there is only one primary in this sequence and it ends with it */
|
||||
CEparts[0] = lh->gapsLo[0];
|
||||
CEparts[1] = lh->gapsLo[1];
|
||||
CEparts[2] = lh->gapsLo[2];
|
||||
@ -736,9 +761,6 @@ UCATableHeader *ucol_assembleTailoringTable(UColTokenParser *src, uint32_t *resL
|
||||
/* tertiary */
|
||||
/* We stuff the initial value in the buffers, and increase the appropriate buffer */
|
||||
/* According to strength */
|
||||
/* Inital value depends on both base and next CE. First we decide which one is */
|
||||
/* longer (in term of non zero bytes). If it's baseCE, we add 1 to it, if it's nextCE, */
|
||||
/* we subtract 1 from it */
|
||||
ucol_initBuffers(&src->lh[i], &b, status);
|
||||
|
||||
}
|
||||
|
@ -25,12 +25,12 @@ typedef struct {
|
||||
uint32_t previousCE;
|
||||
uint32_t previousContCE;
|
||||
/* uint32_t strongest[2];*/
|
||||
int32_t pos[3];
|
||||
uint32_t gapsLo[9];
|
||||
uint32_t gapsHi[9];
|
||||
uint32_t numStr[3];
|
||||
UColToken* fStrToken[3];
|
||||
UColToken* lStrToken[3];
|
||||
int32_t pos[UCOL_STRENGTH_LIMIT];
|
||||
uint32_t gapsLo[3*UCOL_CE_STRENGTH_LIMIT];
|
||||
uint32_t gapsHi[3*UCOL_CE_STRENGTH_LIMIT];
|
||||
uint32_t numStr[UCOL_CE_STRENGTH_LIMIT];
|
||||
UColToken* fStrToken[UCOL_CE_STRENGTH_LIMIT];
|
||||
UColToken* lStrToken[UCOL_CE_STRENGTH_LIMIT];
|
||||
|
||||
/*
|
||||
UColAttributeValue strongestP;
|
||||
|
@ -153,10 +153,12 @@ typedef enum {
|
||||
UCOL_TERTIARY = 2,
|
||||
/** Default collation strength */
|
||||
UCOL_DEFAULT_STRENGTH = UCOL_TERTIARY,
|
||||
UCOL_CE_STRENGTH_LIMIT,
|
||||
/** Quaternary collation strength */
|
||||
UCOL_QUATERNARY=3,
|
||||
/** Identical collation strength */
|
||||
UCOL_IDENTICAL=15,
|
||||
UCOL_STRENGTH_LIMIT,
|
||||
|
||||
/* for UCOL_FRENCH_COLLATION, UCOL_CASE_LEVEL & UCOL_DECOMPOSITION_MODE*/
|
||||
UCOL_OFF = 16,
|
||||
|
Loading…
Reference in New Issue
Block a user