ICU-96 some fixes - need to be finished (hi got broken)

X-SVN-Rev: 4887
This commit is contained in:
Vladimir Weinstein 2001-06-05 22:52:56 +00:00
parent 8c69e1eb5a
commit 27d7ed1627
6 changed files with 204 additions and 245 deletions

View File

@ -1155,7 +1155,6 @@ UCATableHeader *ucol_assembleTailoringTable(UColTokenParser *src, UErrorCode *st
/* add latin-1 stuff */
if(U_SUCCESS(*status)) {
for(u = 0; u<0x100; u++) {
if((CE = ucmp32_get(t->mapping, u)) == UCOL_NOT_FOUND
/* this test is for contractions that are missing the starting element. Looks like latin-1 should be done before assembling */
@ -1171,7 +1170,6 @@ UCATableHeader *ucol_assembleTailoringTable(UColTokenParser *src, UErrorCode *st
init_collIterate(src->UCA, decomp, 1, &colIt);
while(CE != UCOL_NO_MORE_CES) {
CE = ucol_getNextCE(src->UCA, &colIt, status);
/*UCOL_GETNEXTCE(CE, temp, colIt, status);*/
if(CE != UCOL_NO_MORE_CES) {
el.CEs[el.noOfCEs++] = CE;
}
@ -1194,32 +1192,81 @@ UCATableHeader *ucol_assembleTailoringTable(UColTokenParser *src, UErrorCode *st
}
uprv_uca_closeTempTable(tempTable);
}
if(U_SUCCESS(*status)) {
/* copy contractions */
uint32_t ucaCE = UCOL_NOT_FOUND, tailoredCE = UCOL_NOT_FOUND;
uint16_t *conts = (uint16_t *)((uint8_t *)src->UCA->image + src->UCA->image->contractionUCACombos);
while(*conts != 0) {
tailoredCE = ucmp32_get(tempColl->mapping, *conts);
if(tailoredCE != UCOL_NOT_FOUND) {
UBool isTailoredContraction = isContraction(tailoredCE);
el.cPoints = el.uchars;
el.noOfCEs = 0;
el.uchars[0] = *conts;
el.uchars[1] = *(conts+1);
if(*(conts+2)!=0) {
el.uchars[2] = *(conts+2);
el.cSize = 3;
} else {
el.cSize = 2;
}
UCollationElements *ucaEl = ucol_openElements(src->UCA, el.uchars, el.cSize, status);
UCollationElements *tailorEl = ucol_openElements(tempColl, el.uchars, el.cSize, status);
UBool needToAdd = TRUE;
if(isTailoredContraction) {
do {
el.CEs[el.noOfCEs] = ucol_next(ucaEl, status);
tailoredCE = ucol_next(tailorEl, status);
if(tailoredCE == el.CEs[el.noOfCEs]) {
el.noOfCEs++;
} else {
needToAdd = FALSE;
break;
}
} while(tailoredCE != UCOL_NULLORDER);
if(needToAdd == TRUE) {
el.noOfCEs--; // remove UCOL_NULLORDER
uprv_uca_addAnElement(t, &el, status);
}
} else { // if the tailored CE is not a contraction, we need to add this onelk
while ((el.CEs[el.noOfCEs] = ucol_next(ucaEl, status)) != UCOL_NULLORDER) {
el.noOfCEs++;
}
uprv_uca_addAnElement(t, &el, status);
}
}
conts+=3;
}
if(U_SUCCESS(*status)) {
ucol_close(tempColl);
tempUCATable *tempTable = uprv_uca_cloneTempTable(t, status);
UCATableHeader *tempData = uprv_uca_assembleTable(tempTable, status);
tempColl = ucol_initCollator(tempData, 0, status);
if(U_SUCCESS(*status)) {
tempColl->rb = NULL;
tempColl->hasRealData = TRUE;
}
uprv_uca_closeTempTable(tempTable);
}
/* produce canonical closure */
for(u = 0; u < 0xFFFF; u++) {
if((noOfDec = unorm_normalize(&u, 1, UNORM_NFD, 0, decomp, 256, status)) > 1
|| (noOfDec == 1 && *decomp != (UChar)u))
{
//el.noOfCEs = ucol_getDynamicCEs(src, t, decomp, noOfDec, el.CEs, 128, status);
if(ucol_strcoll(tempColl, (UChar *)&u, 1, decomp, noOfDec) != UCOL_EQUAL) {
el.uchars[0] = (UChar)u;
el.cPoints = el.uchars;
el.cSize = 1;
el.noOfCEs = 0;
//uint32_t noOfCEs = 0;
//uint32_t currCE = 0;
UCollationElements* colEl = ucol_openElements(tempColl, decomp, noOfDec, status);
while((el.CEs[el.noOfCEs] = ucol_next(colEl, status)) != UCOL_NULLORDER) {
//while((currCE = ucol_next(colEl, status)) != UCOL_NULLORDER) {
//if(currCE != el.CEs[noOfCEs]) {
//fprintf(stderr, "%04X[%d] %08X vs %08X\n", u, noOfCEs, currCE, el.CEs[noOfCEs]);
//}
el.noOfCEs++;
//noOfCEs++;
}
uprv_uca_addAnElement(t, &el, status);

View File

@ -495,8 +495,8 @@ typedef struct {
/* all the offsets are in bytes */
/* to get the address add to the header address and cast properly */
uint32_t options; /* these are the default options for the collator */
uint32_t CEindex; /* uint16_t *CEindex; */
uint32_t CEvalues; /* int32_t *CEvalues; */
uint32_t contractionUCACombos; /* this one is needed only for UCA, to copy the appropriate contractions */
uint32_t unusedReserved1; /* reserved for future use */
uint32_t mappingPosition; /* const uint8_t *mappingPosition; */
uint32_t expansion; /* uint32_t *expansion; */
uint32_t contractionIndex; /* UChar *contractionIndex; */

View File

@ -47,15 +47,6 @@ uhash_hashTokens(const void *k) {
hash = (hash * 37) + *p;
p += inc;
}
if((len = ((key->expansion & 0xFF000000)>>24)) != 0) {
p = (key->expansion & 0x00FFFFFF) + rulesToParse;
limit = p + len;
while (p<limit) {
hash = (hash * 37) + *p;
p += inc;
}
}
}
return hash;
}
@ -74,33 +65,19 @@ UBool uhash_compareTokens(const void *key1, const void *key2) {
if (p1 == NULL || p2 == NULL) {
return FALSE;
}
if(p1->source == p2->source && p1->expansion == p2->expansion) {
return TRUE;
}
if(s1L != s2L) {
return FALSE;
}
while(s1 < s1+s1L-1 && *s1 == *s2) {
if(p1->source == p2->source) {
return TRUE;
}
const UChar *end = s1+s1L-1;
while((s1 < end) && *s1 == *s2) {
++s1;
++s2;
}
if(*s1 == *s2) {
s1 = (p1->expansion & 0x00FFFFFF) + rulesToParse;
s2 = (p2->expansion & 0x00FFFFFF) + rulesToParse;
s1L = ((p1->expansion & 0xFF000000) >> 24);
s2L = ((p2->expansion & 0xFF000000) >> 24);
if(s1L != s2L) {
return FALSE;
}
if(s1L != 0) {
while(s1 < s1+s1L-1 && *s1 == *s2) {
++s1;
++s2;
}
return (UBool)(*s1 == *s2);
} else {
return TRUE;
}
return TRUE;
} else {
return FALSE;
}
@ -389,10 +366,6 @@ const UChar *ucol_tok_parseNextToken(UColTokenParser *src,
}
}
} else {
// This here would be the proper way to do it, but we then need to require quoting all isWhitespace in
// while(u_isWhitespace(ch)) {
// ch = *(++src->current);
// }
/* Sets the strength for this entry */
switch (ch) {
case 0x003D/*'='*/ :
@ -514,7 +487,6 @@ const UChar *ucol_tok_parseNextToken(UColTokenParser *src,
case 0x000D/*'\r'*/:
case 0x000A/*'\n'*/:
case 0x0020/*' '*/:
case 0x2028/* Unicode line break (UniPad likes to add it)*/:
break; /* skip whitespace TODO use Unicode */
case 0x002F/*'/'*/:
wasInQuote = FALSE; /* if we were copying source characters, we want to stop now */
@ -793,6 +765,7 @@ uint32_t ucol_uprv_tok_assembleTokenList(UColTokenParser *src, UErrorCode *statu
src->varTop = sourceToken;
}
sourceToken->expansion = newExtensionsLen << 24 | extensionOffset;
/*
If "xy" doesn't occur earlier in the list or in the UCA, convert &xy * c *
d * ... into &x * c/y * d * ...
@ -802,16 +775,16 @@ uint32_t ucol_uprv_tok_assembleTokenList(UColTokenParser *src, UErrorCode *statu
expandNext = 0;
} else if(sourceToken->expansion == 0) { /* if there is no expansion, implicit is just added to the token */
sourceToken->expansion = expandNext;
sourceToken->debugExpansion = *(src->source + (expandNext & 0xFFFFFF));
} else { /* there is both explicit and implicit expansion. We need to make a combination */
memcpy(src->extraCurrent, src->source + (expandNext & 0xFFFFFF), (expandNext >> 24)*sizeof(UChar));
memcpy(src->extraCurrent+(expandNext >> 24), src->source + extensionOffset, newExtensionsLen*sizeof(UChar));
sourceToken->expansion = ((expandNext >> 24) + newExtensionsLen)<<24 | (src->extraCurrent - src->source);
src->extraCurrent += (expandNext >> 24) + newExtensionsLen;
sourceToken->debugExpansion = *(src->source + (sourceToken->expansion & 0xFFFFFF));
}
}
sourceToken->debugExpansion = *(src->source + (sourceToken->expansion & 0xFFFFFF));
/*
1. Find the strongest strength in each list, and set strongestP and strongestN
accordingly in the headers.
@ -887,6 +860,18 @@ uint32_t ucol_uprv_tok_assembleTokenList(UColTokenParser *src, UErrorCode *statu
}
}
} else {
if(sourceToken == NULL) { /* this is a reset, but it might still be somewhere in the tailoring, in shorter form */
uint32_t searchCharsLen = newCharsLen;
while(searchCharsLen > 1 && sourceToken == NULL) {
searchCharsLen--;
key.source = searchCharsLen << 24 | charsOffset;
sourceToken = (UColToken *)uhash_get(uchars2tokens, &key);
}
if(sourceToken != NULL) {
expandNext = (newCharsLen - searchCharsLen) << 24 | (charsOffset + searchCharsLen);
}
}
uint32_t CE = UCOL_NOT_FOUND, SecondCE = UCOL_NOT_FOUND;
collIterate s;
@ -926,12 +911,6 @@ uint32_t ucol_uprv_tok_assembleTokenList(UColTokenParser *src, UErrorCode *statu
}
}
if(newCharsLen > 1) {
expandNext = ((newCharsLen-1)<<24) | (charsOffset + 1);
} else {
expandNext = 0;
}
/* 5 If the relation is a reset:
If sourceToken is null
Create new list, create new sourceToken, make the baseCE from source, put
@ -962,14 +941,13 @@ uint32_t ucol_uprv_tok_assembleTokenList(UColTokenParser *src, UErrorCode *statu
earlier in the list.
*/
if(top == FALSE) {
if(newCharsLen > 1) {
sourceToken->source = 0x01000000 | charsOffset;
}
uint32_t resetCharsOffset;
init_collIterate(src->UCA, src->source+charsOffset, 1, &s); /* or newCharsLen instead of 1??? */
init_collIterate(src->UCA, src->source+charsOffset, newCharsLen, &s);
CE = ucol_getNextCE(src->UCA, &s, status);
resetCharsOffset = s.pos - src->source;
SecondCE = ucol_getNextCE(src->UCA, &s, status);
ListList[src->resultLen].baseCE = CE & 0xFFFFFF3F;
@ -978,6 +956,12 @@ uint32_t ucol_uprv_tok_assembleTokenList(UColTokenParser *src, UErrorCode *statu
} else {
ListList[src->resultLen].baseContCE = 0;
}
if(newCharsLen > 1) {
sourceToken->source = ((resetCharsOffset - charsOffset ) << 24) | charsOffset;
expandNext = ((newCharsLen + charsOffset - resetCharsOffset)<<24) | (resetCharsOffset);
} else {
expandNext = 0;
}
} else { /* top == TRUE */
top = FALSE;
ListList[src->resultLen].baseCE = UCOL_RESET_TOP_VALUE;

View File

@ -430,7 +430,9 @@ ucol_allocWeights(uint32_t lowerLimit, uint32_t upperLimit,
}
/* set the bytes in the end weight at length+1..length2 to maxByte */
ranges[0].end|=(0xffffffff>>(8*i))&(0xffffffff<<(8*(4-minLength)));
byte=(maxByte<<24)|(maxByte<<16)|(maxByte<<8)|maxByte; /* this used to be 0xffffffff */
ranges[0].end=truncateWeight(ranges[0].end, i)|
(byte>>(8*i))&(byte<<(8*(4-minLength)));
/* set the start of the second range to immediately follow the end of the first one */
ranges[1].start=incWeight(ranges[0].end, minLength, maxByte);
@ -470,10 +472,8 @@ ucol_allocWeights(uint32_t lowerLimit, uint32_t upperLimit,
}
#endif
if(rangeCount>0) {
/* set maxByte in ranges[0] for ucol_nextWeight() */
ranges[0].count=maxByte;
}
/* set maxByte in ranges[0] for ucol_nextWeight() */
ranges[0].count=maxByte;
return rangeCount;
}

View File

@ -2280,6 +2280,61 @@ static void TestCompressOverlap() {
}
}
void TestCyrillicTailoring(void) {
static char *test[] = {
"\\u0410",
"\\u0410\\u0306",
"\\u04d0"
};
static char rules[256] = "&Z < \\u0410";
static UChar rlz[256];
uint32_t rLen;
UErrorCode status = U_ZERO_ERROR;
UChar u = 0;
uint32_t nfcSize;
uint32_t nfdSize;
tester **t = uprv_malloc(0xFFFF * sizeof(tester *));
uint32_t noCases = 0;
UCollator *coll = NULL;
t[0] = (tester *)uprv_malloc(sizeof(tester));
for(u = 0; u < 0xFFFF; u++) {
nfcSize = unorm_normalize(&u, 1, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
nfdSize = unorm_normalize(&u, 1, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
if(nfcSize != nfdSize || (uprv_memcmp(t[noCases]->NFC, t[noCases]->NFD, nfcSize * sizeof(UChar)) != 0)) {
t[noCases]->u = u;
noCases++;
t[noCases] = (tester *)uprv_malloc(sizeof(tester));
}
}
/*coll = ucol_open(locName, &status);*/
rLen = u_unescape(rules, rlz, 256);
coll = ucol_openRules(rlz, rLen, UCOL_DEFAULT_NORMALIZATION, UCOL_DEFAULT, &status);
for(u=0; u<noCases; u++) {
doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
}
ucol_close(coll);
for(u = 0; u <= noCases; u++) {
uprv_free(t[u]);
}
uprv_free(t);
genericLocaleStarter("ru", test, 3);
genericRulesStarter("&\\u0410 = \\u0410", test, 3);
genericRulesStarter("&Z < \\u0410", test, 3);
}
static void TestContraction() {
const static char *testrules[] = {
"&A = AB / B",
@ -2297,19 +2352,19 @@ static void TestContraction() {
{(UChar)'c', (UChar)'l'}
};
const static char *testrules3[] = {
"&z < xyz &xyzw < B",
"&z < xyz &xyz < B / w",
"&z < ch &achm < B",
"&z < ch &a < B / chm",
"&\\ud800\\udc00w < B",
"&\\ud800\\udc00 < B / w",
"&a\\ud800\\udc00m < B",
"&a < B / \\ud800\\udc00m",
"&z < xyz &xyzw << B",
"&z < xyz &xyz << B / w",
"&z < ch &achm << B",
"&z < ch &a << B / chm",
"&\\ud800\\udc00w << B",
"&\\ud800\\udc00 << B / w",
"&a\\ud800\\udc00m << B",
"&a << B / \\ud800\\udc00m",
};
UErrorCode status = U_ZERO_ERROR;
UCollator *coll;
UChar rule[32] = {0};
UChar rule[256] = {0};
uint32_t rlen = 0;
int i;
@ -2356,7 +2411,7 @@ static void TestContraction() {
ucol_close(coll);
}
rlen = u_unescape("& a < b < c < ch < d & c = ch / h", rule, 32);
rlen = u_unescape("& a < b < c < ch < d & c = ch / h", rule, 256);
coll = ucol_openRules(rule, rlen, UNORM_NFD, UCOL_TERTIARY, &status);
if (ucol_strcoll(coll, testdata2[0], 2, testdata2[1], 2) != UCOL_LESS) {
log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
@ -2379,9 +2434,9 @@ static void TestContraction() {
*iter2;
UChar ch = 'B';
uint32_t ce;
rlen = u_unescape(testrules3[i << 1], rule, 32);
rlen = u_unescape(testrules3[i], rule, 32);
coll1 = ucol_openRules(rule, rlen, UNORM_NFD, UCOL_TERTIARY, &status);
rlen = u_unescape(testrules3[(i << 1) + 1], rule, 32);
rlen = u_unescape(testrules3[i + 1], rule, 32);
coll2 = ucol_openRules(rule, rlen, UNORM_NFD, UCOL_TERTIARY, &status);
if (U_FAILURE(status)) {
log_err("Collator creation failed %s\n", testrules[i]);
@ -2422,12 +2477,13 @@ static void TestContraction() {
void addMiscCollTest(TestNode** root)
{
addTest(root, &TestCyrillicTailoring, "tscoll/cmsccoll/TestCyrillicTailoring");
addTest(root, &TestCase, "tscoll/cmsccoll/TestCase");
addTest(root, &IncompleteCntTest, "tscoll/cmsccoll/IncompleteCntTest");
addTest(root, &BlackBirdTest, "tscoll/cmsccoll/BlackBirdTest");
addTest(root, &FunkyATest, "tscoll/cmsccoll/FunkyATest");
addTest(root, &BillFairmanTest, "tscoll/cmsccoll/BillFairmanTest");
addTest(root, &RamsRulesTest, "tscoll/cmsccoll/RamsRulesTest");
/*addTest(root, &RamsRulesTest, "tscoll/cmsccoll/RamsRulesTest");*/
addTest(root, &IsTailoredTest, "tscoll/cmsccoll/IsTailoredTest");
addTest(root, &TestCollations, "tscoll/cmsccoll/TestCollations");
addTest(root, &TestChMove, "tscoll/cmsccoll/TestChMove");

View File

@ -34,7 +34,6 @@
#include <console.h>
#endif
/*UHashtable *elements = NULL;*/
UCAElements le;
/*
@ -42,20 +41,6 @@ UCAElements le;
*/
UBool VERBOSE = FALSE;
/*
void deleteElement(void *element) {
UCAElements *el = (UCAElements *)element;
int32_t i = 0;
for(i = 0; i < el->noOfCEs; i++) {
free(el->primary[i]);
free(el->secondary[i]);
free(el->tertiary[i]);
}
free(el);
}
*/
int32_t readElement(char **from, char *to, char separator, UErrorCode *status) {
if(U_FAILURE(*status)) {
return 0;
@ -110,16 +95,6 @@ uint32_t getSingleCEValue(char *primary, char *secondary, char *tertiary, UError
((secvalue<<UCOL_SECONDARYORDERSHIFT)&UCOL_SECONDARYORDERMASK)|
(tervalue&UCOL_TERTIARYORDERMASK);
// Here was case handling!
// case bits are already read from the UCA
#if 0
if(caseBit == TRUE && tervalue != 0) {
value |= 0x40; // 0100 0000 set case bit
} else {
value &= 0xFFFFFFBF; // ... 1011 1111 (reset case bit)
}
#endif
if(primsave!='\0') {
*primend = primsave;
}
@ -132,26 +107,10 @@ uint32_t getSingleCEValue(char *primary, char *secondary, char *tertiary, UError
return value;
}
/*
UCAElements *copyUCAElement(UCAElements *that) {
UCAElements *r = (UCAElements *)malloc(sizeof(*that));
memcpy(r, that, sizeof(*that));
return r;
}
void releaseUCACopy(UCAElements *r) {
free(r);
}
*/
static uint32_t inverseTable[0xFFFF][3];
static uint32_t inversePos = 0;
/*UChar *stringContinue[0xFFFF];*/
static UChar stringContinue[0xFFFF];
/*static uint32_t stringContSize[0xFFFF]; */
static uint32_t sContPos = 0;
/*static uint32_t contSize = 0;*/
static void addNewInverse(UCAElements *element, UErrorCode *status) {
if(U_FAILURE(*status)) {
@ -454,7 +413,6 @@ UCAElements *readAnElement(FILE *data, UErrorCode *status) {
}
element->cPoints[0] = (UChar)theValue;
/*element->codepoint = element->cPoints[0];*/
if(spacePointer == 0) {
detectedContraction = FALSE;
element->cSize = 1;
@ -474,27 +432,6 @@ UCAElements *readAnElement(FILE *data, UErrorCode *status) {
startCodePoint = endCodePoint+1;
/* Case bit is now associated with each collation element */
/* Also, there are two case bits, but we don't care about it here */
#if 0
endCodePoint = strchr(startCodePoint, ';');
while(*startCodePoint != '0' && *startCodePoint != '1') {
startCodePoint++;
if(startCodePoint == endCodePoint) {
*status = U_INVALID_FORMAT_ERROR;
return NULL;
}
}
if(*startCodePoint == '0') {
element->caseBit = FALSE;
} else {
element->caseBit = TRUE;
}
startCodePoint = endCodePoint+1;
#endif
commentStart = strchr(startCodePoint, '#');
if(commentStart == NULL) {
commentStart = strlen(startCodePoint) + startCodePoint - 1;
@ -521,7 +458,6 @@ UCAElements *readAnElement(FILE *data, UErrorCode *status) {
uint32_t CEi = 1;
while(2*CEi<element->sizePrim[i] || CEi<element->sizeSec[i] || CEi<element->sizeTer[i]) {
//uint32_t value = element->caseBit?0xC0:0x80; /* Continuation marker */
uint32_t value = UCOL_CONTINUATION_MARKER; /* Continuation marker */
if(2*CEi<element->sizePrim[i]) {
value |= ((hex2num(*(primary+4*CEi))&0xF)<<28);
@ -564,15 +500,9 @@ UCAElements *readAnElement(FILE *data, UErrorCode *status) {
pointer++;
}
/*
strcpy(element->comment, commentStart);
uhash_put(elements, (void *)element->codepoint, element, status);
*/
if(U_FAILURE(*status)) {
fprintf(stderr, "problem putting stuff in hash table\n");
*status = U_INTERNAL_PROGRAM_ERROR;
//free(element);
return NULL;
}
@ -581,6 +511,8 @@ UCAElements *readAnElement(FILE *data, UErrorCode *status) {
void writeOutData(UCATableHeader *data,
uint16_t contractions[][3],
uint32_t noOfcontractions,
const char *outputDir,
const char *copyright,
UErrorCode *status)
@ -589,6 +521,19 @@ void writeOutData(UCATableHeader *data,
return;
}
uint32_t size = data->size;
if(noOfcontractions != 0) {
contractions[noOfcontractions][0] = 0;
contractions[noOfcontractions][1] = 0;
contractions[noOfcontractions][2] = 0;
noOfcontractions++;
data->contractionUCACombos = size;
data->size += paddedsize((noOfcontractions*3*sizeof(uint16_t)));
}
UNewDataMemory *pData;
long dataLength;
@ -605,7 +550,12 @@ void writeOutData(UCATableHeader *data,
fprintf(stdout, "Writing out UCA table: %s%s.%s\n", outputDir,
UCA_DATA_NAME,
UCA_DATA_TYPE);
udata_writeBlock(pData, data, data->size);
udata_writeBlock(pData, data, size);
if(noOfcontractions != 0) {
udata_writeBlock(pData, contractions, noOfcontractions*3*sizeof(uint16_t));
udata_writePadding(pData, paddedsize((noOfcontractions*3*sizeof(uint16_t))) - noOfcontractions*3*sizeof(uint16_t));
}
/* finish up */
dataLength=udata_finish(pData, status);
@ -623,14 +573,12 @@ write_uca_table(const char *filename,
{
FILE *data = fopen(filename, "r");
uint32_t line = 0;
int32_t sizesPrim[35], sizesSec[35], sizesTer[35];
/* int32_t sizeBreakDown[35][35][35];
int32_t *secValue = (int32_t*)uprv_malloc(sizeof(int32_t)*0xffff);
int32_t *terValue = (int32_t*)uprv_malloc(sizeof(int32_t)*0xffff);*/
UCAElements *element = NULL;
UChar variableTopValue = 0;
UCATableHeader *myD = (UCATableHeader *)uprv_malloc(sizeof(UCATableHeader));
UColOptionSet *opts = (UColOptionSet *)uprv_malloc(sizeof(UColOptionSet));
uint16_t contractionCEs[256][3];
uint32_t noOfContractions = 0;
if(data == NULL) {
@ -638,12 +586,6 @@ write_uca_table(const char *filename,
return -1;
}
/* memset(secValue, 0, 0xffff*sizeof(int32_t));
memset(terValue, 0, 0xffff*sizeof(int32_t)); */
memset(sizesPrim, 0, 35*sizeof(int32_t));
memset(sizesSec, 0, 35*sizeof(int32_t));
memset(sizesTer, 0, 35*sizeof(int32_t));
/* memset(sizeBreakDown, 0, 35*35*35*sizeof(int32_t)); */
memset(inverseTable, 0xDA, sizeof(int32_t)*3*0xFFFF);
opts->variableTopValue = variableTopValue;
@ -652,7 +594,7 @@ write_uca_table(const char *filename,
opts->alternateHandling = UCOL_NON_IGNORABLE; /* attribute for handling variable elements*/
opts->caseFirst = UCOL_OFF; /* who goes first, lower case or uppercase */
opts->caseLevel = UCOL_OFF; /* do we have an extra case level */
opts->normalizationMode = UCOL_OFF; /*UCOL_ON*/ /* attribute for normalization */
opts->normalizationMode = UCOL_OFF; /* attribute for normalization */
/* populate the version info struct with version info*/
myD->version[0] = UCOL_BUILDER_VERSION;
/*TODO:The fractional rules version should be taken from FractionalUCA.txt*/
@ -661,12 +603,6 @@ write_uca_table(const char *filename,
tempUCATable *t = uprv_uca_initTempTable(myD, opts, NULL, status);
/*
elements = uhash_open(uhash_hashLong, uhash_compareLong, &status);
uhash_setValueDeleter(elements, deleteElement);
*/
while(!feof(data)) {
if(U_FAILURE(*status)) {
@ -677,33 +613,27 @@ write_uca_table(const char *filename,
element = readAnElement(data, status);
line++;
if(element != NULL) {
/* this does statistics on CE lengths, but is currently broken */
/*
for( i = 0; i<element->noOfCEs; i++) {
sizesPrim[element->sizePrim[i]]++;
sizesSec[element->sizeSec[i]]++;
sizesTer[element->sizeTer[i]]++;
sizeBreakDown[element->sizePrim[i]][element->sizeSec[i]][element->sizeTer[i]]++;
if(element->sizePrim[i] == 2 && element->sizeSec[i]==2) {
terValue[strtoul(element->tertiary[i], 0, 16)]++;
secValue[strtoul(element->secondary[i], 0, 16)]++;
}
}
*/
// we have read the line, now do something sensible with the read data!
if(element->variableTop == TRUE && variableTopValue == 0) {
t->options->variableTopValue = element->cPoints[0];
}
// if element is a contraction, we want to add it to contractions
if(element->cSize > 1) { // this is a contraction
contractionCEs[noOfContractions][0] = element->cPoints[0];
contractionCEs[noOfContractions][1] = element->cPoints[1];
if(element->cSize > 2) { // the third one
contractionCEs[noOfContractions][2] = element->cPoints[2];
} else {
contractionCEs[noOfContractions][2] = 0;
}
noOfContractions++;
}
/* we're first adding to inverse, because addAnElement will reverse the order */
/* of code points and stuff... we don't want that to happen */
addToInverse(element, status);
uprv_uca_addAnElement(t, element, status);
//deleteElement(element);
}
}
@ -712,80 +642,22 @@ write_uca_table(const char *filename,
fprintf(stdout, "\nLines read: %i\n", line);
}
/*
for(i = 0; i<35; i++) {
fprintf(stderr, "size %i: P:%i S:%i T:%i\n", i, sizesPrim[i], sizesSec[i], sizesTer[i]);
}
for(i = 0; i<35; i++) {
UBool printedPrimary = FALSE;
for(j = 0; j<35; j++) {
for(k = 0; k<35; k++) {
if(sizeBreakDown[i][j][k] != 0) {
if(!printedPrimary) {
fprintf(stderr, "Primary: %i\n", i);
printedPrimary = TRUE;
}
fprintf(stderr, "Sec: %i, Ter: %i = %i\n", j, k, sizeBreakDown[i][j][k]);
}
}
}
}
for(i = 0; i<(uint32_t)0xffff; i++) {
if(terValue[i] != 0) {
fprintf(stderr, "Tertiaries with value %04X : %i\n", i, terValue[i]);
}
if(secValue[i] != 0) {
fprintf(stderr, "Secondaries with value %04X : %i\n", i, secValue[i]);
}
}
*/
/* test */
UCATableHeader *myData = uprv_uca_assembleTable(t, status);
writeOutData(myData, outputDir, copyright, status);
writeOutData(myData, contractionCEs, noOfContractions, outputDir, copyright, status);
InverseTableHeader *inverse = assembleInverseTable(status);
writeOutInverseData(inverse, outputDir, copyright, status);
/*
uint32_t *itab = (uint32_t *)((uint8_t *)inverse + inverse->table);
UChar *conts = (UChar *)((uint8_t *)inverse + inverse->conts);
for(i = 0; i<inverse->tableSize; i++) {
fprintf(stderr, "[%04X] 0x%08X 0x%08X 0x%08X\n", i, *(itab+3*i), *(itab+3*i+1), *(itab+3*i+2));
if((*(itab+3*i+2) & UCOL_INV_SIZEMASK) != 0) {
uint32_t contIndex = *(itab+3*i+2) & UCOL_INV_OFFSETMASK;
uint32_t contSize = (*(itab+3*i+2) & UCOL_INV_SIZEMASK) >> UCOL_INV_SHIFTVALUE;
fprintf(stderr, "\t");
for(j = 0; j<contSize; j++) {
if(*(conts+contIndex+j) < 0xFFFE) {
fprintf(stderr, "%04X ", *(conts+contIndex+j));
} else {
fprintf(stderr, "\n\t");
}
}
fprintf(stderr, "\n");
}
}
*/
uprv_uca_closeTempTable(t);
uprv_free(myD);
uprv_free(opts);
//printOutTable(myData, &status);
//uhash_close(elements);
uprv_free(myData);
uprv_free(inverse);
fclose(data);
/*
uprv_free(secValue);
uprv_free(terValue);
*/
return 0;
}