ICU-96 [before [1|2|3]] option implemented

X-SVN-Rev: 4581
This commit is contained in:
Vladimir Weinstein 2001-05-02 05:05:06 +00:00
parent c434168ade
commit 947b617e64
5 changed files with 147 additions and 54 deletions

View File

@ -542,6 +542,10 @@ typedef struct {
uint8_t reserved[64]; /* for future use */
} UCATableHeader;
#define UCOL_INV_SIZEMASK 0xFFF00000
#define UCOL_INV_OFFSETMASK 0x000FFFFF
#define UCOL_INV_SHIFTVALUE 20
typedef struct {
uint32_t byteSize;
uint32_t tableSize;

View File

@ -166,7 +166,7 @@ void ucol_uprv_tok_setOptionInImage(UColOptionSet *opts, UColAttribute attrib, U
}
}
#define UTOK_OPTION_COUNT 12
#define UTOK_OPTION_COUNT 13
static UBool didInit = FALSE;
/* we can be strict, or we can be lenient */
@ -179,7 +179,9 @@ U_STRING_DECL(suboption_02, "lower", 5);
U_STRING_DECL(suboption_03, "upper", 5);
U_STRING_DECL(suboption_04, "off", 3);
U_STRING_DECL(suboption_05, "on", 2);
U_STRING_DECL(suboption_06, "2", 1);
U_STRING_DECL(suboption_06, "1", 1);
U_STRING_DECL(suboption_07, "2", 1);
U_STRING_DECL(suboption_08, "3", 1);
@ -195,6 +197,8 @@ U_STRING_DECL(option_08, "caseFirst", 9);
U_STRING_DECL(option_09, "scriptOrder", 11);
U_STRING_DECL(option_10, "charsetname", 11);
U_STRING_DECL(option_11, "charset", 7);
U_STRING_DECL(option_12, "before", 6);
ucolTokSuboption alternateSub[2] = {
{suboption_00, 13, UCOL_NON_IGNORABLE},
@ -213,9 +217,14 @@ ucolTokSuboption onOffSub[2] = {
};
ucolTokSuboption frenchSub[1] = {
{suboption_06, 1, UCOL_ON}
{suboption_07, 1, UCOL_ON}
};
ucolTokSuboption beforeSub[3] = {
{suboption_06, 1, UCOL_PRIMARY},
{suboption_07, 1, UCOL_SECONDARY},
{suboption_08, 1, UCOL_TERTIARY}
};
ucolTokOption rulesOptions[UTOK_OPTION_COUNT] = {
@ -227,6 +236,7 @@ ucolTokOption rulesOptions[UTOK_OPTION_COUNT] = {
{option_04, 12, NULL, 0, UCOL_ATTRIBUTE_COUNT}, /*"variable top" */
{option_01, 9, NULL, 0, UCOL_ATTRIBUTE_COUNT}, /*"rearrange" */
{option_05, 3, NULL, 0, UCOL_ATTRIBUTE_COUNT}, /*"top" */
{option_12, 6, beforeSub, 3, UCOL_ATTRIBUTE_COUNT}, /*"before" */
{option_00, 9, NULL, 0, UCOL_ATTRIBUTE_COUNT}, /*"undefined" */
{option_09, 11, NULL, 0, UCOL_ATTRIBUTE_COUNT}, /*"scriptOrder" */
{option_10, 11, NULL, 0, UCOL_ATTRIBUTE_COUNT}, /*"charsetname" */
@ -253,7 +263,7 @@ u_strncmpNoCase(const UChar *s1,
}
}
UBool ucol_uprv_tok_readAndSetOption(UColOptionSet *opts, const UChar* start, const UChar *end, UBool *variableTop, UBool *top, UErrorCode *status) {
uint8_t ucol_uprv_tok_readAndSetOption(UColOptionSet *opts, const UChar* start, const UChar *end, UErrorCode *status) {
uint32_t i = 0;
int32_t j=0;
UBool foundOption = FALSE;
@ -267,7 +277,9 @@ UBool ucol_uprv_tok_readAndSetOption(UColOptionSet *opts, const UChar* start, co
U_STRING_INIT(suboption_04, "off", 3);
U_STRING_INIT(suboption_05, "on", 2);
U_STRING_INIT(suboption_06, "2", 1);
U_STRING_INIT(suboption_06, "1", 1);
U_STRING_INIT(suboption_07, "2", 1);
U_STRING_INIT(suboption_08, "3", 1);
U_STRING_INIT(option_00, "undefined", 9);
@ -282,6 +294,7 @@ UBool ucol_uprv_tok_readAndSetOption(UColOptionSet *opts, const UChar* start, co
U_STRING_INIT(option_09, "scriptOrder", 11);
U_STRING_INIT(option_10, "charsetname", 11);
U_STRING_INIT(option_11, "charset", 7);
U_STRING_INIT(option_12, "before", 6);
}
start++; /*skip opening '['*/
while(i < UTOK_OPTION_COUNT) {
@ -308,23 +321,31 @@ UBool ucol_uprv_tok_readAndSetOption(UColOptionSet *opts, const UChar* start, co
for(j = 0; j<rulesOptions[i].subSize; j++) {
if(u_strncmpNoCase(optionArg, rulesOptions[i].subopts[j].subName, rulesOptions[i].subopts[j].subLen) == 0) {
ucol_uprv_tok_setOptionInImage(opts, rulesOptions[i].attr, rulesOptions[i].subopts[j].attrVal);
return TRUE;
return UCOL_TOK_SUCCESS;
}
}
}
*status = U_ILLEGAL_ARGUMENT_ERROR;
return FALSE;
} else if(i == 5) { /* variable top */
*variableTop = TRUE;
return TRUE;
return UCOL_TOK_SUCCESS | UCOL_TOK_VARIABLE_TOP;
} else if(i == 6) { /*rearange */
return TRUE;
return UCOL_TOK_SUCCESS;
} else if(i == 7) { /*top */
*top = TRUE;
return TRUE;
return UCOL_TOK_SUCCESS | UCOL_TOK_TOP;
} else if(i == 8) { /*before*/
if(optionArg) {
for(j = 0; j<rulesOptions[i].subSize; j++) {
if(u_strncmpNoCase(optionArg, rulesOptions[i].subopts[j].subName, rulesOptions[i].subopts[j].subLen) == 0) {
return UCOL_TOK_SUCCESS | rulesOptions[i].subopts[j].attrVal + 1;
}
}
}
*status = U_ILLEGAL_ARGUMENT_ERROR;
return 0;
} else {
*status = U_UNSUPPORTED_ERROR;
return FALSE;
return 0;
}
}
@ -332,7 +353,7 @@ const UChar *ucol_tok_parseNextToken(UColTokenParser *src,
uint32_t *strength,
uint32_t *chOffset, uint32_t *chLen,
uint32_t *exOffset, uint32_t *exLen,
UBool *varT, UBool *top_,
uint8_t *specs,
UBool startOfRules,
UErrorCode *status) {
/* parsing part */
@ -343,6 +364,7 @@ const UChar *ucol_tok_parseNextToken(UColTokenParser *src,
UBool inQuote = FALSE;
UBool wasInQuote = FALSE;
UChar *optionEnd = NULL;
uint8_t before = 0;
uint32_t newCharsLen = 0, newExtensionLen = 0;
uint32_t charsOffset = 0, extensionOffset = 0;
@ -449,28 +471,36 @@ const UChar *ucol_tok_parseNextToken(UColTokenParser *src,
case 0x005b/*'['*/:
/* options - read an option, analyze it */
if((optionEnd = u_strchr(src->current, 0x005d /*']'*/)) != NULL) {
ucol_uprv_tok_readAndSetOption(src->opts, src->current, optionEnd, &variableTop, &top, status);
uint8_t result = ucol_uprv_tok_readAndSetOption(src->opts, src->current, optionEnd, status);
src->current = optionEnd;
if(top == TRUE) {
if(newStrength == UCOL_TOK_RESET) {
src->current++;
goto EndOfLoop;
} else {
*status = U_INVALID_FORMAT_ERROR;
}
} else if(variableTop == TRUE) {
if(newStrength != UCOL_TOK_RESET && newStrength != UCOL_TOK_UNSET) {
charsOffset = src->extraCurrent - src->source;
newCharsLen = 1;
*src->extraCurrent++ = 0xFFFF;
src->current++;
goto EndOfLoop;
} else {
*status = U_INVALID_FORMAT_ERROR;
}
}
if(U_FAILURE(*status)) {
if(U_SUCCESS(*status)) {
if(result & UCOL_TOK_TOP) {
if(newStrength == UCOL_TOK_RESET) {
top = TRUE;
src->current++;
goto EndOfLoop;
} else {
*status = U_INVALID_FORMAT_ERROR;
}
} else if(result & UCOL_TOK_VARIABLE_TOP) {
if(newStrength != UCOL_TOK_RESET && newStrength != UCOL_TOK_UNSET) {
variableTop = TRUE;
charsOffset = src->extraCurrent - src->source;
newCharsLen = 1;
*src->extraCurrent++ = 0xFFFF;
src->current++;
goto EndOfLoop;
} else {
*status = U_INVALID_FORMAT_ERROR;
}
} else if (result & UCOL_TOK_BEFORE){
if(newStrength == UCOL_TOK_RESET) {
before = result & UCOL_TOK_BEFORE;
} else {
*status = U_INVALID_FORMAT_ERROR;
}
}
} else {
return NULL;
}
}
@ -585,8 +615,7 @@ const UChar *ucol_tok_parseNextToken(UColTokenParser *src,
*chLen = newCharsLen;
*exOffset = extensionOffset;
*exLen = newExtensionLen;
*varT = variableTop;
*top_ = top;
*specs = (UCOL_TOK_VARIABLE_TOP * (variableTop?1:0)) | (UCOL_TOK_TOP * (top?1:0)) | before;
return src->current;
}
@ -606,6 +635,7 @@ uint32_t ucol_uprv_tok_assembleTokenList(UColTokenParser *src, UErrorCode *statu
uint32_t expandNext = 0;
UBool variableTop = FALSE;
UBool top = FALSE;
uint8_t specs = 0;
UColTokListHeader *ListList = NULL;
@ -623,10 +653,13 @@ uint32_t ucol_uprv_tok_assembleTokenList(UColTokenParser *src, UErrorCode *statu
&newStrength,
&charsOffset, &newCharsLen,
&extensionOffset, &newExtensionsLen,
&variableTop, &top,
&specs,
(UBool)(lastToken == NULL),
status);
variableTop = ((specs & UCOL_TOK_VARIABLE_TOP) != 0);
top = ((specs & UCOL_TOK_TOP) != 0);
if(U_SUCCESS(*status) && parseEnd != NULL) {
UColToken *sourceToken = NULL;
UColToken key;
@ -755,6 +788,35 @@ uint32_t ucol_uprv_tok_assembleTokenList(UColTokenParser *src, UErrorCode *statu
uint32_t CE = UCOL_NOT_FOUND, SecondCE = UCOL_NOT_FOUND;
collIterate s;
if((specs & UCOL_TOK_BEFORE) != 0) {
uint32_t baseCE = UCOL_NOT_FOUND, baseContCE = UCOL_NOT_FOUND;
uint32_t invPos;
uint8_t strength = (specs & UCOL_TOK_BEFORE) - 1;
init_collIterate(src->UCA, src->source+charsOffset, 1, &s);
baseCE = ucol_getNextCE(src->UCA, &s, status);
baseContCE = ucol_getNextCE(src->UCA, &s, status);
if(baseContCE == UCOL_NO_MORE_CES) {
baseContCE = 0;
}
invPos = ucol_inv_getPrevCE(baseCE, baseContCE, &CE, &SecondCE, strength);
uint32_t *CETable = (uint32_t *)((uint8_t *)src->invUCA+src->invUCA->table);
uint32_t ch = CETable[3*invPos+2];
if((ch & UCOL_INV_SIZEMASK) != 0) {
uint32_t *conts = (uint32_t *)((uint8_t *)src->invUCA+src->invUCA->conts);
uint32_t offset = (ch & UCOL_INV_OFFSETMASK);
ch = conts[offset];
}
*src->extraCurrent++ = (UChar)ch;
charsOffset = src->extraCurrent - src->source - 1;
newCharsLen = 1;
}
if(lastToken != NULL && lastToken->strength == UCOL_TOK_RESET) {
/* if the previous token was also a reset, */
/*this means that we have two consecutive resets */
@ -824,10 +886,7 @@ uint32_t ucol_uprv_tok_assembleTokenList(UColTokenParser *src, UErrorCode *statu
init_collIterate(src->UCA, src->source+charsOffset, 1, &s); /* or newCharsLen instead of 1??? */
CE = ucol_getNextCE(src->UCA, &s, status);
/*UCOL_GETNEXTCE(CE, src->UCA, s, &status);*/
SecondCE = ucol_getNextCE(src->UCA, &s, status);
/*UCOL_GETNEXTCE(SecondCE, src->UCA, s, &status);*/
ListList[src->resultLen].baseCE = CE;
if(isContinuation(SecondCE)) {

View File

@ -29,6 +29,11 @@
#define UCOL_TOK_POLARITY_NEGATIVE 0
#define UCOL_TOK_POLARITY_POSITIVE 1
#define UCOL_TOK_TOP 0x04
#define UCOL_TOK_VARIABLE_TOP 0x08
#define UCOL_TOK_BEFORE 0x03
#define UCOL_TOK_SUCCESS 0x10
/* this is space for the extra strings that need to be unquoted */
/* during the parsing of the rules */
#define UCOL_TOK_EXTRA_RULE_SPACE_SIZE 1024
@ -127,7 +132,7 @@ U_CAPI const UChar U_EXPORT2 *ucol_tok_parseNextToken(UColTokenParser *src,
uint32_t *strength,
uint32_t *chOffset, uint32_t *chLen,
uint32_t *exOffset, uint32_t *exLen,
UBool *varT, UBool *top_,
uint8_t *specs,
UBool startOfRules,
UErrorCode *status);

View File

@ -708,6 +708,7 @@ static void testCollator(UCollator *coll, UErrorCode *status) {
/* uint32_t rExpsLen = 0; */
uint32_t firstLen = 0;
UBool varT = FALSE; UBool top_ = TRUE;
uint8_t specs = 0;
UBool startOfRules = TRUE;
UBool lastReset = FALSE;
UColTokenParser src;
@ -731,9 +732,10 @@ static void testCollator(UCollator *coll, UErrorCode *status) {
while ((current = ucol_tok_parseNextToken(&src, &strength,
&chOffset, &chLen, &exOffset, &exLen,
&varT, &top_, startOfRules, status)) != NULL) {
&specs, startOfRules, status)) != NULL) {
startOfRules = FALSE;
varT = ((specs & UCOL_TOK_VARIABLE_TOP) != 0);
top_ = ((specs & UCOL_TOK_TOP) != 0);
u_strncpy(second,rulesCopy+chOffset, chLen);
second[chLen] = 0;
@ -1013,6 +1015,7 @@ static void testAgainstUCA(UCollator *coll, UCollator *UCA, LCID lcid, UErrorCod
/* uint32_t rExpsLen = 0; */
uint32_t firstLen = 0, secondLen = 0;
UBool varT = FALSE; UBool top_ = TRUE;
uint8_t specs = 0;
UBool startOfRules = TRUE;
UColTokenParser src;
UColOptionSet opts;
@ -1041,8 +1044,10 @@ static void testAgainstUCA(UCollator *coll, UCollator *UCA, LCID lcid, UErrorCod
while ((current = ucol_tok_parseNextToken(&src, &strength,
&chOffset, &chLen, &exOffset, &exLen,
&varT, &top_, startOfRules, status)) != NULL) {
&specs, startOfRules, status)) != NULL) {
startOfRules = FALSE;
varT = ((specs & UCOL_TOK_VARIABLE_TOP) != 0);
top_ = ((specs & UCOL_TOK_TOP) != 0);
u_strncpy(second,rulesCopy+chOffset, chLen);
second[chLen] = 0;
@ -1097,6 +1102,7 @@ static void testCEs(UCollator *coll, UErrorCode *status) {
/* uint32_t rExpsLen = 0; */
/* uint32_t firstLen = 0; */
uint8_t specs = 0;
UBool varT = FALSE; UBool top_ = TRUE;
UBool startOfRules = TRUE;
UColTokenParser src;
@ -1123,8 +1129,10 @@ static void testCEs(UCollator *coll, UErrorCode *status) {
while ((current = ucol_tok_parseNextToken(&src, &strength,
&chOffset, &chLen, &exOffset, &exLen,
&varT, &top_, startOfRules, status)) != NULL) {
&specs, startOfRules, status)) != NULL) {
startOfRules = FALSE;
varT = ((specs & UCOL_TOK_VARIABLE_TOP) != 0);
top_ = ((specs & UCOL_TOK_TOP) != 0);
init_collIterate(coll, rulesCopy+chOffset, chLen, &c);
@ -1448,10 +1456,9 @@ static void genericLocaleStarter(const char *locale, const char *s[], uint32_t s
static void genericRulesStarter(const char *rules, const char *s[], uint32_t size) {
UErrorCode status = U_ZERO_ERROR;
UChar rlz[2048] = { 0 };
UCollator *coll = NULL;
uint32_t rlen = u_unescape(rules, rlz, 2048);
ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, &status);
UCollator *coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, &status);
if(U_SUCCESS(status)) {
genericOrderingTest(coll, s, size);
@ -1461,7 +1468,7 @@ static void genericRulesStarter(const char *rules, const char *s[], uint32_t siz
}
const static char chTest[][20] = {
/*"c",
"c",
"C",
"ca", "cb", "cx", "cy", "CZ",
"c\\u030C", "C\\u030C",
@ -1469,7 +1476,7 @@ const static char chTest[][20] = {
"H",
"ha", "Ha", "harly", "hb", "HB", "hx", "HX", "hy", "HY",
"ch", "cH", "Ch", "CH",
"cha", "charly", "che", */"chh", "chch"/*, "chr",
"cha", "charly", "che", "chh", "chch", "chr",
"i", "I", "iarly",
"r", "R",
"r\\u030C", "R\\u030C",
@ -1477,7 +1484,7 @@ const static char chTest[][20] = {
"S",
"s\\u030C", "S\\u030C",
"z", "Z",
"z\\u030C", "Z\\u030C"*/
"z\\u030C", "Z\\u030C"
};
static void TestChMove(void) {
@ -1679,6 +1686,26 @@ However, in testing we got the following order:
.. (\u01d8)
< .. (\u01dc) < .. (\u01da) < .. (\u01d6) < .. (\u016b)
*/
static void TestBefore() {
const static char *data[] = {
"\\u0101", "\\u00e1", "\\u01ce", "\\u00e0", "A",
"\\u0113", "\\u00e9", "\\u011b", "\\u00e8", "E",
"\\u012b", "\\u00ed", "\\u01d0", "\\u00ec", "I",
"\\u014d", "\\u00f3", "\\u01d2", "\\u00f2", "O",
"\\u016b", "\\u00fa", "\\u01d4", "\\u00f9", "U",
"\\u01d6", "\\u01d8", "\\u01da", "\\u01dc", "\\u00fc"
};
genericRulesStarter(
"&[before 1]a<\\u0101<\\u00e1<\\u01ce<\\u00e0"
"&[before 1]e<\\u0113<\\u00e9<\\u011b<\\u00e8"
"&[before 1]i<\\u012b<\\u00ed<\\u01d0<\\u00ec"
"&[before 1]o<\\u014d<\\u00f3<\\u01d2<\\u00f2"
"&[before 1]u<\\u016b<\\u00fa<\\u01d4<\\u00f9"
"&u<\\u01d6<\\u01d8<\\u01da<\\u01dc<\\u00fc",
data, sizeof(data)/sizeof(data[0]));
}
static void TestJ784() {
const static char *data[] = {
"A", "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0",
@ -1721,6 +1748,7 @@ static void TestJ815() {
"B"
};
genericLocaleStarter("fr", data, sizeof(data)/sizeof(data[0]));
genericRulesStarter("[backwards 2]&A<<\\u00e6/e<<<\\u00c6/E", data, sizeof(data)/sizeof(data[0]));
}
@ -1741,6 +1769,7 @@ void addMiscCollTest(TestNode** root)
addTest(root, &TestJ784, "tscoll/cmsccoll/TestJ784");
addTest(root, &TestJ815, "tscoll/cmsccoll/TestJ815");
addTest(root, &TestJ831, "tscoll/cmsccoll/TestJ831");
addTest(root, &TestBefore, "tscoll/cmsccoll/TestBefore");
/*addTest(root, &TestUCAZero, "tscoll/cmsccoll/TestUCAZero");*/
/*addTest(root, &TestUnmappedSpaces, "tscoll/cmsccoll/TestUnmappedSpaces");*/
/*addTest(root, &PrintMarkDavis, "tscoll/cmsccoll/PrintMarkDavis");*/

View File

@ -149,10 +149,6 @@ uint32_t stringContSize[0xFFFF];
uint32_t sContPos = 0;
uint32_t contSize = 0;
#define UCOL_INV_SIZEMASK 0xFFF00000
#define UCOL_INV_OFFSETMASK 0x000FFFFF
#define UCOL_INV_SHIFTVALUE 20
void addNewInverse(UCAElements *element, UErrorCode *status) {
if(U_FAILURE(*status)) {
return;