ICU-7015 Implementing compact collation tailoring syntax by introducing a star-notation.
X-SVN-Rev: 27521
This commit is contained in:
parent
6d69ce9111
commit
b11c49bcd5
@ -760,6 +760,7 @@ ucol_tok_parseNextToken(UColTokenParser *src,
|
||||
uint32_t extensionOffset = 0;
|
||||
uint32_t newStrength = UCOL_TOK_UNSET;
|
||||
UChar buff[10];
|
||||
UChar32 codepoint;
|
||||
|
||||
src->parsedToken.charsOffset = 0; src->parsedToken.charsLen = 0;
|
||||
src->parsedToken.prefixOffset = 0; src->parsedToken.prefixLen = 0;
|
||||
@ -823,6 +824,12 @@ ucol_tok_parseNextToken(UColTokenParser *src,
|
||||
goto EndOfLoop;
|
||||
}
|
||||
newStrength = UCOL_IDENTICAL;
|
||||
if(*(src->current+1) == 0x002A) {/*'*'*/
|
||||
src->current++;
|
||||
src->prevStrength = newStrength;
|
||||
}else{
|
||||
src->prevStrength = UCOL_TOK_UNSET;
|
||||
}
|
||||
break;
|
||||
|
||||
case 0x002C/*','*/:
|
||||
@ -838,6 +845,7 @@ ucol_tok_parseNextToken(UColTokenParser *src,
|
||||
goto EndOfLoop;
|
||||
}
|
||||
newStrength = UCOL_TERTIARY;
|
||||
src->prevStrength = UCOL_TOK_UNSET;
|
||||
break;
|
||||
|
||||
case 0x003B/*';'*/:
|
||||
@ -853,6 +861,7 @@ ucol_tok_parseNextToken(UColTokenParser *src,
|
||||
goto EndOfLoop;
|
||||
}
|
||||
newStrength = UCOL_SECONDARY;
|
||||
src->prevStrength = UCOL_TOK_UNSET;
|
||||
break;
|
||||
|
||||
case 0x003C/*'<'*/:
|
||||
@ -880,6 +889,12 @@ ucol_tok_parseNextToken(UColTokenParser *src,
|
||||
} else { /* just one */
|
||||
newStrength = UCOL_PRIMARY;
|
||||
}
|
||||
if(*(src->current+1) == 0x002A) {/*'*'*/
|
||||
src->current++;
|
||||
src->prevStrength = newStrength;
|
||||
}else{
|
||||
src->prevStrength = UCOL_TOK_UNSET;
|
||||
}
|
||||
break;
|
||||
|
||||
case 0x0026/*'&'*/:
|
||||
@ -889,6 +904,7 @@ ucol_tok_parseNextToken(UColTokenParser *src,
|
||||
}
|
||||
|
||||
newStrength = UCOL_TOK_RESET; /* PatternEntry::RESET = 0 */
|
||||
src->prevStrength = UCOL_TOK_UNSET;
|
||||
break;
|
||||
|
||||
case 0x005b/*'['*/:
|
||||
@ -953,11 +969,15 @@ ucol_tok_parseNextToken(UColTokenParser *src,
|
||||
/* found a quote, we're gonna start copying */
|
||||
case 0x0027/*'\''*/:
|
||||
if (newStrength == UCOL_TOK_UNSET) { /* quote is illegal until we have a strength */
|
||||
*status = U_INVALID_FORMAT_ERROR;
|
||||
syntaxError(src->source,(int32_t)(src->current-src->source),(int32_t)(src->end-src->source),parseError);
|
||||
return NULL;
|
||||
// enabling rules to start with a non-token character a < b
|
||||
// newStrength = UCOL_TOK_RESET;
|
||||
if(src->prevStrength == UCOL_TOK_UNSET){
|
||||
*status = U_INVALID_FORMAT_ERROR;
|
||||
syntaxError(src->source,(int32_t)(src->current-src->source),(int32_t)(src->end-src->source),parseError);
|
||||
return NULL;
|
||||
// enabling rules to start with a non-token character a < b
|
||||
// newStrength = UCOL_TOK_RESET;
|
||||
}else{
|
||||
newStrength = src->prevStrength;
|
||||
}
|
||||
}
|
||||
|
||||
inQuote = TRUE;
|
||||
@ -1036,9 +1056,13 @@ ucol_tok_parseNextToken(UColTokenParser *src,
|
||||
break;
|
||||
default:
|
||||
if (newStrength == UCOL_TOK_UNSET) {
|
||||
*status = U_INVALID_FORMAT_ERROR;
|
||||
syntaxError(src->source,(int32_t)(src->current-src->source),(int32_t)(src->end-src->source),parseError);
|
||||
return NULL;
|
||||
if(src->prevStrength == UCOL_TOK_UNSET){
|
||||
*status = U_INVALID_FORMAT_ERROR;
|
||||
syntaxError(src->source,(int32_t)(src->current-src->source),(int32_t)(src->end-src->source),parseError);
|
||||
return NULL;
|
||||
}else{
|
||||
newStrength = src->prevStrength;
|
||||
}
|
||||
}
|
||||
|
||||
if (ucol_tok_isSpecialChar(ch) && (inQuote == FALSE)) {
|
||||
@ -1056,6 +1080,11 @@ ucol_tok_parseNextToken(UColTokenParser *src,
|
||||
src->parsedToken.charsOffset = (uint32_t)(src->current - src->source);
|
||||
}
|
||||
src->parsedToken.charsLen++;
|
||||
if(src->prevStrength != UCOL_TOK_UNSET){
|
||||
U16_NEXT(0, src->current, src->end, codepoint);
|
||||
src->parsedToken.charsLen+= U16_LENGTH(codepoint) - 1;
|
||||
goto EndOfLoop;
|
||||
}
|
||||
} else {
|
||||
if(newExtensionLen == 0) {
|
||||
extensionOffset = (uint32_t)(src->current - src->source);
|
||||
@ -1069,6 +1098,10 @@ ucol_tok_parseNextToken(UColTokenParser *src,
|
||||
}
|
||||
|
||||
if(wasInQuote) {
|
||||
if(src->prevStrength != UCOL_TOK_UNSET && !inQuote){
|
||||
src->current++;
|
||||
goto EndOfLoop;
|
||||
}
|
||||
if(ch != 0x27) {
|
||||
if(inQuote || !uprv_isRuleWhiteSpace(ch)) {
|
||||
ucol_tok_addToExtraCurrent(src, &ch, 1, status);
|
||||
@ -1826,6 +1859,7 @@ void ucol_tok_initTokenList(UColTokenParser *src, const UChar *rules, const uint
|
||||
src->parsedToken.flags = 0;
|
||||
src->parsedToken.strength = UCOL_TOK_UNSET;
|
||||
src->buildCCTabFlag = FALSE;
|
||||
src->prevStrength = UCOL_TOK_UNSET;
|
||||
|
||||
if(U_FAILURE(*status)) {
|
||||
return;
|
||||
@ -1915,4 +1949,3 @@ void ucol_tok_closeTokenList(UColTokenParser *src) {
|
||||
}
|
||||
|
||||
#endif /* #if !UCONFIG_NO_COLLATION */
|
||||
|
||||
|
@ -1,7 +1,7 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2001-2008, International Business Machines
|
||||
* Copyright (C) 2001-2010, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
@ -123,6 +123,7 @@ typedef struct {
|
||||
USet *copySet;
|
||||
USet *removeSet;
|
||||
UBool buildCCTabFlag; /* Tailoring rule requirs building combining class table. */
|
||||
uint32_t prevStrength;
|
||||
} UColTokenParser;
|
||||
|
||||
typedef struct {
|
||||
|
@ -5433,6 +5433,57 @@ static void TestHiragana(void) {
|
||||
ucol_close(ucol);
|
||||
}
|
||||
|
||||
const static UChar testSameStrengthSourceCases[][MAX_TOKEN_LEN] = {
|
||||
{0x0061},
|
||||
{0x0061},
|
||||
{0x006c, 0x0061},
|
||||
{0x0061, 0x0061, 0x0061},
|
||||
{0x0062}
|
||||
};
|
||||
|
||||
const static UChar testSameStrengthTargetCases[][MAX_TOKEN_LEN] = {
|
||||
{0x0031},
|
||||
{0x006d},
|
||||
{0x006b, 0x0062},
|
||||
{0x0031, 0x0032, 0x0033},
|
||||
{0x007a}
|
||||
};
|
||||
|
||||
const static UCollationResult sameStrengthResults[] = {
|
||||
UCOL_EQUAL,
|
||||
UCOL_LESS,
|
||||
UCOL_LESS,
|
||||
UCOL_EQUAL,
|
||||
UCOL_LESS
|
||||
};
|
||||
|
||||
static void TestSameStrengthList(void)
|
||||
{
|
||||
|
||||
int32_t i;
|
||||
UParseError error;
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UCollator *myCollation;
|
||||
char srules[500] = "&a<*bcd &b<<*klm &k<<<*xyz &a=*123";
|
||||
UChar rules[500];
|
||||
uint32_t length = 0;
|
||||
|
||||
u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status);
|
||||
myCollation = ucol_openRules(rules, length, UCOL_ON, UCOL_TERTIARY, &error, &status);
|
||||
if(U_FAILURE(status)){
|
||||
log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
|
||||
return;
|
||||
}
|
||||
log_verbose("Testing the <<* syntax\n");
|
||||
/*ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
|
||||
ucol_setStrength(myCollation, UCOL_TERTIARY);*/
|
||||
for (i = 0; i < 5 ; i++)
|
||||
{
|
||||
doTest(myCollation, testSameStrengthSourceCases[i], testSameStrengthTargetCases[i], sameStrengthResults[i]);
|
||||
}
|
||||
ucol_close(myCollation);
|
||||
}
|
||||
|
||||
#define TEST(x) addTest(root, &x, "tscoll/cmsccoll/" # x)
|
||||
|
||||
void addMiscCollTest(TestNode** root)
|
||||
@ -5508,6 +5559,7 @@ void addMiscCollTest(TestNode** root)
|
||||
TEST(TestTailor6179);
|
||||
TEST(TestUCAPrecontext);
|
||||
TEST(TestOutOfBuffer5468);
|
||||
TEST(TestSameStrengthList);
|
||||
}
|
||||
|
||||
#endif /* #if !UCONFIG_NO_COLLATION */
|
||||
|
Loading…
Reference in New Issue
Block a user