ICU-2190 RBBI rule builder, fixed incorrect handling of some \escaped chars in rules,
and failure to allow white space before a * or ? X-SVN-Rev: 9824
This commit is contained in:
parent
3144b2665e
commit
423eb52850
@ -75,89 +75,90 @@ static const struct RBBIRuleTableEl gRuleParseStateTable[] = {
|
||||
{doNOP, 0, 0, 0, TRUE}
|
||||
, {doExprStart, 254, 12, 8, FALSE} // 1 start
|
||||
, {doNOP, 130, 1,0, TRUE} // 2
|
||||
, {doExprStart, 36 /* $ */, 70, 80, FALSE} // 3
|
||||
, {doExprStart, 36 /* $ */, 71, 81, FALSE} // 3
|
||||
, {doReverseDir, 33 /* ! */, 11,0, TRUE} // 4
|
||||
, {doNOP, 59 /* ; */, 1,0, TRUE} // 5
|
||||
, {doNOP, 252, 0,0, FALSE} // 6
|
||||
, {doExprStart, 255, 12, 8, FALSE} // 7
|
||||
, {doEndOfRule, 59 /* ; */, 1,0, TRUE} // 8 break-rule-end
|
||||
, {doNOP, 130, 8,0, TRUE} // 9
|
||||
, {doRuleError, 255, 85,0, FALSE} // 10
|
||||
, {doRuleError, 255, 86,0, FALSE} // 10
|
||||
, {doExprStart, 255, 12, 8, FALSE} // 11 reverse-rule
|
||||
, {doRuleChar, 254, 21,0, TRUE} // 12 term
|
||||
, {doNOP, 130, 12,0, TRUE} // 13
|
||||
, {doRuleChar, 129, 21,0, TRUE} // 14
|
||||
, {doNOP, 91 /* [ */, 76, 21, FALSE} // 15
|
||||
, {doNOP, 91 /* [ */, 77, 21, FALSE} // 15
|
||||
, {doLParen, 40 /* ( */, 12, 21, TRUE} // 16
|
||||
, {doNOP, 36 /* $ */, 70, 20, FALSE} // 17
|
||||
, {doNOP, 36 /* $ */, 71, 20, FALSE} // 17
|
||||
, {doDotAny, 46 /* . */, 21,0, TRUE} // 18
|
||||
, {doRuleError, 255, 85,0, FALSE} // 19
|
||||
, {doRuleError, 255, 86,0, FALSE} // 19
|
||||
, {doCheckVarDef, 255, 21,0, FALSE} // 20 term-var-ref
|
||||
, {doUnaryOpStar, 42 /* * */, 25,0, TRUE} // 21 expr-mod
|
||||
, {doUnaryOpPlus, 43 /* + */, 25,0, TRUE} // 22
|
||||
, {doUnaryOpQuestion, 63 /* ? */, 25,0, TRUE} // 23
|
||||
, {doNOP, 255, 25,0, FALSE} // 24
|
||||
, {doExprCatOperator, 254, 12,0, FALSE} // 25 expr-cont
|
||||
, {doNOP, 130, 25,0, TRUE} // 26
|
||||
, {doExprCatOperator, 129, 12,0, FALSE} // 27
|
||||
, {doExprCatOperator, 91 /* [ */, 12,0, FALSE} // 28
|
||||
, {doExprCatOperator, 40 /* ( */, 12,0, FALSE} // 29
|
||||
, {doExprCatOperator, 36 /* $ */, 12,0, FALSE} // 30
|
||||
, {doExprCatOperator, 46 /* . */, 12,0, FALSE} // 31
|
||||
, {doExprCatOperator, 47 /* / */, 37,0, FALSE} // 32
|
||||
, {doExprCatOperator, 123 /* { */, 49,0, TRUE} // 33
|
||||
, {doExprOrOperator, 124 /* | */, 12,0, TRUE} // 34
|
||||
, {doExprRParen, 41 /* ) */, 255,0, TRUE} // 35
|
||||
, {doExprFinished, 255, 255,0, FALSE} // 36
|
||||
, {doSlash, 47 /* / */, 39,0, TRUE} // 37 look-ahead
|
||||
, {doNOP, 255, 85,0, FALSE} // 38
|
||||
, {doExprCatOperator, 254, 12,0, FALSE} // 39 expr-cont-no-slash
|
||||
, {doNOP, 130, 25,0, TRUE} // 40
|
||||
, {doExprCatOperator, 129, 12,0, FALSE} // 41
|
||||
, {doExprCatOperator, 91 /* [ */, 12,0, FALSE} // 42
|
||||
, {doExprCatOperator, 40 /* ( */, 12,0, FALSE} // 43
|
||||
, {doExprCatOperator, 36 /* $ */, 12,0, FALSE} // 44
|
||||
, {doExprCatOperator, 46 /* . */, 12,0, FALSE} // 45
|
||||
, {doExprOrOperator, 124 /* | */, 12,0, TRUE} // 46
|
||||
, {doExprRParen, 41 /* ) */, 255,0, TRUE} // 47
|
||||
, {doExprFinished, 255, 255,0, FALSE} // 48
|
||||
, {doNOP, 130, 49,0, TRUE} // 49 tag-open
|
||||
, {doStartTagValue, 128, 52,0, FALSE} // 50
|
||||
, {doTagExpectedError, 255, 85,0, FALSE} // 51
|
||||
, {doNOP, 130, 56,0, TRUE} // 52 tag-value
|
||||
, {doNOP, 125 /* } */, 56,0, FALSE} // 53
|
||||
, {doTagDigit, 128, 52,0, TRUE} // 54
|
||||
, {doTagExpectedError, 255, 85,0, FALSE} // 55
|
||||
, {doNOP, 130, 56,0, TRUE} // 56 tag-close
|
||||
, {doTagValue, 125 /* } */, 59,0, TRUE} // 57
|
||||
, {doTagExpectedError, 255, 85,0, FALSE} // 58
|
||||
, {doExprCatOperator, 254, 12,0, FALSE} // 59 expr-cont-no-tag
|
||||
, {doNOP, 130, 59,0, TRUE} // 60
|
||||
, {doExprCatOperator, 129, 12,0, FALSE} // 61
|
||||
, {doExprCatOperator, 91 /* [ */, 12,0, FALSE} // 62
|
||||
, {doExprCatOperator, 40 /* ( */, 12,0, FALSE} // 63
|
||||
, {doExprCatOperator, 36 /* $ */, 12,0, FALSE} // 64
|
||||
, {doExprCatOperator, 46 /* . */, 12,0, FALSE} // 65
|
||||
, {doExprCatOperator, 47 /* / */, 37,0, FALSE} // 66
|
||||
, {doExprOrOperator, 124 /* | */, 12,0, TRUE} // 67
|
||||
, {doExprRParen, 41 /* ) */, 255,0, TRUE} // 68
|
||||
, {doExprFinished, 255, 255,0, FALSE} // 69
|
||||
, {doStartVariableName, 36 /* $ */, 72,0, TRUE} // 70 scan-var-name
|
||||
, {doNOP, 255, 85,0, FALSE} // 71
|
||||
, {doNOP, 132, 74,0, TRUE} // 72 scan-var-start
|
||||
, {doVariableNameExpectedErr, 255, 85,0, FALSE} // 73
|
||||
, {doNOP, 131, 74,0, TRUE} // 74 scan-var-body
|
||||
, {doEndVariableName, 255, 255,0, FALSE} // 75
|
||||
, {doScanUnicodeSet, 91 /* [ */, 255,0, TRUE} // 76 scan-unicode-set
|
||||
, {doScanUnicodeSet, 112 /* p */, 255,0, TRUE} // 77
|
||||
, {doScanUnicodeSet, 80 /* P */, 255,0, TRUE} // 78
|
||||
, {doNOP, 255, 85,0, FALSE} // 79
|
||||
, {doNOP, 130, 80,0, TRUE} // 80 assign-or-rule
|
||||
, {doStartAssign, 61 /* = */, 12, 83, TRUE} // 81
|
||||
, {doNOP, 255, 20, 8, FALSE} // 82
|
||||
, {doEndAssign, 59 /* ; */, 1,0, TRUE} // 83 assign-end
|
||||
, {doRuleErrorAssignExpr, 255, 85,0, FALSE} // 84
|
||||
, {doExit, 255, 85,0, TRUE} // 85 errorDeath
|
||||
, {doNOP, 130, 21,0, TRUE} // 21 expr-mod
|
||||
, {doUnaryOpStar, 42 /* * */, 26,0, TRUE} // 22
|
||||
, {doUnaryOpPlus, 43 /* + */, 26,0, TRUE} // 23
|
||||
, {doUnaryOpQuestion, 63 /* ? */, 26,0, TRUE} // 24
|
||||
, {doNOP, 255, 26,0, FALSE} // 25
|
||||
, {doExprCatOperator, 254, 12,0, FALSE} // 26 expr-cont
|
||||
, {doNOP, 130, 26,0, TRUE} // 27
|
||||
, {doExprCatOperator, 129, 12,0, FALSE} // 28
|
||||
, {doExprCatOperator, 91 /* [ */, 12,0, FALSE} // 29
|
||||
, {doExprCatOperator, 40 /* ( */, 12,0, FALSE} // 30
|
||||
, {doExprCatOperator, 36 /* $ */, 12,0, FALSE} // 31
|
||||
, {doExprCatOperator, 46 /* . */, 12,0, FALSE} // 32
|
||||
, {doExprCatOperator, 47 /* / */, 38,0, FALSE} // 33
|
||||
, {doExprCatOperator, 123 /* { */, 50,0, TRUE} // 34
|
||||
, {doExprOrOperator, 124 /* | */, 12,0, TRUE} // 35
|
||||
, {doExprRParen, 41 /* ) */, 255,0, TRUE} // 36
|
||||
, {doExprFinished, 255, 255,0, FALSE} // 37
|
||||
, {doSlash, 47 /* / */, 40,0, TRUE} // 38 look-ahead
|
||||
, {doNOP, 255, 86,0, FALSE} // 39
|
||||
, {doExprCatOperator, 254, 12,0, FALSE} // 40 expr-cont-no-slash
|
||||
, {doNOP, 130, 26,0, TRUE} // 41
|
||||
, {doExprCatOperator, 129, 12,0, FALSE} // 42
|
||||
, {doExprCatOperator, 91 /* [ */, 12,0, FALSE} // 43
|
||||
, {doExprCatOperator, 40 /* ( */, 12,0, FALSE} // 44
|
||||
, {doExprCatOperator, 36 /* $ */, 12,0, FALSE} // 45
|
||||
, {doExprCatOperator, 46 /* . */, 12,0, FALSE} // 46
|
||||
, {doExprOrOperator, 124 /* | */, 12,0, TRUE} // 47
|
||||
, {doExprRParen, 41 /* ) */, 255,0, TRUE} // 48
|
||||
, {doExprFinished, 255, 255,0, FALSE} // 49
|
||||
, {doNOP, 130, 50,0, TRUE} // 50 tag-open
|
||||
, {doStartTagValue, 128, 53,0, FALSE} // 51
|
||||
, {doTagExpectedError, 255, 86,0, FALSE} // 52
|
||||
, {doNOP, 130, 57,0, TRUE} // 53 tag-value
|
||||
, {doNOP, 125 /* } */, 57,0, FALSE} // 54
|
||||
, {doTagDigit, 128, 53,0, TRUE} // 55
|
||||
, {doTagExpectedError, 255, 86,0, FALSE} // 56
|
||||
, {doNOP, 130, 57,0, TRUE} // 57 tag-close
|
||||
, {doTagValue, 125 /* } */, 60,0, TRUE} // 58
|
||||
, {doTagExpectedError, 255, 86,0, FALSE} // 59
|
||||
, {doExprCatOperator, 254, 12,0, FALSE} // 60 expr-cont-no-tag
|
||||
, {doNOP, 130, 60,0, TRUE} // 61
|
||||
, {doExprCatOperator, 129, 12,0, FALSE} // 62
|
||||
, {doExprCatOperator, 91 /* [ */, 12,0, FALSE} // 63
|
||||
, {doExprCatOperator, 40 /* ( */, 12,0, FALSE} // 64
|
||||
, {doExprCatOperator, 36 /* $ */, 12,0, FALSE} // 65
|
||||
, {doExprCatOperator, 46 /* . */, 12,0, FALSE} // 66
|
||||
, {doExprCatOperator, 47 /* / */, 38,0, FALSE} // 67
|
||||
, {doExprOrOperator, 124 /* | */, 12,0, TRUE} // 68
|
||||
, {doExprRParen, 41 /* ) */, 255,0, TRUE} // 69
|
||||
, {doExprFinished, 255, 255,0, FALSE} // 70
|
||||
, {doStartVariableName, 36 /* $ */, 73,0, TRUE} // 71 scan-var-name
|
||||
, {doNOP, 255, 86,0, FALSE} // 72
|
||||
, {doNOP, 132, 75,0, TRUE} // 73 scan-var-start
|
||||
, {doVariableNameExpectedErr, 255, 86,0, FALSE} // 74
|
||||
, {doNOP, 131, 75,0, TRUE} // 75 scan-var-body
|
||||
, {doEndVariableName, 255, 255,0, FALSE} // 76
|
||||
, {doScanUnicodeSet, 91 /* [ */, 255,0, TRUE} // 77 scan-unicode-set
|
||||
, {doScanUnicodeSet, 112 /* p */, 255,0, TRUE} // 78
|
||||
, {doScanUnicodeSet, 80 /* P */, 255,0, TRUE} // 79
|
||||
, {doNOP, 255, 86,0, FALSE} // 80
|
||||
, {doNOP, 130, 81,0, TRUE} // 81 assign-or-rule
|
||||
, {doStartAssign, 61 /* = */, 12, 84, TRUE} // 82
|
||||
, {doNOP, 255, 20, 8, FALSE} // 83
|
||||
, {doEndAssign, 59 /* ; */, 1,0, TRUE} // 84 assign-end
|
||||
, {doRuleErrorAssignExpr, 255, 86,0, FALSE} // 85
|
||||
, {doExit, 255, 86,0, TRUE} // 86 errorDeath
|
||||
};
|
||||
static const char *RBBIRuleStateNames[] = { 0,
|
||||
"start",
|
||||
@ -183,6 +184,7 @@ static const char *RBBIRuleStateNames[] = { 0,
|
||||
"expr-mod",
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
"expr-cont",
|
||||
0,
|
||||
|
@ -110,6 +110,7 @@ term-var-ref:
|
||||
# trailing '*', '?', '+'
|
||||
#
|
||||
expr-mod:
|
||||
white_space n expr-mod
|
||||
'*' n expr-cont doUnaryOpStar
|
||||
'+' n expr-cont doUnaryOpPlus
|
||||
'?' n expr-cont doUnaryOpQuestion
|
||||
|
@ -868,8 +868,9 @@ void RBBIRuleScanner::parse() {
|
||||
|
||||
for (;;) {
|
||||
if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "scan")) { RBBIDebugPrintf(".");}
|
||||
if (tableEl->fCharClass < 127 && tableEl->fCharClass == fC.fChar) {
|
||||
if (tableEl->fCharClass < 127 && fC.fEscaped == FALSE && tableEl->fCharClass == fC.fChar) {
|
||||
// Table row specified an individual character, not a set, and
|
||||
// the input character is not escaped, and
|
||||
// the input character matched it.
|
||||
break;
|
||||
}
|
||||
@ -891,7 +892,9 @@ void RBBIRuleScanner::parse() {
|
||||
break;
|
||||
}
|
||||
|
||||
if (tableEl->fCharClass >= 128 && tableEl->fCharClass < 240 && fC.fChar != (UChar32)-1) {
|
||||
if (tableEl->fCharClass >= 128 && tableEl->fCharClass < 240 && // Table specs a char class &&
|
||||
fC.fEscaped == FALSE && // char is not escaped &&
|
||||
fC.fChar != (UChar32)-1) { // char is not EOF
|
||||
UnicodeSet *uniset = fRuleSets[tableEl->fCharClass-128];
|
||||
if (uniset->contains(fC.fChar)) {
|
||||
// Table row specified a character class, or set of characters,
|
||||
|
Loading…
Reference in New Issue
Block a user