67e296e813
X-SVN-Rev: 23063
505 lines
16 KiB
C
505 lines
16 KiB
C
//---------------------------------------------------------------------------------
|
|
//
|
|
// Generated Header File. Do not edit by hand.
|
|
// This file contains the state table for the ICU Regular Expression Pattern Parser
|
|
// It is generated by the Perl script "regexcst.pl" from
|
|
// the rule parser state definitions file "regexcst.txt".
|
|
//
|
|
// Copyright (C) 2002-2007 International Business Machines Corporation
|
|
// and others. All rights reserved.
|
|
//
|
|
//---------------------------------------------------------------------------------
|
|
#ifndef RBBIRPT_H
|
|
#define RBBIRPT_H
|
|
|
|
U_NAMESPACE_BEGIN
|
|
//
|
|
// Character classes for regex pattern scanning.
|
|
//
|
|
static const uint8_t kRuleSet_digit_char = 128;
|
|
static const uint8_t kRuleSet_rule_char = 129;
|
|
|
|
|
|
enum Regex_PatternParseAction {
|
|
doLiteralChar,
|
|
doSetEnd,
|
|
doBackslashA,
|
|
doSetBeginUnion,
|
|
doNOP,
|
|
doSetBackslash_w,
|
|
doSetRange,
|
|
doBackslashG,
|
|
doPerlInline,
|
|
doSetAddDash,
|
|
doIntevalLowerDigit,
|
|
doProperty,
|
|
doBackslashX,
|
|
doOpenAtomicParen,
|
|
doSetLiteralEscaped,
|
|
doPatFinish,
|
|
doSetBackslash_D,
|
|
doSetDifference2,
|
|
doNamedChar,
|
|
doNGPlus,
|
|
doOpenLookBehindNeg,
|
|
doIntervalError,
|
|
doIntervalSame,
|
|
doBackRef,
|
|
doPlus,
|
|
doOpenCaptureParen,
|
|
doMismatchedParenErr,
|
|
doBeginMatchMode,
|
|
doEscapeError,
|
|
doOpenNonCaptureParen,
|
|
doDollar,
|
|
doSetProp,
|
|
doIntervalUpperDigit,
|
|
doSetBegin,
|
|
doBackslashs,
|
|
doOpenLookBehind,
|
|
doSetMatchMode,
|
|
doOrOperator,
|
|
doCaret,
|
|
doMatchModeParen,
|
|
doStar,
|
|
doOpt,
|
|
doMatchMode,
|
|
doSuppressComments,
|
|
doPossessiveInterval,
|
|
doOpenLookAheadNeg,
|
|
doBackslashW,
|
|
doCloseParen,
|
|
doSetOpError,
|
|
doIntervalInit,
|
|
doSetFinish,
|
|
doSetIntersection2,
|
|
doNGStar,
|
|
doEnterQuoteMode,
|
|
doSetAddAmp,
|
|
doBackslashB,
|
|
doBackslashw,
|
|
doPossessiveOpt,
|
|
doSetNegate,
|
|
doRuleError,
|
|
doBackslashb,
|
|
doConditionalExpr,
|
|
doPossessivePlus,
|
|
doBadOpenParenType,
|
|
doNGInterval,
|
|
doSetLiteral,
|
|
doSetNamedChar,
|
|
doBackslashd,
|
|
doSetBeginDifference1,
|
|
doBackslashD,
|
|
doExit,
|
|
doSetBackslash_S,
|
|
doInterval,
|
|
doSetNoCloseError,
|
|
doNGOpt,
|
|
doSetPosixProp,
|
|
doBackslashS,
|
|
doBackslashZ,
|
|
doSetBeginIntersection1,
|
|
doSetBackslash_W,
|
|
doSetBackslash_d,
|
|
doOpenLookAhead,
|
|
doBadModeFlag,
|
|
doPatStart,
|
|
doSetNamedRange,
|
|
doPossessiveStar,
|
|
doEscapedLiteralChar,
|
|
doSetBackslash_s,
|
|
doBackslashz,
|
|
doDotAny,
|
|
rbbiLastAction};
|
|
|
|
//-------------------------------------------------------------------------------
|
|
//
|
|
// RegexTableEl represents the structure of a row in the transition table
|
|
// for the pattern parser state machine.
|
|
//-------------------------------------------------------------------------------
|
|
struct RegexTableEl {
|
|
Regex_PatternParseAction fAction;
|
|
uint8_t fCharClass; // 0-127: an individual ASCII character
|
|
// 128-255: character class index
|
|
uint8_t fNextState; // 0-250: normal next-state numbers
|
|
// 255: pop next-state from stack.
|
|
uint8_t fPushState;
|
|
UBool fNextChar;
|
|
};
|
|
|
|
static const struct RegexTableEl gRuleParseStateTable[] = {
|
|
{doNOP, 0, 0, 0, TRUE}
|
|
, {doPatStart, 255, 2,0, FALSE} // 1 start
|
|
, {doLiteralChar, 254, 14,0, TRUE} // 2 term
|
|
, {doLiteralChar, 129, 14,0, TRUE} // 3
|
|
, {doSetBegin, 91 /* [ */, 104, 182, TRUE} // 4
|
|
, {doNOP, 40 /* ( */, 27,0, TRUE} // 5
|
|
, {doDotAny, 46 /* . */, 14,0, TRUE} // 6
|
|
, {doCaret, 94 /* ^ */, 14,0, TRUE} // 7
|
|
, {doDollar, 36 /* $ */, 14,0, TRUE} // 8
|
|
, {doNOP, 92 /* \ */, 84,0, TRUE} // 9
|
|
, {doOrOperator, 124 /* | */, 2,0, TRUE} // 10
|
|
, {doCloseParen, 41 /* ) */, 255,0, TRUE} // 11
|
|
, {doPatFinish, 253, 2,0, FALSE} // 12
|
|
, {doRuleError, 255, 183,0, FALSE} // 13
|
|
, {doNOP, 42 /* * */, 63,0, TRUE} // 14 expr-quant
|
|
, {doNOP, 43 /* + */, 66,0, TRUE} // 15
|
|
, {doNOP, 63 /* ? */, 69,0, TRUE} // 16
|
|
, {doIntervalInit, 123 /* { */, 72,0, TRUE} // 17
|
|
, {doNOP, 40 /* ( */, 23,0, TRUE} // 18
|
|
, {doNOP, 255, 20,0, FALSE} // 19
|
|
, {doOrOperator, 124 /* | */, 2,0, TRUE} // 20 expr-cont
|
|
, {doCloseParen, 41 /* ) */, 255,0, TRUE} // 21
|
|
, {doNOP, 255, 2,0, FALSE} // 22
|
|
, {doSuppressComments, 63 /* ? */, 25,0, TRUE} // 23 open-paren-quant
|
|
, {doNOP, 255, 27,0, FALSE} // 24
|
|
, {doNOP, 35 /* # */, 49, 14, TRUE} // 25 open-paren-quant2
|
|
, {doNOP, 255, 29,0, FALSE} // 26
|
|
, {doSuppressComments, 63 /* ? */, 29,0, TRUE} // 27 open-paren
|
|
, {doOpenCaptureParen, 255, 2, 14, FALSE} // 28
|
|
, {doOpenNonCaptureParen, 58 /* : */, 2, 14, TRUE} // 29 open-paren-extended
|
|
, {doOpenAtomicParen, 62 /* > */, 2, 14, TRUE} // 30
|
|
, {doOpenLookAhead, 61 /* = */, 2, 20, TRUE} // 31
|
|
, {doOpenLookAheadNeg, 33 /* ! */, 2, 20, TRUE} // 32
|
|
, {doNOP, 60 /* < */, 46,0, TRUE} // 33
|
|
, {doNOP, 35 /* # */, 49, 2, TRUE} // 34
|
|
, {doBeginMatchMode, 105 /* i */, 52,0, FALSE} // 35
|
|
, {doBeginMatchMode, 100 /* d */, 52,0, FALSE} // 36
|
|
, {doBeginMatchMode, 109 /* m */, 52,0, FALSE} // 37
|
|
, {doBeginMatchMode, 115 /* s */, 52,0, FALSE} // 38
|
|
, {doBeginMatchMode, 117 /* u */, 52,0, FALSE} // 39
|
|
, {doBeginMatchMode, 119 /* w */, 52,0, FALSE} // 40
|
|
, {doBeginMatchMode, 120 /* x */, 52,0, FALSE} // 41
|
|
, {doBeginMatchMode, 45 /* - */, 52,0, FALSE} // 42
|
|
, {doConditionalExpr, 40 /* ( */, 183,0, TRUE} // 43
|
|
, {doPerlInline, 123 /* { */, 183,0, TRUE} // 44
|
|
, {doBadOpenParenType, 255, 183,0, FALSE} // 45
|
|
, {doOpenLookBehind, 61 /* = */, 2, 20, TRUE} // 46 open-paren-lookbehind
|
|
, {doOpenLookBehindNeg, 33 /* ! */, 2, 20, TRUE} // 47
|
|
, {doBadOpenParenType, 255, 183,0, FALSE} // 48
|
|
, {doNOP, 41 /* ) */, 255,0, TRUE} // 49 paren-comment
|
|
, {doMismatchedParenErr, 253, 183,0, FALSE} // 50
|
|
, {doNOP, 255, 49,0, TRUE} // 51
|
|
, {doMatchMode, 105 /* i */, 52,0, TRUE} // 52 paren-flag
|
|
, {doMatchMode, 100 /* d */, 52,0, TRUE} // 53
|
|
, {doMatchMode, 109 /* m */, 52,0, TRUE} // 54
|
|
, {doMatchMode, 115 /* s */, 52,0, TRUE} // 55
|
|
, {doMatchMode, 117 /* u */, 52,0, TRUE} // 56
|
|
, {doMatchMode, 119 /* w */, 52,0, TRUE} // 57
|
|
, {doMatchMode, 120 /* x */, 52,0, TRUE} // 58
|
|
, {doMatchMode, 45 /* - */, 52,0, TRUE} // 59
|
|
, {doSetMatchMode, 41 /* ) */, 2,0, TRUE} // 60
|
|
, {doMatchModeParen, 58 /* : */, 2, 14, TRUE} // 61
|
|
, {doBadModeFlag, 255, 183,0, FALSE} // 62
|
|
, {doNGStar, 63 /* ? */, 20,0, TRUE} // 63 quant-star
|
|
, {doPossessiveStar, 43 /* + */, 20,0, TRUE} // 64
|
|
, {doStar, 255, 20,0, FALSE} // 65
|
|
, {doNGPlus, 63 /* ? */, 20,0, TRUE} // 66 quant-plus
|
|
, {doPossessivePlus, 43 /* + */, 20,0, TRUE} // 67
|
|
, {doPlus, 255, 20,0, FALSE} // 68
|
|
, {doNGOpt, 63 /* ? */, 20,0, TRUE} // 69 quant-opt
|
|
, {doPossessiveOpt, 43 /* + */, 20,0, TRUE} // 70
|
|
, {doOpt, 255, 20,0, FALSE} // 71
|
|
, {doNOP, 128, 74,0, FALSE} // 72 interval-open
|
|
, {doIntervalError, 255, 183,0, FALSE} // 73
|
|
, {doIntevalLowerDigit, 128, 74,0, TRUE} // 74 interval-lower
|
|
, {doNOP, 44 /* , */, 78,0, TRUE} // 75
|
|
, {doIntervalSame, 125 /* } */, 81,0, TRUE} // 76
|
|
, {doIntervalError, 255, 183,0, FALSE} // 77
|
|
, {doIntervalUpperDigit, 128, 78,0, TRUE} // 78 interval-upper
|
|
, {doNOP, 125 /* } */, 81,0, TRUE} // 79
|
|
, {doIntervalError, 255, 183,0, FALSE} // 80
|
|
, {doNGInterval, 63 /* ? */, 20,0, TRUE} // 81 interval-type
|
|
, {doPossessiveInterval, 43 /* + */, 20,0, TRUE} // 82
|
|
, {doInterval, 255, 20,0, FALSE} // 83
|
|
, {doBackslashA, 65 /* A */, 2,0, TRUE} // 84 backslash
|
|
, {doBackslashB, 66 /* B */, 2,0, TRUE} // 85
|
|
, {doBackslashb, 98 /* b */, 2,0, TRUE} // 86
|
|
, {doBackslashd, 100 /* d */, 14,0, TRUE} // 87
|
|
, {doBackslashD, 68 /* D */, 14,0, TRUE} // 88
|
|
, {doBackslashG, 71 /* G */, 2,0, TRUE} // 89
|
|
, {doNamedChar, 78 /* N */, 14,0, FALSE} // 90
|
|
, {doProperty, 112 /* p */, 14,0, FALSE} // 91
|
|
, {doProperty, 80 /* P */, 14,0, FALSE} // 92
|
|
, {doEnterQuoteMode, 81 /* Q */, 2,0, TRUE} // 93
|
|
, {doBackslashS, 83 /* S */, 14,0, TRUE} // 94
|
|
, {doBackslashs, 115 /* s */, 14,0, TRUE} // 95
|
|
, {doBackslashW, 87 /* W */, 14,0, TRUE} // 96
|
|
, {doBackslashw, 119 /* w */, 14,0, TRUE} // 97
|
|
, {doBackslashX, 88 /* X */, 14,0, TRUE} // 98
|
|
, {doBackslashZ, 90 /* Z */, 2,0, TRUE} // 99
|
|
, {doBackslashz, 122 /* z */, 2,0, TRUE} // 100
|
|
, {doBackRef, 128, 14,0, TRUE} // 101
|
|
, {doEscapeError, 253, 183,0, FALSE} // 102
|
|
, {doEscapedLiteralChar, 255, 14,0, TRUE} // 103
|
|
, {doSetNegate, 94 /* ^ */, 107,0, TRUE} // 104 set-open
|
|
, {doSetPosixProp, 58 /* : */, 109,0, FALSE} // 105
|
|
, {doNOP, 255, 107,0, FALSE} // 106
|
|
, {doSetLiteral, 93 /* ] */, 122,0, TRUE} // 107 set-open2
|
|
, {doNOP, 255, 112,0, FALSE} // 108
|
|
, {doSetEnd, 93 /* ] */, 255,0, TRUE} // 109 set-posix
|
|
, {doNOP, 58 /* : */, 112,0, FALSE} // 110
|
|
, {doRuleError, 255, 183,0, FALSE} // 111
|
|
, {doSetEnd, 93 /* ] */, 255,0, TRUE} // 112 set-start
|
|
, {doSetBeginUnion, 91 /* [ */, 104, 129, TRUE} // 113
|
|
, {doNOP, 92 /* \ */, 172,0, TRUE} // 114
|
|
, {doNOP, 45 /* - */, 118,0, TRUE} // 115
|
|
, {doNOP, 38 /* & */, 120,0, TRUE} // 116
|
|
, {doSetLiteral, 255, 122,0, TRUE} // 117
|
|
, {doRuleError, 45 /* - */, 183,0, FALSE} // 118 set-start-dash
|
|
, {doSetAddDash, 255, 122,0, FALSE} // 119
|
|
, {doRuleError, 38 /* & */, 183,0, FALSE} // 120 set-start-amp
|
|
, {doSetAddAmp, 255, 122,0, FALSE} // 121
|
|
, {doSetEnd, 93 /* ] */, 255,0, TRUE} // 122 set-after-lit
|
|
, {doSetBeginUnion, 91 /* [ */, 104, 129, TRUE} // 123
|
|
, {doNOP, 45 /* - */, 159,0, TRUE} // 124
|
|
, {doNOP, 38 /* & */, 150,0, TRUE} // 125
|
|
, {doNOP, 92 /* \ */, 172,0, TRUE} // 126
|
|
, {doSetNoCloseError, 253, 183,0, FALSE} // 127
|
|
, {doSetLiteral, 255, 122,0, TRUE} // 128
|
|
, {doSetEnd, 93 /* ] */, 255,0, TRUE} // 129 set-after-set
|
|
, {doSetBeginUnion, 91 /* [ */, 104, 129, TRUE} // 130
|
|
, {doNOP, 45 /* - */, 152,0, TRUE} // 131
|
|
, {doNOP, 38 /* & */, 147,0, TRUE} // 132
|
|
, {doNOP, 92 /* \ */, 172,0, TRUE} // 133
|
|
, {doSetNoCloseError, 253, 183,0, FALSE} // 134
|
|
, {doSetLiteral, 255, 122,0, TRUE} // 135
|
|
, {doSetEnd, 93 /* ] */, 255,0, TRUE} // 136 set-after-range
|
|
, {doSetBeginUnion, 91 /* [ */, 104, 129, TRUE} // 137
|
|
, {doNOP, 45 /* - */, 155,0, TRUE} // 138
|
|
, {doNOP, 38 /* & */, 157,0, TRUE} // 139
|
|
, {doNOP, 92 /* \ */, 172,0, TRUE} // 140
|
|
, {doSetNoCloseError, 253, 183,0, FALSE} // 141
|
|
, {doSetLiteral, 255, 122,0, TRUE} // 142
|
|
, {doSetBeginUnion, 91 /* [ */, 104, 129, TRUE} // 143 set-after-op
|
|
, {doSetOpError, 93 /* ] */, 183,0, FALSE} // 144
|
|
, {doNOP, 92 /* \ */, 172,0, TRUE} // 145
|
|
, {doSetLiteral, 255, 122,0, TRUE} // 146
|
|
, {doSetBeginIntersection1, 91 /* [ */, 104, 129, TRUE} // 147 set-set-amp
|
|
, {doSetIntersection2, 38 /* & */, 143,0, TRUE} // 148
|
|
, {doSetAddAmp, 255, 122,0, FALSE} // 149
|
|
, {doSetIntersection2, 38 /* & */, 143,0, TRUE} // 150 set-lit-amp
|
|
, {doSetAddAmp, 255, 122,0, FALSE} // 151
|
|
, {doSetBeginDifference1, 91 /* [ */, 104, 129, TRUE} // 152 set-set-dash
|
|
, {doSetDifference2, 45 /* - */, 143,0, TRUE} // 153
|
|
, {doSetAddDash, 255, 122,0, FALSE} // 154
|
|
, {doSetDifference2, 45 /* - */, 143,0, TRUE} // 155 set-range-dash
|
|
, {doSetAddDash, 255, 122,0, FALSE} // 156
|
|
, {doSetIntersection2, 38 /* & */, 143,0, TRUE} // 157 set-range-amp
|
|
, {doSetAddAmp, 255, 122,0, FALSE} // 158
|
|
, {doSetDifference2, 45 /* - */, 143,0, TRUE} // 159 set-lit-dash
|
|
, {doSetAddDash, 91 /* [ */, 122,0, FALSE} // 160
|
|
, {doSetAddDash, 93 /* ] */, 122,0, FALSE} // 161
|
|
, {doNOP, 92 /* \ */, 164,0, TRUE} // 162
|
|
, {doSetRange, 255, 136,0, TRUE} // 163
|
|
, {doSetOpError, 115 /* s */, 183,0, FALSE} // 164 set-lit-dash-escape
|
|
, {doSetOpError, 83 /* S */, 183,0, FALSE} // 165
|
|
, {doSetOpError, 119 /* w */, 183,0, FALSE} // 166
|
|
, {doSetOpError, 87 /* W */, 183,0, FALSE} // 167
|
|
, {doSetOpError, 100 /* d */, 183,0, FALSE} // 168
|
|
, {doSetOpError, 68 /* D */, 183,0, FALSE} // 169
|
|
, {doSetNamedRange, 78 /* N */, 136,0, FALSE} // 170
|
|
, {doSetRange, 255, 136,0, TRUE} // 171
|
|
, {doSetProp, 112 /* p */, 129,0, FALSE} // 172 set-escape
|
|
, {doSetProp, 80 /* P */, 129,0, FALSE} // 173
|
|
, {doSetNamedChar, 78 /* N */, 122,0, FALSE} // 174
|
|
, {doSetBackslash_s, 115 /* s */, 136,0, TRUE} // 175
|
|
, {doSetBackslash_S, 83 /* S */, 136,0, TRUE} // 176
|
|
, {doSetBackslash_w, 119 /* w */, 136,0, TRUE} // 177
|
|
, {doSetBackslash_W, 87 /* W */, 136,0, TRUE} // 178
|
|
, {doSetBackslash_d, 100 /* d */, 136,0, TRUE} // 179
|
|
, {doSetBackslash_D, 68 /* D */, 136,0, TRUE} // 180
|
|
, {doSetLiteralEscaped, 255, 122,0, TRUE} // 181
|
|
, {doSetFinish, 255, 14,0, FALSE} // 182 set-finish
|
|
, {doExit, 255, 183,0, TRUE} // 183 errorDeath
|
|
};
|
|
static const char * const RegexStateNames[] = { 0,
|
|
"start",
|
|
"term",
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
"expr-quant",
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
"expr-cont",
|
|
0,
|
|
0,
|
|
"open-paren-quant",
|
|
0,
|
|
"open-paren-quant2",
|
|
0,
|
|
"open-paren",
|
|
0,
|
|
"open-paren-extended",
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
"open-paren-lookbehind",
|
|
0,
|
|
0,
|
|
"paren-comment",
|
|
0,
|
|
0,
|
|
"paren-flag",
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
"quant-star",
|
|
0,
|
|
0,
|
|
"quant-plus",
|
|
0,
|
|
0,
|
|
"quant-opt",
|
|
0,
|
|
0,
|
|
"interval-open",
|
|
0,
|
|
"interval-lower",
|
|
0,
|
|
0,
|
|
0,
|
|
"interval-upper",
|
|
0,
|
|
0,
|
|
"interval-type",
|
|
0,
|
|
0,
|
|
"backslash",
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
"set-open",
|
|
0,
|
|
0,
|
|
"set-open2",
|
|
0,
|
|
"set-posix",
|
|
0,
|
|
0,
|
|
"set-start",
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
"set-start-dash",
|
|
0,
|
|
"set-start-amp",
|
|
0,
|
|
"set-after-lit",
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
"set-after-set",
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
"set-after-range",
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
"set-after-op",
|
|
0,
|
|
0,
|
|
0,
|
|
"set-set-amp",
|
|
0,
|
|
0,
|
|
"set-lit-amp",
|
|
0,
|
|
"set-set-dash",
|
|
0,
|
|
0,
|
|
"set-range-dash",
|
|
0,
|
|
"set-range-amp",
|
|
0,
|
|
"set-lit-dash",
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
"set-lit-dash-escape",
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
"set-escape",
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
"set-finish",
|
|
"errorDeath",
|
|
0};
|
|
|
|
U_NAMESPACE_END
|
|
#endif
|