ICU-2422 regexp, match flag setting options added.

X-SVN-Rev: 11032
This commit is contained in:
Andy Heninger 2003-02-12 01:28:01 +00:00
parent 5e8f53a387
commit 2397658197
10 changed files with 218 additions and 71 deletions

View File

@ -185,7 +185,7 @@ RegexCompile::RegexCompile(RegexPattern *rxp, UErrorCode &status) : fParenStack(
fCharNum = 0;
fQuoteMode = FALSE;
fFreeForm = FALSE;
fCaseI = (fRXPat->fFlags & UREGEX_CASE_INSENSITIVE) != 0;
fModeFlags = fRXPat->fFlags;
fMatchOpenParen = -1;
fMatchCloseParen = -1;
@ -579,9 +579,10 @@ UBool RegexCompile::doParseActions(EParseAction action)
// of the two NOPs. Depending on what follows in the pattern, the
// NOPs may be changed to SAVE_STATE or JMP ops, with a target
// address of the end of the parenthesized group.
fParenStack.push(-2, *fStatus); // Begin a new frame.
fParenStack.push(fRXPat->fCompiledPat->size()-3, *fStatus); // The first NOP
fParenStack.push(fRXPat->fCompiledPat->size()-1, *fStatus); // The second NOP
fParenStack.push(fModeFlags, *fStatus); // Match mode state
fParenStack.push(capturing, *fStatus); // Frame type.
fParenStack.push(fRXPat->fCompiledPat->size()-3, *fStatus); // The first NOP location
fParenStack.push(fRXPat->fCompiledPat->size()-1, *fStatus); // The second NOP loc
// Save the mapping from group number to stack frame variable position.
fRXPat->fGroupMap->addElement(varsLoc, *fStatus);
@ -601,9 +602,10 @@ UBool RegexCompile::doParseActions(EParseAction action)
// On the Parentheses stack, start a new frame and add the postions
// of the two NOPs.
fParenStack.push(-1, *fStatus); // Begin a new frame.
fParenStack.push(fRXPat->fCompiledPat->size()-2, *fStatus); // The first NOP
fParenStack.push(fRXPat->fCompiledPat->size()-1, *fStatus); // The second NOP
fParenStack.push(fModeFlags, *fStatus); // Match mode state
fParenStack.push(plain, *fStatus); // Begin a new frame.
fParenStack.push(fRXPat->fCompiledPat->size()-2, *fStatus); // The first NOP location
fParenStack.push(fRXPat->fCompiledPat->size()-1, *fStatus); // The second NOP loc
}
break;
@ -628,7 +630,8 @@ UBool RegexCompile::doParseActions(EParseAction action)
// of the two NOPs. Depending on what follows in the pattern, the
// NOPs may be changed to SAVE_STATE or JMP ops, with a target
// address of the end of the parenthesized group.
fParenStack.push(-3, *fStatus); // Begin a new frame.
fParenStack.push(fModeFlags, *fStatus); // Match mode state
fParenStack.push(atomic, *fStatus); // Frame type.
fParenStack.push(fRXPat->fCompiledPat->size()-3, *fStatus); // The first NOP
fParenStack.push(fRXPat->fCompiledPat->size()-1, *fStatus); // The second NOP
}
@ -659,9 +662,10 @@ UBool RegexCompile::doParseActions(EParseAction action)
// On the Parentheses stack, start a new frame and add the postions
// of the NOPs.
fParenStack.push(lookAhead, *fStatus); // Begin a new frame.
fParenStack.push(fRXPat->fCompiledPat->size()-2, *fStatus); // The first NOP
fParenStack.push(fRXPat->fCompiledPat->size()-1, *fStatus); // The second NOP
fParenStack.push(fModeFlags, *fStatus); // Match mode state
fParenStack.push(lookAhead, *fStatus); // Frame type.
fParenStack.push(fRXPat->fCompiledPat->size()-2, *fStatus); // The first NOP location
fParenStack.push(fRXPat->fCompiledPat->size()-1, *fStatus); // The second NOP location
}
break;
@ -690,9 +694,10 @@ UBool RegexCompile::doParseActions(EParseAction action)
// On the Parentheses stack, start a new frame and add the postions
// of the StateSave and NOP.
fParenStack.push( negLookAhead, *fStatus); // Begin a new frame.
fParenStack.push(fRXPat->fCompiledPat->size()-2, *fStatus); // The STATE_SAVE
fParenStack.push(fRXPat->fCompiledPat->size()-1, *fStatus); // The second NOP
fParenStack.push(fModeFlags, *fStatus); // Match mode state
fParenStack.push( negLookAhead, *fStatus); // Frame type
fParenStack.push(fRXPat->fCompiledPat->size()-2, *fStatus); // The STATE_SAVE location
fParenStack.push(fRXPat->fCompiledPat->size()-1, *fStatus); // The second NOP location
// Instructions #5 and #6 will be added when the ')' is encountered.
}
@ -957,16 +962,30 @@ UBool RegexCompile::doParseActions(EParseAction action)
case doDotAny:
// scanned a ".", match any single character.
fRXPat->fCompiledPat->addElement(URX_BUILD(URX_DOTANY, 0), *fStatus);
{
int32_t op;
if (fModeFlags & UREGEX_DOTALL) {
op = URX_BUILD(URX_DOTANY_ALL, 0);
} else {
op = URX_BUILD(URX_DOTANY, 0);
}
fRXPat->fCompiledPat->addElement(op, *fStatus);
}
break;
case doCaret: // TODO: multi-line mode flag.
fRXPat->fCompiledPat->addElement(URX_BUILD(URX_CARET, 0), *fStatus);
case doCaret:
{
int32_t op = (fModeFlags & UREGEX_MULTILINE)? URX_CARET_M : URX_CARET;
fRXPat->fCompiledPat->addElement(URX_BUILD(op, 0), *fStatus);
}
break;
case doDollar: // TODO: multi-line mode flag.
fRXPat->fCompiledPat->addElement(URX_BUILD(URX_DOLLAR, 0), *fStatus);
case doDollar:
{
int32_t op = (fModeFlags & UREGEX_MULTILINE)? URX_DOLLAR_M : URX_DOLLAR;
fRXPat->fCompiledPat->addElement(URX_BUILD(op, 0), *fStatus);
}
break;
case doBackslashA:
@ -1051,8 +1070,9 @@ UBool RegexCompile::doParseActions(EParseAction action)
case doScanUnicodeSet:
{
UnicodeSet *theSet = scanSet();
if (fCaseI && theSet != NULL) {
if ((fModeFlags & UREGEX_CASE_INSENSITIVE) && theSet != NULL) {
caseClose(theSet); // TODO: replace with the real function.
// theSet->closeOver(USET_CASE);
}
compileSet(theSet);
}
@ -1094,7 +1114,7 @@ UBool RegexCompile::doParseActions(EParseAction action)
// of compilation, it will be changed to the variables location.
U_ASSERT(groupNum > 0);
int32_t op;
if (fCaseI) {
if (fModeFlags & UREGEX_CASE_INSENSITIVE) {
op = URX_BUILD(URX_BACKREF_I, groupNum);
} else {
op = URX_BUILD(URX_BACKREF, groupNum);
@ -1217,11 +1237,70 @@ UBool RegexCompile::doParseActions(EParseAction action)
break;
case doMatchMode: // (?i) and similar
// TODO: implement
error(U_REGEX_UNIMPLEMENTED);
case doBeginMatchMode:
fNewModeFlags = fModeFlags;
fSetModeFlag = TRUE;
break;
case doMatchMode: // (?i) and similar
{
int32_t bit = 0;
switch (fC.fChar) {
case 0x69: /* 'i' */ bit = UREGEX_CASE_INSENSITIVE; break;
case 0x6d: /* 'm' */ bit = UREGEX_MULTILINE; break;
case 0x73: /* 's' */ bit = UREGEX_DOTALL; break;
case 0x78: /* 'x' */ bit = UREGEX_COMMENTS; break;
case 0x2d: /* '-' */ fSetModeFlag = FALSE; break;
default:
U_ASSERT(FALSE); // Should never happen. Other chars are filtered out
// by the scanner.
}
if (fSetModeFlag) {
fNewModeFlags |= bit;
} else {
fNewModeFlags &= ~bit;
}
}
break;
case doSetMatchMode:
// We've got a (?i) or similar. The match mode is being changed, but
// the change is not scoped to a parenthesized block.
fModeFlags = fNewModeFlags;
// Prevent any string from spanning across the change of match mode.
// Otherwise the pattern "abc(?i)def" would make a single string of "abcdef"
fixLiterals();
break;
case doMatchModeParen:
// We've got a (?i: or similar. Begin a parenthesized block, save old
// mode flags so they can be restored at the close of the block.
//
// Compile to a
// - NOP, which later may be replaced by a save-state if the
// parenthesized group gets a * quantifier, followed by
// - NOP, which may later be replaced by a save-state if there
// is an '|' alternation within the parens.
{
fRXPat->fCompiledPat->addElement(URX_BUILD(URX_NOP, 0), *fStatus);
fRXPat->fCompiledPat->addElement(URX_BUILD(URX_NOP, 0), *fStatus);
// On the Parentheses stack, start a new frame and add the postions
// of the two NOPs (a normal non-capturing () frame, except for the
// saving of the orignal mode flags.)
fParenStack.push(fModeFlags, *fStatus);
fParenStack.push(flags, *fStatus); // Frame Marker
fParenStack.push(fRXPat->fCompiledPat->size()-2, *fStatus); // The first NOP
fParenStack.push(fRXPat->fCompiledPat->size()-1, *fStatus); // The second NOP
// Set the current mode flags to the new values.
fModeFlags = fNewModeFlags;
}
break;
default:
error(U_REGEX_INTERNAL_ERROR);
@ -1278,7 +1357,7 @@ void RegexCompile::literalChar() {
opType = URX_TYPE(op);
U_ASSERT(opType == URX_ONECHAR || opType == URX_ONECHAR_I || opType == URX_STRING_LEN);
if (opType == URX_ONECHAR || opType == URX_ONECHAR_I) {
if (fCaseI) {
if (fModeFlags & UREGEX_CASE_INSENSITIVE) {
op = URX_BUILD(URX_STRING_I, fStringOpStart);
} else {
op = URX_BUILD(URX_STRING, fStringOpStart);
@ -1308,7 +1387,7 @@ void RegexCompile::literalChar() {
//------------------------------------------------------------------------------
void RegexCompile::emitONE_CHAR(UChar32 c) {
int32_t op;
if (fCaseI && (u_tolower(c) != u_toupper(c))) {
if ((fModeFlags & UREGEX_CASE_INSENSITIVE) && (u_tolower(c) != u_toupper(c))) {
// We have a cased character, and are in case insensitive matching mode.
// TODO: replace with a better test. See Alan L.'s mail of 2/6
c = u_foldCase(c, U_FOLD_CASE_DEFAULT);
@ -1540,11 +1619,17 @@ void RegexCompile::handleCloseParen() {
fMatchOpenParen = patIdx;
}
// At the close of any parenthesized block, restore the match mode flags to
// the value they had at the open paren. Saved value is
// at the top of the paren stack.
fModeFlags = fParenStack.popi();
// DO any additional fixups, depending on the specific kind of
// parentesized grouping this is
switch (patIdx) {
case plain:
case flags:
// No additional fixups required.
// (Grouping-only parentheses)
break;

View File

@ -74,7 +74,8 @@ public:
capturing = -2,
atomic = -3,
lookAhead = -4,
negLookAhead = -5
negLookAhead = -5,
flags = -6
};
private:
@ -142,7 +143,11 @@ private:
//
// Data associated with the generation of the pcode for the match engine
//
UBool fCaseI; // Case Insensitive Match Mode is on.
int32_t fModeFlags; // Match Flags. (Case Insensitive, etc.)
int32_t fNewModeFlags; // New flags, while compiling (?i, holds state
// until last flag is scanned.
UBool fSetModeFlag; // true for (?ismx, false for (?-ismx
int32_t fStringOpStart; // While a literal string is being scanned
// holds the start index within RegexPattern.

View File

@ -24,6 +24,7 @@ U_NAMESPACE_BEGIN
enum Regex_PatternParseAction {
doCloseParen,
doProperty,
doBeginMatchMode,
doOrOperator,
doOpenCaptureParen,
doBadOpenParenType,
@ -53,11 +54,13 @@ enum Regex_PatternParseAction {
doBackslashA,
doBackslashB,
doNGPlus,
doSetMatchMode,
doPatFinish,
doBackslashD,
doPossesiveOpt,
doEscapeError,
doBackslashG,
doMatchModeParen,
doOpt,
doInterval,
doLiteralChar,
@ -136,11 +139,11 @@ static const struct RegexTableEl gRuleParseStateTable[] = {
, {doOpenLookAheadNeg, 33 /* ! */, 2, 20, TRUE} // 32
, {doNOP, 60 /* < */, 43,0, TRUE} // 33
, {doNOP, 35 /* # */, 46, 2, TRUE} // 34
, {doMatchMode, 105 /* i */, 49,0, TRUE} // 35
, {doMatchMode, 120 /* x */, 49,0, TRUE} // 36
, {doMatchMode, 115 /* s */, 49,0, TRUE} // 37
, {doMatchMode, 109 /* m */, 49,0, TRUE} // 38
, {doMatchMode, 45 /* - */, 49,0, TRUE} // 39
, {doBeginMatchMode, 105 /* i */, 49,0, FALSE} // 35
, {doBeginMatchMode, 109 /* m */, 49,0, FALSE} // 36
, {doBeginMatchMode, 115 /* s */, 49,0, FALSE} // 37
, {doBeginMatchMode, 120 /* x */, 49,0, FALSE} // 38
, {doBeginMatchMode, 45 /* - */, 49,0, FALSE} // 39
, {doConditionalExpr, 40 /* ( */, 101,0, TRUE} // 40
, {doPerlInline, 123 /* { */, 101,0, TRUE} // 41
, {doBadOpenParenType, 255, 101,0, FALSE} // 42
@ -151,12 +154,12 @@ static const struct RegexTableEl gRuleParseStateTable[] = {
, {doMismatchedParenErr, 253, 101,0, FALSE} // 47
, {doNOP, 255, 46,0, TRUE} // 48
, {doMatchMode, 105 /* i */, 49,0, TRUE} // 49 paren-flag
, {doMatchMode, 115 /* s */, 49,0, TRUE} // 50
, {doMatchMode, 109 /* m */, 49,0, TRUE} // 51
, {doMatchMode, 109 /* m */, 49,0, TRUE} // 50
, {doMatchMode, 115 /* s */, 49,0, TRUE} // 51
, {doMatchMode, 120 /* x */, 49,0, TRUE} // 52
, {doMatchMode, 45 /* - */, 49,0, TRUE} // 53
, {doNOP, 41 /* ) */, 2,0, TRUE} // 54
, {doOpenNonCaptureParen, 58 /* : */, 2, 14, TRUE} // 55
, {doSetMatchMode, 41 /* ) */, 2,0, TRUE} // 54
, {doMatchModeParen, 58 /* : */, 2, 14, TRUE} // 55
, {doNOP, 255, 101,0, FALSE} // 56
, {doNGStar, 63 /* ? */, 20,0, TRUE} // 57 quant-star
, {doPossesiveStar, 43 /* + */, 20,0, TRUE} // 58

View File

@ -132,11 +132,11 @@ open-paren-extended:
'!' n term ^expr-cont doOpenLookAheadNeg # (?!
'<' n open-paren-lookbehind
'#' n paren-comment ^term
'i' n paren-flag doMatchMode
'x' n paren-flag doMatchMode
's' n paren-flag doMatchMode
'm' n paren-flag doMatchMode
'-' n paren-flag doMatchMode
'i' paren-flag doBeginMatchMode
'm' paren-flag doBeginMatchMode
's' paren-flag doBeginMatchMode
'x' paren-flag doBeginMatchMode
'-' paren-flag doBeginMatchMode
'(' n errorDeath doConditionalExpr
'{' n errorDeath doPerlInline
default errorDeath doBadOpenParenType
@ -157,16 +157,16 @@ paren-comment:
default n paren-comment
#
# paren-flag Scanned a (?ismx-ismx flag setting thing
# TODO: this is not fully implemented yet.
# paren-flag Scanned a (?ismx-ismx flag setting
#
paren-flag:
'i' n paren-flag doMatchMode
's' n paren-flag doMatchMode
'm' n paren-flag doMatchMode
's' n paren-flag doMatchMode
'x' n paren-flag doMatchMode
'-' n paren-flag doMatchMode
')' n term
':' n term ^expr-quant doOpenNonCaptureParen
')' n term doSetMatchMode
':' n term ^expr-quant doMatchModeParen
default errorDeath

View File

@ -117,9 +117,11 @@ enum {
// First Operand: Index of start of string in string literals
// Second Operand (next word in compiled code):
// the length of the string.
URX_BACKREF_I = 41 // Case insensitive back reference.
URX_BACKREF_I = 41, // Case insensitive back reference.
// Parameter is the index of the
// capture group variables in the state stack frame.
URX_DOLLAR_M = 42, // $ in multi-line mode.
URX_CARET_M = 43 // ^ in multi-line mode.
};
// Keep this list of opcode names in sync with the above enum
@ -166,7 +168,9 @@ enum {
"LA_END", \
"ONECHAR_I", \
"STRING_I", \
"BACKREF_I"
"BACKREF_I", \
"DOLLAR_M", \
"CARET_M"
//
// Convenience macros for assembling and disassembling a compiled operation.

View File

@ -810,17 +810,52 @@ void RegexMatcher::MatchAt(int32_t startIdx, UErrorCode &status) {
fp = (REStackFrame *)fStack->popFrame(frameSize);
// TODO: support for multi-line mode.
break;
case URX_CARET: // ^, test for start of line
case URX_DOLLAR_M: // $, test for End of line in multi-line mode
{
if (fp->fInputIdx >= inputLen) {
// We really are at the end of input. Success.
break;
}
// If we are positioned just before a new-line , succeed.
// It makes no difference where the new-line is within the input.
UChar32 c = inputBuf[fp->fInputIdx];
if (c == 0x0a || c==0x0d || c==0x0c || c==0x85 ||c==0x2028 || c==0x2029) {
break; // At new-line at end of input. Success
}
// not at a new line. Fail.
fp = (REStackFrame *)fStack->popFrame(frameSize);
}
break;
case URX_CARET: // ^, test for start of line
if (fp->fInputIdx != 0) {
fp = (REStackFrame *)fStack->popFrame(frameSize);
} // TODO: support for multi-line mode.
}
break;
case URX_CARET_M: // ^, test for start of line in mulit-line mode
{
if (fp->fInputIdx == 0) {
// We are at the start input. Success.
break;
}
// Check the character just before the current pos.
UChar c = inputBuf[fp->fInputIdx - 1];
if (c == 0x0a || c==0x0d || c==0x0c || c==0x85 ||c==0x2028 || c==0x2029) {
// It's a new-line. ^ is true. Success.
break;
}
// Not at the start of a line. Fail.
fp = (REStackFrame *)fStack->popFrame(frameSize);
}
break;
case URX_BACKSLASH_A: // Test for start of input
if (fp->fInputIdx != 0) {
fp = (REStackFrame *)fStack->popFrame(frameSize);
@ -966,10 +1001,10 @@ void RegexMatcher::MatchAt(int32_t startIdx, UErrorCode &status) {
case URX_DOTANY:
{
// . matches anything
// . matches anything, but stops at end-of-line.
if (fp->fInputIdx >= inputLen) {
// At end of input. Match failed. Backtrack out.
fp = (REStackFrame *)fStack->popFrame(frameSize);
fp = (REStackFrame *)fStack->popFrame(frameSize);
break;
}
// There is input left. Advance over one char, unless we've hit end-of-line
@ -988,20 +1023,20 @@ void RegexMatcher::MatchAt(int32_t startIdx, UErrorCode &status) {
case URX_DOTANY_ALL:
{
// ., in dot-matches-all (including new lines) mode
// . matches anything
if (fp->fInputIdx >= inputLen) {
// At end of input. Match failed. Backtrack out.
fp = (REStackFrame *)fStack->popFrame(frameSize);
break;
}
// There is input left. Advance over one char, unless we've hit end-of-line
UChar32 c = fInput->char32At(fp->fInputIdx);
fp->fInputIdx = fInput->moveIndex32(fp->fInputIdx, 1);
if (c == 0x0a || c==0x0d || c==0x0c || c==0x85 ||c==0x2028 || c==0x2029) {
// There is input left. Advance over one char, except if we are
// at a cr/lf, advance over both of them.
UChar32 c;
U16_NEXT(inputBuf, fp->fInputIdx, inputLen, c);
if (c==0x0d) {
// In the case of a CR/LF, we need to advance over both.
UChar32 nextc = fInput->char32At(fp->fInputIdx);
if (c == 0x0d && nextc == 0x0a) {
fp->fInputIdx = fInput->moveIndex32(fp->fInputIdx, 1);
UChar nextc = inputBuf[fp->fInputIdx];
if (nextc == 0x0a) {
fp->fInputIdx++;
}
}
}

View File

@ -452,10 +452,14 @@ void RegexPattern::dumpOp(int32_t index) const {
case URX_NOP:
case URX_DOTANY:
case URX_FAIL:
case URX_CARET:
case URX_DOLLAR:
case URX_BACKSLASH_A:
case URX_BACKSLASH_G:
case URX_BACKSLASH_X:
case URX_END:
case URX_DOLLAR_M:
case URX_CARET_M:
// Types with no operand field of interest.
break;
@ -468,8 +472,6 @@ void RegexPattern::dumpOp(int32_t index) const {
case URX_BACKSLASH_D:
case URX_BACKSLASH_W:
case URX_BACKSLASH_Z:
case URX_CARET:
case URX_DOLLAR:
case URX_STRING_LEN:
case URX_CTR_INIT:
case URX_CTR_INIT_NG:
@ -485,6 +487,7 @@ void RegexPattern::dumpOp(int32_t index) const {
case URX_JMPX:
case URX_LA_START:
case URX_LA_END:
case URX_BACKREF_I:
// types with an integer operand field.
REGEX_DUMP_DEBUG_PRINTF("%d", val);
break;

View File

@ -64,13 +64,17 @@ struct REStackFrame;
enum {
/** Forces normalization of pattern and strings. @draft ICU 2.4 */
UREGEX_CANON_EQ = 128,
/** Enable case insensitive matching. @draft ICU 2.4 */
UREGEX_CASE_INSENSITIVE = 2,
/** Allow white space and comments within patterns @draft ICU 2.4 */
UREGEX_COMMENTS = 4,
/** If set, '.' matches line terminators, otherwise '.' matching stops at line end.
* @draft ICU 2.4 */
UREGEX_DOTALL = 32,
/** Control behavior of "$" and "^"
* If set, recognize line terminators within string,
* otherwise, match only at start and end of input string.

View File

@ -1228,10 +1228,6 @@ void RegexTest::Errors() {
REGEX_ERR(")))))))", 1, 1, U_REGEX_MISMATCHED_PAREN);
REGEX_ERR("(((((((", 1, 7, U_REGEX_MISMATCHED_PAREN);
// Flag settings not yet implemented
REGEX_ERR("(?i:stuff*)", 1, 3, U_REGEX_UNIMPLEMENTED);
REGEX_ERR("(?-si) stuff", 1, 3, U_REGEX_UNIMPLEMENTED);
// Look-ahead, Look-behind
REGEX_ERR("abc(?<=xyz).*", 1, 7, U_REGEX_UNIMPLEMENTED); // look-behind
REGEX_ERR("abc(?<!xyz).*", 1, 7, U_REGEX_UNIMPLEMENTED); // negated look-behind
@ -1666,10 +1662,16 @@ void RegexTest::PerlTests() {
}
else if (perlExpr.startsWith("\\")) { // \Escape. Take following char as a literal.
// or as an escaped sequence (e.g. \n)
if (perlExpr.length() > 1) {
perlExpr.remove(0, 1); // Remove the '\', but only if not last char.
}
resultString.append(perlExpr.charAt(0));
UChar c = perlExpr.charAt(0);
switch (c) {
case 'n': c = '\n'; break;
// add any other escape sequences that show up in the test expected results.
}
resultString.append(c);
perlExpr.remove(0, 1);
}
@ -1693,6 +1695,8 @@ void RegexTest::PerlTests() {
UnicodeString expectedS(fields[4]);
expectedS.findAndReplace(nulnulSrc, nulnul);
expectedS.findAndReplace(ffffSrc, ffff);
expectedS.findAndReplace("\\n", "\n");
if (expectedS.compare(resultString) != 0) {
errln("Line %d: Incorrect perl expression results. Expected \"%s\"; got \"%s\"",

View File

@ -210,5 +210,9 @@
# Case Insensitive
"aBc" i "<0>ABC</0>"
#"a[^bc]d" i "ABD" # TODO: case closure bug
'((((((((((a))))))))))\10' i "<0><1><2><3><4><5><6><7><8><9><10>A</10></9></8></7></6></5></4></3></2></1>A</0>"
#"a[^bc]d" i "ABD" # TODO: case closure bug
'((((((((((a))))))))))\10' i "<0><1><2><3><4><5><6><7><8><9><10>A</10></9></8></7></6></5></4></3></2></1>A</0>"
"(?:(?i)a)b" "<0>Ab</0>"
"ab(?i)cd" "<0>abCd</0>"
"ab$cd" "abcd"