ICU-20618 Regex nested lookaround expressions, clean up active match region handling.
This commit is contained in:
parent
5f837abd40
commit
327087150f
@ -561,7 +561,7 @@ UBool RegexCompile::doParseActions(int32_t action)
|
||||
// sequence; don't change without making updates there too.
|
||||
//
|
||||
// Compiles to
|
||||
// 1 START_LA dataLoc Saves SP, Input Pos
|
||||
// 1 LA_START dataLoc Saves SP, Input Pos, Active input region.
|
||||
// 2. STATE_SAVE 4 on failure of lookahead, goto 4
|
||||
// 3 JMP 6 continue ...
|
||||
//
|
||||
@ -575,10 +575,14 @@ UBool RegexCompile::doParseActions(int32_t action)
|
||||
// 8. code for parenthesized stuff.
|
||||
// 9. LA_END
|
||||
//
|
||||
// Two data slots are reserved, for saving the stack ptr and the input position.
|
||||
// Four data slots are reserved, for saving state on entry to the look-around
|
||||
// 0: stack pointer on entry.
|
||||
// 1: input position on entry.
|
||||
// 2: fActiveStart, the active bounds start on entry.
|
||||
// 3: fActiveLimit, the active bounds limit on entry.
|
||||
{
|
||||
fixLiterals();
|
||||
int32_t dataLoc = allocateData(2);
|
||||
int32_t dataLoc = allocateData(4);
|
||||
appendOp(URX_LA_START, dataLoc);
|
||||
appendOp(URX_STATE_SAVE, fRXPat->fCompiledPat->size()+ 2);
|
||||
appendOp(URX_JMP, fRXPat->fCompiledPat->size()+ 3);
|
||||
@ -599,18 +603,23 @@ UBool RegexCompile::doParseActions(int32_t action)
|
||||
case doOpenLookAheadNeg:
|
||||
// Negated Lookahead. (?! stuff )
|
||||
// Compiles to
|
||||
// 1. START_LA dataloc
|
||||
// 1. LA_START dataloc
|
||||
// 2. SAVE_STATE 7 // Fail within look-ahead block restores to this state,
|
||||
// // which continues with the match.
|
||||
// 3. NOP // Std. Open Paren sequence, for possible '|'
|
||||
// 4. code for parenthesized stuff.
|
||||
// 5. END_LA // Cut back stack, remove saved state from step 2.
|
||||
// 5. LA_END // Cut back stack, remove saved state from step 2.
|
||||
// 6. BACKTRACK // code in block succeeded, so neg. lookahead fails.
|
||||
// 7. END_LA // Restore match region, in case look-ahead was using
|
||||
// an alternate (transparent) region.
|
||||
// Four data slots are reserved, for saving state on entry to the look-around
|
||||
// 0: stack pointer on entry.
|
||||
// 1: input position on entry.
|
||||
// 2: fActiveStart, the active bounds start on entry.
|
||||
// 3: fActiveLimit, the active bounds limit on entry.
|
||||
{
|
||||
fixLiterals();
|
||||
int32_t dataLoc = allocateData(2);
|
||||
int32_t dataLoc = allocateData(4);
|
||||
appendOp(URX_LA_START, dataLoc);
|
||||
appendOp(URX_STATE_SAVE, 0); // dest address will be patched later.
|
||||
appendOp(URX_NOP, 0);
|
||||
@ -644,14 +653,16 @@ UBool RegexCompile::doParseActions(int32_t action)
|
||||
// Allocate a block of matcher data, to contain (when running a match)
|
||||
// 0: Stack ptr on entry
|
||||
// 1: Input Index on entry
|
||||
// 2: Start index of match current match attempt.
|
||||
// 3: Original Input String len.
|
||||
// 2: fActiveStart, the active bounds start on entry.
|
||||
// 3: fActiveLimit, the active bounds limit on entry.
|
||||
// 4: Start index of match current match attempt.
|
||||
// The first four items must match the layout of data for LA_START / LA_END
|
||||
|
||||
// Generate match code for any pending literals.
|
||||
fixLiterals();
|
||||
|
||||
// Allocate data space
|
||||
int32_t dataLoc = allocateData(4);
|
||||
int32_t dataLoc = allocateData(5);
|
||||
|
||||
// Emit URX_LB_START
|
||||
appendOp(URX_LB_START, dataLoc);
|
||||
@ -696,14 +707,16 @@ UBool RegexCompile::doParseActions(int32_t action)
|
||||
// Allocate a block of matcher data, to contain (when running a match)
|
||||
// 0: Stack ptr on entry
|
||||
// 1: Input Index on entry
|
||||
// 2: Start index of match current match attempt.
|
||||
// 3: Original Input String len.
|
||||
// 2: fActiveStart, the active bounds start on entry.
|
||||
// 3: fActiveLimit, the active bounds limit on entry.
|
||||
// 4: Start index of match current match attempt.
|
||||
// The first four items must match the layout of data for LA_START / LA_END
|
||||
|
||||
// Generate match code for any pending literals.
|
||||
fixLiterals();
|
||||
|
||||
// Allocate data space
|
||||
int32_t dataLoc = allocateData(4);
|
||||
int32_t dataLoc = allocateData(5);
|
||||
|
||||
// Emit URX_LB_START
|
||||
appendOp(URX_LB_START, dataLoc);
|
||||
|
@ -123,7 +123,7 @@ enum {
|
||||
// saved input position, FAIL rather than taking
|
||||
// the JMP
|
||||
URX_LA_START = 37, // Starting a LookAround expression.
|
||||
// Save InputPos and SP in static data.
|
||||
// Save InputPos, SP and active region in static data.
|
||||
// Operand: Static data offset for the save
|
||||
URX_LA_END = 38, // Ending a Lookaround expression.
|
||||
// Restore InputPos and Stack to saved values.
|
||||
|
@ -3805,11 +3805,13 @@ GC_Done:
|
||||
|
||||
case URX_LA_START:
|
||||
{
|
||||
// Entering a lookahead block.
|
||||
// Entering a look around block.
|
||||
// Save Stack Ptr, Input Pos.
|
||||
U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize);
|
||||
U_ASSERT(opValue>=0 && opValue+3<fPattern->fDataSize);
|
||||
fData[opValue] = fStack->size();
|
||||
fData[opValue+1] = fp->fInputIdx;
|
||||
fData[opValue+2] = fActiveStart;
|
||||
fData[opValue+3] = fActiveLimit;
|
||||
fActiveStart = fLookStart; // Set the match region change for
|
||||
fActiveLimit = fLookLimit; // transparent bounds.
|
||||
}
|
||||
@ -3819,7 +3821,7 @@ GC_Done:
|
||||
{
|
||||
// Leaving a look-ahead block.
|
||||
// restore Stack Ptr, Input Pos to positions they had on entry to block.
|
||||
U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize);
|
||||
U_ASSERT(opValue>=0 && opValue+3<fPattern->fDataSize);
|
||||
int32_t stackSize = fStack->size();
|
||||
int32_t newStackSize =(int32_t)fData[opValue];
|
||||
U_ASSERT(stackSize >= newStackSize);
|
||||
@ -3839,8 +3841,10 @@ GC_Done:
|
||||
|
||||
// Restore the active region bounds in the input string; they may have
|
||||
// been changed because of transparent bounds on a Region.
|
||||
fActiveStart = fRegionStart;
|
||||
fActiveLimit = fRegionLimit;
|
||||
fActiveStart = fData[opValue+2];
|
||||
fActiveLimit = fData[opValue+3];
|
||||
U_ASSERT(fActiveStart >= 0);
|
||||
U_ASSERT(fActiveLimit <= fInputLength);
|
||||
}
|
||||
break;
|
||||
|
||||
@ -3916,17 +3920,19 @@ GC_Done:
|
||||
case URX_LB_START:
|
||||
{
|
||||
// Entering a look-behind block.
|
||||
// Save Stack Ptr, Input Pos.
|
||||
// Save Stack Ptr, Input Pos and active input region.
|
||||
// TODO: implement transparent bounds. Ticket #6067
|
||||
U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize);
|
||||
U_ASSERT(opValue>=0 && opValue+4<fPattern->fDataSize);
|
||||
fData[opValue] = fStack->size();
|
||||
fData[opValue+1] = fp->fInputIdx;
|
||||
// Init the variable containing the start index for attempted matches.
|
||||
fData[opValue+2] = -1;
|
||||
// Save input string length, then reset to pin any matches to end at
|
||||
// the current position.
|
||||
fData[opValue+2] = fActiveStart;
|
||||
fData[opValue+3] = fActiveLimit;
|
||||
fActiveStart = fRegionStart;
|
||||
fActiveLimit = fp->fInputIdx;
|
||||
// Init the variable containing the start index for attempted matches.
|
||||
fData[opValue+4] = -1;
|
||||
}
|
||||
break;
|
||||
|
||||
@ -3949,8 +3955,8 @@ GC_Done:
|
||||
U_ASSERT(minML >= 0);
|
||||
|
||||
// Fetch (from data) the last input index where a match was attempted.
|
||||
U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize);
|
||||
int64_t &lbStartIdx = fData[opValue+2];
|
||||
U_ASSERT(opValue>=0 && opValue+4<fPattern->fDataSize);
|
||||
int64_t &lbStartIdx = fData[opValue+4];
|
||||
if (lbStartIdx < 0) {
|
||||
// First time through loop.
|
||||
lbStartIdx = fp->fInputIdx - minML;
|
||||
@ -3976,10 +3982,10 @@ GC_Done:
|
||||
// getting a match. Backtrack out, and out of the
|
||||
// Look Behind altogether.
|
||||
fp = (REStackFrame *)fStack->popFrame(fFrameSize);
|
||||
int64_t restoreInputLen = fData[opValue+3];
|
||||
U_ASSERT(restoreInputLen >= fActiveLimit);
|
||||
U_ASSERT(restoreInputLen <= fInputLength);
|
||||
fActiveLimit = restoreInputLen;
|
||||
fActiveStart = fData[opValue+2];
|
||||
fActiveLimit = fData[opValue+3];
|
||||
U_ASSERT(fActiveStart >= 0);
|
||||
U_ASSERT(fActiveLimit <= fInputLength);
|
||||
break;
|
||||
}
|
||||
|
||||
@ -3993,7 +3999,7 @@ GC_Done:
|
||||
case URX_LB_END:
|
||||
// End of a look-behind block, after a successful match.
|
||||
{
|
||||
U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize);
|
||||
U_ASSERT(opValue>=0 && opValue+4<fPattern->fDataSize);
|
||||
if (fp->fInputIdx != fActiveLimit) {
|
||||
// The look-behind expression matched, but the match did not
|
||||
// extend all the way to the point that we are looking behind from.
|
||||
@ -4004,13 +4010,13 @@ GC_Done:
|
||||
break;
|
||||
}
|
||||
|
||||
// Look-behind match is good. Restore the orignal input string length,
|
||||
// Look-behind match is good. Restore the orignal input string region,
|
||||
// which had been truncated to pin the end of the lookbehind match to the
|
||||
// position being looked-behind.
|
||||
int64_t originalInputLen = fData[opValue+3];
|
||||
U_ASSERT(originalInputLen >= fActiveLimit);
|
||||
U_ASSERT(originalInputLen <= fInputLength);
|
||||
fActiveLimit = originalInputLen;
|
||||
fActiveStart = fData[opValue+2];
|
||||
fActiveLimit = fData[opValue+3];
|
||||
U_ASSERT(fActiveStart >= 0);
|
||||
U_ASSERT(fActiveLimit <= fInputLength);
|
||||
}
|
||||
break;
|
||||
|
||||
@ -4035,8 +4041,8 @@ GC_Done:
|
||||
U_ASSERT(continueLoc > fp->fPatIdx);
|
||||
|
||||
// Fetch (from data) the last input index where a match was attempted.
|
||||
U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize);
|
||||
int64_t &lbStartIdx = fData[opValue+2];
|
||||
U_ASSERT(opValue>=0 && opValue+4<fPattern->fDataSize);
|
||||
int64_t &lbStartIdx = fData[opValue+4];
|
||||
if (lbStartIdx < 0) {
|
||||
// First time through loop.
|
||||
lbStartIdx = fp->fInputIdx - minML;
|
||||
@ -4061,10 +4067,10 @@ GC_Done:
|
||||
// We have tried all potential match starting points without
|
||||
// getting a match, which means that the negative lookbehind as
|
||||
// a whole has succeeded. Jump forward to the continue location
|
||||
int64_t restoreInputLen = fData[opValue+3];
|
||||
U_ASSERT(restoreInputLen >= fActiveLimit);
|
||||
U_ASSERT(restoreInputLen <= fInputLength);
|
||||
fActiveLimit = restoreInputLen;
|
||||
fActiveStart = fData[opValue+2];
|
||||
fActiveLimit = fData[opValue+3];
|
||||
U_ASSERT(fActiveStart >= 0);
|
||||
U_ASSERT(fActiveLimit <= fInputLength);
|
||||
fp->fPatIdx = continueLoc;
|
||||
break;
|
||||
}
|
||||
@ -4079,7 +4085,7 @@ GC_Done:
|
||||
case URX_LBN_END:
|
||||
// End of a negative look-behind block, after a successful match.
|
||||
{
|
||||
U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize);
|
||||
U_ASSERT(opValue>=0 && opValue+4<fPattern->fDataSize);
|
||||
if (fp->fInputIdx != fActiveLimit) {
|
||||
// The look-behind expression matched, but the match did not
|
||||
// extend all the way to the point that we are looking behind from.
|
||||
@ -4096,10 +4102,10 @@ GC_Done:
|
||||
// Restore the orignal input string length, which had been truncated
|
||||
// inorder to pin the end of the lookbehind match
|
||||
// to the position being looked-behind.
|
||||
int64_t originalInputLen = fData[opValue+3];
|
||||
U_ASSERT(originalInputLen >= fActiveLimit);
|
||||
U_ASSERT(originalInputLen <= fInputLength);
|
||||
fActiveLimit = originalInputLen;
|
||||
fActiveStart = fData[opValue+2];
|
||||
fActiveLimit = fData[opValue+3];
|
||||
U_ASSERT(fActiveStart >= 0);
|
||||
U_ASSERT(fActiveLimit <= fInputLength);
|
||||
|
||||
// Restore original stack position, discarding any state saved
|
||||
// by the successful pattern match.
|
||||
@ -5336,11 +5342,13 @@ GC_Done:
|
||||
|
||||
case URX_LA_START:
|
||||
{
|
||||
// Entering a lookahead block.
|
||||
// Entering a look around block.
|
||||
// Save Stack Ptr, Input Pos.
|
||||
U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize);
|
||||
U_ASSERT(opValue>=0 && opValue+3<fPattern->fDataSize);
|
||||
fData[opValue] = fStack->size();
|
||||
fData[opValue+1] = fp->fInputIdx;
|
||||
fData[opValue+2] = fActiveStart;
|
||||
fData[opValue+3] = fActiveLimit;
|
||||
fActiveStart = fLookStart; // Set the match region change for
|
||||
fActiveLimit = fLookLimit; // transparent bounds.
|
||||
}
|
||||
@ -5348,9 +5356,9 @@ GC_Done:
|
||||
|
||||
case URX_LA_END:
|
||||
{
|
||||
// Leaving a look-ahead block.
|
||||
// Leaving a look around block.
|
||||
// restore Stack Ptr, Input Pos to positions they had on entry to block.
|
||||
U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize);
|
||||
U_ASSERT(opValue>=0 && opValue+3<fPattern->fDataSize);
|
||||
int32_t stackSize = fStack->size();
|
||||
int32_t newStackSize = (int32_t)fData[opValue];
|
||||
U_ASSERT(stackSize >= newStackSize);
|
||||
@ -5370,8 +5378,10 @@ GC_Done:
|
||||
|
||||
// Restore the active region bounds in the input string; they may have
|
||||
// been changed because of transparent bounds on a Region.
|
||||
fActiveStart = fRegionStart;
|
||||
fActiveLimit = fRegionLimit;
|
||||
fActiveStart = fData[opValue+2];
|
||||
fActiveLimit = fData[opValue+3];
|
||||
U_ASSERT(fActiveStart >= 0);
|
||||
U_ASSERT(fActiveLimit <= fInputLength);
|
||||
}
|
||||
break;
|
||||
|
||||
@ -5434,17 +5444,19 @@ GC_Done:
|
||||
case URX_LB_START:
|
||||
{
|
||||
// Entering a look-behind block.
|
||||
// Save Stack Ptr, Input Pos.
|
||||
// Save Stack Ptr, Input Pos and active input region.
|
||||
// TODO: implement transparent bounds. Ticket #6067
|
||||
U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize);
|
||||
U_ASSERT(opValue>=0 && opValue+4<fPattern->fDataSize);
|
||||
fData[opValue] = fStack->size();
|
||||
fData[opValue+1] = fp->fInputIdx;
|
||||
// Init the variable containing the start index for attempted matches.
|
||||
fData[opValue+2] = -1;
|
||||
// Save input string length, then reset to pin any matches to end at
|
||||
// the current position.
|
||||
fData[opValue+2] = fActiveStart;
|
||||
fData[opValue+3] = fActiveLimit;
|
||||
fActiveStart = fRegionStart;
|
||||
fActiveLimit = fp->fInputIdx;
|
||||
// Init the variable containing the start index for attempted matches.
|
||||
fData[opValue+4] = -1;
|
||||
}
|
||||
break;
|
||||
|
||||
@ -5462,8 +5474,8 @@ GC_Done:
|
||||
U_ASSERT(minML >= 0);
|
||||
|
||||
// Fetch (from data) the last input index where a match was attempted.
|
||||
U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize);
|
||||
int64_t &lbStartIdx = fData[opValue+2];
|
||||
U_ASSERT(opValue>=0 && opValue+4<fPattern->fDataSize);
|
||||
int64_t &lbStartIdx = fData[opValue+4];
|
||||
if (lbStartIdx < 0) {
|
||||
// First time through loop.
|
||||
lbStartIdx = fp->fInputIdx - minML;
|
||||
@ -5485,10 +5497,10 @@ GC_Done:
|
||||
// getting a match. Backtrack out, and out of the
|
||||
// Look Behind altogether.
|
||||
fp = (REStackFrame *)fStack->popFrame(fFrameSize);
|
||||
int64_t restoreInputLen = fData[opValue+3];
|
||||
U_ASSERT(restoreInputLen >= fActiveLimit);
|
||||
U_ASSERT(restoreInputLen <= fInputLength);
|
||||
fActiveLimit = restoreInputLen;
|
||||
fActiveStart = fData[opValue+2];
|
||||
fActiveLimit = fData[opValue+3];
|
||||
U_ASSERT(fActiveStart >= 0);
|
||||
U_ASSERT(fActiveLimit <= fInputLength);
|
||||
break;
|
||||
}
|
||||
|
||||
@ -5502,7 +5514,7 @@ GC_Done:
|
||||
case URX_LB_END:
|
||||
// End of a look-behind block, after a successful match.
|
||||
{
|
||||
U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize);
|
||||
U_ASSERT(opValue>=0 && opValue+4<fPattern->fDataSize);
|
||||
if (fp->fInputIdx != fActiveLimit) {
|
||||
// The look-behind expression matched, but the match did not
|
||||
// extend all the way to the point that we are looking behind from.
|
||||
@ -5513,13 +5525,13 @@ GC_Done:
|
||||
break;
|
||||
}
|
||||
|
||||
// Look-behind match is good. Restore the orignal input string length,
|
||||
// Look-behind match is good. Restore the orignal input string region,
|
||||
// which had been truncated to pin the end of the lookbehind match to the
|
||||
// position being looked-behind.
|
||||
int64_t originalInputLen = fData[opValue+3];
|
||||
U_ASSERT(originalInputLen >= fActiveLimit);
|
||||
U_ASSERT(originalInputLen <= fInputLength);
|
||||
fActiveLimit = originalInputLen;
|
||||
fActiveStart = fData[opValue+2];
|
||||
fActiveLimit = fData[opValue+3];
|
||||
U_ASSERT(fActiveStart >= 0);
|
||||
U_ASSERT(fActiveLimit <= fInputLength);
|
||||
}
|
||||
break;
|
||||
|
||||
@ -5539,8 +5551,8 @@ GC_Done:
|
||||
U_ASSERT(continueLoc > fp->fPatIdx);
|
||||
|
||||
// Fetch (from data) the last input index where a match was attempted.
|
||||
U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize);
|
||||
int64_t &lbStartIdx = fData[opValue+2];
|
||||
U_ASSERT(opValue>=0 && opValue+4<fPattern->fDataSize);
|
||||
int64_t &lbStartIdx = fData[opValue+4];
|
||||
if (lbStartIdx < 0) {
|
||||
// First time through loop.
|
||||
lbStartIdx = fp->fInputIdx - minML;
|
||||
@ -5561,10 +5573,10 @@ GC_Done:
|
||||
// We have tried all potential match starting points without
|
||||
// getting a match, which means that the negative lookbehind as
|
||||
// a whole has succeeded. Jump forward to the continue location
|
||||
int64_t restoreInputLen = fData[opValue+3];
|
||||
U_ASSERT(restoreInputLen >= fActiveLimit);
|
||||
U_ASSERT(restoreInputLen <= fInputLength);
|
||||
fActiveLimit = restoreInputLen;
|
||||
fActiveStart = fData[opValue+2];
|
||||
fActiveLimit = fData[opValue+3];
|
||||
U_ASSERT(fActiveStart >= 0);
|
||||
U_ASSERT(fActiveLimit <= fInputLength);
|
||||
fp->fPatIdx = continueLoc;
|
||||
break;
|
||||
}
|
||||
@ -5579,7 +5591,7 @@ GC_Done:
|
||||
case URX_LBN_END:
|
||||
// End of a negative look-behind block, after a successful match.
|
||||
{
|
||||
U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize);
|
||||
U_ASSERT(opValue>=0 && opValue+4<fPattern->fDataSize);
|
||||
if (fp->fInputIdx != fActiveLimit) {
|
||||
// The look-behind expression matched, but the match did not
|
||||
// extend all the way to the point that we are looking behind from.
|
||||
@ -5596,10 +5608,10 @@ GC_Done:
|
||||
// Restore the orignal input string length, which had been truncated
|
||||
// inorder to pin the end of the lookbehind match
|
||||
// to the position being looked-behind.
|
||||
int64_t originalInputLen = fData[opValue+3];
|
||||
U_ASSERT(originalInputLen >= fActiveLimit);
|
||||
U_ASSERT(originalInputLen <= fInputLength);
|
||||
fActiveLimit = originalInputLen;
|
||||
fActiveStart = fData[opValue+2];
|
||||
fActiveLimit = fData[opValue+3];
|
||||
U_ASSERT(fActiveStart >= 0);
|
||||
U_ASSERT(fActiveLimit <= fInputLength);
|
||||
|
||||
// Restore original stack position, discarding any state saved
|
||||
// by the successful pattern match.
|
||||
|
@ -3525,11 +3525,16 @@ void RegexTest::regex_find(const UnicodeString &pattern,
|
||||
}
|
||||
}
|
||||
parseMatcher->appendTail(deTaggedInput);
|
||||
REGEX_ASSERT_L(groupStarts.size() == groupEnds.size(), line);
|
||||
|
||||
if (groupStarts.size() != groupEnds.size()) {
|
||||
errln("Error at line %d: mismatched <n> group tags in expected results.", line);
|
||||
failed = true;
|
||||
goto cleanupAndReturn;
|
||||
}
|
||||
if ((regionStart>=0 || regionEnd>=0) && (regionStart<0 || regionStart>regionEnd)) {
|
||||
errln("mismatched <r> tags");
|
||||
failed = TRUE;
|
||||
goto cleanupAndReturn;
|
||||
errln("mismatched <r> tags");
|
||||
failed = TRUE;
|
||||
goto cleanupAndReturn;
|
||||
}
|
||||
|
||||
//
|
||||
|
27
icu4c/source/test/testdata/regextst.txt
vendored
27
icu4c/source/test/testdata/regextst.txt
vendored
@ -88,6 +88,28 @@
|
||||
"abc(?!def)" b "<r>abc</r>def"
|
||||
"abc(?!def)" b "<r><0>abc</0></r>xyz"
|
||||
|
||||
#
|
||||
# Nested Lookahead / Behind
|
||||
#
|
||||
"one(?=(?:(?!<out>).)*</out>)" "<out><0>one</0> stuff</out>"
|
||||
"one(?=(?:(?!<out>).)*</out>)" "<out>one <out></out>"
|
||||
|
||||
# More nesting lookaround: pattern matches "qq" when not preceded by 'a' and followed by 'z'
|
||||
"(?<!a(?!...z))qq" "<0>qq</0>c"
|
||||
"(?<!a(?!...z))qq" "f<0>qq</0>c"
|
||||
"(?<!a(?!...z))qq" "aqqz"
|
||||
|
||||
# More nested lookaround: match any two chars preceded and followed by an upper case letter.
|
||||
# With gratuitious nesting of look-arounds and capture from the look-arounds.
|
||||
|
||||
"(?=(?<=(\p{Lu})(?=..(\p{Lu})))).." "<1>A</1><0>jk</0><2>B</2>"
|
||||
"(?=(?<=(\p{Lu})(?=..(\p{Lu})))).." "ajkB"
|
||||
"(?=(?<=(\p{Lu})(?=..(\p{Lu})))).." "Ajkb"
|
||||
|
||||
# Nested lookaround cases from bug ICU-20564
|
||||
"(?<=(?<=((?=)){0}+))" "<0></0>abc"
|
||||
"(?<=c(?<=c((?=c)){1}+))" "c<0><1></1></0>cc"
|
||||
|
||||
#
|
||||
# Anchoring Bounds
|
||||
#
|
||||
@ -1456,11 +1478,14 @@
|
||||
"abc(?=de(?=f))...g" "<0>abcdefg</0>"
|
||||
"abc(?=de(?=f))...g" "abcdxfg"
|
||||
|
||||
# Bug ICU-20618 Assertion failure with nested look-around expressions.
|
||||
#
|
||||
"(?<=(?<=b?(?=a)))" "hello, world."
|
||||
|
||||
|
||||
# Random debugging, Temporary
|
||||
#
|
||||
|
||||
|
||||
#
|
||||
# Regexps from http://www.regexlib.com
|
||||
#
|
||||
|
Loading…
Reference in New Issue
Block a user