ICU-2422 Regexp, optimizing find() operations

X-SVN-Rev: 11354
This commit is contained in:
Andy Heninger 2003-03-19 01:57:23 +00:00
parent fae219cec9
commit f0e3f3d714
2 changed files with 36 additions and 32 deletions

View File

@ -932,28 +932,13 @@ UBool RegexCompile::doParseActions(EParseAction action)
break; break;
case doPlus: case doPlus:
// Normal '+' compiles to
// 1. stuff to be repeated (already built)
// 2. state-save 4
// 3. jmp 1
// 4. ...
// Normal '+' compiles to // Normal '+' compiles to
// 1. stuff to be repeated (already built) // 1. stuff to be repeated (already built)
// 2. jmp-sav 1 // 2. jmp-sav 1
// 3. ... // 3. ...
{ {
int32_t topLoc = blockTopLoc(FALSE); // location of item #1 int32_t topLoc = blockTopLoc(FALSE); // location of item #1
int32_t jmpOp = URX_BUILD(URX_JMP_SAV, topLoc);
// Locate the position in the compiled pattern where the match will continue
// after completing the + (4 in the comment above)
//int32_t continueLoc = fRXPat->fCompiledPat->size()+2;
// Emit the STATE_SAVE
//int32_t saveStateOp = URX_BUILD(URX_STATE_SAVE, continueLoc);
//fRXPat->fCompiledPat->addElement(saveStateOp, *fStatus);
// Emit the JMP
int32_t jmpOp = URX_BUILD(URX_JMP_SAV, topLoc);
fRXPat->fCompiledPat->addElement(jmpOp, *fStatus); fRXPat->fCompiledPat->addElement(jmpOp, *fStatus);
} }
break; break;
@ -1016,12 +1001,12 @@ UBool RegexCompile::doParseActions(EParseAction action)
// Compiles to // Compiles to
// 1. STATE_SAVE 4 // 1. STATE_SAVE 4
// 2. body of stuff being iterated over // 2. body of stuff being iterated over
// 3. JMP 1 // 3. JMP_SAV 2
// 4. ... // 4. ...
// //
// Or, if the body can match a zero-length string, to inhibit infinite loops, // Or, if the body can match a zero-length string, to inhibit infinite loops,
// 1. STATE_SAVE 6 // 1. STATE_SAVE 6
// 2. POS_SAVE data-loc // 2. STO_INP_LOC data-loc
// 3. body of stuff // 3. body of stuff
// 4. JMPX 1 // 4. JMPX 1
// 5 data-loc (extra operand of JMPX) // 5 data-loc (extra operand of JMPX)
@ -1041,20 +1026,19 @@ UBool RegexCompile::doParseActions(EParseAction action)
} }
// Locate the position in the compiled pattern where the match will continue // Locate the position in the compiled pattern where the match will continue
// after completing the *. (4 in the comment above) // after completing the *. (4 or 6 in the comment above)
int32_t continueLoc = fRXPat->fCompiledPat->size()+1; int32_t continueLoc = fRXPat->fCompiledPat->size()+1;
if (dataLoc != -1) { if (dataLoc != -1) {
continueLoc++; continueLoc++; // second code sequence.
} }
// Put together the save state op store it into the compiled code. // Put together the save state op store it into the compiled code.
int32_t saveStateOp = URX_BUILD(URX_STATE_SAVE, continueLoc); int32_t saveStateOp = URX_BUILD(URX_STATE_SAVE, continueLoc);
fRXPat->fCompiledPat->setElementAt(saveStateOp, saveStateLoc); fRXPat->fCompiledPat->setElementAt(saveStateOp, saveStateLoc);
// Append the URX_JMP or URX_JMPX operation to the compiled pattern. Its target // Append the URX_JMP_SAV or URX_JMPX operation to the compiled pattern.
// is the locaton of the state-save, above.
if (dataLoc == -1) { if (dataLoc == -1) {
int32_t jmpOp = URX_BUILD(URX_JMP, saveStateLoc); int32_t jmpOp = URX_BUILD(URX_JMP_SAV, saveStateLoc+1);
fRXPat->fCompiledPat->addElement(jmpOp, *fStatus); fRXPat->fCompiledPat->addElement(jmpOp, *fStatus);
} else { } else {
int32_t op = URX_BUILD(URX_JMPX, saveStateLoc); int32_t op = URX_BUILD(URX_JMPX, saveStateLoc);
@ -1062,7 +1046,6 @@ UBool RegexCompile::doParseActions(EParseAction action)
op = URX_BUILD(URX_RESERVED_OP, dataLoc); op = URX_BUILD(URX_RESERVED_OP, dataLoc);
fRXPat->fCompiledPat->addElement(op, *fStatus); fRXPat->fCompiledPat->addElement(op, *fStatus);
} }
} }
break; break;

View File

@ -850,17 +850,37 @@ void RegexMatcher::MatchAt(int32_t startIdx, UErrorCode &status) {
// Test input against a literal string. // Test input against a literal string.
// Strings require two slots in the compiled pattern, one for the // Strings require two slots in the compiled pattern, one for the
// offset to the string text, and one for the length. // offset to the string text, and one for the length.
int32_t stringStartIdx, stringLen; int32_t stringStartIdx = opValue;
stringStartIdx = opValue; int32_t stringLen;
op = pat[fp->fPatIdx]; op = pat[fp->fPatIdx]; // Fetch the second operand
fp->fPatIdx++; fp->fPatIdx++;
opType = URX_TYPE(op); opType = URX_TYPE(op);
opValue = URX_VAL(op); stringLen = URX_VAL(op);
U_ASSERT(opType == URX_STRING_LEN); U_ASSERT(opType == URX_STRING_LEN);
stringLen = opValue; U_ASSERT(stringLen >= 2);
int32_t stringEndIndex = fp->fInputIdx + stringLen; const UChar * pInp = inputBuf + fp->fInputIdx;
const UChar * pPat = litText+stringStartIdx;
const UChar * pEnd = pInp + stringLen;
for(;;) {
if (*pInp == *pPat) {
pInp++;
pPat++;
if (pInp == pEnd) {
// Successful Match.
fp->fInputIdx += stringLen;
break;
}
} else {
// Match failed.
fp = (REStackFrame *)fStack->popFrame(frameSize);
break;
}
}
break;
#if 0
if (stringEndIndex <= inputLen && if (stringEndIndex <= inputLen &&
u_strncmp(inputBuf+fp->fInputIdx, litText+stringStartIdx, stringLen) == 0) { u_strncmp(inputBuf+fp->fInputIdx, litText+stringStartIdx, stringLen) == 0) {
// Success. Advance the current input position. // Success. Advance the current input position.
@ -869,6 +889,7 @@ void RegexMatcher::MatchAt(int32_t startIdx, UErrorCode &status) {
// No match. Back up matching to a saved state // No match. Back up matching to a saved state
fp = (REStackFrame *)fStack->popFrame(frameSize); fp = (REStackFrame *)fStack->popFrame(frameSize);
} }
#endif
} }
break; break;