ICU-2422 Regexp, optimizing find() operations
X-SVN-Rev: 11354
This commit is contained in:
parent
fae219cec9
commit
f0e3f3d714
@ -932,28 +932,13 @@ UBool RegexCompile::doParseActions(EParseAction action)
|
||||
break;
|
||||
|
||||
case doPlus:
|
||||
// Normal '+' compiles to
|
||||
// 1. stuff to be repeated (already built)
|
||||
// 2. state-save 4
|
||||
// 3. jmp 1
|
||||
// 4. ...
|
||||
// Normal '+' compiles to
|
||||
// 1. stuff to be repeated (already built)
|
||||
// 2. jmp-sav 1
|
||||
// 3. ...
|
||||
{
|
||||
int32_t topLoc = blockTopLoc(FALSE); // location of item #1
|
||||
|
||||
// Locate the position in the compiled pattern where the match will continue
|
||||
// after completing the + (4 in the comment above)
|
||||
//int32_t continueLoc = fRXPat->fCompiledPat->size()+2;
|
||||
|
||||
// Emit the STATE_SAVE
|
||||
//int32_t saveStateOp = URX_BUILD(URX_STATE_SAVE, continueLoc);
|
||||
//fRXPat->fCompiledPat->addElement(saveStateOp, *fStatus);
|
||||
|
||||
// Emit the JMP
|
||||
int32_t jmpOp = URX_BUILD(URX_JMP_SAV, topLoc);
|
||||
int32_t topLoc = blockTopLoc(FALSE); // location of item #1
|
||||
int32_t jmpOp = URX_BUILD(URX_JMP_SAV, topLoc);
|
||||
fRXPat->fCompiledPat->addElement(jmpOp, *fStatus);
|
||||
}
|
||||
break;
|
||||
@ -1016,12 +1001,12 @@ UBool RegexCompile::doParseActions(EParseAction action)
|
||||
// Compiles to
|
||||
// 1. STATE_SAVE 4
|
||||
// 2. body of stuff being iterated over
|
||||
// 3. JMP 1
|
||||
// 3. JMP_SAV 2
|
||||
// 4. ...
|
||||
//
|
||||
// Or, if the body can match a zero-length string, to inhibit infinite loops,
|
||||
// 1. STATE_SAVE 6
|
||||
// 2. POS_SAVE data-loc
|
||||
// 2. STO_INP_LOC data-loc
|
||||
// 3. body of stuff
|
||||
// 4. JMPX 1
|
||||
// 5 data-loc (extra operand of JMPX)
|
||||
@ -1041,20 +1026,19 @@ UBool RegexCompile::doParseActions(EParseAction action)
|
||||
}
|
||||
|
||||
// Locate the position in the compiled pattern where the match will continue
|
||||
// after completing the *. (4 in the comment above)
|
||||
// after completing the *. (4 or 6 in the comment above)
|
||||
int32_t continueLoc = fRXPat->fCompiledPat->size()+1;
|
||||
if (dataLoc != -1) {
|
||||
continueLoc++;
|
||||
continueLoc++; // second code sequence.
|
||||
}
|
||||
|
||||
// Put together the save state op store it into the compiled code.
|
||||
int32_t saveStateOp = URX_BUILD(URX_STATE_SAVE, continueLoc);
|
||||
fRXPat->fCompiledPat->setElementAt(saveStateOp, saveStateLoc);
|
||||
|
||||
// Append the URX_JMP or URX_JMPX operation to the compiled pattern. Its target
|
||||
// is the locaton of the state-save, above.
|
||||
// Append the URX_JMP_SAV or URX_JMPX operation to the compiled pattern.
|
||||
if (dataLoc == -1) {
|
||||
int32_t jmpOp = URX_BUILD(URX_JMP, saveStateLoc);
|
||||
int32_t jmpOp = URX_BUILD(URX_JMP_SAV, saveStateLoc+1);
|
||||
fRXPat->fCompiledPat->addElement(jmpOp, *fStatus);
|
||||
} else {
|
||||
int32_t op = URX_BUILD(URX_JMPX, saveStateLoc);
|
||||
@ -1062,7 +1046,6 @@ UBool RegexCompile::doParseActions(EParseAction action)
|
||||
op = URX_BUILD(URX_RESERVED_OP, dataLoc);
|
||||
fRXPat->fCompiledPat->addElement(op, *fStatus);
|
||||
}
|
||||
|
||||
}
|
||||
break;
|
||||
|
||||
|
@ -850,17 +850,37 @@ void RegexMatcher::MatchAt(int32_t startIdx, UErrorCode &status) {
|
||||
// Test input against a literal string.
|
||||
// Strings require two slots in the compiled pattern, one for the
|
||||
// offset to the string text, and one for the length.
|
||||
int32_t stringStartIdx, stringLen;
|
||||
stringStartIdx = opValue;
|
||||
int32_t stringStartIdx = opValue;
|
||||
int32_t stringLen;
|
||||
|
||||
op = pat[fp->fPatIdx];
|
||||
op = pat[fp->fPatIdx]; // Fetch the second operand
|
||||
fp->fPatIdx++;
|
||||
opType = URX_TYPE(op);
|
||||
opValue = URX_VAL(op);
|
||||
opType = URX_TYPE(op);
|
||||
stringLen = URX_VAL(op);
|
||||
U_ASSERT(opType == URX_STRING_LEN);
|
||||
stringLen = opValue;
|
||||
U_ASSERT(stringLen >= 2);
|
||||
|
||||
int32_t stringEndIndex = fp->fInputIdx + stringLen;
|
||||
const UChar * pInp = inputBuf + fp->fInputIdx;
|
||||
const UChar * pPat = litText+stringStartIdx;
|
||||
const UChar * pEnd = pInp + stringLen;
|
||||
for(;;) {
|
||||
if (*pInp == *pPat) {
|
||||
pInp++;
|
||||
pPat++;
|
||||
if (pInp == pEnd) {
|
||||
// Successful Match.
|
||||
fp->fInputIdx += stringLen;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
// Match failed.
|
||||
fp = (REStackFrame *)fStack->popFrame(frameSize);
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
#if 0
|
||||
if (stringEndIndex <= inputLen &&
|
||||
u_strncmp(inputBuf+fp->fInputIdx, litText+stringStartIdx, stringLen) == 0) {
|
||||
// Success. Advance the current input position.
|
||||
@ -869,6 +889,7 @@ void RegexMatcher::MatchAt(int32_t startIdx, UErrorCode &status) {
|
||||
// No match. Back up matching to a saved state
|
||||
fp = (REStackFrame *)fStack->popFrame(frameSize);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
break;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user