ICU-2422 Regexp, optimizing find() operations
X-SVN-Rev: 11354
This commit is contained in:
parent
fae219cec9
commit
f0e3f3d714
@ -932,27 +932,12 @@ UBool RegexCompile::doParseActions(EParseAction action)
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case doPlus:
|
case doPlus:
|
||||||
// Normal '+' compiles to
|
|
||||||
// 1. stuff to be repeated (already built)
|
|
||||||
// 2. state-save 4
|
|
||||||
// 3. jmp 1
|
|
||||||
// 4. ...
|
|
||||||
// Normal '+' compiles to
|
// Normal '+' compiles to
|
||||||
// 1. stuff to be repeated (already built)
|
// 1. stuff to be repeated (already built)
|
||||||
// 2. jmp-sav 1
|
// 2. jmp-sav 1
|
||||||
// 3. ...
|
// 3. ...
|
||||||
{
|
{
|
||||||
int32_t topLoc = blockTopLoc(FALSE); // location of item #1
|
int32_t topLoc = blockTopLoc(FALSE); // location of item #1
|
||||||
|
|
||||||
// Locate the position in the compiled pattern where the match will continue
|
|
||||||
// after completing the + (4 in the comment above)
|
|
||||||
//int32_t continueLoc = fRXPat->fCompiledPat->size()+2;
|
|
||||||
|
|
||||||
// Emit the STATE_SAVE
|
|
||||||
//int32_t saveStateOp = URX_BUILD(URX_STATE_SAVE, continueLoc);
|
|
||||||
//fRXPat->fCompiledPat->addElement(saveStateOp, *fStatus);
|
|
||||||
|
|
||||||
// Emit the JMP
|
|
||||||
int32_t jmpOp = URX_BUILD(URX_JMP_SAV, topLoc);
|
int32_t jmpOp = URX_BUILD(URX_JMP_SAV, topLoc);
|
||||||
fRXPat->fCompiledPat->addElement(jmpOp, *fStatus);
|
fRXPat->fCompiledPat->addElement(jmpOp, *fStatus);
|
||||||
}
|
}
|
||||||
@ -1016,12 +1001,12 @@ UBool RegexCompile::doParseActions(EParseAction action)
|
|||||||
// Compiles to
|
// Compiles to
|
||||||
// 1. STATE_SAVE 4
|
// 1. STATE_SAVE 4
|
||||||
// 2. body of stuff being iterated over
|
// 2. body of stuff being iterated over
|
||||||
// 3. JMP 1
|
// 3. JMP_SAV 2
|
||||||
// 4. ...
|
// 4. ...
|
||||||
//
|
//
|
||||||
// Or, if the body can match a zero-length string, to inhibit infinite loops,
|
// Or, if the body can match a zero-length string, to inhibit infinite loops,
|
||||||
// 1. STATE_SAVE 6
|
// 1. STATE_SAVE 6
|
||||||
// 2. POS_SAVE data-loc
|
// 2. STO_INP_LOC data-loc
|
||||||
// 3. body of stuff
|
// 3. body of stuff
|
||||||
// 4. JMPX 1
|
// 4. JMPX 1
|
||||||
// 5 data-loc (extra operand of JMPX)
|
// 5 data-loc (extra operand of JMPX)
|
||||||
@ -1041,20 +1026,19 @@ UBool RegexCompile::doParseActions(EParseAction action)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Locate the position in the compiled pattern where the match will continue
|
// Locate the position in the compiled pattern where the match will continue
|
||||||
// after completing the *. (4 in the comment above)
|
// after completing the *. (4 or 6 in the comment above)
|
||||||
int32_t continueLoc = fRXPat->fCompiledPat->size()+1;
|
int32_t continueLoc = fRXPat->fCompiledPat->size()+1;
|
||||||
if (dataLoc != -1) {
|
if (dataLoc != -1) {
|
||||||
continueLoc++;
|
continueLoc++; // second code sequence.
|
||||||
}
|
}
|
||||||
|
|
||||||
// Put together the save state op store it into the compiled code.
|
// Put together the save state op store it into the compiled code.
|
||||||
int32_t saveStateOp = URX_BUILD(URX_STATE_SAVE, continueLoc);
|
int32_t saveStateOp = URX_BUILD(URX_STATE_SAVE, continueLoc);
|
||||||
fRXPat->fCompiledPat->setElementAt(saveStateOp, saveStateLoc);
|
fRXPat->fCompiledPat->setElementAt(saveStateOp, saveStateLoc);
|
||||||
|
|
||||||
// Append the URX_JMP or URX_JMPX operation to the compiled pattern. Its target
|
// Append the URX_JMP_SAV or URX_JMPX operation to the compiled pattern.
|
||||||
// is the locaton of the state-save, above.
|
|
||||||
if (dataLoc == -1) {
|
if (dataLoc == -1) {
|
||||||
int32_t jmpOp = URX_BUILD(URX_JMP, saveStateLoc);
|
int32_t jmpOp = URX_BUILD(URX_JMP_SAV, saveStateLoc+1);
|
||||||
fRXPat->fCompiledPat->addElement(jmpOp, *fStatus);
|
fRXPat->fCompiledPat->addElement(jmpOp, *fStatus);
|
||||||
} else {
|
} else {
|
||||||
int32_t op = URX_BUILD(URX_JMPX, saveStateLoc);
|
int32_t op = URX_BUILD(URX_JMPX, saveStateLoc);
|
||||||
@ -1062,7 +1046,6 @@ UBool RegexCompile::doParseActions(EParseAction action)
|
|||||||
op = URX_BUILD(URX_RESERVED_OP, dataLoc);
|
op = URX_BUILD(URX_RESERVED_OP, dataLoc);
|
||||||
fRXPat->fCompiledPat->addElement(op, *fStatus);
|
fRXPat->fCompiledPat->addElement(op, *fStatus);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
@ -850,17 +850,37 @@ void RegexMatcher::MatchAt(int32_t startIdx, UErrorCode &status) {
|
|||||||
// Test input against a literal string.
|
// Test input against a literal string.
|
||||||
// Strings require two slots in the compiled pattern, one for the
|
// Strings require two slots in the compiled pattern, one for the
|
||||||
// offset to the string text, and one for the length.
|
// offset to the string text, and one for the length.
|
||||||
int32_t stringStartIdx, stringLen;
|
int32_t stringStartIdx = opValue;
|
||||||
stringStartIdx = opValue;
|
int32_t stringLen;
|
||||||
|
|
||||||
op = pat[fp->fPatIdx];
|
op = pat[fp->fPatIdx]; // Fetch the second operand
|
||||||
fp->fPatIdx++;
|
fp->fPatIdx++;
|
||||||
opType = URX_TYPE(op);
|
opType = URX_TYPE(op);
|
||||||
opValue = URX_VAL(op);
|
stringLen = URX_VAL(op);
|
||||||
U_ASSERT(opType == URX_STRING_LEN);
|
U_ASSERT(opType == URX_STRING_LEN);
|
||||||
stringLen = opValue;
|
U_ASSERT(stringLen >= 2);
|
||||||
|
|
||||||
int32_t stringEndIndex = fp->fInputIdx + stringLen;
|
const UChar * pInp = inputBuf + fp->fInputIdx;
|
||||||
|
const UChar * pPat = litText+stringStartIdx;
|
||||||
|
const UChar * pEnd = pInp + stringLen;
|
||||||
|
for(;;) {
|
||||||
|
if (*pInp == *pPat) {
|
||||||
|
pInp++;
|
||||||
|
pPat++;
|
||||||
|
if (pInp == pEnd) {
|
||||||
|
// Successful Match.
|
||||||
|
fp->fInputIdx += stringLen;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Match failed.
|
||||||
|
fp = (REStackFrame *)fStack->popFrame(frameSize);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
#if 0
|
||||||
if (stringEndIndex <= inputLen &&
|
if (stringEndIndex <= inputLen &&
|
||||||
u_strncmp(inputBuf+fp->fInputIdx, litText+stringStartIdx, stringLen) == 0) {
|
u_strncmp(inputBuf+fp->fInputIdx, litText+stringStartIdx, stringLen) == 0) {
|
||||||
// Success. Advance the current input position.
|
// Success. Advance the current input position.
|
||||||
@ -869,6 +889,7 @@ void RegexMatcher::MatchAt(int32_t startIdx, UErrorCode &status) {
|
|||||||
// No match. Back up matching to a saved state
|
// No match. Back up matching to a saved state
|
||||||
fp = (REStackFrame *)fStack->popFrame(frameSize);
|
fp = (REStackFrame *)fStack->popFrame(frameSize);
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user