ICU-2422 Regexp, general cleanup
X-SVN-Rev: 11375
This commit is contained in:
parent
32e659eb56
commit
c612056f6a
@ -40,6 +40,10 @@ RegexMatcher::RegexMatcher(const RegexPattern *pat) {
|
||||
fDeferredStatus = U_ZERO_ERROR;
|
||||
fStack = new UVector32(fDeferredStatus);
|
||||
fData = fSmallData;
|
||||
if (pat==NULL) {
|
||||
fDeferredStatus = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return;
|
||||
}
|
||||
if (pat->fDataSize > sizeof(fSmallData)/sizeof(int32_t)) {
|
||||
fData = (int32_t *)uprv_malloc(pat->fDataSize * sizeof(int32_t));
|
||||
}
|
||||
@ -61,11 +65,14 @@ RegexMatcher::RegexMatcher(const UnicodeString ®exp, const UnicodeString &inp
|
||||
fDeferredStatus = U_ZERO_ERROR;
|
||||
fStack = new UVector32(status);
|
||||
fData = fSmallData;
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
if (fPattern->fDataSize > sizeof(fSmallData)/sizeof(int32_t)) {
|
||||
fData = (int32_t *)uprv_malloc(fPattern->fDataSize * sizeof(int32_t));
|
||||
}
|
||||
if (fStack == NULL || fData == NULL) {
|
||||
fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
reset(input);
|
||||
}
|
||||
@ -74,17 +81,21 @@ RegexMatcher::RegexMatcher(const UnicodeString ®exp, const UnicodeString &inp
|
||||
RegexMatcher::RegexMatcher(const UnicodeString ®exp,
|
||||
uint32_t flags, UErrorCode &status) {
|
||||
UParseError pe;
|
||||
fPattern = RegexPattern::compile(regexp, flags, pe, status);
|
||||
fPatternOwned = TRUE;
|
||||
fTraceDebug = FALSE;
|
||||
fDeferredStatus = U_ZERO_ERROR;
|
||||
fStack = new UVector32(status);
|
||||
fData = fSmallData;
|
||||
fPattern = RegexPattern::compile(regexp, flags, pe, status);
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (fPattern->fDataSize > sizeof(fSmallData)/sizeof(int32_t)) {
|
||||
fData = (int32_t *)uprv_malloc(fPattern->fDataSize * sizeof(int32_t));
|
||||
}
|
||||
if (fStack == NULL || fData == NULL) {
|
||||
fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
reset();
|
||||
}
|
||||
@ -287,9 +298,7 @@ UBool RegexMatcher::find() {
|
||||
// Start at the position of the last match end. (Will be zero if the
|
||||
// matcher has been reset.
|
||||
//
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
|
||||
if (fPattern->fBadState) {
|
||||
if (U_FAILURE(fDeferredStatus)) {
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
@ -309,8 +318,8 @@ UBool RegexMatcher::find() {
|
||||
// No optimization was found.
|
||||
// Try a match at each input position.
|
||||
for (;;) {
|
||||
MatchAt(startPos, status);
|
||||
if (U_FAILURE(status)) {
|
||||
MatchAt(startPos, fDeferredStatus);
|
||||
if (U_FAILURE(fDeferredStatus)) {
|
||||
return FALSE;
|
||||
}
|
||||
if (fMatch) {
|
||||
@ -332,8 +341,8 @@ UBool RegexMatcher::find() {
|
||||
if (startPos > 0) {
|
||||
return FALSE;
|
||||
}
|
||||
MatchAt(startPos, status);
|
||||
if (U_FAILURE(status)) {
|
||||
MatchAt(startPos, fDeferredStatus);
|
||||
if (U_FAILURE(fDeferredStatus)) {
|
||||
return FALSE;
|
||||
}
|
||||
return fMatch;
|
||||
@ -347,8 +356,8 @@ UBool RegexMatcher::find() {
|
||||
int32_t pos = startPos;
|
||||
U16_NEXT(inputBuf, startPos, inputLen, c); // like c = inputBuf[startPos++];
|
||||
if (fPattern->fInitialChars->contains(c)) {
|
||||
MatchAt(pos, status);
|
||||
if (U_FAILURE(status)) {
|
||||
MatchAt(pos, fDeferredStatus);
|
||||
if (U_FAILURE(fDeferredStatus)) {
|
||||
return FALSE;
|
||||
}
|
||||
if (fMatch) {
|
||||
@ -372,8 +381,8 @@ UBool RegexMatcher::find() {
|
||||
int32_t pos = startPos;
|
||||
U16_NEXT(inputBuf, startPos, inputLen, c); // like c = inputBuf[startPos++];
|
||||
if (c == theChar) {
|
||||
MatchAt(pos, status);
|
||||
if (U_FAILURE(status)) {
|
||||
MatchAt(pos, fDeferredStatus);
|
||||
if (U_FAILURE(fDeferredStatus)) {
|
||||
return FALSE;
|
||||
}
|
||||
if (fMatch) {
|
||||
@ -391,8 +400,8 @@ UBool RegexMatcher::find() {
|
||||
{
|
||||
UChar32 c;
|
||||
if (startPos == 0) {
|
||||
MatchAt(startPos, status);
|
||||
if (U_FAILURE(status)) {
|
||||
MatchAt(startPos, fDeferredStatus);
|
||||
if (U_FAILURE(fDeferredStatus)) {
|
||||
return FALSE;
|
||||
}
|
||||
if (fMatch) {
|
||||
@ -406,8 +415,8 @@ UBool RegexMatcher::find() {
|
||||
if (((c & 0x7f) <= 0x29) && // First quickly bypass as many chars as possible
|
||||
(c == 0x0a || c==0x0c || c==0x85 ||c==0x2028 || c==0x2029 ||
|
||||
c == 0x0d && startPos+1 < inputLen && inputBuf[startPos+1] != 0x0a)) {
|
||||
MatchAt(startPos, status);
|
||||
if (U_FAILURE(status)) {
|
||||
MatchAt(startPos, fDeferredStatus);
|
||||
if (U_FAILURE(fDeferredStatus)) {
|
||||
return FALSE;
|
||||
}
|
||||
if (fMatch) {
|
||||
|
@ -2,10 +2,10 @@
|
||||
// file: repattrn.cpp
|
||||
//
|
||||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 2002 International Business Machines Corporation *
|
||||
* and others. All rights reserved. *
|
||||
**********************************************************************
|
||||
***************************************************************************
|
||||
* Copyright (C) 2002-2003 International Business Machines Corporation *
|
||||
* and others. All rights reserved. *
|
||||
***************************************************************************
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
@ -65,7 +65,7 @@ RegexPattern &RegexPattern::operator = (const RegexPattern &other) {
|
||||
fPattern = other.fPattern;
|
||||
fFlags = other.fFlags;
|
||||
fLiteralText = other.fLiteralText;
|
||||
fBadState = other.fBadState;
|
||||
fDeferredStatus = other.fDeferredStatus;
|
||||
fMinMatchLen = other.fMinMatchLen;
|
||||
fMaxMatchLen = other.fMaxMatchLen;
|
||||
fMaxCaptureDigits = other.fMaxCaptureDigits;
|
||||
@ -76,18 +76,14 @@ RegexPattern &RegexPattern::operator = (const RegexPattern &other) {
|
||||
fInitialStringLen = other.fInitialStringLen;
|
||||
fInitialChars = new UnicodeSet(*other.fInitialChars);
|
||||
fInitialChar = other.fInitialChar;
|
||||
if (fBadState) {
|
||||
return *this;
|
||||
}
|
||||
|
||||
// Copy the pattern. It's just values, nothing deep to copy.
|
||||
// TODO: something with status
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
fCompiledPat->assign(*other.fCompiledPat, status);
|
||||
fGroupMap->assign(*other.fGroupMap, status);
|
||||
fCompiledPat->assign(*other.fCompiledPat, fDeferredStatus);
|
||||
fGroupMap->assign(*other.fGroupMap, fDeferredStatus);
|
||||
|
||||
// Note: do not copy fMatcher. It'll be created on first use if the
|
||||
// destination needs one.
|
||||
// TODO: thread safety
|
||||
|
||||
// Copy the Unicode Sets.
|
||||
// Could be made more efficient if the sets were reference counted and shared,
|
||||
@ -95,16 +91,16 @@ RegexPattern &RegexPattern::operator = (const RegexPattern &other) {
|
||||
// Note: init() already added an empty element zero to fSets
|
||||
int32_t i;
|
||||
for (i=1; i<other.fSets->size(); i++) {
|
||||
if (U_FAILURE(fDeferredStatus)) {
|
||||
return *this;
|
||||
}
|
||||
UnicodeSet *sourceSet = (UnicodeSet *)other.fSets->elementAt(i);
|
||||
UnicodeSet *newSet = new UnicodeSet(*sourceSet);
|
||||
if (newSet == NULL) {
|
||||
fBadState = TRUE;
|
||||
fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
|
||||
break;
|
||||
}
|
||||
fSets->addElement(newSet, status);
|
||||
}
|
||||
if (U_FAILURE(status)) {
|
||||
fBadState = TRUE;
|
||||
fSets->addElement(newSet, fDeferredStatus);
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
@ -118,7 +114,7 @@ RegexPattern &RegexPattern::operator = (const RegexPattern &other) {
|
||||
//--------------------------------------------------------------------------
|
||||
void RegexPattern::init() {
|
||||
fFlags = 0;
|
||||
fBadState = FALSE;
|
||||
fDeferredStatus = U_ZERO_ERROR;
|
||||
fMinMatchLen = 0;
|
||||
fMaxMatchLen = -1;
|
||||
fMaxCaptureDigits = 1;
|
||||
@ -132,19 +128,20 @@ void RegexPattern::init() {
|
||||
fInitialChars = NULL;
|
||||
fInitialChar = 0;
|
||||
|
||||
UErrorCode status=U_ZERO_ERROR;
|
||||
// Init of a completely new RegexPattern.
|
||||
fCompiledPat = new UVector32(status);
|
||||
fGroupMap = new UVector32(status);
|
||||
fSets = new UVector(status);
|
||||
fInitialChars = new UnicodeSet;
|
||||
if (U_FAILURE(status) || fCompiledPat == NULL || fSets == NULL || fInitialChars == NULL) {
|
||||
fBadState = TRUE;
|
||||
fCompiledPat = new UVector32(fDeferredStatus);
|
||||
fGroupMap = new UVector32(fDeferredStatus);
|
||||
fSets = new UVector(fDeferredStatus);
|
||||
fInitialChars = new UnicodeSet;
|
||||
if (U_FAILURE(fDeferredStatus)) {
|
||||
return;
|
||||
}
|
||||
if (fCompiledPat == NULL || fGroupMap == NULL || fSets == NULL || fInitialChars == NULL) {
|
||||
fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
|
||||
// Slot zero of the vector of sets is reserved. Fill it here.
|
||||
fSets->addElement((int32_t)0, status);
|
||||
fSets->addElement((int32_t)0, fDeferredStatus);
|
||||
}
|
||||
|
||||
|
||||
@ -205,8 +202,7 @@ RegexPattern *RegexPattern::clone() const {
|
||||
UBool RegexPattern::operator ==(const RegexPattern &other) const {
|
||||
UBool r = this->fFlags == other.fFlags &&
|
||||
this->fPattern == other.fPattern &&
|
||||
this->fBadState == FALSE &&
|
||||
other.fBadState == FALSE;
|
||||
this->fDeferredStatus == other.fDeferredStatus;
|
||||
return r;
|
||||
}
|
||||
|
||||
@ -243,8 +239,8 @@ RegexPattern *RegexPattern::compile(
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
if (This->fBadState) {
|
||||
status = U_REGEX_INVALID_STATE;
|
||||
if (U_FAILURE(This->fDeferredStatus)) {
|
||||
status = This->fDeferredStatus;
|
||||
return NULL;
|
||||
}
|
||||
This->fFlags = flags;
|
||||
@ -283,20 +279,20 @@ uint32_t RegexPattern::flags() const {
|
||||
//
|
||||
//---------------------------------------------------------------------
|
||||
RegexMatcher *RegexPattern::matcher(const UnicodeString &input,
|
||||
UErrorCode &err) const {
|
||||
UErrorCode &status) const {
|
||||
RegexMatcher *retMatcher = NULL;
|
||||
|
||||
if (U_FAILURE(err)) {
|
||||
if (U_FAILURE(status)) {
|
||||
return NULL;
|
||||
}
|
||||
if (fBadState) {
|
||||
U_FAILURE(U_REGEX_INVALID_STATE);
|
||||
if (U_FAILURE(fDeferredStatus)) {
|
||||
status = fDeferredStatus;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
retMatcher = new RegexMatcher(this);
|
||||
if (retMatcher == NULL) {
|
||||
err = U_MEMORY_ALLOCATION_ERROR;
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
retMatcher->reset(input);
|
||||
@ -571,7 +567,7 @@ void RegexPattern::dumpOp(int32_t index) const {
|
||||
|
||||
|
||||
|
||||
|
||||
// TODO: get rid of max match length
|
||||
|
||||
|
||||
void RegexPattern::dump() const {
|
||||
@ -583,7 +579,6 @@ void RegexPattern::dump() const {
|
||||
REGEX_DUMP_DEBUG_PRINTF("%c", fPattern.charAt(i));
|
||||
}
|
||||
REGEX_DUMP_DEBUG_PRINTF("\n");
|
||||
REGEX_DUMP_DEBUG_PRINTF("Pattern Valid?: %s\n" , fBadState? "no" : "yes");
|
||||
REGEX_DUMP_DEBUG_PRINTF(" Min Match Length: %d\n", fMinMatchLen);
|
||||
REGEX_DUMP_DEBUG_PRINTF(" Max Match Length: %d\n", fMaxMatchLen);
|
||||
REGEX_DUMP_DEBUG_PRINTF(" Match Start Type: %s\n", START_OF_MATCH_STR(fStartType));
|
||||
|
@ -316,7 +316,7 @@ private:
|
||||
|
||||
UVector *fSets; // Any UnicodeSets referenced from the pattern.
|
||||
|
||||
UBool fBadState; // True if some prior error has left this
|
||||
UErrorCode fDeferredStatus; // status if some prior error has left this
|
||||
// RegexPattern in an unusable state.
|
||||
|
||||
int32_t fMinMatchLen; // Minimum Match Length. All matches will have length
|
||||
|
Loading…
Reference in New Issue
Block a user