ICU-105 Regular Expressions, memory cleanup

X-SVN-Rev: 10186
This commit is contained in:
Andy Heninger 2002-11-07 20:06:39 +00:00
parent 39660f8c4f
commit 54d2cd87e5
6 changed files with 54 additions and 121 deletions

View File

@ -25,6 +25,7 @@
#include "cmemory.h"
#include "cstring.h"
#include "uassert.h"
#include "ucln_in.h"
#include "stdio.h" // TODO: Get rid of this
@ -129,7 +130,7 @@ RegexCompile::RegexCompile(UErrorCode &status) : fParenStack(status)
//
if (gRuleSets[kRuleSet_rule_char-128] == NULL) {
// TODO: Make thread safe.
// TODO: Memory Cleanup on ICU shutdown.
ucln_i18n_registerCleanup();
gRuleSets[kRuleSet_rule_char-128] = new UnicodeSet(gRuleSet_rule_char_pattern, status);
gRuleSets[kRuleSet_white_space-128] = (UnicodeSet*) uprv_openRuleWhiteSpaceSet(&status);
gRuleSets[kRuleSet_digit_char-128] = new UnicodeSet(gRuleSet_digit_char_pattern, status);
@ -138,19 +139,7 @@ RegexCompile::RegexCompile(UErrorCode &status) : fParenStack(status)
gPropSets[URX_ISSPACE_SET] = new UnicodeSet(gIsSpacePattern, status);
if (U_FAILURE(status)) {
delete gRuleSets[kRuleSet_rule_char-128];
delete gRuleSets[kRuleSet_white_space-128];
delete gRuleSets[kRuleSet_digit_char-128];
delete gUnescapeCharSet;
gRuleSets[kRuleSet_rule_char-128] = NULL;
gRuleSets[kRuleSet_white_space-128] = NULL;
gRuleSets[kRuleSet_digit_char-128] = NULL;
gUnescapeCharSet = NULL;
int i;
for (i=0; i<URX_LAST_SET; i++) {
delete (UnicodeSet *)gPropSets[i];
gPropSets[i] = NULL;
}
RegexCompile::cleanup();
return;
}
}
@ -166,6 +155,29 @@ RegexCompile::RegexCompile(UErrorCode &status) : fParenStack(status)
RegexCompile::~RegexCompile() {
}
//----------------------------------------------------------------------------------------
//
// cleanup. Called (indirectly) by u_cleanup to free all cached memory
//
//----------------------------------------------------------------------------------------
void RegexCompile::cleanup() {
delete gRuleSets[kRuleSet_rule_char-128];
delete gRuleSets[kRuleSet_white_space-128];
delete gRuleSets[kRuleSet_digit_char-128];
delete gUnescapeCharSet;
gRuleSets[kRuleSet_rule_char-128] = NULL;
gRuleSets[kRuleSet_white_space-128] = NULL;
gRuleSets[kRuleSet_digit_char-128] = NULL;
gUnescapeCharSet = NULL;
int i;
for (i=0; i<URX_LAST_SET; i++) {
delete (UnicodeSet *)gPropSets[i];
gPropSets[i] = NULL;
}
return;
}
//---------------------------------------------------------------------------------
//
// Compile regex pattern. The state machine for rules parsing is here.

View File

@ -64,6 +64,8 @@ public:
void nextChar(RegexPatternChar &c); // Get the next char from the input stream.
static void cleanup(); // Memory cleanup
/**
* ICU "poor man's RTTI", returns a UClassID for the actual class.

View File

@ -87,6 +87,7 @@ RegexPattern &RegexPattern::operator = (const RegexPattern &other) {
// Copy the Unicode Sets.
// Could be made more efficient if the sets were reference counted and shared,
// but I doubt that pattern copying will be particularly common.
fSets->addElement((UnicodeSet *)NULL, status);
for (i=1; i<other.fSets->size(); i++) {
UnicodeSet *sourceSet = (UnicodeSet *)other.fSets->elementAt(i);
UnicodeSet *newSet = new UnicodeSet(*sourceSet);
@ -94,7 +95,7 @@ RegexPattern &RegexPattern::operator = (const RegexPattern &other) {
fBadState = TRUE;
break;
}
fCompiledPat->addElement(other.fCompiledPat->elementAti(i), status);
fSets->addElement(newSet, status);
}
if (U_FAILURE(status)) {
fBadState = TRUE;
@ -510,6 +511,17 @@ breakFromLoop:
const char RegexPattern::fgClassID = 0;
//----------------------------------------------------------------------------------
//
// regex_cleanup Memory cleanup function, free/delete all
// cached memory. Called by ICU's u_cleanup() function.
//
//----------------------------------------------------------------------------------
U_CFUNC UBool
regex_cleanup(void) {
RegexCompile::cleanup();
return TRUE;
};
U_NAMESPACE_END
#endif // !UCONFIG_NO_REGULAR_EXPRESSIONS

View File

@ -35,6 +35,11 @@ static UBool i18n_cleanup(void)
ucol_cleanup();
ucol_bld_cleanup();
#endif
#if !UCONFIG_NO_REGULAR_EXPRESSIONS
regex_cleanup();
#endif
return TRUE;
}

View File

@ -32,6 +32,8 @@ U_CFUNC UBool ucol_cleanup(void);
U_CFUNC UBool ucol_bld_cleanup(void);
U_CFUNC UBool regex_cleanup(void);
#ifdef ICU_DATEFORMATSYMBOLS_USE_DEPRECATES
U_CFUNC UBool dateFormatSymbols_cleanup(void);
#endif

View File

@ -701,108 +701,6 @@ void RegexTest::API_Match() {
delete pat;
}
//
// Replace
//
{
int32_t flags=0;
UParseError pe;
UErrorCode status=U_ZERO_ERROR;
UnicodeString re("abc");
RegexPattern *pat = RegexPattern::compile(re, flags, pe, status);
REGEX_CHECK_STATUS;
UnicodeString data = ".abc..abc...abc..";
// 012345678901234567
RegexMatcher *matcher = pat->matcher(data, status);
//
// Plain vanilla matches.
//
UnicodeString dest;
dest = matcher->replaceFirst("yz", status);
REGEX_CHECK_STATUS;
REGEX_ASSERT(dest == ".yz..abc...abc..");
dest = matcher->replaceAll("yz", status);
REGEX_CHECK_STATUS;
REGEX_ASSERT(dest == ".yz..yz...yz..");
//
// Plain vanilla non-matches.
//
UnicodeString d2 = ".abx..abx...abx..";
matcher->reset(d2);
dest = matcher->replaceFirst("yz", status);
REGEX_CHECK_STATUS;
REGEX_ASSERT(dest == ".abx..abx...abx..");
dest = matcher->replaceAll("yz", status);
REGEX_CHECK_STATUS;
REGEX_ASSERT(dest == ".abx..abx...abx..");
//
// Empty source string
//
UnicodeString d3 = "";
matcher->reset(d3);
dest = matcher->replaceFirst("yz", status);
REGEX_CHECK_STATUS;
REGEX_ASSERT(dest == "");
dest = matcher->replaceAll("yz", status);
REGEX_CHECK_STATUS;
REGEX_ASSERT(dest == "");
//
// Empty substitution string
//
matcher->reset(data); // ".abc..abc...abc.."
dest = matcher->replaceFirst("", status);
REGEX_CHECK_STATUS;
REGEX_ASSERT(dest == "...abc...abc..");
dest = matcher->replaceAll("", status);
REGEX_CHECK_STATUS;
REGEX_ASSERT(dest == "........");
//
// match whole string
//
UnicodeString d4 = "abc";
matcher->reset(d4);
dest = matcher->replaceFirst("xyz", status);
REGEX_CHECK_STATUS;
REGEX_ASSERT(dest == "xyz");
dest = matcher->replaceAll("xyz", status);
REGEX_CHECK_STATUS;
REGEX_ASSERT(dest == "xyz");
//
// Capture Group, simple case
//
UnicodeString re2("a(..)");
RegexPattern *pat2 = RegexPattern::compile(re2, flags, pe, status);
REGEX_CHECK_STATUS;
UnicodeString d5 = "abcdefg";
RegexMatcher *matcher2 = pat2->matcher(d5, status);
REGEX_CHECK_STATUS;
dest = matcher2->replaceFirst("$1$1", status);
REGEX_CHECK_STATUS;
REGEX_ASSERT(dest == "bcbcdefg");
// TODO: need more through testing of capture substitutions.
//
// Non-Grouping parentheses
//
}
}
@ -910,10 +808,10 @@ void RegexTest::API_Replace() {
// TODO: need more through testing of capture substitutions.
//
// Non-Grouping parentheses
//
delete matcher2;
delete pat2;
delete matcher;
delete pat;
}
@ -981,6 +879,7 @@ void RegexTest::API_Pattern() {
delete pat1;
delete pat2;
//
// matches convenience API
//
@ -1052,6 +951,7 @@ void RegexTest::API_Pattern() {
REGEX_ASSERT(n==0);
REGEX_ASSERT(fields[0]=="foo");
delete pat1;
}