ICU-105 Regular Expressions, memory cleanup
X-SVN-Rev: 10186
This commit is contained in:
parent
39660f8c4f
commit
54d2cd87e5
@ -25,6 +25,7 @@
|
||||
#include "cmemory.h"
|
||||
#include "cstring.h"
|
||||
#include "uassert.h"
|
||||
#include "ucln_in.h"
|
||||
|
||||
#include "stdio.h" // TODO: Get rid of this
|
||||
|
||||
@ -129,7 +130,7 @@ RegexCompile::RegexCompile(UErrorCode &status) : fParenStack(status)
|
||||
//
|
||||
if (gRuleSets[kRuleSet_rule_char-128] == NULL) {
|
||||
// TODO: Make thread safe.
|
||||
// TODO: Memory Cleanup on ICU shutdown.
|
||||
ucln_i18n_registerCleanup();
|
||||
gRuleSets[kRuleSet_rule_char-128] = new UnicodeSet(gRuleSet_rule_char_pattern, status);
|
||||
gRuleSets[kRuleSet_white_space-128] = (UnicodeSet*) uprv_openRuleWhiteSpaceSet(&status);
|
||||
gRuleSets[kRuleSet_digit_char-128] = new UnicodeSet(gRuleSet_digit_char_pattern, status);
|
||||
@ -138,19 +139,7 @@ RegexCompile::RegexCompile(UErrorCode &status) : fParenStack(status)
|
||||
gPropSets[URX_ISSPACE_SET] = new UnicodeSet(gIsSpacePattern, status);
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
delete gRuleSets[kRuleSet_rule_char-128];
|
||||
delete gRuleSets[kRuleSet_white_space-128];
|
||||
delete gRuleSets[kRuleSet_digit_char-128];
|
||||
delete gUnescapeCharSet;
|
||||
gRuleSets[kRuleSet_rule_char-128] = NULL;
|
||||
gRuleSets[kRuleSet_white_space-128] = NULL;
|
||||
gRuleSets[kRuleSet_digit_char-128] = NULL;
|
||||
gUnescapeCharSet = NULL;
|
||||
int i;
|
||||
for (i=0; i<URX_LAST_SET; i++) {
|
||||
delete (UnicodeSet *)gPropSets[i];
|
||||
gPropSets[i] = NULL;
|
||||
}
|
||||
RegexCompile::cleanup();
|
||||
return;
|
||||
}
|
||||
}
|
||||
@ -166,6 +155,29 @@ RegexCompile::RegexCompile(UErrorCode &status) : fParenStack(status)
|
||||
RegexCompile::~RegexCompile() {
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
//
|
||||
// cleanup. Called (indirectly) by u_cleanup to free all cached memory
|
||||
//
|
||||
//----------------------------------------------------------------------------------------
|
||||
void RegexCompile::cleanup() {
|
||||
delete gRuleSets[kRuleSet_rule_char-128];
|
||||
delete gRuleSets[kRuleSet_white_space-128];
|
||||
delete gRuleSets[kRuleSet_digit_char-128];
|
||||
delete gUnescapeCharSet;
|
||||
gRuleSets[kRuleSet_rule_char-128] = NULL;
|
||||
gRuleSets[kRuleSet_white_space-128] = NULL;
|
||||
gRuleSets[kRuleSet_digit_char-128] = NULL;
|
||||
gUnescapeCharSet = NULL;
|
||||
int i;
|
||||
for (i=0; i<URX_LAST_SET; i++) {
|
||||
delete (UnicodeSet *)gPropSets[i];
|
||||
gPropSets[i] = NULL;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
//---------------------------------------------------------------------------------
|
||||
//
|
||||
// Compile regex pattern. The state machine for rules parsing is here.
|
||||
|
@ -64,6 +64,8 @@ public:
|
||||
|
||||
void nextChar(RegexPatternChar &c); // Get the next char from the input stream.
|
||||
|
||||
static void cleanup(); // Memory cleanup
|
||||
|
||||
|
||||
/**
|
||||
* ICU "poor man's RTTI", returns a UClassID for the actual class.
|
||||
|
@ -87,6 +87,7 @@ RegexPattern &RegexPattern::operator = (const RegexPattern &other) {
|
||||
// Copy the Unicode Sets.
|
||||
// Could be made more efficient if the sets were reference counted and shared,
|
||||
// but I doubt that pattern copying will be particularly common.
|
||||
fSets->addElement((UnicodeSet *)NULL, status);
|
||||
for (i=1; i<other.fSets->size(); i++) {
|
||||
UnicodeSet *sourceSet = (UnicodeSet *)other.fSets->elementAt(i);
|
||||
UnicodeSet *newSet = new UnicodeSet(*sourceSet);
|
||||
@ -94,7 +95,7 @@ RegexPattern &RegexPattern::operator = (const RegexPattern &other) {
|
||||
fBadState = TRUE;
|
||||
break;
|
||||
}
|
||||
fCompiledPat->addElement(other.fCompiledPat->elementAti(i), status);
|
||||
fSets->addElement(newSet, status);
|
||||
}
|
||||
if (U_FAILURE(status)) {
|
||||
fBadState = TRUE;
|
||||
@ -510,6 +511,17 @@ breakFromLoop:
|
||||
|
||||
const char RegexPattern::fgClassID = 0;
|
||||
|
||||
//----------------------------------------------------------------------------------
|
||||
//
|
||||
// regex_cleanup Memory cleanup function, free/delete all
|
||||
// cached memory. Called by ICU's u_cleanup() function.
|
||||
//
|
||||
//----------------------------------------------------------------------------------
|
||||
U_CFUNC UBool
|
||||
regex_cleanup(void) {
|
||||
RegexCompile::cleanup();
|
||||
return TRUE;
|
||||
};
|
||||
|
||||
U_NAMESPACE_END
|
||||
#endif // !UCONFIG_NO_REGULAR_EXPRESSIONS
|
||||
|
@ -35,6 +35,11 @@ static UBool i18n_cleanup(void)
|
||||
ucol_cleanup();
|
||||
ucol_bld_cleanup();
|
||||
#endif
|
||||
|
||||
#if !UCONFIG_NO_REGULAR_EXPRESSIONS
|
||||
regex_cleanup();
|
||||
#endif
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
|
@ -32,6 +32,8 @@ U_CFUNC UBool ucol_cleanup(void);
|
||||
|
||||
U_CFUNC UBool ucol_bld_cleanup(void);
|
||||
|
||||
U_CFUNC UBool regex_cleanup(void);
|
||||
|
||||
#ifdef ICU_DATEFORMATSYMBOLS_USE_DEPRECATES
|
||||
U_CFUNC UBool dateFormatSymbols_cleanup(void);
|
||||
#endif
|
||||
|
@ -700,108 +700,6 @@ void RegexTest::API_Match() {
|
||||
delete matcher;
|
||||
delete pat;
|
||||
}
|
||||
|
||||
//
|
||||
// Replace
|
||||
//
|
||||
{
|
||||
int32_t flags=0;
|
||||
UParseError pe;
|
||||
UErrorCode status=U_ZERO_ERROR;
|
||||
|
||||
UnicodeString re("abc");
|
||||
RegexPattern *pat = RegexPattern::compile(re, flags, pe, status);
|
||||
REGEX_CHECK_STATUS;
|
||||
UnicodeString data = ".abc..abc...abc..";
|
||||
// 012345678901234567
|
||||
RegexMatcher *matcher = pat->matcher(data, status);
|
||||
|
||||
//
|
||||
// Plain vanilla matches.
|
||||
//
|
||||
UnicodeString dest;
|
||||
dest = matcher->replaceFirst("yz", status);
|
||||
REGEX_CHECK_STATUS;
|
||||
REGEX_ASSERT(dest == ".yz..abc...abc..");
|
||||
|
||||
dest = matcher->replaceAll("yz", status);
|
||||
REGEX_CHECK_STATUS;
|
||||
REGEX_ASSERT(dest == ".yz..yz...yz..");
|
||||
|
||||
//
|
||||
// Plain vanilla non-matches.
|
||||
//
|
||||
UnicodeString d2 = ".abx..abx...abx..";
|
||||
matcher->reset(d2);
|
||||
dest = matcher->replaceFirst("yz", status);
|
||||
REGEX_CHECK_STATUS;
|
||||
REGEX_ASSERT(dest == ".abx..abx...abx..");
|
||||
|
||||
dest = matcher->replaceAll("yz", status);
|
||||
REGEX_CHECK_STATUS;
|
||||
REGEX_ASSERT(dest == ".abx..abx...abx..");
|
||||
|
||||
//
|
||||
// Empty source string
|
||||
//
|
||||
UnicodeString d3 = "";
|
||||
matcher->reset(d3);
|
||||
dest = matcher->replaceFirst("yz", status);
|
||||
REGEX_CHECK_STATUS;
|
||||
REGEX_ASSERT(dest == "");
|
||||
|
||||
dest = matcher->replaceAll("yz", status);
|
||||
REGEX_CHECK_STATUS;
|
||||
REGEX_ASSERT(dest == "");
|
||||
|
||||
//
|
||||
// Empty substitution string
|
||||
//
|
||||
matcher->reset(data); // ".abc..abc...abc.."
|
||||
dest = matcher->replaceFirst("", status);
|
||||
REGEX_CHECK_STATUS;
|
||||
REGEX_ASSERT(dest == "...abc...abc..");
|
||||
|
||||
dest = matcher->replaceAll("", status);
|
||||
REGEX_CHECK_STATUS;
|
||||
REGEX_ASSERT(dest == "........");
|
||||
|
||||
//
|
||||
// match whole string
|
||||
//
|
||||
UnicodeString d4 = "abc";
|
||||
matcher->reset(d4);
|
||||
dest = matcher->replaceFirst("xyz", status);
|
||||
REGEX_CHECK_STATUS;
|
||||
REGEX_ASSERT(dest == "xyz");
|
||||
|
||||
dest = matcher->replaceAll("xyz", status);
|
||||
REGEX_CHECK_STATUS;
|
||||
REGEX_ASSERT(dest == "xyz");
|
||||
|
||||
//
|
||||
// Capture Group, simple case
|
||||
//
|
||||
UnicodeString re2("a(..)");
|
||||
RegexPattern *pat2 = RegexPattern::compile(re2, flags, pe, status);
|
||||
REGEX_CHECK_STATUS;
|
||||
UnicodeString d5 = "abcdefg";
|
||||
RegexMatcher *matcher2 = pat2->matcher(d5, status);
|
||||
REGEX_CHECK_STATUS;
|
||||
dest = matcher2->replaceFirst("$1$1", status);
|
||||
REGEX_CHECK_STATUS;
|
||||
REGEX_ASSERT(dest == "bcbcdefg");
|
||||
|
||||
// TODO: need more through testing of capture substitutions.
|
||||
|
||||
|
||||
//
|
||||
// Non-Grouping parentheses
|
||||
//
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
@ -910,10 +808,10 @@ void RegexTest::API_Replace() {
|
||||
// TODO: need more through testing of capture substitutions.
|
||||
|
||||
|
||||
//
|
||||
// Non-Grouping parentheses
|
||||
//
|
||||
|
||||
delete matcher2;
|
||||
delete pat2;
|
||||
delete matcher;
|
||||
delete pat;
|
||||
}
|
||||
|
||||
|
||||
@ -981,6 +879,7 @@ void RegexTest::API_Pattern() {
|
||||
delete pat1;
|
||||
delete pat2;
|
||||
|
||||
|
||||
//
|
||||
// matches convenience API
|
||||
//
|
||||
@ -1052,6 +951,7 @@ void RegexTest::API_Pattern() {
|
||||
REGEX_ASSERT(n==0);
|
||||
REGEX_ASSERT(fields[0]=="foo");
|
||||
|
||||
delete pat1;
|
||||
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user