ICU-105 Regular Expressions, memory cleanup

X-SVN-Rev: 10186
2002-11-07 20:06:39 +00:00 · 2002-11-07 20:06:39 +00:00 · 54d2cd87e5
commit 54d2cd87e5
parent 39660f8c4f
6 changed files with 54 additions and 121 deletions
--- a/icu4c/source/i18n/regexcmp.cpp
+++ b/icu4c/source/i18n/regexcmp.cpp
@ -25,6 +25,7 @@
 #include "cmemory.h"
 #include "cstring.h"
 #include "uassert.h"
+#include "ucln_in.h"

 #include "stdio.h"    // TODO:  Get rid of this

@ -129,7 +130,7 @@ RegexCompile::RegexCompile(UErrorCode &status) : fParenStack(status)
    //
    if (gRuleSets[kRuleSet_rule_char-128] == NULL) {
        //  TODO:  Make thread safe.
-        //  TODO:  Memory Cleanup on ICU shutdown.
+        ucln_i18n_registerCleanup();
        gRuleSets[kRuleSet_rule_char-128]       = new UnicodeSet(gRuleSet_rule_char_pattern,       status);
        gRuleSets[kRuleSet_white_space-128]     = (UnicodeSet*) uprv_openRuleWhiteSpaceSet(&status);
        gRuleSets[kRuleSet_digit_char-128]      = new UnicodeSet(gRuleSet_digit_char_pattern,      status);
@ -138,19 +139,7 @@ RegexCompile::RegexCompile(UErrorCode &status) : fParenStack(status)
        gPropSets[URX_ISSPACE_SET]              = new UnicodeSet(gIsSpacePattern,                  status);

        if (U_FAILURE(status)) {
-            delete gRuleSets[kRuleSet_rule_char-128];
-            delete gRuleSets[kRuleSet_white_space-128];
-            delete gRuleSets[kRuleSet_digit_char-128];
-            delete gUnescapeCharSet;
-            gRuleSets[kRuleSet_rule_char-128]   = NULL;
-            gRuleSets[kRuleSet_white_space-128] = NULL;
-            gRuleSets[kRuleSet_digit_char-128]  = NULL;
-            gUnescapeCharSet = NULL;
-            int i;
-            for (i=0; i<URX_LAST_SET; i++) {
-                delete (UnicodeSet *)gPropSets[i];
-                gPropSets[i] = NULL;
-            }
+            RegexCompile::cleanup();
            return;
        }
    }
@ -166,6 +155,29 @@ RegexCompile::RegexCompile(UErrorCode &status) : fParenStack(status)
 RegexCompile::~RegexCompile() {
 }

+//----------------------------------------------------------------------------------------
+//
+//   cleanup.    Called (indirectly) by u_cleanup to free all cached memory
+//
+//----------------------------------------------------------------------------------------
+void RegexCompile::cleanup() {
+    delete gRuleSets[kRuleSet_rule_char-128];
+    delete gRuleSets[kRuleSet_white_space-128];
+    delete gRuleSets[kRuleSet_digit_char-128];
+    delete gUnescapeCharSet;
+    gRuleSets[kRuleSet_rule_char-128]   = NULL;
+    gRuleSets[kRuleSet_white_space-128] = NULL;
+    gRuleSets[kRuleSet_digit_char-128]  = NULL;
+    gUnescapeCharSet = NULL;
+    int i;
+    for (i=0; i<URX_LAST_SET; i++) {
+        delete (UnicodeSet *)gPropSets[i];
+        gPropSets[i] = NULL;
+    }
+    return;
+}
+
+
 //---------------------------------------------------------------------------------
 //
 //  Compile regex pattern.   The state machine for rules parsing is here.
--- a/icu4c/source/i18n/regexcmp.h
+++ b/icu4c/source/i18n/regexcmp.h
@ -64,6 +64,8 @@ public:

    void        nextChar(RegexPatternChar &c);      // Get the next char from the input stream.

+    static void cleanup();                       // Memory cleanup
+

    /**
     * ICU "poor man's RTTI", returns a UClassID for the actual class.
--- a/icu4c/source/i18n/repattrn.cpp
+++ b/icu4c/source/i18n/repattrn.cpp
@ -87,6 +87,7 @@ RegexPattern &RegexPattern::operator = (const RegexPattern &other) {
    //  Copy the Unicode Sets.  
    //    Could be made more efficient if the sets were reference counted and shared,
    //    but I doubt that pattern copying will be particularly common. 
+    fSets->addElement((UnicodeSet *)NULL, status);
    for (i=1; i<other.fSets->size(); i++) {
        UnicodeSet *sourceSet = (UnicodeSet *)other.fSets->elementAt(i);
        UnicodeSet *newSet    = new UnicodeSet(*sourceSet);
@ -94,7 +95,7 @@ RegexPattern &RegexPattern::operator = (const RegexPattern &other) {
            fBadState = TRUE;
            break;
        }
-        fCompiledPat->addElement(other.fCompiledPat->elementAti(i), status);
+        fSets->addElement(newSet, status);
    }
    if (U_FAILURE(status)) {
        fBadState = TRUE;
@ -510,6 +511,17 @@ breakFromLoop:

 const char RegexPattern::fgClassID = 0;

+//----------------------------------------------------------------------------------
+//
+//   regex_cleanup      Memory cleanup function, free/delete all
+//                      cached memory.  Called by ICU's u_cleanup() function.
+//
+//----------------------------------------------------------------------------------
+U_CFUNC UBool 
+regex_cleanup(void) {
+    RegexCompile::cleanup();
+    return TRUE;
+};

 U_NAMESPACE_END
 #endif  // !UCONFIG_NO_REGULAR_EXPRESSIONS
--- a/icu4c/source/i18n/ucln_in.c
+++ b/icu4c/source/i18n/ucln_in.c
@ -35,6 +35,11 @@ static UBool i18n_cleanup(void)
    ucol_cleanup();
    ucol_bld_cleanup();
 #endif
+
+#if !UCONFIG_NO_REGULAR_EXPRESSIONS
+    regex_cleanup();
+#endif
+
    return TRUE;
 }

--- a/icu4c/source/i18n/ucln_in.h
+++ b/icu4c/source/i18n/ucln_in.h
@ -32,6 +32,8 @@ U_CFUNC UBool ucol_cleanup(void);

 U_CFUNC UBool ucol_bld_cleanup(void);

+U_CFUNC UBool regex_cleanup(void);
+
 #ifdef ICU_DATEFORMATSYMBOLS_USE_DEPRECATES
 U_CFUNC UBool dateFormatSymbols_cleanup(void);
 #endif
--- a/icu4c/source/test/intltest/regextst.cpp
+++ b/icu4c/source/test/intltest/regextst.cpp
@ -700,108 +700,6 @@ void RegexTest::API_Match() {
        delete matcher;
        delete pat;
    }
-
-    //
-    //  Replace
-    //
-    {
-        int32_t             flags=0;
-        UParseError         pe;
-        UErrorCode          status=U_ZERO_ERROR;
-
-        UnicodeString       re("abc");
-        RegexPattern *pat = RegexPattern::compile(re, flags, pe, status);
-        REGEX_CHECK_STATUS;
-        UnicodeString data = ".abc..abc...abc..";
-        //                    012345678901234567
-        RegexMatcher *matcher = pat->matcher(data, status);
-
-        //
-        //  Plain vanilla matches.
-        //
-        UnicodeString  dest;
-        dest = matcher->replaceFirst("yz", status);
-        REGEX_CHECK_STATUS;
-        REGEX_ASSERT(dest == ".yz..abc...abc..");
-
-        dest = matcher->replaceAll("yz", status);
-        REGEX_CHECK_STATUS;
-        REGEX_ASSERT(dest == ".yz..yz...yz..");
-
-        //
-        //  Plain vanilla non-matches.
-        //
-        UnicodeString d2 = ".abx..abx...abx..";
-        matcher->reset(d2);
-        dest = matcher->replaceFirst("yz", status);
-        REGEX_CHECK_STATUS;
-        REGEX_ASSERT(dest == ".abx..abx...abx..");
-
-        dest = matcher->replaceAll("yz", status);
-        REGEX_CHECK_STATUS;
-        REGEX_ASSERT(dest == ".abx..abx...abx..");
-
-        //
-        // Empty source string
-        //
-        UnicodeString d3 = "";
-        matcher->reset(d3);
-        dest = matcher->replaceFirst("yz", status);
-        REGEX_CHECK_STATUS;
-        REGEX_ASSERT(dest == "");
-
-        dest = matcher->replaceAll("yz", status);
-        REGEX_CHECK_STATUS;
-        REGEX_ASSERT(dest == "");
-
-        //
-        // Empty substitution string
-        //
-        matcher->reset(data);              // ".abc..abc...abc.."
-        dest = matcher->replaceFirst("", status);
-        REGEX_CHECK_STATUS;
-        REGEX_ASSERT(dest == "...abc...abc..");
-
-        dest = matcher->replaceAll("", status);
-        REGEX_CHECK_STATUS;
-        REGEX_ASSERT(dest == "........");
-
-        //
-        // match whole string
-        //
-        UnicodeString d4 = "abc";
-        matcher->reset(d4);   
-        dest = matcher->replaceFirst("xyz", status);
-        REGEX_CHECK_STATUS;
-        REGEX_ASSERT(dest == "xyz");
-
-        dest = matcher->replaceAll("xyz", status);
-        REGEX_CHECK_STATUS;
-        REGEX_ASSERT(dest == "xyz");
-
-        //
-        // Capture Group, simple case
-        //
-        UnicodeString       re2("a(..)");
-        RegexPattern *pat2 = RegexPattern::compile(re2, flags, pe, status);
-        REGEX_CHECK_STATUS;
-        UnicodeString d5 = "abcdefg";
-        RegexMatcher *matcher2 = pat2->matcher(d5, status);
-        REGEX_CHECK_STATUS;
-        dest = matcher2->replaceFirst("$1$1", status);
-        REGEX_CHECK_STATUS;
-        REGEX_ASSERT(dest == "bcbcdefg");
-      
-        // TODO:  need more through testing of capture substitutions.
-
-
-        //
-        //  Non-Grouping parentheses
-        //
-
-    }
-
-
        
 }

@ -910,10 +808,10 @@ void RegexTest::API_Replace() {
    // TODO:  need more through testing of capture substitutions.
    
    
-    //
-    //  Non-Grouping parentheses
-    //
-    
+    delete matcher2;
+    delete pat2;
+    delete matcher;
+    delete pat;
 }


@ -981,6 +879,7 @@ void RegexTest::API_Pattern() {
    delete pat1;
    delete pat2;

+
    //
    //   matches convenience API
    //
@ -1052,6 +951,7 @@ void RegexTest::API_Pattern() {
    REGEX_ASSERT(n==0);
    REGEX_ASSERT(fields[0]=="foo");

+    delete pat1;

 }