ICU-2421 Redo Regex C API changes lost in OSS crash

X-SVN-Rev: 14721
2004-03-22 22:16:21 +00:00 · 2004-03-22 22:16:21 +00:00 · efeb3b480d
commit efeb3b480d
parent 33c376bd78
6 changed files with 1361 additions and 11 deletions
--- a/icu4c/source/i18n/i18n.dsp
+++ b/icu4c/source/i18n/i18n.dsp
@ -1943,6 +1943,57 @@ SOURCE=.\rematch.cpp
 # Begin Source File

 SOURCE=.\repattrn.cpp
+# End Source File
+# Begin Source File
+
+SOURCE=.\uregex.cpp
+# End Source File
+# Begin Source File
+
+SOURCE=.\unicode\uregex.h
+
+!IF  "$(CFG)" == "i18n - Win32 Release"
+
+# Begin Custom Build
+InputPath=.\unicode\uregex.h
+
+"..\..\include\unicode\uregex.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+	copy    $(InputPath)    ..\..\include\unicode
+
+# End Custom Build
+
+!ELSEIF  "$(CFG)" == "i18n - Win32 Debug"
+
+# Begin Custom Build
+InputPath=.\unicode\uregex.h
+
+"..\..\include\unicode\uregex.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+	copy    $(InputPath)    ..\..\include\unicode
+
+# End Custom Build
+
+!ELSEIF  "$(CFG)" == "i18n - Win64 Release"
+
+# Begin Custom Build
+InputPath=.\unicode\uregex.h
+
+"..\..\include\unicode\uregex.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+	copy    $(InputPath)    ..\..\include\unicode
+
+# End Custom Build
+
+!ELSEIF  "$(CFG)" == "i18n - Win64 Debug"
+
+# Begin Custom Build
+InputPath=.\unicode\uregex.h
+
+"..\..\include\unicode\uregex.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+	copy    $(InputPath)    ..\..\include\unicode
+
+# End Custom Build
+
+!ENDIF 
+
 # End Source File
 # End Group
 # Begin Group "transforms"
--- a/icu4c/source/i18n/repattrn.cpp
+++ b/icu4c/source/i18n/repattrn.cpp
@ -243,7 +243,7 @@ RegexPattern  *RegexPattern::compile(
    }

    const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS |
-                              UREGEX_DOTALL   | UREGEX_MULTILINE;
+                              UREGEX_DOTALL   | UREGEX_MULTILINE | UREGEX_UWORD;

    if ((flags & ~allFlags) != 0) {
        status = U_REGEX_INVALID_FLAG;
--- a/icu4c/source/i18n/unicode/regex.h
+++ b/icu4c/source/i18n/unicode/regex.h
@ -60,6 +60,7 @@ class UnicodeSet;
 struct REStackFrame;
 struct Regex8BitSet;
 class  RuleBasedBreakIterator;
+class  RegexCImpl;



@ -383,6 +384,7 @@ private:

    friend class RegexCompile;
    friend class RegexMatcher;
+    friend class RegexCImpl;

    //
    //  Implementation Methods
@ -817,6 +819,7 @@ private:
    RegexMatcher(const RegexMatcher &other);
    RegexMatcher &operator =(const RegexMatcher &rhs);
    friend class RegexPattern;
+    friend class RegexCImpl;


    //
@ -857,6 +860,7 @@ private:

    RuleBasedBreakIterator  *fWordBreakItr;

+
 };

 U_NAMESPACE_END
--- a/icu4c/source/i18n/unicode/uregex.h
+++ b/icu4c/source/i18n/unicode/uregex.h
@ -20,6 +20,9 @@

 #if !UCONFIG_NO_REGULAR_EXPRESSIONS

+#include "unicode/parseerr.h"
+
+
 struct URegularExpression;
 /**
  * Structure represeting a compiled regular rexpression, plus the results
@ -132,10 +135,14 @@ U_CAPI void U_EXPORT2
 uregex_close(URegularExpression *regexp);

 /**
- * Make an exact copy of a compiled regular expression.  Cloning a regular
+ * Make a copy of a compiled regular expression.  Cloning a regular
 * expression is faster than opening a second instance from the source
 * form of the expression, and requires less memory.
 * <p>
+ * Note that the current input string and the position of any matched text
+ *  within it are not cloned; only the pattern itself and and the
+ *  match mode flags are copied.
+ * <p>
 * Cloning can be particularly useful to threaded applications that perform
 * multiple match operations in parallel.  Each concurrent RE
 * operation requires its own instance of a URegularExpression.
@ -152,7 +159,10 @@ uregex_clone(const URegularExpression *regexp, UErrorCode *status);
 *
 * @param regexp     The compiled regular expression.
 * @param patLength  This output parameter will be set to the length of the
- *                   pattern string.
+ *                   pattern string.  A NULL pointer may be used here if the
+ *                   pattern length is not needed, as would be the case if
+ *                   the pattern is known in advance to be a NUL terminated
+ *                   string.
 * @param status     Receives errors detected by this function.
 * @return a pointer to the pattern string.  The storage for the string is
 *                   owned by the regular expression object, and must not be
@ -160,19 +170,21 @@ uregex_clone(const URegularExpression *regexp, UErrorCode *status);
 *                   will remain valid until the regular expression is closed.
 */
 U_CAPI const UChar * U_EXPORT2 
-uregex_pattern(const    URegularExpression *regexp,
-                        int32_t           **patLength,
-                        UErrorCode         *status);
+uregex_pattern(const  URegularExpression   *regexp,
+                         int32_t           *patLength,
+                         UErrorCode        *status);


 /**
  * Get the match mode flags that were specified when compiling this regular expression.
  * @param status     Receives errors detected by this function.
+  * @param regexp     The compiled regular expression.
  * @param return     The match mode flags
  * @see URegexpFlag
  */
 U_CAPI int32_t U_EXPORT2 
-uregex_flags(UErrorCode *status);
+uregex_flags(const  URegularExpression   *regexp,
+                    UErrorCode           *status);


 /**
@ -206,14 +218,18 @@ uregex_setText(URegularExpression *regexp,
  *   pointer was previously supplied via uregex_setText().
  *
  * @param regexp      The compiled regular expression.
-  * @param textLength  The length of the string is returned in this output parameter.
+  * @param textLength  The length of the string is returned in this output parameter. 
+  *                    A NULL pointer may be used here if the
+  *                    text length is not needed, as would be the case if
+  *                    the text is known in advance to be a NUL terminated
+  *                    string.
  * @param status      Receives errors detected by this function.
  * @return            Poiner to the subject text string currently associated with
  *                    this regular expression.
  */
 U_CAPI const UChar * U_EXPORT2 
 uregex_getText(URegularExpression *regexp,
-               int32_t            **textLength,
+               int32_t            *textLength,
               UErrorCode         *status);

 /**
@ -449,6 +465,17 @@ uregex_replaceFirst(URegularExpression  *regexp,
  *   replacement string is appended to the output string,
  *   including handling any substitutions of captured text.</p>
  *
+  *   <p>A note on preflight computation of buffersize and error handling:
+  *   Calls to uregex_appendReplacement() and uregex_appendTail() are
+  *   designed to be chained, one after another, with the destination
+  *   buffer pointer and buffer capacity updated after each in preparation
+  *   to for the next.  If the destination buffer is exhausted partway through such a
+  *   sequence, a U_BUFFER_OVERFLOW_ERROR status will be returned.  Normal
+  *   ICU conventions are for a function to perform no action if it is
+  *   called with an error status, but for this one case, uregex_appendRepacement()
+  *   will operate normally so that buffer size computations will complete
+  *   correctly.
+  *
  *   <p>For simple, prepackaged, non-incremental find-and-replace
  *      operations, see replaceFirst() or replaceAll().</p>
  *
@ -535,7 +562,7 @@ uregex_appendTail(URegularExpression    *regexp,
   *                         extra positions within the destFields array will be
   *                         set to NULL.
   *    @param   destCapacity The capacity of the destBuf.
-   *    @param   requiredCapacty  The actual capacity required of the destBuf.
+   *    @param   requiredCapacity  The actual capacity required of the destBuf.
   *                         If destCapacity is too small, requiredCapacity is the
   *                         total capacity required to hold all of the output.
   *    @param   destFields  An array to be filled with the position of each
@ -572,7 +599,7 @@ U_CAPI int32_t U_EXPORT2
 uregex_split(   URegularExpression      *regexp,
                  UChar                 *destBuf,
                  int32_t                destCapacity,
-                  int32_t              **requiredCapacity,
+                  int32_t               *requiredCapacity,
                  UChar                 *destFields[],
                  int32_t                destFieldsCapacity,
                  UErrorCode            *status);
--- a/icu4c/source/i18n/uregex.cpp
+++ b/icu4c/source/i18n/uregex.cpp
--- a/icu4c/source/test/cintltst/reapits.c
+++ b/icu4c/source/test/cintltst/reapits.c
@ -0,0 +1,176 @@
+/********************************************************************
+ * COPYRIGHT: 
+ * Copyright (c) 2004, International Business Machines Corporation and
+ * others. All Rights Reserved.
+ ********************************************************************/
+/********************************************************************************
+*
+* File reapits.c
+*
+*********************************************************************************/
+/*C API TEST FOR Regular Expressions */
+/**
+*   This is an API test for ICU regular expressions in C.  It doesn't test very many cases, and doesn't
+*   try to test the full functionality.  It just calls each function and verifies that it
+*   works on a basic level.
+*
+*   More complete testing of regular expression functionality is done with the C++ tests.
+**/
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_REGULAR_EXPRESSIONS
+
+#include <stdlib.h>
+#include <string.h>
+#include "unicode/uloc.h"
+#include "unicode/uregex.h"
+#include "unicode/ustring.h"
+#include "cintltst.h"
+
+#define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \
+log_err("Failure at file %s, line %d, error = %s\n", __FILE__, __LINE__, u_errorName(status));}}
+
+#define TEST_ASSERT(expr) {if ((expr)==FALSE) { \
+log_err("Test Failure at file %s, line %d\n", __FILE__, __LINE__);}}
+
+static void TestRegexCAPI(void);
+
+void addURegexTest(TestNode** root);
+
+void addURegexTest(TestNode** root)
+{
+    addTest(root, &TestRegexCAPI, "regex/TestRegexCAPI");
+/*  addTest(root, &TestBreakIteratorSafeClone, "tstxtbd/cbiapts/TestBreakIteratorSafeClone"); */
+}
+
+
+void TestRegexCAPI(void) {
+    UErrorCode           status = U_ZERO_ERROR;
+    URegularExpression  *re;
+    UChar                pat[200];
+
+    /* Mimimalist open/close */
+    u_uastrncpy(pat, "abc*", sizeof(pat)/2);
+    re = uregex_open(pat, -1, 0, 0, &status);
+    TEST_ASSERT_SUCCESS(status);
+    uregex_close(re);
+
+    /* Open with all flag values set */
+    status = U_ZERO_ERROR;
+    re = uregex_open(pat, -1, 
+        UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD,
+        0, &status);
+    TEST_ASSERT_SUCCESS(status);
+    uregex_close(re);
+
+    /* Open with an invalid flag */
+    status = U_ZERO_ERROR;
+    re = uregex_open(pat, -1, 0x40000000, 0, &status);
+    TEST_ASSERT(status == U_REGEX_INVALID_FLAG);
+    uregex_close(re);
+
+
+    /* openC   open from a C string */
+    {
+        const UChar   *p;
+        int32_t  len;
+        status = U_ZERO_ERROR;
+        re = uregex_openC("abc*", 0, 0, &status);
+        TEST_ASSERT_SUCCESS(status);
+        p = uregex_pattern(re, &len, &status);
+        TEST_ASSERT_SUCCESS(status);
+        u_uastrncpy(pat, "abc*", sizeof(pat)/2);
+        TEST_ASSERT(u_strcmp(pat, p) == 0);
+        TEST_ASSERT(len==(int32_t)strlen("abc*"));
+
+        uregex_close(re);
+    }
+
+    /*
+     *  clone
+     */
+    {
+        URegularExpression *clone1;
+        URegularExpression *clone2;
+        URegularExpression *clone3;
+        UChar  testString1[30];
+        UChar  testString2[30];
+        UBool  result;
+
+
+        status = U_ZERO_ERROR;
+        re = uregex_openC("abc*", 0, 0, &status);
+        TEST_ASSERT_SUCCESS(status);
+        clone1 = uregex_clone(re, &status);
+        TEST_ASSERT_SUCCESS(status);
+        TEST_ASSERT(clone1 != NULL);
+
+        status = U_ZERO_ERROR;
+        clone2 = uregex_clone(re, &status);
+        TEST_ASSERT_SUCCESS(status);
+        TEST_ASSERT(clone2 != NULL);
+        uregex_close(re);
+
+        status = U_ZERO_ERROR;
+        clone3 = uregex_clone(clone2, &status);
+        TEST_ASSERT_SUCCESS(status);
+        TEST_ASSERT(clone3 != NULL);
+
+        u_uastrncpy(testString1, "abcccd", sizeof(pat)/2);
+        u_uastrncpy(testString2, "xxxabcccd", sizeof(pat)/2);
+
+        status = U_ZERO_ERROR;
+        uregex_setText(clone1, testString1, -1, &status);
+        TEST_ASSERT_SUCCESS(status);
+        result = uregex_lookingAt(clone1, 0, &status);
+        TEST_ASSERT_SUCCESS(status);
+        TEST_ASSERT(result==TRUE);
+        
+        status = U_ZERO_ERROR;
+        uregex_setText(clone2, testString2, -1, &status);
+        TEST_ASSERT_SUCCESS(status);
+        result = uregex_lookingAt(clone2, 0, &status);
+        TEST_ASSERT_SUCCESS(status);
+        TEST_ASSERT(result==FALSE);
+        result = uregex_find(clone2, 0, &status);
+        TEST_ASSERT_SUCCESS(status);
+        TEST_ASSERT(result==TRUE);
+
+        uregex_close(clone1);
+        uregex_close(clone2);
+        uregex_close(clone3);
+
+    }
+
+    /*
+     *  pattern()
+    */
+    {
+        const UChar  *resultPat;
+        int32_t       resultLen;
+        u_uastrncpy(pat, "hello", sizeof(pat)/2);
+        status = U_ZERO_ERROR;
+        re = uregex_open(pat, -1, 0, NULL, &status);
+        resultPat = uregex_pattern(re, &resultLen, &status);
+        TEST_ASSERT_SUCCESS(status);
+        TEST_ASSERT(resultLen == -1);
+        TEST_ASSERT(u_strcmp(resultPat, pat) == 0);
+        uregex_close(re);
+
+        status = U_ZERO_ERROR;
+        re = uregex_open(pat, 3, 0, NULL, &status);
+        resultPat = uregex_pattern(re, &resultLen, &status);
+        TEST_ASSERT_SUCCESS(status);
+        TEST_ASSERT(resultLen == 3);
+        TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0);
+        TEST_ASSERT(u_strlen(resultPat) == 3);
+        uregex_close(re);
+
+
+    }
+    
+
+}
+
+#endif   /*  !UCONFIG_NO_REGULAR_EXPRESSIONS */