From 24bf088281b1cec1753e936ad50ed6106acbb3e2 Mon Sep 17 00:00:00 2001
From: Andy Heninger <andy.heninger@gmail.com>
Date: Tue, 19 Nov 2002 19:31:03 +0000
Subject: [PATCH] ICU-105 Regular Expressions, changes from code review

X-SVN-Rev: 10294
---
 icu4c/source/common/putil.c             |   1 -
 icu4c/source/common/unicode/utypes.h    |  21 +-
 icu4c/source/i18n/regexcmp.cpp          |  98 +++--
 icu4c/source/i18n/regexcmp.h            |   2 -
 icu4c/source/i18n/regeximp.h            |  36 +-
 icu4c/source/i18n/rematch.cpp           |  46 ++-
 icu4c/source/i18n/repattrn.cpp          | 190 +++++-----
 icu4c/source/i18n/unicode/regex.h       | 464 +++++++++++++-----------
 icu4c/source/test/intltest/regextst.cpp |  51 ++-
 9 files changed, 556 insertions(+), 353 deletions(-)

diff --git a/icu4c/source/common/putil.c b/icu4c/source/common/putil.c
index 17bc5b4e93..7047a04cce 100644
--- a/icu4c/source/common/putil.c
+++ b/icu4c/source/common/putil.c
@@ -1839,7 +1839,6 @@ _uRegexErrorName[U_REGEX_ERROR_LIMIT - U_REGEX_ERROR_START] = {
     "U_REGEX_PROPERTY_SYNTAX",
     "U_REGEX_UNIMPLEMENTED",
     "U_REGEX_MISMATCHED_PAREN",
-    "U_REGEX_MATCH_MODE_ERROR"
 };
 
 U_CAPI const char * U_EXPORT2
diff --git a/icu4c/source/common/unicode/utypes.h b/icu4c/source/common/unicode/utypes.h
index ffb3bc334a..40f92d0d72 100644
--- a/icu4c/source/common/unicode/utypes.h
+++ b/icu4c/source/common/unicode/utypes.h
@@ -500,18 +500,17 @@ typedef enum UErrorCode {
     /*
      * The error codes in the range 0x10300-0x103ff are reserved for regular expression related errrs
      */
-     U_REGEX_ERROR_START=0x10300,
-     U_REGEX_INTERNAL_ERROR,
-     U_REGEX_RULE_SYNTAX,
-     U_REGEX_INVALID_STATE,
-     U_REGEX_BAD_ESCAPE_SEQUENCE,
-     U_REGEX_PROPERTY_SYNTAX,
-     U_REGEX_UNIMPLEMENTED,
-     U_REGEX_MISMATCHED_PAREN,
-     U_REGEX_MATCH_MODE_ERROR,
-     U_REGEX_ERROR_LIMIT,
+     U_REGEX_ERROR_START=0x10300,          /**< Start of codes indicating Regexp failures */
+     U_REGEX_INTERNAL_ERROR,               /**< An internal error (bug) was detected.     */
+     U_REGEX_RULE_SYNTAX,                  /**< Syntax error in regexp pattern.           */
+     U_REGEX_INVALID_STATE,                /**< RegexMatcher in invalid state for requested operation */
+     U_REGEX_BAD_ESCAPE_SEQUENCE,          /**< Unrecognized backslash escape sequence in pattern */
+     U_REGEX_PROPERTY_SYNTAX,              /**< Incorrect Unicode property                */
+     U_REGEX_UNIMPLEMENTED,                /**< Use of regexp feature that is not yet implemented. */
+     U_REGEX_MISMATCHED_PAREN,             /**< Incorrectly nested parentheses in regexp pattern. */
+     U_REGEX_ERROR_LIMIT,                  /**< This must always be the last value to indicate the limit for regexp errors */
 
-    U_ERROR_LIMIT=U_BRK_ERROR_LIMIT /**< This must always be the last value to indicate the limit for UErrorCode (last error code +1) */
+    U_ERROR_LIMIT=U_REGEX_ERROR_LIMIT      /**< This must always be the last value to indicate the limit for UErrorCode (last error code +1) */
 } UErrorCode;
 
 /* Use the following to determine if an UErrorCode represents */
diff --git a/icu4c/source/i18n/regexcmp.cpp b/icu4c/source/i18n/regexcmp.cpp
index 895e4d8cef..a38fafe801 100644
--- a/icu4c/source/i18n/regexcmp.cpp
+++ b/icu4c/source/i18n/regexcmp.cpp
@@ -28,8 +28,6 @@
 #include "ucln_in.h"
 #include "mutex.h"
 
-#include "stdio.h"      // TODO:  Get rid of this
-  
 #include "regeximp.h"
 #include "regexcst.h"   // Contains state table for the regex pattern parser.
                         //   generated by a Perl script.
@@ -40,7 +38,6 @@
 U_NAMESPACE_BEGIN
 
 const char       RegexCompile::fgClassID=0;
-static const int RESCAN_DEBUG = 0;
 
 //----------------------------------------------------------------------------------------
 //
@@ -173,6 +170,7 @@ RegexCompile::RegexCompile(UErrorCode &status) : fParenStack(status)
 
     //
     //  Set up the constant (static) Unicode Sets.
+    //    TODO:  something cleaner for that -128 constant.
     //
     ThreadSafeUnicodeSetInit(&gRuleSets[kRuleSet_rule_char-128],   gRuleSet_rule_char_pattern,  status);
     ThreadSafeUnicodeSetInit(&gRuleSets[kRuleSet_white_space-128], gRuleWhiteSpacePattern,      status);
@@ -282,14 +280,12 @@ void    RegexCompile::compile(
         //    the search will stop there, if not before.
         //
         tableEl = &gRuleParseStateTable[state];
-        if (RESCAN_DEBUG) {
-            printf( "char, line, col = (\'%c\', %d, %d)    state=%s ",
-                fC.fChar, fLineNum, fCharNum, RegexStateNames[state]);
-        }
+        REGEX_SCAN_DEBUG_PRINTF( "char, line, col = (\'%c\', %d, %d)    state=%s ",
+            fC.fChar, fLineNum, fCharNum, RegexStateNames[state]);
 
         for (;;) {    // loop through table rows belonging to this state, looking for one
                       //   that matches the current input char.
-            if (RESCAN_DEBUG) { printf( ".");}
+            REGEX_SCAN_DEBUG_PRINTF( ".");
             if (tableEl->fCharClass < 127 && fC.fQuoted == FALSE &&   tableEl->fCharClass == fC.fChar) {
                 // Table row specified an individual character, not a set, and
                 //   the input character is not quoted, and
@@ -323,7 +319,7 @@ void    RegexCompile::compile(
             // No match on this row, advance to the next  row for this state,
             tableEl++;
         }
-        if (RESCAN_DEBUG) { printf( "\n");}
+        REGEX_SCAN_DEBUG_PRINTF("\n");
 
         //
         // We've found the row of the state table that matches the current input
@@ -340,7 +336,7 @@ void    RegexCompile::compile(
             fStackPtr++;
             if (fStackPtr >= kStackSize) {
                 error(U_REGEX_INTERNAL_ERROR);
-                // printf( "RegexCompile::parse() - state stack overflow.\n");
+                REGEX_SCAN_DEBUG_PRINTF( "RegexCompile::parse() - state stack overflow.\n");
                 fStackPtr--;
             }
             fStack[fStackPtr] = tableEl->fPushState;
@@ -369,6 +365,36 @@ void    RegexCompile::compile(
 
     }
 
+    //
+    // The pattern has now been read and processed, and the compiled code generated.
+    //
+
+    //
+    // Compute the number of digits requried for the largest capture group number.
+    //
+    fRXPat->fMaxCaptureDigits = 1;
+    int32_t  n = 10;
+    for (;;) {
+        if (n > fRXPat->fNumCaptureGroups) {
+            break;
+        }
+        fRXPat->fMaxCaptureDigits++;
+        n *= 10;
+    }
+
+    //
+    // A stupid bit of non-sense to prevent code coverage testing from complaining
+    //   about the pattern.dump() debug function.  Go through the motions of dumping,
+    //   even though, without the #define set, it will do nothing.
+    //
+#ifndef REGEX_DUMP_DEBUG
+    static UBool phonyDumpDone = FALSE;
+    if (phonyDumpDone==FALSE) {
+        fRXPat->dump();
+        phonyDumpDone = TRUE;
+    }
+#endif
+
 }
 
 
@@ -1094,27 +1120,39 @@ void        RegexCompile::compileSet(UnicodeSet *theSet)
     if (theSet == NULL) {
         return;
     }
-    if (theSet->size() > 1) {
-        //  The set contains two or more chars.
-        //  Put it into the compiled pattern as a set.
-        int32_t setNumber = fRXPat->fSets->size();
-        fRXPat->fSets->addElement(theSet, *fStatus);
-        int32_t setOp = URX_BUILD(URX_SETREF, setNumber);
-        fRXPat->fCompiledPat->addElement(setOp, *fStatus);
+    int32_t  setSize = theSet->size();
+    UChar32  firstSetChar = theSet->charAt(0);
+    if (firstSetChar == -1) {
+        // Sets that contain only strings, but no individual chars,
+        // will end up here.   TODO:  figure out what to with sets containing strings.
+        setSize = 0;
     }
-    else
-    {
-        // The set contains only a single code point.  Put it into
-        //   the compiled pattern as a single char operation rather
-        //   than a set, and discard the set itself.
-        UChar32  c = theSet->charAt(0);
-        if (c == -1) {
-            // Set contained no chars.  Stuff an invalid char that can't match.
-            c = 0x1fffff;
+
+    switch (setSize) {
+    case 0:      // Set of no elements.   Always fails to match.  
+        fRXPat->fCompiledPat->addElement(URX_BUILD(URX_BACKTRACK, 0), *fStatus);
+        break;
+        
+    case 1:
+        {
+            // The set contains only a single code point.  Put it into
+            //   the compiled pattern as a single char operation rather
+            //   than a set, and discard the set itself.
+            int32_t  charToken = URX_BUILD(URX_ONECHAR, firstSetChar);
+            fRXPat->fCompiledPat->addElement(charToken, *fStatus);
+            delete theSet;
+        }
+        break;
+        
+    default: 
+        {
+            //  The set contains two or more chars.  (the normal case)
+            //  Put it into the compiled pattern as a set.
+            int32_t setNumber = fRXPat->fSets->size();
+            fRXPat->fSets->addElement(theSet, *fStatus);
+            int32_t setOp = URX_BUILD(URX_SETREF, setNumber);
+            fRXPat->fCompiledPat->addElement(setOp, *fStatus);
         }
-        int32_t  charToken = URX_BUILD(URX_ONECHAR, c);
-        fRXPat->fCompiledPat->addElement(charToken, *fStatus);
-        delete theSet;
     }
 }
 
@@ -1321,7 +1359,7 @@ UnicodeSet *RegexCompile::scanSet() {
     if (U_FAILURE(localStatus)) {
         //  TODO:  Get more accurate position of the error from UnicodeSet's return info.
         //         UnicodeSet appears to not be reporting correctly at this time.
-        printf( "UnicodeSet parse postion.ErrorIndex = %d\n", pos.getIndex());
+        REGEX_SCAN_DEBUG_PRINTF( "UnicodeSet parse postion.ErrorIndex = %d\n", pos.getIndex());
         error(localStatus);
         delete uset;
         return NULL;
diff --git a/icu4c/source/i18n/regexcmp.h b/icu4c/source/i18n/regexcmp.h
index 7f11cb114a..d809b78ea6 100644
--- a/icu4c/source/i18n/regexcmp.h
+++ b/icu4c/source/i18n/regexcmp.h
@@ -28,8 +28,6 @@
 U_NAMESPACE_BEGIN
 
 
-static const UBool REGEX_DEBUG = TRUE;
-
 //--------------------------------------------------------------------------------
 //
 //  class RegexCompile    Contains the regular expression compiler.
diff --git a/icu4c/source/i18n/regeximp.h b/icu4c/source/i18n/regeximp.h
index 3d28b1849f..b176ed54c0 100644
--- a/icu4c/source/i18n/regeximp.h
+++ b/icu4c/source/i18n/regeximp.h
@@ -13,13 +13,45 @@
 #define _REGEXIMP_H
 
 
+//
+//  debugging support.  Enable one or more of the #defines immediately following
+//
+//#define REGEX_SCAN_DEBUG
+#define REGEX_DUMP_DEBUG
+//#define REGEX_RUN_DEBUG
+//  End of #defines inteded to be directly set.
+
+#ifdef REGEX_SCAN_DEBUG
+#define REGEX_SCAN_DEBUG_PRINTF printf
+#else
+#define REGEX_SCAN_DEBUG_PRINTF
+#endif
+
+#ifdef REGEX_DUMP_DEBUG
+#define REGEX_DUMP_DEBUG_PRINTF printf
+#else
+#define REGEX_DUMP_DEBUG_PRINTF
+#endif
+
+#ifdef REGEX_RUN_DEBUG
+#define REGEX_RUN_DEBUG_PRINTF printf
+#define REGEX_DUMP_DEBUG_PRINTF printf
+#else
+#define REGEX_RUN_DEBUG_PRINTF
+#endif
+
+#if defined(REGEX_SCAN_DEBUG) || defined(REGEX_RUN_DEBUG) || defined(REGEX_DUMP_DEBUG)
+#include <stdio.h>
+#endif
+
+
 //
 //  Opcode types     In the compiled form of the regexp, these are the type, or opcodes,
 //                   of the entries.
 //
 enum {
      URX_RESERVED_OP   = 0,
-     URX_UNUSED1       = 1,
+     URX_BACKTRACK     = 1,
      URX_END           = 2,
      URX_ONECHAR       = 3,    // Value field is the 21 bit unicode char to match
      URX_STRING        = 4,    // Value field is index of string start
@@ -52,7 +84,7 @@ enum {
 //   Used for debug printing only.
 #define URX_OPCODE_NAMES       \
         "URX_RESERVED_OP",     \
-        "URX_UNUSED1",         \
+        "URX_BACKTRACK",       \
         "END",                 \
         "ONECHAR",             \
         "STRING",              \
diff --git a/icu4c/source/i18n/rematch.cpp b/icu4c/source/i18n/rematch.cpp
index 52ac4af64f..10f92708b0 100644
--- a/icu4c/source/i18n/rematch.cpp
+++ b/icu4c/source/i18n/rematch.cpp
@@ -280,9 +280,9 @@ UnicodeString RegexMatcher::group(UErrorCode &status) const {
 
 
 
-UnicodeString RegexMatcher::group(int32_t group, UErrorCode &status) const {
-    int32_t  s = start(group, status);
-    int32_t  e = end(group, status);
+UnicodeString RegexMatcher::group(int32_t groupNum, UErrorCode &status) const {
+    int32_t  s = start(groupNum, status);
+    int32_t  e = end(groupNum, status);
 
     // Note:  calling start() and end() above will do all necessary checking that
     //        the group number is OK and that a match exists.  status will be set.
@@ -539,6 +539,28 @@ void RegexMatcher::MatchAt(int32_t startIdx, UErrorCode &status) {
     int32_t     opType;                //    the opcode
     int32_t     opValue;               //    and the operand value.
 
+    #ifdef REGEX_RUN_DEBUG
+    {
+        printf("MatchAt(startIdx=%d)\n", startIdx);
+        printf("Original Pattern: ");
+        int i;
+        for (i=0; i<fPattern->fPattern.length(); i++) {
+            printf("%c", fPattern->fPattern.charAt(i));
+        }
+        printf("\n");
+        printf("Input String: ");
+        for (i=0; i<fInput->length(); i++) {
+            UChar c = fInput->charAt(i);
+            if (c<32 || c>256) {
+                c = '.';
+            }
+            printf("%c", c);
+        }
+        printf("\n");
+        printf("\n");
+        printf("PatLoc  inputIdx  char\n");
+    }
+    #endif
 
     if (U_FAILURE(status)) {
         return;
@@ -569,7 +591,10 @@ void RegexMatcher::MatchAt(int32_t startIdx, UErrorCode &status) {
         op      = pat->elementAti(patIdx);
         opType  = URX_TYPE(op);
         opValue = URX_VAL(op);
-        // printf("%d   %d  \"%c\"\n", patIdx, inputIdx, fInput->char32At(inputIdx));
+        #ifdef REGEX_RUN_DEBUG
+            printf("inputIdx=%d   inputChar=%c    ", inputIdx, fInput->char32At(inputIdx));
+            fPattern->dumpOp(patIdx);
+        #endif
         patIdx++;
 
         switch (opType) {
@@ -579,6 +604,14 @@ void RegexMatcher::MatchAt(int32_t startIdx, UErrorCode &status) {
             break;
 
 
+        case URX_BACKTRACK:
+            // Force a backtrack.  In some circumstances, the pattern compiler
+            //   will notice that the pattern can't possibly match anything, and will
+            //   emit one of these at that point.
+            backTrack(inputIdx, patIdx);
+            break;
+
+
         case URX_ONECHAR:
             {
                 UChar32 inputChar = fInput->char32At(inputIdx);
@@ -909,7 +942,12 @@ breakFromLoop:
         fLastMatchEnd = fMatchEnd;
         fMatchStart   = startIdx;
         fMatchEnd     = inputIdx;
+        REGEX_RUN_DEBUG_PRINTF("Match.  start=%d   end=%d\n\n", fMatchStart, fMatchEnd);
         }
+    else
+    {
+        REGEX_RUN_DEBUG_PRINTF("No match\n\n");
+    }
     return;
 }
 
diff --git a/icu4c/source/i18n/repattrn.cpp b/icu4c/source/i18n/repattrn.cpp
index 227bd2d628..46268e6552 100644
--- a/icu4c/source/i18n/repattrn.cpp
+++ b/icu4c/source/i18n/repattrn.cpp
@@ -18,8 +18,6 @@
 #include "regexcmp.h"
 #include "regeximp.h"
 
-#include "stdio.h"    // TODO:  get rid of this...
-
 U_NAMESPACE_BEGIN
 
 //--------------------------------------------------------------------------
@@ -197,7 +195,7 @@ UBool   RegexPattern::operator ==(const RegexPattern &other) const {
 //---------------------------------------------------------------------
 RegexPattern  *RegexPattern::compile(
                              const UnicodeString &regex,
-                             int32_t              flags,
+                             uint32_t             flags,
                              UParseError          &pe,
                              UErrorCode           &status)  {
 
@@ -243,7 +241,7 @@ RegexPattern *RegexPattern::compile( const UnicodeString &regex,
 //   flags
 //
 //---------------------------------------------------------------------
-int32_t RegexPattern::flags() const {
+uint32_t RegexPattern::flags() const {
     return fFlags;
 }
 
@@ -320,8 +318,6 @@ UnicodeString RegexPattern::pattern() const {
 //---------------------------------------------------------------------
 //
 //   split
-//            TODO:  perl returns captured strings intermixed with the
-//                   fields.  Should we do this too?
 //
 //---------------------------------------------------------------------
 int32_t  RegexPattern::split(const UnicodeString &input,
@@ -383,10 +379,28 @@ int32_t  RegexPattern::split(const UnicodeString &input,
             int32_t fieldLen = fMatcher->fMatchStart - nextOutputStringStart;
             dest[i].setTo(input, nextOutputStringStart, fieldLen);
             nextOutputStringStart = fMatcher->fMatchEnd;
+
+            // If the delimiter pattern has capturing parentheses, the captured
+            //  text goes out into the next n destination strings.
+            int32_t groupNum;
+            for (groupNum=1; groupNum<=this->fNumCaptureGroups; groupNum++) {
+                if (i==destCapacity-1) {
+                    break;
+                }
+                i++;
+                dest[i] = fMatcher->group(groupNum, status);
+            }
+
             if (nextOutputStringStart == inputLen) {
                 // The delimiter was at the end of the string.  We're done.
                 break;
             }
+
+            if (i==destCapacity-1) {
+                // We've filled up the last output string with capture group data.
+                //  Give back the last string, to be used for the remainder of the input.
+                i--;
+            }
         }
         else
         {
@@ -410,88 +424,102 @@ int32_t  RegexPattern::split(const UnicodeString &input,
 //---------------------------------------------------------------------
 static const char *opNames[] = {URX_OPCODE_NAMES};
 
-void   RegexPattern::dump() {
+void   RegexPattern::dumpOp(int32_t index) const {
+    int32_t op          = fCompiledPat->elementAti(index);
+    int32_t val         = URX_VAL(op);
+    int32_t type        = URX_TYPE(op);
+    int32_t pinnedType  = type;
+    if (pinnedType >= sizeof(opNames)/sizeof(char *)) {
+        pinnedType = 0;
+    }
+    
+    REGEX_DUMP_DEBUG_PRINTF("%4d   %08x    %-15s  ", index, op, opNames[pinnedType]);
+    switch (type) {
+    case URX_NOP:
+    case URX_DOTANY:
+    case URX_FAIL:
+    case URX_BACKSLASH_A:
+    case URX_BACKSLASH_G:
+    case URX_BACKSLASH_X:
+    case URX_END:
+        // Types with no operand field of interest.
+        break;
+        
+    case URX_START_CAPTURE:
+    case URX_END_CAPTURE:
+    case URX_STATIC_SETREF:
+    case URX_STATE_SAVE:
+    case URX_JMP:
+    case URX_BACKSLASH_B:
+    case URX_BACKSLASH_D:
+    case URX_BACKSLASH_W:
+    case URX_BACKSLASH_Z:
+    case URX_CARET:
+    case URX_DOLLAR:
+    case URX_STRING_LEN:
+        // types with an integer operand field.
+        REGEX_DUMP_DEBUG_PRINTF("%d", val);
+        break;
+        
+    case URX_ONECHAR:
+        REGEX_DUMP_DEBUG_PRINTF("%c", val<256?val:'?');
+        break;
+        
+    case URX_STRING:
+        {
+            int32_t lengthOp       = fCompiledPat->elementAti(index+1);
+            U_ASSERT(URX_TYPE(lengthOp) == URX_STRING_LEN);
+            int32_t length = URX_VAL(lengthOp);
+            int32_t i;
+            for (i=val; i<val+length; i++) {
+                UChar c = fLiteralText[i];
+                if (c < 32 || c >= 256) {c = '.';}
+                REGEX_DUMP_DEBUG_PRINTF("%c", c);
+            }
+        }
+        break;
+
+    case URX_SETREF:
+        {
+            REGEX_DUMP_DEBUG_PRINTF("%d ", val);
+            UnicodeString s;
+            UnicodeSet *set = (UnicodeSet *)fSets->elementAt(val);
+            set->toPattern(s, TRUE);
+            for (int32_t i=0; i<s.length(); i++) {
+                REGEX_DUMP_DEBUG_PRINTF("%c", s.charAt(i));
+            }
+        }
+
+
+        
+    default:
+        REGEX_DUMP_DEBUG_PRINTF("??????");
+        break;
+    }
+    REGEX_DUMP_DEBUG_PRINTF("\n");
+}
+
+
+
+
+
+
+void   RegexPattern::dump() const {
     int      index;
     int      i;
-    UChar    c;
-    int32_t  op;
-    int32_t  pinnedType;
-    int32_t  type;
-    int32_t  val;
-    int32_t  stringStart;
 
-
-    printf("Original Pattern:  ");
+    REGEX_DUMP_DEBUG_PRINTF("Original Pattern:  ");
     for (i=0; i<fPattern.length(); i++) {
-        printf("%c", fPattern.charAt(i));
+        REGEX_DUMP_DEBUG_PRINTF("%c", fPattern.charAt(i));
     }
-    printf("\n");
-    printf("Pattern Valid?:     %s\n", fBadState? "no" : "yes");
-    printf("\nIndex   Binary     Type             Operand\n"
+    REGEX_DUMP_DEBUG_PRINTF("\n");
+    REGEX_DUMP_DEBUG_PRINTF("Pattern Valid?:     %s\n", fBadState? "no" : "yes");
+    REGEX_DUMP_DEBUG_PRINTF("\nIndex   Binary     Type             Operand\n"
            "-------------------------------------------\n");
-    for (index = 0; ; index++) {
-        op         = fCompiledPat->elementAti(index);
-        val        = URX_VAL(op);
-        type       = URX_TYPE(op);
-        pinnedType = type;
-        if (pinnedType >= sizeof(opNames)/sizeof(char *)) {
-            pinnedType = 0;
-        }
-
-        printf("%4d   %08x    %-15s  ", index, op, opNames[pinnedType]);
-        switch (type) {
-        case URX_NOP:
-        case URX_DOTANY:
-        case URX_FAIL:
-        case URX_BACKSLASH_A:
-        case URX_BACKSLASH_G:
-        case URX_BACKSLASH_X:
-            // Types with no operand field of interest.
-            break;
-
-        case URX_START_CAPTURE:
-        case URX_END_CAPTURE:
-        case URX_SETREF:
-        case URX_STATIC_SETREF:
-        case URX_STATE_SAVE:
-        case URX_JMP:
-        case URX_BACKSLASH_B:
-        case URX_BACKSLASH_D:
-        case URX_BACKSLASH_W:
-        case URX_BACKSLASH_Z:
-        case URX_CARET:
-        case URX_DOLLAR:
-            // types with an integer operand field.
-            printf("%d", val);
-            break;
-
-        case URX_ONECHAR:
-            printf("%c", val<256?val:'?');
-            break;
-
-        case URX_STRING:
-            stringStart = val;
-            break;
-
-        case URX_STRING_LEN:
-            for (i=stringStart; i<stringStart+val; i++) {
-                c = fLiteralText[i];
-                if (c >= 256) {c = '?';};
-                printf("%c", c);
-            }
-            break;
-            
-        case URX_END:
-            goto breakFromLoop;
-            
-        default:
-            printf("??????");
-            break;
-        }
-        printf("\n");
+    for (index = 0; index<fCompiledPat->size(); index++) {
+        dumpOp(index);
     }
-breakFromLoop:
-    printf("\n\n");
+    REGEX_DUMP_DEBUG_PRINTF("\n\n");
 };
 
 const char RegexPattern::fgClassID = 0;
diff --git a/icu4c/source/i18n/unicode/regex.h b/icu4c/source/i18n/unicode/regex.h
index 4a4d9f2843..c6d6c4e353 100644
--- a/icu4c/source/i18n/unicode/regex.h
+++ b/icu4c/source/i18n/unicode/regex.h
@@ -81,6 +81,8 @@ enum {
   * to be applied to input text, and a few convenience methods for simple common
   * uses of regular expressions.
   *
+  * <p>Class RegexPattern is not intended to be subclassed.</p>
+  *
   * @draft ICU 2.4
   */
 class U_I18N_API RegexPattern: public UObject {
@@ -192,7 +194,7 @@ public:
     *    @draft ICU 2.4
     */
     static RegexPattern *compile( const UnicodeString &regex,
-        int32_t              flags,
+        uint32_t             flags,
         UParseError          &pe,
         UErrorCode           &status); 
 
@@ -202,7 +204,7 @@ public:
     *     @return  the match mode flags
     *     @draft ICU 2.4
     */
-    virtual int32_t flags() const;
+    virtual uint32_t flags() const;
     
    /*
     *  Creates a RegexMatcher that will match the given input against this pattern.  The
@@ -275,7 +277,7 @@ public:
     //
     //   dump   Debug function, displays the compiled form of a pattern.
     //
-    void dump();
+    void dump() const;
 
     /**
      * ICU "poor man's RTTI", returns a UClassID for the actual class.
@@ -291,14 +293,12 @@ public:
     */
     static inline UClassID getStaticClassID() { return (UClassID)&fgClassID; }
     
-    static const char fgClassID;
-
 private:
     //
     //  Implementation Data
     //
     UnicodeString   fPattern;      // The original pattern string.
-    int32_t         fFlags;        // The flags used when compiling the pattern.
+    uint32_t        fFlags;        // The flags used when compiling the pattern.
                                    //   
     UVector         *fCompiledPat; // The compiled pattern.
     UnicodeString   fLiteralText;  // Any literal string data from the pattern, 
@@ -317,6 +317,12 @@ private:
     UnicodeSet    **fStaticSets;  // Ptr to static (shared) sets for predefined
                                     //   regex character classes, e.g. Word.
 
+    /**
+     * The address of this static class variable serves as this class's ID
+     * for ICU "poor man's RTTI".
+     */
+    static const char fgClassID;
+
     friend class RegexCompile;
     friend class RegexMatcher;
 
@@ -325,6 +331,7 @@ private:
     //
     void        init();            // Common initialization, for use by constructors.
     void        zap();             // Common cleanup
+    void        dumpOp(int32_t index) const;
 
 
 
@@ -343,6 +350,8 @@ private:
   *  input text to which the expression can be applied.  It includes methods
   *  for testing for matches, and for find and replace operations.
   *
+  * <p>Class RegexMatcher is not intended to be subclassed.</p>
+  *
   * @draft ICU 2.4
   */
   class U_I18N_API RegexMatcher: public UObject {
@@ -355,6 +364,227 @@ public:
     */
     virtual ~RegexMatcher();
 
+    
+   /**
+    *   Attempts to match the entire input string against the pattern.
+    *    @param   status     A reference to a UErrorCode to receive any errors. 
+    *    @return TRUE if there is a match
+    *    @draft ICU 2.4
+    */
+    virtual UBool matches(UErrorCode &status);
+    
+    
+    
+   /**
+    *   Attempts to match the input string, starting from the beginning, against the pattern.
+    *   Like the matches() method, this function always starts at the beginning of the input string;
+    *   unlike that function, it does not require that the entire input string be matched.
+    *
+    *   <p>If the match succeeds then more information can be obtained via the <code>start()</code>,
+    *     <code>end()</code>, and <code>group()</code> functions.</p>
+    *
+    *    @param   status     A reference to a UErrorCode to receive any errors. 
+    *    @return  TRUE if there is a match at the start of the input string.
+    *    @draft ICU 2.4
+    */
+    virtual UBool lookingAt(UErrorCode &status);
+    
+    
+   /**
+    *  Find the next pattern match in the input string.
+    *  The find begins searching the input at the location following the end of
+    *  the previous match, or at the start of the string if there is no previous match.
+    *  If a match is found, <code>start(), end()</code> and <code>group()</code>
+    *  will provide more information regarding the match.
+    *  <p>Note that if the input string is changed by the application, 
+    *     use find(startPos, status) instead of find(), because the saved starting
+    *     position may not be valid with the altered input string.</p>
+    *  @return  TRUE if a match is found.
+    *  @draft ICU 2.4
+    */
+    virtual UBool find();
+    
+    
+   /**
+    *   Resets this RegexMatcher and then attempts to find the next substring of the 
+    *   input string that matches the pattern, starting at the specified index. 
+    *
+    *   @param   start     the position in the input string to begin the search
+    *   @param   status    A reference to a UErrorCode to receive any errors.  
+    *   @return  TRUE if a match is found.
+    *   @draft ICU 2.4
+    */
+    virtual UBool find(int32_t start, UErrorCode &status); 
+    
+    
+   /*
+    *   Returns a string containing the text matched by the previous match. 
+    *   If the pattern can match an empty string, an empty string may be returned.
+    *   @param   status      A reference to a UErrorCode to receive any errors.  
+    *                        Possible errors are  U_REGEX_INVALID_STATE if no match
+    *                        has been attempted or the last match failed. 
+    *   @return  a string containing the matched input text.  
+    *   @draft ICU 2.4
+    */
+    virtual UnicodeString group(UErrorCode &status) const;
+    
+    
+   /**
+    *    Returns a string containing the text captured by the given group
+    *    during the previous match operation.  Group(0) is the entire match.
+    *   
+    *    @param group the capture group number
+    *    @param   status     A reference to a UErrorCode to receive any errors.  
+    *                        Possible errors are  U_REGEX_INVALID_STATE if no match
+    *                        has been attempted or the last match failed and
+    *                        U_INDEX_OUTOFBOUNDS_ERROR for a bad capture group number.
+    *    @return the captured text
+    *    @draft ICU 2.4
+    */
+    virtual UnicodeString group(int32_t groupNum, UErrorCode &status) const; 
+    
+    
+   /**
+    *   Returns the number of capturing groups in this matcher's pattern.
+    *   @return the number of capture groups
+    *   @draft ICU 2.4
+    */
+    virtual int32_t groupCount() const;
+    
+    
+   /**
+    *   Returns the index in the input string of the start of the text matched 
+    *   during the previous match operation. 
+    *    @param   status      a reference to a UErrorCode to receive any errors. 
+    *    @return              The position in the input string of the start of the last match.
+    *    @draft ICU 2.4
+    */
+    virtual int32_t start(UErrorCode &status) const;
+    
+    
+   /**
+    *   Returns the index in the input string of the start of the text matched by the
+    *    specified capture group during the previous match operation.  Return -1 if
+    *    the capture group exists in the pattern, but was not part of the last match.
+    *
+    *    @param  group       the capture group number
+    *    @param  status      A reference to a UErrorCode to receive any errors.  Possible 
+    *                        errors are  U_REGEX_INVALID_STATE if no match has been
+    *                        attempted or the last match failed, and
+    *                        U_INDEX_OUTOFBOUNDS_ERROR for a bad capture group number
+    *    @return the start position of substring matched by the specified group.
+    *    @draft ICU 2.4
+    */
+    virtual int32_t start(int group, UErrorCode &status) const;
+    
+    
+   /**
+    *    Returns the index in the input string of the character following the
+    *    text matched during the previous match operation.  
+    *   @param   status      A reference to a UErrorCode to receive any errors.  Possible 
+    *                        errors are  U_REGEX_INVALID_STATE if no match has been
+    *                        attempted or the last match failed.
+    *    @return the index of the last character matched, plus one.
+    *   @draft ICU 2.4
+    */
+    virtual int32_t end(UErrorCode &status) const;
+    
+    
+   /**
+    *    Returns the index in the input string of the character following the
+    *    text matched by the specified capture group during the previous match operation.
+    *    @param group  the capture group number
+    *    @param   status      A reference to a UErrorCode to receive any errors.  Possible 
+    *                        errors are  U_REGEX_INVALID_STATE if no match has been
+    *                        attempted or the last match failed and
+    *                        U_INDEX_OUTOFBOUNDS_ERROR for a bad capture group number
+    *    @return  the index of the last character, plus one, of the text 
+    *              captured by the specifed group during the previous match operation. 
+    *              Return -1 if the capture group was not part of the match.
+    *    @draft ICU 2.4
+    */
+    virtual int32_t end(int group, UErrorCode &status) const; 
+    
+    
+   /**
+    *   Resets this matcher.  The effect is to remove any memory of previous matches,
+    *       and to cause subsequent find() operations to begin at the beginning of
+    *       the input string.
+    *
+    *   @return this RegexMatcher.
+    *   @draft ICU 2.4
+    */
+    virtual RegexMatcher &reset();
+    
+    
+   /**
+    *   Resets this matcher with a new input string.  This allows instances of RegexMatcher
+    *     to be reused, which is more efficient than creating a new RegexMatcher for
+    *     each input string to be processed.
+    *   @return this RegexMatcher.
+    *   @draft ICU 2.4
+    */
+    virtual RegexMatcher &reset(const UnicodeString &input);  
+    
+    
+   /**
+    *   Returns the input string being matched.  The returned string is not a copy,
+    *   but the live input string.  It should not be altered or deleted.
+    *   @return the input string
+    *   @draft ICU 2.4
+    */
+    virtual const UnicodeString &input() const; 
+    
+    
+   /**
+    *    Returns the pattern that is interpreted by this matcher.
+    *    @return  the RegexPattern for this RegexMatcher
+    *    @draft ICU 2.4
+    */
+    virtual const RegexPattern &pattern() const;
+    
+    
+   /**
+    *    Replaces every substring of the input that matches the pattern
+    *    with the given replacement string.  This is a convenience function that
+    *    provides a complete find-and-replace-all operation.
+    *
+    *    This method first resets this matcher. It then scans the input string
+    *    looking for matches of the pattern. Input that is not part of any 
+    *    match is left unchanged; each match is replaced in the result by the
+    *    replacement string. The replacement string may contain references to
+    *    capture groups. 
+    *
+    *    @param   replacement a string containing the replacement text.
+    *    @param   status      a reference to a UErrorCode to receive any errors. 
+    *    @return              a string containing the results of the find and replace.
+    *    @draft ICU 2.4
+    */
+    virtual UnicodeString replaceAll(const UnicodeString &replacement, UErrorCode &status); 
+    
+    
+   /**
+    * Replaces the first substring of the input that matches
+    * the pattern with the replacement string.   This is a convenience
+    * function that provides a complete find-and-replace operation.
+    *
+    * <p>This function first resets this RegexMatcher. It then scans the input string
+    * looking for a match of the pattern. Input that is not part
+    * of the match is appended directly to the result string; the match is replaced
+    * in the result by the replacement string. The replacement string may contain
+    * references to captured groups.</p>
+    *
+    * <p>The state of the matcher (the position at which a subsequent find()
+    *    would begin) after completing a replaceFirst() is not specified.  The
+    *    RegexMatcher should be reset before doing additional find() operations.</p>
+    * 
+    *    @param   replacement a string containing the replacement text.
+    *    @param   status      a reference to a UErrorCode to receive any errors. 
+    *    @return              a string containing the results of the find and replace.
+    *    @draft ICU 2.4
+    */
+    virtual UnicodeString replaceFirst(const UnicodeString &replacement, UErrorCode &status); 
+    
    /**
     *   Implements a replace operation intended to be used as part of an
     *   incremental find-and-replace.
@@ -398,219 +628,6 @@ public:
     */
     virtual UnicodeString &appendTail(UnicodeString &dest); 
     
-    
-   /**
-    *    Returns the index in the input string of the character following the
-    *    text matched during the previous match operation.  
-    *   @param   status      A reference to a UErrorCode to receive any errors.  Possible 
-    *                        errors are  U_REGEX_INVALID_STATE if no match has been
-    *                        attempted or the last match failed.
-    *    @return the index of the last character matched, plus one.
-    *   @draft ICU 2.4
-    */
-    virtual int32_t end(UErrorCode &status) const;
-    
-    
-   /**
-    *    Returns the index in the input string of the character following the
-    *    text matched by the specified capture group during the previous match operation.
-    *    @param group  the capture group number
-    *    @param   status      A reference to a UErrorCode to receive any errors.  Possible 
-    *                        errors are  U_REGEX_INVALID_STATE if no match has been
-    *                        attempted or the last match failed and
-    *                        U_INDEX_OUTOFBOUNDS_ERROR for a bad capture group number
-    *    @return  the index of the last character, plus one, of the text 
-    *              captured by the specifed group during the previous match operation. 
-    *              Return -1 if the capture group was not part of the match.
-    *    @draft ICU 2.4
-    */
-    virtual int32_t end(int group, UErrorCode &status) const; 
-    
-    
-   /**
-    *  Find the next pattern match in the input string.
-    *  The find begins searching the input at the location following the end of
-    *  the previous match, or at the start of the string if there is no previous match.
-    *  If a match is found, <code>start(), end()</code> and <code>group()</code>
-    *  will provide more information regarding the match.
-    *  @return  TRUE if a match is found.
-    *  @draft ICU 2.4
-    */
-    virtual UBool find();
-    
-    
-   /**
-    *   Resets this RegexMatcher and then attempts to find the next substring of the 
-    *   input string that matches the pattern, starting at the specified index. 
-    *
-    *   @param status the position in the input string to begin the search
-    *   @param   status      A reference to a UErrorCode to receive any errors.  
-    *   @return  TRUE if a match is found.
-    *   @draft ICU 2.4
-    */
-    virtual UBool find(int32_t start, UErrorCode &status); 
-    
-    
-   /*
-    *   Returns a string containing the text matched by the previous match. 
-    *   If the pattern can match an empty string, an empty string may be returned.
-    *   @param   status      A reference to a UErrorCode to receive any errors.  
-    *                        Possible errors are  U_REGEX_INVALID_STATE if no match
-    *                        has been attempted or the last match failed. 
-    *   @return  a string containing the matched input text.  
-    *   @draft ICU 2.4
-    */
-    virtual UnicodeString group(UErrorCode &status) const;
-    
-    
-   /**
-    *    Returns a string containing the text captured by the given group
-    *    during the previous match operation.  Group(0) is the entire match.
-    *   
-    *    @param group the capture group number
-    *    @param   status     A reference to a UErrorCode to receive any errors.  
-    *                        Possible errors are  U_REGEX_INVALID_STATE if no match
-    *                        has been attempted or the last match failed and
-    *                        U_INDEX_OUTOFBOUNDS_ERROR for a bad capture group number.
-    *    @return the captured text
-    *    @draft ICU 2.4
-    */
-    virtual UnicodeString group(int32_t group, UErrorCode &status) const; 
-    
-    
-   /**
-    *   Returns the number of capturing groups in this matcher's pattern.
-    *   @return the number of capture groups
-    *   @draft ICU 2.4
-    */
-    virtual int32_t groupCount() const;
-    
-    
-   /**
-    *   Returns the input string being matched.  The returned string is not a copy,
-    *   but the live input string.  It should not be altered or deleted.
-    *   @return the input string
-    *   @draft ICU 2.4
-    */
-    virtual const UnicodeString &input() const; 
-    
-    
-   /**
-    *   Attempts to match the input string, starting from the beginning, against the pattern.
-    *   Like the matches() method, this function always starts at the beginning of the input string;
-    *   unlike that function, it does not require that the entire input string be matched.
-    *
-    *   <p>If the match succeeds then more information can be obtained via the <code>start()</code>,
-    *     <code>end()</code>, and <code>group()</code> functions.</p>
-    *
-    *    @param   status     A reference to a UErrorCode to receive any errors. 
-    *    @return  TRUE if there is a match at the start of the input string.
-    *    @draft ICU 2.4
-    */
-    virtual UBool lookingAt(UErrorCode &status);
-    
-    
-   /**
-    *   Attempts to match the entire input string against the pattern.
-    *    @param   status     A reference to a UErrorCode to receive any errors. 
-    *    @return TRUE if there is a match
-    *    @draft ICU 2.4
-    */
-    virtual UBool matches(UErrorCode &status);
-    
-    
-   /**
-    *    Returns the pattern that is interpreted by this matcher.
-    *    @return  the RegexPattern for this RegexMatcher
-    *    @draft ICU 2.4
-    */
-    virtual const RegexPattern &pattern() const;
-    
-    
-   /**
-    *    Replaces every substring of the input that matches the pattern
-    *    with the given replacement string.  This is a convenience function that
-    *    provides a complete find-and-replace-all operation.
-    *
-    *    This method first resets this matcher. It then scans the input string
-    *    looking for matches of the pattern. Input that is not part of any 
-    *    match is left unchanged; each match is replaced in the result by the
-    *    replacement string. The replacement string may contain references to
-    *    capture groups. 
-    *
-    *    @param   replacement a string containing the replacement text.
-    *    @param   status      a reference to a UErrorCode to receive any errors. 
-    *    @return              a string containing the results of the find and replace.
-    *    @draft ICU 2.4
-    */
-    virtual UnicodeString replaceAll(const UnicodeString &replacement, UErrorCode &status); 
-    
-    
-   /**
-    * Replaces the first substring of the input that matches
-    * the pattern with the replacement string.   This is a convenience
-    * function that provides a complete find-and-replace operation.
-    *
-    * This function first resets this RegexMatcher. It then scans the input string
-    * looking for a match of the pattern. Input that is not part
-    * of the match is appended directly to the result string; the match is replaced
-    * in the result by the replacement string. The replacement string may contain
-    * references to captured groups.
-    * 
-    *    @param   replacement a string containing the replacement text.
-    *    @param   status      a reference to a UErrorCode to receive any errors. 
-    *    @return              a string containing the results of the find and replace.
-    *    @draft ICU 2.4
-    */
-    virtual UnicodeString replaceFirst(const UnicodeString &replacement, UErrorCode &status); 
-    
-    
-   /**
-    *   Resets this matcher.  The effect is to remove any memory of previous matches,
-    *       and to cause subsequent find() operations to begin at the beginning of
-    *       the input string.
-    *
-    *   @return this RegexMatcher.
-    *   @draft ICU 2.4
-    */
-    virtual RegexMatcher &reset();
-    
-    
-   /**
-    *   Resets this matcher with a new input string.  This allows instances of RegexMatcher
-    *     to be reused, which is more efficient than creating a new RegexMatcher for
-    *     each input string to be processed.
-    *   @return this RegexMatcher.
-    *   @draft ICU 2.4
-    */
-    virtual RegexMatcher &reset(const UnicodeString &input);  
-    
-    
-   /**
-    *   Returns the index in the input string of the start of the text matched 
-    *   during the previous match operation. 
-    *    @param   status      a reference to a UErrorCode to receive any errors. 
-    *    @return              The position in the input string of the start of the last match.
-    *    @draft ICU 2.4
-    */
-    virtual int32_t start(UErrorCode &status) const;
-    
-    
-   /**
-    *   Returns the index in the input string of the start of the text matched by the
-    *    specified capture group during the previous match operation.  Return -1 if
-    *    the capture group exists in the pattern, but was not part of the last match.
-    *
-    *    @param  group       the capture group number
-    *    @param  status      A reference to a UErrorCode to receive any errors.  Possible 
-    *                        errors are  U_REGEX_INVALID_STATE if no match has been
-    *                        attempted or the last match failed, and
-    *                        U_INDEX_OUTOFBOUNDS_ERROR for a bad capture group number
-    *    @return the start position of substring matched by the specified group.
-    *    @draft ICU 2.4
-    */
-    virtual int32_t start(int group, UErrorCode &status) const;
-    
 
     /**
      * ICU "poor man's RTTI", returns a UClassID for the actual class.
@@ -626,8 +643,6 @@ public:
     */
     static inline UClassID getStaticClassID() { return (UClassID)&fgClassID; }
     
-    static const char fgClassID;
-
 private:
     // Constructors and other object boilerplate are private.
     // Instances of RegexMatcher can not be assigned, copied, cloned, etc.
@@ -658,6 +673,13 @@ private:
     UVector             *fCaptureStarts;
     UVector             *fCaptureEnds;
 
+    /**
+     * The address of this static class variable serves as this class's ID
+     * for ICU "poor man's RTTI".
+     */
+    static const char   fgClassID;
+
+
 };  
 
 U_NAMESPACE_END
diff --git a/icu4c/source/test/intltest/regextst.cpp b/icu4c/source/test/intltest/regextst.cpp
index e97651e62b..4618deda6c 100644
--- a/icu4c/source/test/intltest/regextst.cpp
+++ b/icu4c/source/test/intltest/regextst.cpp
@@ -368,7 +368,7 @@ void RegexTest::Basic() {
 //
 #if 0
     {
-    REGEX_FIND("(?:ABC)+", "<0>ABCABCABC</0>D");
+    REGEX_FIND("[{ab}]", "a");
     }
     exit(1);
 #endif
@@ -436,6 +436,9 @@ void RegexTest::Basic() {
     REGEX_TESTLM("[\\p{Nd}]*", "a123456", TRUE, FALSE);   // note that * matches 0 occurences.
     REGEX_TESTLM("[a][b][[:Zs:]]*", "ab   ", TRUE, TRUE);
 
+    // Set contains only a string, no individual chars.
+    REGEX_TESTLM("[{ab}]", "a", FALSE, FALSE);
+
     //
     //   OR operator in patterns
     //
@@ -975,6 +978,52 @@ void RegexTest::API_Pattern() {
 
     delete pat1;
 
+    //  split, with a pattern with (capture)
+    pat1 = RegexPattern::compile("<(\\w*)>",  pe, status);
+    REGEX_CHECK_STATUS;
+
+    n = pat1->split("<a>Now is <b>the time<c>", fields, 10, status);
+    REGEX_CHECK_STATUS;
+    REGEX_ASSERT(n==6);
+    REGEX_ASSERT(fields[0]=="");
+    REGEX_ASSERT(fields[1]=="a");
+    REGEX_ASSERT(fields[2]=="Now is ");
+    REGEX_ASSERT(fields[3]=="b");
+    REGEX_ASSERT(fields[4]=="the time");
+    REGEX_ASSERT(fields[5]=="c");
+    REGEX_ASSERT(fields[6]=="");
+
+    n = pat1->split("  <a>Now is <b>the time<c>", fields, 10, status);
+    REGEX_CHECK_STATUS;
+    REGEX_ASSERT(n==6);
+    REGEX_ASSERT(fields[0]=="  ");
+    REGEX_ASSERT(fields[1]=="a");
+    REGEX_ASSERT(fields[2]=="Now is ");
+    REGEX_ASSERT(fields[3]=="b");
+    REGEX_ASSERT(fields[4]=="the time");
+    REGEX_ASSERT(fields[5]=="c");
+    REGEX_ASSERT(fields[6]=="");
+
+    n = pat1->split("  <a>Now is <b>the time<c>", fields, 4, status);
+    REGEX_CHECK_STATUS;
+    REGEX_ASSERT(n==4);
+    REGEX_ASSERT(fields[0]=="  ");
+    REGEX_ASSERT(fields[1]=="a");
+    REGEX_ASSERT(fields[2]=="Now is ");
+    REGEX_ASSERT(fields[3]=="the time<c>");
+    delete pat1;
+
+    pat1 = RegexPattern::compile("([-,])",  pe, status);
+    REGEX_CHECK_STATUS;
+    n = pat1->split("1-10,20", fields, 10, status);
+    REGEX_CHECK_STATUS;
+    REGEX_ASSERT(n==5);
+    REGEX_ASSERT(fields[0]=="1");
+    REGEX_ASSERT(fields[1]=="-");
+    REGEX_ASSERT(fields[2]=="10");
+    REGEX_ASSERT(fields[3]==",");
+    REGEX_ASSERT(fields[4]=="20");
+    delete pat1;
 }