ICU-10463 Regular Expressions, rework debug conditionals to fix build failures on clang, and to somewhat simplify.

X-SVN-Rev: 34565
2013-10-14 22:11:21 +00:00 · 2013-10-14 22:11:21 +00:00 · 10dd7ed47b
commit 10dd7ed47b
parent f1df548fc4
6 changed files with 289 additions and 321 deletions
--- a/icu4c/source/i18n/regexcmp.cpp
+++ b/icu4c/source/i18n/regexcmp.cpp
@ -109,7 +109,7 @@ void    RegexCompile::compile(
    fRXPat->fPatternString = new UnicodeString(pat);
    UText patternText = UTEXT_INITIALIZER;
    utext_openConstUnicodeString(&patternText, fRXPat->fPatternString, &e);
-    
+
    if (U_SUCCESS(e)) {
        compile(&patternText, pp, e);
        utext_close(&patternText);
@ -568,13 +568,13 @@ UBool RegexCompile::doParseActions(int32_t action)

            op = URX_BUILD(URX_JMP, fRXPat->fCompiledPat->size()+ 3);
            fRXPat->fCompiledPat->addElement(op, *fStatus);
-            
+
            op = URX_BUILD(URX_LA_END, dataLoc);
            fRXPat->fCompiledPat->addElement(op, *fStatus);

            op = URX_BUILD(URX_BACKTRACK, 0);
            fRXPat->fCompiledPat->addElement(op, *fStatus);
-            
+
            op = URX_BUILD(URX_NOP, 0);
            fRXPat->fCompiledPat->addElement(op, *fStatus);
            fRXPat->fCompiledPat->addElement(op, *fStatus);
@ -1147,7 +1147,7 @@ UBool RegexCompile::doParseActions(int32_t action)
            } else if ((fModeFlags & UREGEX_MULTILINE) != 0 && (fModeFlags & UREGEX_UNIX_LINES) == 0) {
                op = URX_CARET_M;
            } else if ((fModeFlags & UREGEX_MULTILINE) == 0 && (fModeFlags & UREGEX_UNIX_LINES) != 0) {
-                op = URX_CARET;   // Only testing true start of input. 
+                op = URX_CARET;   // Only testing true start of input.
            } else if ((fModeFlags & UREGEX_MULTILINE) != 0 && (fModeFlags & UREGEX_UNIX_LINES) != 0) {
                op = URX_CARET_M_UNIX;
            }
@ -1281,7 +1281,7 @@ UBool RegexCompile::doParseActions(int32_t action)
            literalChar(c);
        }
        break;
-        
+

    case doBackRef:
        // BackReference.  Somewhat unusual in that the front-end can not completely parse
@ -1643,7 +1643,7 @@ UBool RegexCompile::doParseActions(int32_t action)
        compileSet(theSet);
        break;
        }
-        
+
    case doSetIntersection2:
        // Have scanned something like [abc&&
        setPushOp(setIntersection2);
@ -1654,7 +1654,7 @@ UBool RegexCompile::doParseActions(int32_t action)
        //    This operation is the highest precedence set operation, so we can always do
        //    it immediately, without waiting to see what follows.  It is necessary to perform
        //    any pending '-' or '&' operation first, because these have the same precedence
-        //    as union-ing in a literal' 
+        //    as union-ing in a literal'
        {
            setEval(setUnion);
            UnicodeSet *s = (UnicodeSet *)fSetStack.peek();
@ -1749,7 +1749,7 @@ UBool RegexCompile::doParseActions(int32_t action)
            }  // else error.  scanProp() reported the error status already.
        }
        break;
-        
+
    case doSetProp:
        //  Scanned a \p \P within [brackets].
        {
@ -1771,7 +1771,7 @@ UBool RegexCompile::doParseActions(int32_t action)
        //        and ICU UnicodeSet behavior.
        {
        if (fLastSetLiteral > fC.fChar) {
-            error(U_REGEX_INVALID_RANGE);  
+            error(U_REGEX_INVALID_RANGE);
        }
        UnicodeSet *s = (UnicodeSet *)fSetStack.peek();
        s->add(fLastSetLiteral, fC.fChar);
@ -1830,7 +1830,7 @@ void    RegexCompile::fixLiterals(UBool split) {
    int32_t indexOfLastCodePoint = fLiteralChars.moveIndex32(fLiteralChars.length(), -1);
    UChar32 lastCodePoint = fLiteralChars.char32At(indexOfLastCodePoint);

-    // Split:  We need to  ensure that the last item in the compiled pattern 
+    // Split:  We need to  ensure that the last item in the compiled pattern
    //     refers only to the last literal scanned in the pattern, so that
    //     quantifiers (*, +, etc.) affect only it, and not a longer string.
    //     Split before case folding for case insensitive matches.
@ -1856,7 +1856,7 @@ void    RegexCompile::fixLiterals(UBool split) {

    if (indexOfLastCodePoint == 0) {
        // Single character, emit a URX_ONECHAR op to match it.
-        if ((fModeFlags & UREGEX_CASE_INSENSITIVE) && 
+        if ((fModeFlags & UREGEX_CASE_INSENSITIVE) &&
                 u_hasBinaryProperty(lastCodePoint, UCHAR_CASE_SENSITIVE)) {
            op = URX_BUILD(URX_ONECHAR_I, lastCodePoint);
        } else {
@ -1875,7 +1875,7 @@ void    RegexCompile::fixLiterals(UBool split) {
        fRXPat->fCompiledPat->addElement(op, *fStatus);
        op = URX_BUILD(URX_STRING_LEN, fLiteralChars.length());
        fRXPat->fCompiledPat->addElement(op, *fStatus);
-        
+
        // Add this string into the accumulated strings of the compiled pattern.
        fRXPat->fLiteralText.append(fLiteralChars);
    }
@ -2449,7 +2449,7 @@ void   RegexCompile::matchStartType() {
        case URX_STO_INP_LOC:
        case URX_BACKREF:         // BackRef.  Must assume that it might be a zero length match
        case URX_BACKREF_I:
-                
+
        case URX_STO_SP:          // Setup for atomic or possessive blocks.  Doesn't change what can match.
        case URX_LD_SP:
            break;
@ -2762,7 +2762,7 @@ void   RegexCompile::matchStartType() {
            {
                // Look-around.  Scan forward until the matching look-ahead end,
                //   without processing the look-around block.  This is overly pessimistic.
-                
+
                // Keep track of the nesting depth of look-around blocks.  Boilerplate code for
                //   lookahead contains two LA_END instructions, so count goes up by two
                //   for each LA_START.
@ -3322,7 +3322,7 @@ int32_t   RegexCompile::maxMatchLength(int32_t start, int32_t end) {
            //        compiled (folded) string.  Folding may add code points, but
            //        not remove them.
            //
-            //        There is a potential problem if a supplemental code point 
+            //        There is a potential problem if a supplemental code point
            //        case-folds to a BMP code point.  In this case our compiled string
            //        could be shorter (in code units) than a matching user string.
            //
@ -3353,7 +3353,7 @@ int32_t   RegexCompile::maxMatchLength(int32_t start, int32_t end) {
                    loc = loopEndLoc;
                    break;
                }
-                
+
                int32_t maxLoopCount = fRXPat->fCompiledPat->elementAti(loc+3);
                if (maxLoopCount == -1) {
                    // Unbounded Loop. No upper bound on match length.
@ -3471,7 +3471,7 @@ void RegexCompile::stripNOPs() {
            d++;
        }
    }
-    
+
    UnicodeString caseStringBuffer;

    // Make a second pass over the code, removing the NOPs by moving following
@ -3518,7 +3518,7 @@ void RegexCompile::stripNOPs() {
                op    = URX_BUILD(opType, where);
                fRXPat->fCompiledPat->setElementAt(op, dst);
                dst++;
-                
+
                fRXPat->fNeedsAltInput = TRUE;
                break;
            }
@ -3609,7 +3609,7 @@ void RegexCompile::error(UErrorCode e) {
            fParseErr->line   = (int32_t)fLineNum;
            fParseErr->offset = (int32_t)fCharNum;
        }
-        
+
        UErrorCode status = U_ZERO_ERROR; // throwaway status for extracting context

        // Fill in the context.
@ -3663,7 +3663,7 @@ UChar32  RegexCompile::nextCharLL() {
        fPeekChar = -1;
        return ch;
    }
-    
+
    // assume we're already in the right place
    ch = UTEXT_NEXT32(fRXPat->fPattern);
    if (ch == U_SENTINEL) {
@ -3719,7 +3719,7 @@ void RegexCompile::nextChar(RegexPatternChar &c) {

    if (fQuoteMode) {
        c.fQuoted = TRUE;
-        if ((c.fChar==chBackSlash && peekCharLL()==chE && ((fModeFlags & UREGEX_LITERAL) == 0)) || 
+        if ((c.fChar==chBackSlash && peekCharLL()==chE && ((fModeFlags & UREGEX_LITERAL) == 0)) ||
            c.fChar == (UChar32)-1) {
            fQuoteMode = FALSE;  //  Exit quote mode,
            nextCharLL();        // discard the E
@ -3780,11 +3780,11 @@ void RegexCompile::nextChar(RegexPatternChar &c) {
                //
                nextCharLL();                 // get & discard the peeked char.
                c.fQuoted = TRUE;
-                
+
                if (UTEXT_FULL_TEXT_IN_CHUNK(fRXPat->fPattern, fPatternLength)) {
                    int32_t endIndex = (int32_t)pos;
                    c.fChar = u_unescapeAt(uregex_ucstr_unescape_charAt, &endIndex, (int32_t)fPatternLength, (void *)fRXPat->fPattern->chunkContents);
-                    
+
                    if (endIndex == pos) {
                        error(U_REGEX_BAD_ESCAPE_SEQUENCE);
                    }
@ -3793,7 +3793,7 @@ void RegexCompile::nextChar(RegexPatternChar &c) {
                } else {
                    int32_t offset = 0;
                    struct URegexUTextUnescapeCharContext context = U_REGEX_UTEXT_UNESCAPE_CONTEXT(fRXPat->fPattern);
-                    
+
                    UTEXT_SETNATIVEINDEX(fRXPat->fPattern, pos);
                    c.fChar = u_unescapeAt(uregex_utext_unescape_charAt, &offset, INT32_MAX, &context);

@ -3836,8 +3836,8 @@ void RegexCompile::nextChar(RegexPatternChar &c) {
                        c.fChar >>= 3;
                    }
                }
-                c.fQuoted = TRUE; 
-            } 
+                c.fQuoted = TRUE;
+            }
            else if (peekCharLL() == chQ) {
                //  "\Q"  enter quote mode, which will continue until "\E"
                fQuoteMode = TRUE;
@ -3885,7 +3885,7 @@ UChar32  RegexCompile::scanNamedChar() {
        error(U_REGEX_PROPERTY_SYNTAX);
        return 0;
    }
-    
+
    UnicodeString  charName;
    for (;;) {
        nextChar(fC);
@ -3898,7 +3898,7 @@ UChar32  RegexCompile::scanNamedChar() {
        }
        charName.append(fC.fChar);
    }
-    
+
    char name[100];
    if (!uprv_isInvariantUString(charName.getBuffer(), charName.length()) ||
         (uint32_t)charName.length()>=sizeof(name)) {
@ -4006,7 +4006,7 @@ UnicodeSet *RegexCompile::scanPosixProp() {

    // Scan for a closing ].   A little tricky because there are some perverse
    //   edge cases possible.  "[:abc\Qdef:] \E]"  is a valid non-property expression,
-    //   ending on the second closing ]. 
+    //   ending on the second closing ].

    UnicodeString propName;
    UBool         negated  = FALSE;
@ -4017,7 +4017,7 @@ UnicodeSet *RegexCompile::scanPosixProp() {
       negated = TRUE;
       nextChar(fC);
    }
-    
+
    // Scan for the closing ":]", collecting the property name along the way.
    UBool  sawPropSetTerminator = FALSE;
    for (;;) {
@ -4035,7 +4035,7 @@ UnicodeSet *RegexCompile::scanPosixProp() {
            break;
        }
    }
-    
+
    if (sawPropSetTerminator) {
        uset = createSetForProperty(propName, negated);
    }
@ -4068,7 +4068,7 @@ static inline void addIdentifierIgnorable(UnicodeSet *set, UErrorCode& ec) {
 //  Create a Unicode Set from a Unicode Property expression.
 //     This is common code underlying both \p{...} ane [:...:] expressions.
 //     Includes trying the Java "properties" that aren't supported as
-//     normal ICU UnicodeSet properties 
+//     normal ICU UnicodeSet properties
 //
 static const UChar posSetPrefix[] = {0x5b, 0x5c, 0x70, 0x7b, 0}; // "[\p{"
 static const UChar negSetPrefix[] = {0x5b, 0x5c, 0x50, 0x7b, 0}; // "[\P{"
@ -4076,7 +4076,7 @@ UnicodeSet *RegexCompile::createSetForProperty(const UnicodeString &propName, UB
    UnicodeString   setExpr;
    UnicodeSet      *set;
    uint32_t        usetFlags = 0;
-    
+
    if (U_FAILURE(*fStatus)) {
        return NULL;
    }
@ -4101,13 +4101,13 @@ UnicodeSet *RegexCompile::createSetForProperty(const UnicodeString &propName, UB
    }
    delete set;
    set = NULL;
-    
+
    //
    //  The property as it was didn't work.

-    //  Do [:word:]. It is not recognized as a property by UnicodeSet.  "word" not standard POSIX 
+    //  Do [:word:]. It is not recognized as a property by UnicodeSet.  "word" not standard POSIX
    //     or standard Java, but many other regular expression packages do recognize it.
-    
+
    if (propName.caseCompare(UNICODE_STRING_SIMPLE("word"), 0) == 0) {
        *fStatus = U_ZERO_ERROR;
        set = new UnicodeSet(*(fRXPat->fStaticSets[URX_ISWORD_SET]));
@ -4127,7 +4127,7 @@ UnicodeSet *RegexCompile::createSetForProperty(const UnicodeString &propName, UB
    //       InCombiningMarksforSymbols -> InCombiningDiacriticalMarksforSymbols.
    //
    //       Note on Spaces:  either "InCombiningMarksForSymbols" or "InCombining Marks for Symbols"
-    //                        is accepted by Java.  The property part of the name is compared 
+    //                        is accepted by Java.  The property part of the name is compared
    //                        case-insenstively.  The spaces must be exactly as shown, either
    //                        all there, or all omitted, with exactly one at each position
    //                        if they are present.  From checking against JDK 1.6
@ -4146,7 +4146,7 @@ UnicodeSet *RegexCompile::createSetForProperty(const UnicodeString &propName, UB
    else if (mPropName.compare(UNICODE_STRING_SIMPLE("all")) == 0) {
        mPropName = UNICODE_STRING_SIMPLE("javaValidCodePoint");
    }
-    
+
    //    See if the property looks like a Java "InBlockName", which
    //    we will recast as "Block=BlockName"
    //
@ -4270,7 +4270,7 @@ UnicodeSet *RegexCompile::createSetForProperty(const UnicodeString &propName, UB
        set = NULL;
    }
    error(*fStatus);
-    return NULL; 
+    return NULL;
 }


--- a/icu4c/source/i18n/regeximp.h
+++ b/icu4c/source/i18n/regeximp.h
@ -1,5 +1,5 @@
 //
-//   Copyright (C) 2002-2012 International Business Machines Corporation
+//   Copyright (C) 2002-2013 International Business Machines Corporation
 //   and others. All rights reserved.
 //
 //   file:  regeximp.h
@ -22,11 +22,11 @@

 U_NAMESPACE_BEGIN

-// For debugging, define REGEX_DEBUG 
+// For debugging, define REGEX_DEBUG
 // To define with configure,
-//   ./runConfigureICU --enable-debug --disable-release Linux CPPFLAGS="-DREGEX_DEBUG"
+//   CPPFLAGS="-DREGEX_DEBUG" ./runConfigureICU --enable-debug --disable-release Linux 

-#ifdef REGEX_DEBUG   
+#ifdef REGEX_DEBUG
 //
 //  debugging options.  Enable one or more of the three #defines immediately following
 //
@ -46,19 +46,6 @@ U_NAMESPACE_BEGIN
 #define REGEX_SCAN_DEBUG_PRINTF(a)
 #endif

-#ifdef REGEX_DUMP_DEBUG
-#define REGEX_DUMP_DEBUG_PRINTF(a) printf a
-#else
-#define REGEX_DUMP_DEBUG_PRINTF(a)
-#endif
-
-#ifdef REGEX_RUN_DEBUG
-#define REGEX_RUN_DEBUG_PRINTF(a) printf a
-#define REGEX_DUMP_DEBUG_PRINTF(a) printf a
-#else
-#define REGEX_RUN_DEBUG_PRINTF(a)
-#endif
-

 //
 //  Opcode types     In the compiled form of the regexp, these are the type, or opcodes,
@ -373,9 +360,9 @@ class CaseFoldingUTextIterator: public UMemory {
        CaseFoldingUTextIterator(UText &text);
        ~CaseFoldingUTextIterator();

-        UChar32 next();           // Next case folded character 
+        UChar32 next();           // Next case folded character

-        UBool   inExpansion();    // True if last char returned from next() and the 
+        UBool   inExpansion();    // True if last char returned from next() and the
                                  //  next to be returned both originated from a string
                                  //  folding of the same code point from the orignal UText.
      private:
@ -398,9 +385,9 @@ class CaseFoldingUCharIterator: public UMemory {
        CaseFoldingUCharIterator(const UChar *chars, int64_t start, int64_t limit);
        ~CaseFoldingUCharIterator();

-        UChar32 next();           // Next case folded character 
+        UChar32 next();           // Next case folded character

-        UBool   inExpansion();    // True if last char returned from next() and the 
+        UBool   inExpansion();    // True if last char returned from next() and the
                                  //  next to be returned both originated from a string
                                  //  folding of the same code point from the orignal UText.

--- a/icu4c/source/i18n/rematch.cpp
+++ b/icu4c/source/i18n/rematch.cpp
@ -2720,7 +2720,7 @@ void RegexMatcher::MatchAt(int64_t startIdx, UBool toEnd, UErrorCode &status) {
    int32_t     opType;                //    the opcode
    int32_t     opValue;               //    and the operand value.

-    #ifdef REGEX_RUN_DEBUG
+#ifdef REGEX_RUN_DEBUG
    if (fTraceDebug)
    {
        printf("MatchAt(startIdx=%ld)\n", startIdx);
@ -2730,7 +2730,7 @@ void RegexMatcher::MatchAt(int64_t startIdx, UBool toEnd, UErrorCode &status) {
            if (c<32 || c>256) {
                c = '.';
            }
-            REGEX_DUMP_DEBUG_PRINTF(("%c", c));
+            printf("%c", c);

            c = UTEXT_NEXT32(fPattern->fPattern);
        }
@ -2748,7 +2748,7 @@ void RegexMatcher::MatchAt(int64_t startIdx, UBool toEnd, UErrorCode &status) {
        printf("\n");
        printf("\n");
    }
-    #endif
+#endif

    if (U_FAILURE(status)) {
        return;
@ -2778,23 +2778,17 @@ void RegexMatcher::MatchAt(int64_t startIdx, UBool toEnd, UErrorCode &status) {
    //  One iteration of the loop per pattern operation performed.
    //
    for (;;) {
-#if 0
-        if (_heapchk() != _HEAPOK) {
-            fprintf(stderr, "Heap Trouble\n");
-        }
-#endif
-
        op      = (int32_t)pat[fp->fPatIdx];
        opType  = URX_TYPE(op);
        opValue = URX_VAL(op);
-        #ifdef REGEX_RUN_DEBUG
+#ifdef REGEX_RUN_DEBUG
        if (fTraceDebug) {
            UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx);
            printf("inputIdx=%ld   inputChar=%x   sp=%3ld   activeLimit=%ld  ", fp->fInputIdx,
                UTEXT_CURRENT32(fInputText), (int64_t *)fp-fStack->getBuffer(), fActiveLimit);
            fPattern->dumpOp(fp->fPatIdx);
        }
-        #endif
+#endif
        fp->fPatIdx++;

        switch (opType) {
@ -4188,16 +4182,17 @@ breakFromLoop:
        fLastMatchEnd = fMatchEnd;
        fMatchStart   = startIdx;
        fMatchEnd     = fp->fInputIdx;
-        if (fTraceDebug) {
-            REGEX_RUN_DEBUG_PRINTF(("Match.  start=%ld   end=%ld\n\n", fMatchStart, fMatchEnd));
-        }
-    }
-    else
-    {
-        if (fTraceDebug) {
-            REGEX_RUN_DEBUG_PRINTF(("No match\n\n"));
+    }
+
+#ifdef REGEX_RUN_DEBUG
+    if (fTraceDebug) {
+        if (isMatch) {
+            printf("Match.  start=%ld   end=%ld\n\n", fMatchStart, fMatchEnd);
+        } else {
+            printf("No match\n\n");
        }
    }
+#endif

    fFrame = fp;                // The active stack frame when the engine stopped.
                                //   Contains the capture group results that we need to
@ -4228,8 +4223,7 @@ void RegexMatcher::MatchChunkAt(int32_t startIdx, UBool toEnd, UErrorCode &statu
    int32_t     opValue;               //    and the operand value.

 #ifdef REGEX_RUN_DEBUG
-    if (fTraceDebug)
-    {
+    if (fTraceDebug) {
        printf("MatchAt(startIdx=%d)\n", startIdx);
        printf("Original Pattern: ");
        UChar32 c = utext_next32From(fPattern->fPattern, 0);
@ -4237,7 +4231,7 @@ void RegexMatcher::MatchChunkAt(int32_t startIdx, UBool toEnd, UErrorCode &statu
            if (c<32 || c>256) {
                c = '.';
            }
-            REGEX_DUMP_DEBUG_PRINTF(("%c", c));
+            printf("%c", c);

            c = UTEXT_NEXT32(fPattern->fPattern);
        }
@ -4287,12 +4281,6 @@ void RegexMatcher::MatchChunkAt(int32_t startIdx, UBool toEnd, UErrorCode &statu
    //  One iteration of the loop per pattern operation performed.
    //
    for (;;) {
-#if 0
-        if (_heapchk() != _HEAPOK) {
-            fprintf(stderr, "Heap Trouble\n");
-        }
-#endif
-
        op      = (int32_t)pat[fp->fPatIdx];
        opType  = URX_TYPE(op);
        opValue = URX_VAL(op);
@ -5627,20 +5615,21 @@ breakFromLoop:
        fLastMatchEnd = fMatchEnd;
        fMatchStart   = startIdx;
        fMatchEnd     = fp->fInputIdx;
-        if (fTraceDebug) {
-            REGEX_RUN_DEBUG_PRINTF(("Match.  start=%ld   end=%ld\n\n", fMatchStart, fMatchEnd));
-        }
-    }
-    else
-    {
-        if (fTraceDebug) {
-            REGEX_RUN_DEBUG_PRINTF(("No match\n\n"));
-        }
    }

+#ifdef REGEX_RUN_DEBUG
+    if (fTraceDebug) {
+        if (isMatch) {
+            printf("Match.  start=%ld   end=%ld\n\n", fMatchStart, fMatchEnd);
+        } else {
+            printf("No match\n\n");
+        }
+    }
+#endif
+
    fFrame = fp;                // The active stack frame when the engine stopped.
-    //   Contains the capture group results that we need to
-    //    access later.
+                                //   Contains the capture group results that we need to
+                                //    access later.

    return;
 }
--- a/icu4c/source/i18n/repattrn.cpp
+++ b/icu4c/source/i18n/repattrn.cpp
@ -3,7 +3,7 @@
 //
 /*
 ***************************************************************************
-*   Copyright (C) 2002-2012 International Business Machines Corporation   *
+*   Copyright (C) 2002-2013 International Business Machines Corporation   *
 *   and others. All rights reserved.                                      *
 ***************************************************************************
 */
@ -275,21 +275,21 @@ RegexPattern::compile(const UnicodeString &regex,
    if (U_FAILURE(status)) {
        return NULL;
    }
-    
+
    const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS |
    UREGEX_DOTALL   | UREGEX_MULTILINE        | UREGEX_UWORD |
    UREGEX_ERROR_ON_UNKNOWN_ESCAPES           | UREGEX_UNIX_LINES | UREGEX_LITERAL;
-    
+
    if ((flags & ~allFlags) != 0) {
        status = U_REGEX_INVALID_FLAG;
        return NULL;
    }
-    
+
    if ((flags & UREGEX_CANON_EQ) != 0) {
        status = U_REGEX_UNIMPLEMENTED;
        return NULL;
    }
-    
+
    RegexPattern *This = new RegexPattern;
    if (This == NULL) {
        status = U_MEMORY_ALLOCATION_ERROR;
@ -301,15 +301,15 @@ RegexPattern::compile(const UnicodeString &regex,
        return NULL;
    }
    This->fFlags = flags;
-    
+
    RegexCompile     compiler(This, status);
    compiler.compile(regex, pe, status);
-    
+
    if (U_FAILURE(status)) {
        delete This;
        This = NULL;
    }
-    
+
    return This;
 }

@ -355,7 +355,7 @@ RegexPattern::compile(UText                *regex,

    RegexCompile     compiler(This, status);
    compiler.compile(regex, pe, status);
-    
+
    if (U_FAILURE(status)) {
        delete This;
        This = NULL;
@ -538,12 +538,12 @@ UnicodeString RegexPattern::pattern() const {
        int64_t nativeLen = utext_nativeLength(fPattern);
        int32_t len16 = utext_extract(fPattern, 0, nativeLen, NULL, 0, &status); // buffer overflow error
        UnicodeString result;
-        
+
        status = U_ZERO_ERROR;
        UChar *resultChars = result.getBuffer(len16);
        utext_extract(fPattern, 0, nativeLen, resultChars, len16, &status); // unterminated warning
        result.releaseBuffer(len16);
-        
+
        return result;
    }
 }
@ -622,8 +622,9 @@ int32_t  RegexPattern::split(UText *input,
 //           Debugging function only.
 //
 //---------------------------------------------------------------------
-#if defined(REGEX_DEBUG)
 void   RegexPattern::dumpOp(int32_t index) const {
+    (void)index;  // Suppress warnings in non-debug build.
+#if defined(REGEX_DEBUG)
    static const char * const opNames[] = {URX_OPCODE_NAMES};
    int32_t op          = fCompiledPat->elementAti(index);
    int32_t val         = URX_VAL(op);
@ -633,7 +634,7 @@ void   RegexPattern::dumpOp(int32_t index) const {
        pinnedType = 0;
    }

-    REGEX_DUMP_DEBUG_PRINTF(("%4d   %08x    %-15s  ", index, op, opNames[pinnedType]));
+    printf("%4d   %08x    %-15s  ", index, op, opNames[pinnedType]);
    switch (type) {
    case URX_NOP:
    case URX_DOTANY:
@ -682,12 +683,12 @@ void   RegexPattern::dumpOp(int32_t index) const {
    case URX_LOOP_C:
    case URX_LOOP_DOT_I:
        // types with an integer operand field.
-        REGEX_DUMP_DEBUG_PRINTF(("%d", val));
+        printf("%d", val);
        break;

    case URX_ONECHAR:
    case URX_ONECHAR_I:
-        REGEX_DUMP_DEBUG_PRINTF(("%c", val<256?val:'?'));
+        printf("%c", val<256?val:'?');
        break;

    case URX_STRING:
@ -700,7 +701,7 @@ void   RegexPattern::dumpOp(int32_t index) const {
            for (i=val; i<val+length; i++) {
                UChar c = fLiteralText[i];
                if (c < 32 || c >= 256) {c = '.';}
-                REGEX_DUMP_DEBUG_PRINTF(("%c", c));
+                printf("%c", c);
            }
        }
        break;
@ -712,7 +713,7 @@ void   RegexPattern::dumpOp(int32_t index) const {
            UnicodeSet *set = (UnicodeSet *)fSets->elementAt(val);
            set->toPattern(s, TRUE);
            for (int32_t i=0; i<s.length(); i++) {
-                REGEX_DUMP_DEBUG_PRINTF(("%c", s.charAt(i)));
+                printf("%c", s.charAt(i));
            }
        }
        break;
@ -722,89 +723,89 @@ void   RegexPattern::dumpOp(int32_t index) const {
        {
            UnicodeString s;
            if (val & URX_NEG_SET) {
-                REGEX_DUMP_DEBUG_PRINTF(("NOT "));
+                printf("NOT ");
                val &= ~URX_NEG_SET;
            }
            UnicodeSet *set = fStaticSets[val];
            set->toPattern(s, TRUE);
            for (int32_t i=0; i<s.length(); i++) {
-                REGEX_DUMP_DEBUG_PRINTF(("%c", s.charAt(i)));
+                printf("%c", s.charAt(i));
            }
        }
        break;


    default:
-        REGEX_DUMP_DEBUG_PRINTF(("??????"));
+        printf("??????");
        break;
    }
-    REGEX_DUMP_DEBUG_PRINTF(("\n"));
-}
+    printf("\n");
 #endif
+}


-#if defined(REGEX_DEBUG)
 U_CAPI void  U_EXPORT2
-RegexPatternDump(const RegexPattern *This) {
+RegexPattern::dumpPattern() const {
+#if defined(REGEX_DEBUG)
    int      index;
    int      i;

-    REGEX_DUMP_DEBUG_PRINTF(("Original Pattern:  "));
-    UChar32 c = utext_next32From(This->fPattern, 0);
+    printf("Original Pattern:  ");
+    UChar32 c = utext_next32From(fPattern, 0);
    while (c != U_SENTINEL) {
        if (c<32 || c>256) {
            c = '.';
        }
-        REGEX_DUMP_DEBUG_PRINTF(("%c", c));
-        
-        c = UTEXT_NEXT32(This->fPattern);
-    }
-    REGEX_DUMP_DEBUG_PRINTF(("\n"));
-    REGEX_DUMP_DEBUG_PRINTF(("   Min Match Length:  %d\n", This->fMinMatchLen));
-    REGEX_DUMP_DEBUG_PRINTF(("   Match Start Type:  %s\n", START_OF_MATCH_STR(This->fStartType)));
-    if (This->fStartType == START_STRING) {
-        REGEX_DUMP_DEBUG_PRINTF(("    Initial match string: \""));
-        for (i=This->fInitialStringIdx; i<This->fInitialStringIdx+This->fInitialStringLen; i++) {
-            REGEX_DUMP_DEBUG_PRINTF(("%c", This->fLiteralText[i]));   // TODO:  non-printables, surrogates.
-        }
-        REGEX_DUMP_DEBUG_PRINTF(("\"\n"));
+        printf("%c", c);

-    } else if (This->fStartType == START_SET) {
-        int32_t numSetChars = This->fInitialChars->size();
+        c = UTEXT_NEXT32(fPattern);
+    }
+    printf("\n");
+    printf("   Min Match Length:  %d\n", fMinMatchLen);
+    printf("   Match Start Type:  %s\n", START_OF_MATCH_STR(fStartType));
+    if (fStartType == START_STRING) {
+        printf("    Initial match string: \"");
+        for (i=fInitialStringIdx; i<fInitialStringIdx+fInitialStringLen; i++) {
+            printf("%c", fLiteralText[i]);   // TODO:  non-printables, surrogates.
+        }
+        printf("\"\n");
+
+    } else if (fStartType == START_SET) {
+        int32_t numSetChars = fInitialChars->size();
        if (numSetChars > 20) {
            numSetChars = 20;
        }
-        REGEX_DUMP_DEBUG_PRINTF(("     Match First Chars : "));
+        printf("     Match First Chars : ");
        for (i=0; i<numSetChars; i++) {
-            UChar32 c = This->fInitialChars->charAt(i);
+            UChar32 c = fInitialChars->charAt(i);
            if (0x20<c && c <0x7e) {
-                REGEX_DUMP_DEBUG_PRINTF(("%c ", c));
+                printf("%c ", c);
            } else {
-                REGEX_DUMP_DEBUG_PRINTF(("%#x ", c));
+                printf("%#x ", c);
            }
        }
-        if (numSetChars < This->fInitialChars->size()) {
-            REGEX_DUMP_DEBUG_PRINTF((" ..."));
+        if (numSetChars < fInitialChars->size()) {
+            printf(" ...");
        }
-        REGEX_DUMP_DEBUG_PRINTF(("\n"));
+        printf("\n");

-    } else if (This->fStartType == START_CHAR) {
-        REGEX_DUMP_DEBUG_PRINTF(("    First char of Match : "));
-        if (0x20 < This->fInitialChar && This->fInitialChar<0x7e) {
-                REGEX_DUMP_DEBUG_PRINTF(("%c\n", This->fInitialChar));
+    } else if (fStartType == START_CHAR) {
+        printf("    First char of Match : ");
+        if (0x20 < fInitialChar && fInitialChar<0x7e) {
+                printf("%c\n", fInitialChar);
            } else {
-                REGEX_DUMP_DEBUG_PRINTF(("%#x\n", This->fInitialChar));
+                printf("%#x\n", fInitialChar);
            }
    }

-    REGEX_DUMP_DEBUG_PRINTF(("\nIndex   Binary     Type             Operand\n" \
-           "-------------------------------------------\n"));
-    for (index = 0; index<This->fCompiledPat->size(); index++) {
-        This->dumpOp(index);
+    printf("\nIndex   Binary     Type             Operand\n" \
+           "-------------------------------------------\n");
+    for (index = 0; index<fCompiledPat->size(); index++) {
+        dumpOp(index);
    }
-    REGEX_DUMP_DEBUG_PRINTF(("\n\n"));
-}
+    printf("\n\n");
 #endif
+}



--- a/icu4c/source/i18n/unicode/regex.h
+++ b/icu4c/source/i18n/unicode/regex.h
@ -68,21 +68,6 @@ class  UVector;
 class  UVector32;
 class  UVector64;

-#ifndef U_HIDE_INTERNAL_API
-/**
- *   RBBIPatternDump   Debug function, displays the compiled form of a pattern.
- *   @internal
- */
-#ifdef REGEX_DEBUG
-U_INTERNAL void U_EXPORT2
-    RegexPatternDump(const RegexPattern *pat);
-#else
-    #undef RegexPatternDump
-    #define RegexPatternDump(pat)
-#endif
-#endif  /* U_HIDE_INTERNAL_API */
-
-

 /**
  * Class <code>RegexPattern</code> represents a compiled regular expression.  It includes
@ -613,11 +598,17 @@ private:
    //
    void        init();            // Common initialization, for use by constructors.
    void        zap();             // Common cleanup
-#ifdef REGEX_DEBUG
-    void        dumpOp(int32_t index) const;
-    friend     void U_EXPORT2 RegexPatternDump(const RegexPattern *);
-#endif

+    void        dumpOp(int32_t index) const;
+
+  public:
+#ifndef U_HIDE_INTERNAL_API
+    /**
+      * Dump a compiled pattern. Internal debug function.
+      * @internal
+      */
+    void        dumpPattern() const;
+#endif
 };


--- a/icu4c/source/test/intltest/regextst.cpp
+++ b/icu4c/source/test/intltest/regextst.cpp