ICU-10463 Regular Expressions, rework debug conditionals to fix build failures on clang, and to somewhat simplify.

X-SVN-Rev: 34565
This commit is contained in:
Andy Heninger 2013-10-14 22:11:21 +00:00
parent f1df548fc4
commit 10dd7ed47b
6 changed files with 289 additions and 321 deletions

View File

@ -109,7 +109,7 @@ void RegexCompile::compile(
fRXPat->fPatternString = new UnicodeString(pat);
UText patternText = UTEXT_INITIALIZER;
utext_openConstUnicodeString(&patternText, fRXPat->fPatternString, &e);
if (U_SUCCESS(e)) {
compile(&patternText, pp, e);
utext_close(&patternText);
@ -568,13 +568,13 @@ UBool RegexCompile::doParseActions(int32_t action)
op = URX_BUILD(URX_JMP, fRXPat->fCompiledPat->size()+ 3);
fRXPat->fCompiledPat->addElement(op, *fStatus);
op = URX_BUILD(URX_LA_END, dataLoc);
fRXPat->fCompiledPat->addElement(op, *fStatus);
op = URX_BUILD(URX_BACKTRACK, 0);
fRXPat->fCompiledPat->addElement(op, *fStatus);
op = URX_BUILD(URX_NOP, 0);
fRXPat->fCompiledPat->addElement(op, *fStatus);
fRXPat->fCompiledPat->addElement(op, *fStatus);
@ -1147,7 +1147,7 @@ UBool RegexCompile::doParseActions(int32_t action)
} else if ((fModeFlags & UREGEX_MULTILINE) != 0 && (fModeFlags & UREGEX_UNIX_LINES) == 0) {
op = URX_CARET_M;
} else if ((fModeFlags & UREGEX_MULTILINE) == 0 && (fModeFlags & UREGEX_UNIX_LINES) != 0) {
op = URX_CARET; // Only testing true start of input.
op = URX_CARET; // Only testing true start of input.
} else if ((fModeFlags & UREGEX_MULTILINE) != 0 && (fModeFlags & UREGEX_UNIX_LINES) != 0) {
op = URX_CARET_M_UNIX;
}
@ -1281,7 +1281,7 @@ UBool RegexCompile::doParseActions(int32_t action)
literalChar(c);
}
break;
case doBackRef:
// BackReference. Somewhat unusual in that the front-end can not completely parse
@ -1643,7 +1643,7 @@ UBool RegexCompile::doParseActions(int32_t action)
compileSet(theSet);
break;
}
case doSetIntersection2:
// Have scanned something like [abc&&
setPushOp(setIntersection2);
@ -1654,7 +1654,7 @@ UBool RegexCompile::doParseActions(int32_t action)
// This operation is the highest precedence set operation, so we can always do
// it immediately, without waiting to see what follows. It is necessary to perform
// any pending '-' or '&' operation first, because these have the same precedence
// as union-ing in a literal'
// as union-ing in a literal'
{
setEval(setUnion);
UnicodeSet *s = (UnicodeSet *)fSetStack.peek();
@ -1749,7 +1749,7 @@ UBool RegexCompile::doParseActions(int32_t action)
} // else error. scanProp() reported the error status already.
}
break;
case doSetProp:
// Scanned a \p \P within [brackets].
{
@ -1771,7 +1771,7 @@ UBool RegexCompile::doParseActions(int32_t action)
// and ICU UnicodeSet behavior.
{
if (fLastSetLiteral > fC.fChar) {
error(U_REGEX_INVALID_RANGE);
error(U_REGEX_INVALID_RANGE);
}
UnicodeSet *s = (UnicodeSet *)fSetStack.peek();
s->add(fLastSetLiteral, fC.fChar);
@ -1830,7 +1830,7 @@ void RegexCompile::fixLiterals(UBool split) {
int32_t indexOfLastCodePoint = fLiteralChars.moveIndex32(fLiteralChars.length(), -1);
UChar32 lastCodePoint = fLiteralChars.char32At(indexOfLastCodePoint);
// Split: We need to ensure that the last item in the compiled pattern
// Split: We need to ensure that the last item in the compiled pattern
// refers only to the last literal scanned in the pattern, so that
// quantifiers (*, +, etc.) affect only it, and not a longer string.
// Split before case folding for case insensitive matches.
@ -1856,7 +1856,7 @@ void RegexCompile::fixLiterals(UBool split) {
if (indexOfLastCodePoint == 0) {
// Single character, emit a URX_ONECHAR op to match it.
if ((fModeFlags & UREGEX_CASE_INSENSITIVE) &&
if ((fModeFlags & UREGEX_CASE_INSENSITIVE) &&
u_hasBinaryProperty(lastCodePoint, UCHAR_CASE_SENSITIVE)) {
op = URX_BUILD(URX_ONECHAR_I, lastCodePoint);
} else {
@ -1875,7 +1875,7 @@ void RegexCompile::fixLiterals(UBool split) {
fRXPat->fCompiledPat->addElement(op, *fStatus);
op = URX_BUILD(URX_STRING_LEN, fLiteralChars.length());
fRXPat->fCompiledPat->addElement(op, *fStatus);
// Add this string into the accumulated strings of the compiled pattern.
fRXPat->fLiteralText.append(fLiteralChars);
}
@ -2449,7 +2449,7 @@ void RegexCompile::matchStartType() {
case URX_STO_INP_LOC:
case URX_BACKREF: // BackRef. Must assume that it might be a zero length match
case URX_BACKREF_I:
case URX_STO_SP: // Setup for atomic or possessive blocks. Doesn't change what can match.
case URX_LD_SP:
break;
@ -2762,7 +2762,7 @@ void RegexCompile::matchStartType() {
{
// Look-around. Scan forward until the matching look-ahead end,
// without processing the look-around block. This is overly pessimistic.
// Keep track of the nesting depth of look-around blocks. Boilerplate code for
// lookahead contains two LA_END instructions, so count goes up by two
// for each LA_START.
@ -3322,7 +3322,7 @@ int32_t RegexCompile::maxMatchLength(int32_t start, int32_t end) {
// compiled (folded) string. Folding may add code points, but
// not remove them.
//
// There is a potential problem if a supplemental code point
// There is a potential problem if a supplemental code point
// case-folds to a BMP code point. In this case our compiled string
// could be shorter (in code units) than a matching user string.
//
@ -3353,7 +3353,7 @@ int32_t RegexCompile::maxMatchLength(int32_t start, int32_t end) {
loc = loopEndLoc;
break;
}
int32_t maxLoopCount = fRXPat->fCompiledPat->elementAti(loc+3);
if (maxLoopCount == -1) {
// Unbounded Loop. No upper bound on match length.
@ -3471,7 +3471,7 @@ void RegexCompile::stripNOPs() {
d++;
}
}
UnicodeString caseStringBuffer;
// Make a second pass over the code, removing the NOPs by moving following
@ -3518,7 +3518,7 @@ void RegexCompile::stripNOPs() {
op = URX_BUILD(opType, where);
fRXPat->fCompiledPat->setElementAt(op, dst);
dst++;
fRXPat->fNeedsAltInput = TRUE;
break;
}
@ -3609,7 +3609,7 @@ void RegexCompile::error(UErrorCode e) {
fParseErr->line = (int32_t)fLineNum;
fParseErr->offset = (int32_t)fCharNum;
}
UErrorCode status = U_ZERO_ERROR; // throwaway status for extracting context
// Fill in the context.
@ -3663,7 +3663,7 @@ UChar32 RegexCompile::nextCharLL() {
fPeekChar = -1;
return ch;
}
// assume we're already in the right place
ch = UTEXT_NEXT32(fRXPat->fPattern);
if (ch == U_SENTINEL) {
@ -3719,7 +3719,7 @@ void RegexCompile::nextChar(RegexPatternChar &c) {
if (fQuoteMode) {
c.fQuoted = TRUE;
if ((c.fChar==chBackSlash && peekCharLL()==chE && ((fModeFlags & UREGEX_LITERAL) == 0)) ||
if ((c.fChar==chBackSlash && peekCharLL()==chE && ((fModeFlags & UREGEX_LITERAL) == 0)) ||
c.fChar == (UChar32)-1) {
fQuoteMode = FALSE; // Exit quote mode,
nextCharLL(); // discard the E
@ -3780,11 +3780,11 @@ void RegexCompile::nextChar(RegexPatternChar &c) {
//
nextCharLL(); // get & discard the peeked char.
c.fQuoted = TRUE;
if (UTEXT_FULL_TEXT_IN_CHUNK(fRXPat->fPattern, fPatternLength)) {
int32_t endIndex = (int32_t)pos;
c.fChar = u_unescapeAt(uregex_ucstr_unescape_charAt, &endIndex, (int32_t)fPatternLength, (void *)fRXPat->fPattern->chunkContents);
if (endIndex == pos) {
error(U_REGEX_BAD_ESCAPE_SEQUENCE);
}
@ -3793,7 +3793,7 @@ void RegexCompile::nextChar(RegexPatternChar &c) {
} else {
int32_t offset = 0;
struct URegexUTextUnescapeCharContext context = U_REGEX_UTEXT_UNESCAPE_CONTEXT(fRXPat->fPattern);
UTEXT_SETNATIVEINDEX(fRXPat->fPattern, pos);
c.fChar = u_unescapeAt(uregex_utext_unescape_charAt, &offset, INT32_MAX, &context);
@ -3836,8 +3836,8 @@ void RegexCompile::nextChar(RegexPatternChar &c) {
c.fChar >>= 3;
}
}
c.fQuoted = TRUE;
}
c.fQuoted = TRUE;
}
else if (peekCharLL() == chQ) {
// "\Q" enter quote mode, which will continue until "\E"
fQuoteMode = TRUE;
@ -3885,7 +3885,7 @@ UChar32 RegexCompile::scanNamedChar() {
error(U_REGEX_PROPERTY_SYNTAX);
return 0;
}
UnicodeString charName;
for (;;) {
nextChar(fC);
@ -3898,7 +3898,7 @@ UChar32 RegexCompile::scanNamedChar() {
}
charName.append(fC.fChar);
}
char name[100];
if (!uprv_isInvariantUString(charName.getBuffer(), charName.length()) ||
(uint32_t)charName.length()>=sizeof(name)) {
@ -4006,7 +4006,7 @@ UnicodeSet *RegexCompile::scanPosixProp() {
// Scan for a closing ]. A little tricky because there are some perverse
// edge cases possible. "[:abc\Qdef:] \E]" is a valid non-property expression,
// ending on the second closing ].
// ending on the second closing ].
UnicodeString propName;
UBool negated = FALSE;
@ -4017,7 +4017,7 @@ UnicodeSet *RegexCompile::scanPosixProp() {
negated = TRUE;
nextChar(fC);
}
// Scan for the closing ":]", collecting the property name along the way.
UBool sawPropSetTerminator = FALSE;
for (;;) {
@ -4035,7 +4035,7 @@ UnicodeSet *RegexCompile::scanPosixProp() {
break;
}
}
if (sawPropSetTerminator) {
uset = createSetForProperty(propName, negated);
}
@ -4068,7 +4068,7 @@ static inline void addIdentifierIgnorable(UnicodeSet *set, UErrorCode& ec) {
// Create a Unicode Set from a Unicode Property expression.
// This is common code underlying both \p{...} ane [:...:] expressions.
// Includes trying the Java "properties" that aren't supported as
// normal ICU UnicodeSet properties
// normal ICU UnicodeSet properties
//
static const UChar posSetPrefix[] = {0x5b, 0x5c, 0x70, 0x7b, 0}; // "[\p{"
static const UChar negSetPrefix[] = {0x5b, 0x5c, 0x50, 0x7b, 0}; // "[\P{"
@ -4076,7 +4076,7 @@ UnicodeSet *RegexCompile::createSetForProperty(const UnicodeString &propName, UB
UnicodeString setExpr;
UnicodeSet *set;
uint32_t usetFlags = 0;
if (U_FAILURE(*fStatus)) {
return NULL;
}
@ -4101,13 +4101,13 @@ UnicodeSet *RegexCompile::createSetForProperty(const UnicodeString &propName, UB
}
delete set;
set = NULL;
//
// The property as it was didn't work.
// Do [:word:]. It is not recognized as a property by UnicodeSet. "word" not standard POSIX
// Do [:word:]. It is not recognized as a property by UnicodeSet. "word" not standard POSIX
// or standard Java, but many other regular expression packages do recognize it.
if (propName.caseCompare(UNICODE_STRING_SIMPLE("word"), 0) == 0) {
*fStatus = U_ZERO_ERROR;
set = new UnicodeSet(*(fRXPat->fStaticSets[URX_ISWORD_SET]));
@ -4127,7 +4127,7 @@ UnicodeSet *RegexCompile::createSetForProperty(const UnicodeString &propName, UB
// InCombiningMarksforSymbols -> InCombiningDiacriticalMarksforSymbols.
//
// Note on Spaces: either "InCombiningMarksForSymbols" or "InCombining Marks for Symbols"
// is accepted by Java. The property part of the name is compared
// is accepted by Java. The property part of the name is compared
// case-insenstively. The spaces must be exactly as shown, either
// all there, or all omitted, with exactly one at each position
// if they are present. From checking against JDK 1.6
@ -4146,7 +4146,7 @@ UnicodeSet *RegexCompile::createSetForProperty(const UnicodeString &propName, UB
else if (mPropName.compare(UNICODE_STRING_SIMPLE("all")) == 0) {
mPropName = UNICODE_STRING_SIMPLE("javaValidCodePoint");
}
// See if the property looks like a Java "InBlockName", which
// we will recast as "Block=BlockName"
//
@ -4270,7 +4270,7 @@ UnicodeSet *RegexCompile::createSetForProperty(const UnicodeString &propName, UB
set = NULL;
}
error(*fStatus);
return NULL;
return NULL;
}

View File

@ -1,5 +1,5 @@
//
// Copyright (C) 2002-2012 International Business Machines Corporation
// Copyright (C) 2002-2013 International Business Machines Corporation
// and others. All rights reserved.
//
// file: regeximp.h
@ -22,11 +22,11 @@
U_NAMESPACE_BEGIN
// For debugging, define REGEX_DEBUG
// For debugging, define REGEX_DEBUG
// To define with configure,
// ./runConfigureICU --enable-debug --disable-release Linux CPPFLAGS="-DREGEX_DEBUG"
// CPPFLAGS="-DREGEX_DEBUG" ./runConfigureICU --enable-debug --disable-release Linux
#ifdef REGEX_DEBUG
#ifdef REGEX_DEBUG
//
// debugging options. Enable one or more of the three #defines immediately following
//
@ -46,19 +46,6 @@ U_NAMESPACE_BEGIN
#define REGEX_SCAN_DEBUG_PRINTF(a)
#endif
#ifdef REGEX_DUMP_DEBUG
#define REGEX_DUMP_DEBUG_PRINTF(a) printf a
#else
#define REGEX_DUMP_DEBUG_PRINTF(a)
#endif
#ifdef REGEX_RUN_DEBUG
#define REGEX_RUN_DEBUG_PRINTF(a) printf a
#define REGEX_DUMP_DEBUG_PRINTF(a) printf a
#else
#define REGEX_RUN_DEBUG_PRINTF(a)
#endif
//
// Opcode types In the compiled form of the regexp, these are the type, or opcodes,
@ -373,9 +360,9 @@ class CaseFoldingUTextIterator: public UMemory {
CaseFoldingUTextIterator(UText &text);
~CaseFoldingUTextIterator();
UChar32 next(); // Next case folded character
UChar32 next(); // Next case folded character
UBool inExpansion(); // True if last char returned from next() and the
UBool inExpansion(); // True if last char returned from next() and the
// next to be returned both originated from a string
// folding of the same code point from the orignal UText.
private:
@ -398,9 +385,9 @@ class CaseFoldingUCharIterator: public UMemory {
CaseFoldingUCharIterator(const UChar *chars, int64_t start, int64_t limit);
~CaseFoldingUCharIterator();
UChar32 next(); // Next case folded character
UChar32 next(); // Next case folded character
UBool inExpansion(); // True if last char returned from next() and the
UBool inExpansion(); // True if last char returned from next() and the
// next to be returned both originated from a string
// folding of the same code point from the orignal UText.

View File

@ -2720,7 +2720,7 @@ void RegexMatcher::MatchAt(int64_t startIdx, UBool toEnd, UErrorCode &status) {
int32_t opType; // the opcode
int32_t opValue; // and the operand value.
#ifdef REGEX_RUN_DEBUG
#ifdef REGEX_RUN_DEBUG
if (fTraceDebug)
{
printf("MatchAt(startIdx=%ld)\n", startIdx);
@ -2730,7 +2730,7 @@ void RegexMatcher::MatchAt(int64_t startIdx, UBool toEnd, UErrorCode &status) {
if (c<32 || c>256) {
c = '.';
}
REGEX_DUMP_DEBUG_PRINTF(("%c", c));
printf("%c", c);
c = UTEXT_NEXT32(fPattern->fPattern);
}
@ -2748,7 +2748,7 @@ void RegexMatcher::MatchAt(int64_t startIdx, UBool toEnd, UErrorCode &status) {
printf("\n");
printf("\n");
}
#endif
#endif
if (U_FAILURE(status)) {
return;
@ -2778,23 +2778,17 @@ void RegexMatcher::MatchAt(int64_t startIdx, UBool toEnd, UErrorCode &status) {
// One iteration of the loop per pattern operation performed.
//
for (;;) {
#if 0
if (_heapchk() != _HEAPOK) {
fprintf(stderr, "Heap Trouble\n");
}
#endif
op = (int32_t)pat[fp->fPatIdx];
opType = URX_TYPE(op);
opValue = URX_VAL(op);
#ifdef REGEX_RUN_DEBUG
#ifdef REGEX_RUN_DEBUG
if (fTraceDebug) {
UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx);
printf("inputIdx=%ld inputChar=%x sp=%3ld activeLimit=%ld ", fp->fInputIdx,
UTEXT_CURRENT32(fInputText), (int64_t *)fp-fStack->getBuffer(), fActiveLimit);
fPattern->dumpOp(fp->fPatIdx);
}
#endif
#endif
fp->fPatIdx++;
switch (opType) {
@ -4188,16 +4182,17 @@ breakFromLoop:
fLastMatchEnd = fMatchEnd;
fMatchStart = startIdx;
fMatchEnd = fp->fInputIdx;
if (fTraceDebug) {
REGEX_RUN_DEBUG_PRINTF(("Match. start=%ld end=%ld\n\n", fMatchStart, fMatchEnd));
}
}
else
{
if (fTraceDebug) {
REGEX_RUN_DEBUG_PRINTF(("No match\n\n"));
}
#ifdef REGEX_RUN_DEBUG
if (fTraceDebug) {
if (isMatch) {
printf("Match. start=%ld end=%ld\n\n", fMatchStart, fMatchEnd);
} else {
printf("No match\n\n");
}
}
#endif
fFrame = fp; // The active stack frame when the engine stopped.
// Contains the capture group results that we need to
@ -4228,8 +4223,7 @@ void RegexMatcher::MatchChunkAt(int32_t startIdx, UBool toEnd, UErrorCode &statu
int32_t opValue; // and the operand value.
#ifdef REGEX_RUN_DEBUG
if (fTraceDebug)
{
if (fTraceDebug) {
printf("MatchAt(startIdx=%d)\n", startIdx);
printf("Original Pattern: ");
UChar32 c = utext_next32From(fPattern->fPattern, 0);
@ -4237,7 +4231,7 @@ void RegexMatcher::MatchChunkAt(int32_t startIdx, UBool toEnd, UErrorCode &statu
if (c<32 || c>256) {
c = '.';
}
REGEX_DUMP_DEBUG_PRINTF(("%c", c));
printf("%c", c);
c = UTEXT_NEXT32(fPattern->fPattern);
}
@ -4287,12 +4281,6 @@ void RegexMatcher::MatchChunkAt(int32_t startIdx, UBool toEnd, UErrorCode &statu
// One iteration of the loop per pattern operation performed.
//
for (;;) {
#if 0
if (_heapchk() != _HEAPOK) {
fprintf(stderr, "Heap Trouble\n");
}
#endif
op = (int32_t)pat[fp->fPatIdx];
opType = URX_TYPE(op);
opValue = URX_VAL(op);
@ -5627,20 +5615,21 @@ breakFromLoop:
fLastMatchEnd = fMatchEnd;
fMatchStart = startIdx;
fMatchEnd = fp->fInputIdx;
if (fTraceDebug) {
REGEX_RUN_DEBUG_PRINTF(("Match. start=%ld end=%ld\n\n", fMatchStart, fMatchEnd));
}
}
else
{
if (fTraceDebug) {
REGEX_RUN_DEBUG_PRINTF(("No match\n\n"));
}
}
#ifdef REGEX_RUN_DEBUG
if (fTraceDebug) {
if (isMatch) {
printf("Match. start=%ld end=%ld\n\n", fMatchStart, fMatchEnd);
} else {
printf("No match\n\n");
}
}
#endif
fFrame = fp; // The active stack frame when the engine stopped.
// Contains the capture group results that we need to
// access later.
// Contains the capture group results that we need to
// access later.
return;
}

View File

@ -3,7 +3,7 @@
//
/*
***************************************************************************
* Copyright (C) 2002-2012 International Business Machines Corporation *
* Copyright (C) 2002-2013 International Business Machines Corporation *
* and others. All rights reserved. *
***************************************************************************
*/
@ -275,21 +275,21 @@ RegexPattern::compile(const UnicodeString &regex,
if (U_FAILURE(status)) {
return NULL;
}
const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS |
UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD |
UREGEX_ERROR_ON_UNKNOWN_ESCAPES | UREGEX_UNIX_LINES | UREGEX_LITERAL;
if ((flags & ~allFlags) != 0) {
status = U_REGEX_INVALID_FLAG;
return NULL;
}
if ((flags & UREGEX_CANON_EQ) != 0) {
status = U_REGEX_UNIMPLEMENTED;
return NULL;
}
RegexPattern *This = new RegexPattern;
if (This == NULL) {
status = U_MEMORY_ALLOCATION_ERROR;
@ -301,15 +301,15 @@ RegexPattern::compile(const UnicodeString &regex,
return NULL;
}
This->fFlags = flags;
RegexCompile compiler(This, status);
compiler.compile(regex, pe, status);
if (U_FAILURE(status)) {
delete This;
This = NULL;
}
return This;
}
@ -355,7 +355,7 @@ RegexPattern::compile(UText *regex,
RegexCompile compiler(This, status);
compiler.compile(regex, pe, status);
if (U_FAILURE(status)) {
delete This;
This = NULL;
@ -538,12 +538,12 @@ UnicodeString RegexPattern::pattern() const {
int64_t nativeLen = utext_nativeLength(fPattern);
int32_t len16 = utext_extract(fPattern, 0, nativeLen, NULL, 0, &status); // buffer overflow error
UnicodeString result;
status = U_ZERO_ERROR;
UChar *resultChars = result.getBuffer(len16);
utext_extract(fPattern, 0, nativeLen, resultChars, len16, &status); // unterminated warning
result.releaseBuffer(len16);
return result;
}
}
@ -622,8 +622,9 @@ int32_t RegexPattern::split(UText *input,
// Debugging function only.
//
//---------------------------------------------------------------------
#if defined(REGEX_DEBUG)
void RegexPattern::dumpOp(int32_t index) const {
(void)index; // Suppress warnings in non-debug build.
#if defined(REGEX_DEBUG)
static const char * const opNames[] = {URX_OPCODE_NAMES};
int32_t op = fCompiledPat->elementAti(index);
int32_t val = URX_VAL(op);
@ -633,7 +634,7 @@ void RegexPattern::dumpOp(int32_t index) const {
pinnedType = 0;
}
REGEX_DUMP_DEBUG_PRINTF(("%4d %08x %-15s ", index, op, opNames[pinnedType]));
printf("%4d %08x %-15s ", index, op, opNames[pinnedType]);
switch (type) {
case URX_NOP:
case URX_DOTANY:
@ -682,12 +683,12 @@ void RegexPattern::dumpOp(int32_t index) const {
case URX_LOOP_C:
case URX_LOOP_DOT_I:
// types with an integer operand field.
REGEX_DUMP_DEBUG_PRINTF(("%d", val));
printf("%d", val);
break;
case URX_ONECHAR:
case URX_ONECHAR_I:
REGEX_DUMP_DEBUG_PRINTF(("%c", val<256?val:'?'));
printf("%c", val<256?val:'?');
break;
case URX_STRING:
@ -700,7 +701,7 @@ void RegexPattern::dumpOp(int32_t index) const {
for (i=val; i<val+length; i++) {
UChar c = fLiteralText[i];
if (c < 32 || c >= 256) {c = '.';}
REGEX_DUMP_DEBUG_PRINTF(("%c", c));
printf("%c", c);
}
}
break;
@ -712,7 +713,7 @@ void RegexPattern::dumpOp(int32_t index) const {
UnicodeSet *set = (UnicodeSet *)fSets->elementAt(val);
set->toPattern(s, TRUE);
for (int32_t i=0; i<s.length(); i++) {
REGEX_DUMP_DEBUG_PRINTF(("%c", s.charAt(i)));
printf("%c", s.charAt(i));
}
}
break;
@ -722,89 +723,89 @@ void RegexPattern::dumpOp(int32_t index) const {
{
UnicodeString s;
if (val & URX_NEG_SET) {
REGEX_DUMP_DEBUG_PRINTF(("NOT "));
printf("NOT ");
val &= ~URX_NEG_SET;
}
UnicodeSet *set = fStaticSets[val];
set->toPattern(s, TRUE);
for (int32_t i=0; i<s.length(); i++) {
REGEX_DUMP_DEBUG_PRINTF(("%c", s.charAt(i)));
printf("%c", s.charAt(i));
}
}
break;
default:
REGEX_DUMP_DEBUG_PRINTF(("??????"));
printf("??????");
break;
}
REGEX_DUMP_DEBUG_PRINTF(("\n"));
}
printf("\n");
#endif
}
#if defined(REGEX_DEBUG)
U_CAPI void U_EXPORT2
RegexPatternDump(const RegexPattern *This) {
RegexPattern::dumpPattern() const {
#if defined(REGEX_DEBUG)
int index;
int i;
REGEX_DUMP_DEBUG_PRINTF(("Original Pattern: "));
UChar32 c = utext_next32From(This->fPattern, 0);
printf("Original Pattern: ");
UChar32 c = utext_next32From(fPattern, 0);
while (c != U_SENTINEL) {
if (c<32 || c>256) {
c = '.';
}
REGEX_DUMP_DEBUG_PRINTF(("%c", c));
c = UTEXT_NEXT32(This->fPattern);
}
REGEX_DUMP_DEBUG_PRINTF(("\n"));
REGEX_DUMP_DEBUG_PRINTF((" Min Match Length: %d\n", This->fMinMatchLen));
REGEX_DUMP_DEBUG_PRINTF((" Match Start Type: %s\n", START_OF_MATCH_STR(This->fStartType)));
if (This->fStartType == START_STRING) {
REGEX_DUMP_DEBUG_PRINTF((" Initial match string: \""));
for (i=This->fInitialStringIdx; i<This->fInitialStringIdx+This->fInitialStringLen; i++) {
REGEX_DUMP_DEBUG_PRINTF(("%c", This->fLiteralText[i])); // TODO: non-printables, surrogates.
}
REGEX_DUMP_DEBUG_PRINTF(("\"\n"));
printf("%c", c);
} else if (This->fStartType == START_SET) {
int32_t numSetChars = This->fInitialChars->size();
c = UTEXT_NEXT32(fPattern);
}
printf("\n");
printf(" Min Match Length: %d\n", fMinMatchLen);
printf(" Match Start Type: %s\n", START_OF_MATCH_STR(fStartType));
if (fStartType == START_STRING) {
printf(" Initial match string: \"");
for (i=fInitialStringIdx; i<fInitialStringIdx+fInitialStringLen; i++) {
printf("%c", fLiteralText[i]); // TODO: non-printables, surrogates.
}
printf("\"\n");
} else if (fStartType == START_SET) {
int32_t numSetChars = fInitialChars->size();
if (numSetChars > 20) {
numSetChars = 20;
}
REGEX_DUMP_DEBUG_PRINTF((" Match First Chars : "));
printf(" Match First Chars : ");
for (i=0; i<numSetChars; i++) {
UChar32 c = This->fInitialChars->charAt(i);
UChar32 c = fInitialChars->charAt(i);
if (0x20<c && c <0x7e) {
REGEX_DUMP_DEBUG_PRINTF(("%c ", c));
printf("%c ", c);
} else {
REGEX_DUMP_DEBUG_PRINTF(("%#x ", c));
printf("%#x ", c);
}
}
if (numSetChars < This->fInitialChars->size()) {
REGEX_DUMP_DEBUG_PRINTF((" ..."));
if (numSetChars < fInitialChars->size()) {
printf(" ...");
}
REGEX_DUMP_DEBUG_PRINTF(("\n"));
printf("\n");
} else if (This->fStartType == START_CHAR) {
REGEX_DUMP_DEBUG_PRINTF((" First char of Match : "));
if (0x20 < This->fInitialChar && This->fInitialChar<0x7e) {
REGEX_DUMP_DEBUG_PRINTF(("%c\n", This->fInitialChar));
} else if (fStartType == START_CHAR) {
printf(" First char of Match : ");
if (0x20 < fInitialChar && fInitialChar<0x7e) {
printf("%c\n", fInitialChar);
} else {
REGEX_DUMP_DEBUG_PRINTF(("%#x\n", This->fInitialChar));
printf("%#x\n", fInitialChar);
}
}
REGEX_DUMP_DEBUG_PRINTF(("\nIndex Binary Type Operand\n" \
"-------------------------------------------\n"));
for (index = 0; index<This->fCompiledPat->size(); index++) {
This->dumpOp(index);
printf("\nIndex Binary Type Operand\n" \
"-------------------------------------------\n");
for (index = 0; index<fCompiledPat->size(); index++) {
dumpOp(index);
}
REGEX_DUMP_DEBUG_PRINTF(("\n\n"));
}
printf("\n\n");
#endif
}

View File

@ -68,21 +68,6 @@ class UVector;
class UVector32;
class UVector64;
#ifndef U_HIDE_INTERNAL_API
/**
* RBBIPatternDump Debug function, displays the compiled form of a pattern.
* @internal
*/
#ifdef REGEX_DEBUG
U_INTERNAL void U_EXPORT2
RegexPatternDump(const RegexPattern *pat);
#else
#undef RegexPatternDump
#define RegexPatternDump(pat)
#endif
#endif /* U_HIDE_INTERNAL_API */
/**
* Class <code>RegexPattern</code> represents a compiled regular expression. It includes
@ -613,11 +598,17 @@ private:
//
void init(); // Common initialization, for use by constructors.
void zap(); // Common cleanup
#ifdef REGEX_DEBUG
void dumpOp(int32_t index) const;
friend void U_EXPORT2 RegexPatternDump(const RegexPattern *);
#endif
void dumpOp(int32_t index) const;
public:
#ifndef U_HIDE_INTERNAL_API
/**
* Dump a compiled pattern. Internal debug function.
* @internal
*/
void dumpPattern() const;
#endif
};

File diff suppressed because it is too large Load Diff