ICU-10463 Regular Expressions, rework debug conditionals to fix build failures on clang, and to somewhat simplify.

X-SVN-Rev: 34565
This commit is contained in:
Andy Heninger 2013-10-14 22:11:21 +00:00
parent f1df548fc4
commit 10dd7ed47b
6 changed files with 289 additions and 321 deletions

View File

@ -1,5 +1,5 @@
// //
// Copyright (C) 2002-2012 International Business Machines Corporation // Copyright (C) 2002-2013 International Business Machines Corporation
// and others. All rights reserved. // and others. All rights reserved.
// //
// file: regeximp.h // file: regeximp.h
@ -24,7 +24,7 @@ U_NAMESPACE_BEGIN
// For debugging, define REGEX_DEBUG // For debugging, define REGEX_DEBUG
// To define with configure, // To define with configure,
// ./runConfigureICU --enable-debug --disable-release Linux CPPFLAGS="-DREGEX_DEBUG" // CPPFLAGS="-DREGEX_DEBUG" ./runConfigureICU --enable-debug --disable-release Linux
#ifdef REGEX_DEBUG #ifdef REGEX_DEBUG
// //
@ -46,19 +46,6 @@ U_NAMESPACE_BEGIN
#define REGEX_SCAN_DEBUG_PRINTF(a) #define REGEX_SCAN_DEBUG_PRINTF(a)
#endif #endif
#ifdef REGEX_DUMP_DEBUG
#define REGEX_DUMP_DEBUG_PRINTF(a) printf a
#else
#define REGEX_DUMP_DEBUG_PRINTF(a)
#endif
#ifdef REGEX_RUN_DEBUG
#define REGEX_RUN_DEBUG_PRINTF(a) printf a
#define REGEX_DUMP_DEBUG_PRINTF(a) printf a
#else
#define REGEX_RUN_DEBUG_PRINTF(a)
#endif
// //
// Opcode types In the compiled form of the regexp, these are the type, or opcodes, // Opcode types In the compiled form of the regexp, these are the type, or opcodes,

View File

@ -2730,7 +2730,7 @@ void RegexMatcher::MatchAt(int64_t startIdx, UBool toEnd, UErrorCode &status) {
if (c<32 || c>256) { if (c<32 || c>256) {
c = '.'; c = '.';
} }
REGEX_DUMP_DEBUG_PRINTF(("%c", c)); printf("%c", c);
c = UTEXT_NEXT32(fPattern->fPattern); c = UTEXT_NEXT32(fPattern->fPattern);
} }
@ -2778,12 +2778,6 @@ void RegexMatcher::MatchAt(int64_t startIdx, UBool toEnd, UErrorCode &status) {
// One iteration of the loop per pattern operation performed. // One iteration of the loop per pattern operation performed.
// //
for (;;) { for (;;) {
#if 0
if (_heapchk() != _HEAPOK) {
fprintf(stderr, "Heap Trouble\n");
}
#endif
op = (int32_t)pat[fp->fPatIdx]; op = (int32_t)pat[fp->fPatIdx];
opType = URX_TYPE(op); opType = URX_TYPE(op);
opValue = URX_VAL(op); opValue = URX_VAL(op);
@ -4188,16 +4182,17 @@ breakFromLoop:
fLastMatchEnd = fMatchEnd; fLastMatchEnd = fMatchEnd;
fMatchStart = startIdx; fMatchStart = startIdx;
fMatchEnd = fp->fInputIdx; fMatchEnd = fp->fInputIdx;
}
#ifdef REGEX_RUN_DEBUG
if (fTraceDebug) { if (fTraceDebug) {
REGEX_RUN_DEBUG_PRINTF(("Match. start=%ld end=%ld\n\n", fMatchStart, fMatchEnd)); if (isMatch) {
} printf("Match. start=%ld end=%ld\n\n", fMatchStart, fMatchEnd);
} } else {
else printf("No match\n\n");
{
if (fTraceDebug) {
REGEX_RUN_DEBUG_PRINTF(("No match\n\n"));
} }
} }
#endif
fFrame = fp; // The active stack frame when the engine stopped. fFrame = fp; // The active stack frame when the engine stopped.
// Contains the capture group results that we need to // Contains the capture group results that we need to
@ -4228,8 +4223,7 @@ void RegexMatcher::MatchChunkAt(int32_t startIdx, UBool toEnd, UErrorCode &statu
int32_t opValue; // and the operand value. int32_t opValue; // and the operand value.
#ifdef REGEX_RUN_DEBUG #ifdef REGEX_RUN_DEBUG
if (fTraceDebug) if (fTraceDebug) {
{
printf("MatchAt(startIdx=%d)\n", startIdx); printf("MatchAt(startIdx=%d)\n", startIdx);
printf("Original Pattern: "); printf("Original Pattern: ");
UChar32 c = utext_next32From(fPattern->fPattern, 0); UChar32 c = utext_next32From(fPattern->fPattern, 0);
@ -4237,7 +4231,7 @@ void RegexMatcher::MatchChunkAt(int32_t startIdx, UBool toEnd, UErrorCode &statu
if (c<32 || c>256) { if (c<32 || c>256) {
c = '.'; c = '.';
} }
REGEX_DUMP_DEBUG_PRINTF(("%c", c)); printf("%c", c);
c = UTEXT_NEXT32(fPattern->fPattern); c = UTEXT_NEXT32(fPattern->fPattern);
} }
@ -4287,12 +4281,6 @@ void RegexMatcher::MatchChunkAt(int32_t startIdx, UBool toEnd, UErrorCode &statu
// One iteration of the loop per pattern operation performed. // One iteration of the loop per pattern operation performed.
// //
for (;;) { for (;;) {
#if 0
if (_heapchk() != _HEAPOK) {
fprintf(stderr, "Heap Trouble\n");
}
#endif
op = (int32_t)pat[fp->fPatIdx]; op = (int32_t)pat[fp->fPatIdx];
opType = URX_TYPE(op); opType = URX_TYPE(op);
opValue = URX_VAL(op); opValue = URX_VAL(op);
@ -5627,16 +5615,17 @@ breakFromLoop:
fLastMatchEnd = fMatchEnd; fLastMatchEnd = fMatchEnd;
fMatchStart = startIdx; fMatchStart = startIdx;
fMatchEnd = fp->fInputIdx; fMatchEnd = fp->fInputIdx;
}
#ifdef REGEX_RUN_DEBUG
if (fTraceDebug) { if (fTraceDebug) {
REGEX_RUN_DEBUG_PRINTF(("Match. start=%ld end=%ld\n\n", fMatchStart, fMatchEnd)); if (isMatch) {
} printf("Match. start=%ld end=%ld\n\n", fMatchStart, fMatchEnd);
} } else {
else printf("No match\n\n");
{
if (fTraceDebug) {
REGEX_RUN_DEBUG_PRINTF(("No match\n\n"));
} }
} }
#endif
fFrame = fp; // The active stack frame when the engine stopped. fFrame = fp; // The active stack frame when the engine stopped.
// Contains the capture group results that we need to // Contains the capture group results that we need to

View File

@ -3,7 +3,7 @@
// //
/* /*
*************************************************************************** ***************************************************************************
* Copyright (C) 2002-2012 International Business Machines Corporation * * Copyright (C) 2002-2013 International Business Machines Corporation *
* and others. All rights reserved. * * and others. All rights reserved. *
*************************************************************************** ***************************************************************************
*/ */
@ -622,8 +622,9 @@ int32_t RegexPattern::split(UText *input,
// Debugging function only. // Debugging function only.
// //
//--------------------------------------------------------------------- //---------------------------------------------------------------------
#if defined(REGEX_DEBUG)
void RegexPattern::dumpOp(int32_t index) const { void RegexPattern::dumpOp(int32_t index) const {
(void)index; // Suppress warnings in non-debug build.
#if defined(REGEX_DEBUG)
static const char * const opNames[] = {URX_OPCODE_NAMES}; static const char * const opNames[] = {URX_OPCODE_NAMES};
int32_t op = fCompiledPat->elementAti(index); int32_t op = fCompiledPat->elementAti(index);
int32_t val = URX_VAL(op); int32_t val = URX_VAL(op);
@ -633,7 +634,7 @@ void RegexPattern::dumpOp(int32_t index) const {
pinnedType = 0; pinnedType = 0;
} }
REGEX_DUMP_DEBUG_PRINTF(("%4d %08x %-15s ", index, op, opNames[pinnedType])); printf("%4d %08x %-15s ", index, op, opNames[pinnedType]);
switch (type) { switch (type) {
case URX_NOP: case URX_NOP:
case URX_DOTANY: case URX_DOTANY:
@ -682,12 +683,12 @@ void RegexPattern::dumpOp(int32_t index) const {
case URX_LOOP_C: case URX_LOOP_C:
case URX_LOOP_DOT_I: case URX_LOOP_DOT_I:
// types with an integer operand field. // types with an integer operand field.
REGEX_DUMP_DEBUG_PRINTF(("%d", val)); printf("%d", val);
break; break;
case URX_ONECHAR: case URX_ONECHAR:
case URX_ONECHAR_I: case URX_ONECHAR_I:
REGEX_DUMP_DEBUG_PRINTF(("%c", val<256?val:'?')); printf("%c", val<256?val:'?');
break; break;
case URX_STRING: case URX_STRING:
@ -700,7 +701,7 @@ void RegexPattern::dumpOp(int32_t index) const {
for (i=val; i<val+length; i++) { for (i=val; i<val+length; i++) {
UChar c = fLiteralText[i]; UChar c = fLiteralText[i];
if (c < 32 || c >= 256) {c = '.';} if (c < 32 || c >= 256) {c = '.';}
REGEX_DUMP_DEBUG_PRINTF(("%c", c)); printf("%c", c);
} }
} }
break; break;
@ -712,7 +713,7 @@ void RegexPattern::dumpOp(int32_t index) const {
UnicodeSet *set = (UnicodeSet *)fSets->elementAt(val); UnicodeSet *set = (UnicodeSet *)fSets->elementAt(val);
set->toPattern(s, TRUE); set->toPattern(s, TRUE);
for (int32_t i=0; i<s.length(); i++) { for (int32_t i=0; i<s.length(); i++) {
REGEX_DUMP_DEBUG_PRINTF(("%c", s.charAt(i))); printf("%c", s.charAt(i));
} }
} }
break; break;
@ -722,89 +723,89 @@ void RegexPattern::dumpOp(int32_t index) const {
{ {
UnicodeString s; UnicodeString s;
if (val & URX_NEG_SET) { if (val & URX_NEG_SET) {
REGEX_DUMP_DEBUG_PRINTF(("NOT ")); printf("NOT ");
val &= ~URX_NEG_SET; val &= ~URX_NEG_SET;
} }
UnicodeSet *set = fStaticSets[val]; UnicodeSet *set = fStaticSets[val];
set->toPattern(s, TRUE); set->toPattern(s, TRUE);
for (int32_t i=0; i<s.length(); i++) { for (int32_t i=0; i<s.length(); i++) {
REGEX_DUMP_DEBUG_PRINTF(("%c", s.charAt(i))); printf("%c", s.charAt(i));
} }
} }
break; break;
default: default:
REGEX_DUMP_DEBUG_PRINTF(("??????")); printf("??????");
break; break;
} }
REGEX_DUMP_DEBUG_PRINTF(("\n")); printf("\n");
}
#endif #endif
}
#if defined(REGEX_DEBUG)
U_CAPI void U_EXPORT2 U_CAPI void U_EXPORT2
RegexPatternDump(const RegexPattern *This) { RegexPattern::dumpPattern() const {
#if defined(REGEX_DEBUG)
int index; int index;
int i; int i;
REGEX_DUMP_DEBUG_PRINTF(("Original Pattern: ")); printf("Original Pattern: ");
UChar32 c = utext_next32From(This->fPattern, 0); UChar32 c = utext_next32From(fPattern, 0);
while (c != U_SENTINEL) { while (c != U_SENTINEL) {
if (c<32 || c>256) { if (c<32 || c>256) {
c = '.'; c = '.';
} }
REGEX_DUMP_DEBUG_PRINTF(("%c", c)); printf("%c", c);
c = UTEXT_NEXT32(This->fPattern); c = UTEXT_NEXT32(fPattern);
} }
REGEX_DUMP_DEBUG_PRINTF(("\n")); printf("\n");
REGEX_DUMP_DEBUG_PRINTF((" Min Match Length: %d\n", This->fMinMatchLen)); printf(" Min Match Length: %d\n", fMinMatchLen);
REGEX_DUMP_DEBUG_PRINTF((" Match Start Type: %s\n", START_OF_MATCH_STR(This->fStartType))); printf(" Match Start Type: %s\n", START_OF_MATCH_STR(fStartType));
if (This->fStartType == START_STRING) { if (fStartType == START_STRING) {
REGEX_DUMP_DEBUG_PRINTF((" Initial match string: \"")); printf(" Initial match string: \"");
for (i=This->fInitialStringIdx; i<This->fInitialStringIdx+This->fInitialStringLen; i++) { for (i=fInitialStringIdx; i<fInitialStringIdx+fInitialStringLen; i++) {
REGEX_DUMP_DEBUG_PRINTF(("%c", This->fLiteralText[i])); // TODO: non-printables, surrogates. printf("%c", fLiteralText[i]); // TODO: non-printables, surrogates.
} }
REGEX_DUMP_DEBUG_PRINTF(("\"\n")); printf("\"\n");
} else if (This->fStartType == START_SET) { } else if (fStartType == START_SET) {
int32_t numSetChars = This->fInitialChars->size(); int32_t numSetChars = fInitialChars->size();
if (numSetChars > 20) { if (numSetChars > 20) {
numSetChars = 20; numSetChars = 20;
} }
REGEX_DUMP_DEBUG_PRINTF((" Match First Chars : ")); printf(" Match First Chars : ");
for (i=0; i<numSetChars; i++) { for (i=0; i<numSetChars; i++) {
UChar32 c = This->fInitialChars->charAt(i); UChar32 c = fInitialChars->charAt(i);
if (0x20<c && c <0x7e) { if (0x20<c && c <0x7e) {
REGEX_DUMP_DEBUG_PRINTF(("%c ", c)); printf("%c ", c);
} else { } else {
REGEX_DUMP_DEBUG_PRINTF(("%#x ", c)); printf("%#x ", c);
} }
} }
if (numSetChars < This->fInitialChars->size()) { if (numSetChars < fInitialChars->size()) {
REGEX_DUMP_DEBUG_PRINTF((" ...")); printf(" ...");
} }
REGEX_DUMP_DEBUG_PRINTF(("\n")); printf("\n");
} else if (This->fStartType == START_CHAR) { } else if (fStartType == START_CHAR) {
REGEX_DUMP_DEBUG_PRINTF((" First char of Match : ")); printf(" First char of Match : ");
if (0x20 < This->fInitialChar && This->fInitialChar<0x7e) { if (0x20 < fInitialChar && fInitialChar<0x7e) {
REGEX_DUMP_DEBUG_PRINTF(("%c\n", This->fInitialChar)); printf("%c\n", fInitialChar);
} else { } else {
REGEX_DUMP_DEBUG_PRINTF(("%#x\n", This->fInitialChar)); printf("%#x\n", fInitialChar);
} }
} }
REGEX_DUMP_DEBUG_PRINTF(("\nIndex Binary Type Operand\n" \ printf("\nIndex Binary Type Operand\n" \
"-------------------------------------------\n")); "-------------------------------------------\n");
for (index = 0; index<This->fCompiledPat->size(); index++) { for (index = 0; index<fCompiledPat->size(); index++) {
This->dumpOp(index); dumpOp(index);
}
REGEX_DUMP_DEBUG_PRINTF(("\n\n"));
} }
printf("\n\n");
#endif #endif
}

View File

@ -68,21 +68,6 @@ class UVector;
class UVector32; class UVector32;
class UVector64; class UVector64;
#ifndef U_HIDE_INTERNAL_API
/**
* RBBIPatternDump Debug function, displays the compiled form of a pattern.
* @internal
*/
#ifdef REGEX_DEBUG
U_INTERNAL void U_EXPORT2
RegexPatternDump(const RegexPattern *pat);
#else
#undef RegexPatternDump
#define RegexPatternDump(pat)
#endif
#endif /* U_HIDE_INTERNAL_API */
/** /**
* Class <code>RegexPattern</code> represents a compiled regular expression. It includes * Class <code>RegexPattern</code> represents a compiled regular expression. It includes
@ -613,11 +598,17 @@ private:
// //
void init(); // Common initialization, for use by constructors. void init(); // Common initialization, for use by constructors.
void zap(); // Common cleanup void zap(); // Common cleanup
#ifdef REGEX_DEBUG
void dumpOp(int32_t index) const;
friend void U_EXPORT2 RegexPatternDump(const RegexPattern *);
#endif
void dumpOp(int32_t index) const;
public:
#ifndef U_HIDE_INTERNAL_API
/**
* Dump a compiled pattern. Internal debug function.
* @internal
*/
void dumpPattern() const;
#endif
}; };

View File

@ -378,7 +378,7 @@ UBool RegexTest::doRegexLMTest(const char *pat, const char *text, UBool looking,
line, u_errorName(status)); line, u_errorName(status));
return FALSE; return FALSE;
} }
if (line==376) { RegexPatternDump(REPattern);} if (line==376) { REPattern->dumpPattern();}
UnicodeString inputString(inputText); UnicodeString inputString(inputText);
UnicodeString unEscapedInput = inputString.unescape(); UnicodeString unEscapedInput = inputString.unescape();
@ -414,7 +414,7 @@ UBool RegexTest::doRegexLMTest(const char *pat, const char *text, UBool looking,
} }
if (retVal == FALSE) { if (retVal == FALSE) {
RegexPatternDump(REPattern); REPattern->dumpPattern();
} }
delete REPattern; delete REPattern;
@ -490,7 +490,7 @@ UBool RegexTest::doRegexLMTestUTF8(const char *pat, const char *text, UBool look
} }
if (retVal == FALSE) { if (retVal == FALSE) {
RegexPatternDump(REPattern); REPattern->dumpPattern();
} }
delete REPattern; delete REPattern;
@ -583,7 +583,7 @@ void RegexTest::Basic() {
UErrorCode status = U_ZERO_ERROR; UErrorCode status = U_ZERO_ERROR;
RegexPattern *pattern; RegexPattern *pattern;
pattern = RegexPattern::compile(UNICODE_STRING_SIMPLE("a\\u00dfx").unescape(), UREGEX_CASE_INSENSITIVE, pe, status); pattern = RegexPattern::compile(UNICODE_STRING_SIMPLE("a\\u00dfx").unescape(), UREGEX_CASE_INSENSITIVE, pe, status);
RegexPatternDump(pattern); pattern->dumpPattern();
RegexMatcher *m = pattern->matcher(UNICODE_STRING_SIMPLE("a\\u00dfxzzz").unescape(), status); RegexMatcher *m = pattern->matcher(UNICODE_STRING_SIMPLE("a\\u00dfxzzz").unescape(), status);
UBool result = m->find(); UBool result = m->find();
printf("result = %d\n", result); printf("result = %d\n", result);
@ -3406,7 +3406,7 @@ void RegexTest::regex_find(const UnicodeString &pattern,
} }
if (flags.indexOf((UChar)0x64) >= 0) { // 'd' flag if (flags.indexOf((UChar)0x64) >= 0) { // 'd' flag
RegexPatternDump(callerPattern); callerPattern->dumpPattern();
} }
if (flags.indexOf((UChar)0x45) >= 0) { // 'E' flag if (flags.indexOf((UChar)0x45) >= 0) { // 'E' flag