scuffed-code/icu4c/source/test/intltest/regextst.cpp

/********************************************************************
 * COPYRIGHT:
 * Copyright (c) 2002, International Business Machines Corporation and
 * others. All Rights Reserved.
 ********************************************************************/

//
//   regex.cpp
//
//      ICU Regular Expressions test, part of intltest.
//

#include "unicode/utypes.h"
#include "intltest.h"
#include "regextst.h"


RegexTest::RegexTest() 
{
};


RegexTest::~RegexTest()
{
};


void RegexTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ )
{
    if (exec) logln("TestSuite RegexTest: ");
    switch (index) {

        case 0: name = "API_Match";
            if (exec) API_Match(); 
            break;
        case 1: name = "Basic";
            if (exec) Basic(); 
            break;
        case 2: name = "API_Replace";
            if (exec) API_Replace(); 
            break;
        case 3: name = "API_Pattern";
            if (exec) API_Pattern(); 
            break;
        default: name = ""; 
            break; //needed to end loop
    }
}


//---------------------------------------------------------------------------
//
//    REGEX_TESTLM       Macro + invocation function to simplify writing quick tests
//                       for the LookingAt() and  Match() functions.
//
//       usage:
//          REGEX_TESTLM("pattern",  "input text",  lookingAt expected, matches expected);
//
//          The expected results are UBool - TRUE or FALSE.
//          The input text is unescaped.  The pattern is not.
//            
//
//---------------------------------------------------------------------------
#define REGEX_CHECK_STATUS {if (U_FAILURE(status)) {errln("RegexTest failure at line %d.  status=%d\n", \
__LINE__, status); return;}}

#define REGEX_ASSERT(expr) {if ((expr)==FALSE) {errln("RegexTest failure at line %d.\n", __LINE__);};}

#define REGEX_ASSERT_FAIL(expr, errcode) {UErrorCode status=U_ZERO_ERROR; (expr);\
if (status!=errcode) {errln("RegexTest failure at line %d.\n", __LINE__);};}

#define REGEX_TESTLM(pat, text, looking, match) doRegexLMTest(pat, text, looking, match, __LINE__);

UBool RegexTest::doRegexLMTest(char *pat, char *text, UBool looking, UBool match, int line) {
    const UnicodeString pattern(pat);
    const UnicodeString inputText(text);
    UErrorCode          status  = U_ZERO_ERROR;
    UParseError         pe;
    RegexPattern        *REPattern = NULL;
    RegexMatcher        *REMatcher = NULL;
    UBool               retVal     = TRUE;

    UnicodeString patString(pat);
    REPattern = RegexPattern::compile(patString, 0, pe, status);
    if (U_FAILURE(status)) {
        errln("RegexTest failure in RegexPattern::compile() at line %d.  Status = %d\n", line, status);
        return FALSE;
    }

    UnicodeString inputString(inputText);
    UnicodeString unEscapedInput = inputString.unescape();
    REMatcher = REPattern->matcher(unEscapedInput, status);
    if (U_FAILURE(status)) {
        errln("RegexTest failure in REPattern::matcher() at line %d.  Status = %d\n", line, status);
        return FALSE;
    }
  
    UBool actualmatch;
    actualmatch = REMatcher->lookingAt(status);
    if (U_FAILURE(status)) {
        errln("RegexTest failure in lookingAt() at line %d.  Status = %d\n", line, status);
        retVal =  FALSE;
    }
    if (actualmatch != looking) {
        errln("RegexTest: wrong return from lookingAt() at line %d.\n", line);
        retVal = FALSE;
    }

    status = U_ZERO_ERROR;
    actualmatch = REMatcher->matches(status);
    if (U_FAILURE(status)) {
        errln("RegexTest failure in matches() at line %d.  Status = %d\n", line, status);
        retVal = FALSE;
    }
    if (actualmatch != match) {
        errln("RegexTest: wrong return from matches() at line %d.\n", line);
        retVal = FALSE;
    }

    if (retVal == FALSE) {
        REPattern->dump();
    }

    delete REPattern;
    delete REMatcher;
    return retVal;
}
    

//---------------------------------------------------------------------------
//
//      API_Match
//
//---------------------------------------------------------------------------
void RegexTest::API_Match() {
    UParseError         pe;
    UErrorCode          status=U_ZERO_ERROR;
    int32_t             flags = 0;

    //
    // Debug - slide failing test cases early
    //
#if 0
    {
    }
    return;
#endif

    //
    // Simple pattern compilation
    //
    {
        UnicodeString       re("abc");
        RegexPattern        *pat2;
        pat2 = RegexPattern::compile(re, flags, pe, status);
        REGEX_CHECK_STATUS;
        
        UnicodeString inStr1 = "abcdef this is a test";
        UnicodeString instr2 = "not abc";
        UnicodeString empty  = "";
        
        
        //
        // Matcher creation and reset.
        //
        RegexMatcher *m1 = pat2->matcher(inStr1, status);
        REGEX_CHECK_STATUS;
        REGEX_ASSERT(m1->lookingAt(status) == TRUE); 
        REGEX_ASSERT(m1->input() == inStr1);
        m1->reset(instr2);
        REGEX_ASSERT(m1->lookingAt(status) == FALSE);
        REGEX_ASSERT(m1->input() == instr2);
        m1->reset(inStr1);
        REGEX_ASSERT(m1->input() == inStr1);
        REGEX_ASSERT(m1->lookingAt(status) == TRUE);
        m1->reset(empty);
        REGEX_ASSERT(m1->lookingAt(status) == FALSE);
        REGEX_ASSERT(m1->input() == empty);
        REGEX_ASSERT(&m1->pattern() == pat2);
        delete m1;
        delete pat2;
    }


    //
    // Capture Group. 
    //     RegexMatcher::start();
    //     RegexMatcher::end();
    //     RegexMatcher::groupCount();
    //
    {
        int32_t             flags=0;
        UParseError         pe;
        UErrorCode          status=U_ZERO_ERROR;

        UnicodeString       re("01(23(45)67)(.*)");
        RegexPattern *pat = RegexPattern::compile(re, flags, pe, status);
        REGEX_CHECK_STATUS;
        UnicodeString data = "0123456789";
        
        RegexMatcher *matcher = pat->matcher(data, status);
        REGEX_CHECK_STATUS;
        REGEX_ASSERT(matcher->lookingAt(status) == TRUE); 
        int  matchStarts[] = {0,  2, 4, 8};
        int  matchEnds[]   = {10, 8, 6, 10};
        int i;
        for (i=0; i<4; i++) {
            int32_t actualStart = matcher->start(i, status);
            REGEX_CHECK_STATUS;
            if (actualStart != matchStarts[i]) {
                errln("RegexTest failure at line %d, index %d.  Expected %d, got %d\n",
                    __LINE__, i, matchStarts[i], actualStart);
            }
            int32_t actualEnd = matcher->end(i, status);
            REGEX_CHECK_STATUS;
            if (actualEnd != matchEnds[i]) {
                errln("RegexTest failure at line %d index %d.  Expected %d, got %d\n",
                    __LINE__, i, matchEnds[i], actualEnd);
            }
        }

        REGEX_ASSERT(matcher->start(0, status) == matcher->start(status));
        REGEX_ASSERT(matcher->end(0, status) == matcher->end(status));

        REGEX_ASSERT_FAIL(matcher->start(-1, status), U_INDEX_OUTOFBOUNDS_ERROR);
        REGEX_ASSERT_FAIL(matcher->start( 4, status), U_INDEX_OUTOFBOUNDS_ERROR);
        matcher->reset();
        REGEX_ASSERT_FAIL(matcher->start( 0, status), U_REGEX_INVALID_STATE);

        matcher->lookingAt(status);
        REGEX_ASSERT(matcher->group(status)    == "0123456789");
        REGEX_ASSERT(matcher->group(0, status) == "0123456789");
        REGEX_ASSERT(matcher->group(1, status) == "234567"    );
        REGEX_ASSERT(matcher->group(2, status) == "45"        );
        REGEX_ASSERT(matcher->group(3, status) == "89"        );
        REGEX_CHECK_STATUS;
        REGEX_ASSERT_FAIL(matcher->group(-1, status), U_INDEX_OUTOFBOUNDS_ERROR);
        REGEX_ASSERT_FAIL(matcher->group( 4, status), U_INDEX_OUTOFBOUNDS_ERROR);
        matcher->reset();
        REGEX_ASSERT_FAIL(matcher->group( 0, status), U_REGEX_INVALID_STATE);

        delete matcher;
        delete pat;

    }

    //
    //  find
    //
    {
        int32_t             flags=0;
        UParseError         pe;
        UErrorCode          status=U_ZERO_ERROR;

        UnicodeString       re("abc");
        RegexPattern *pat = RegexPattern::compile(re, flags, pe, status);
        REGEX_CHECK_STATUS;
        UnicodeString data = ".abc..abc...abc..";
        //                    012345678901234567
        
        RegexMatcher *matcher = pat->matcher(data, status);
        REGEX_CHECK_STATUS;
        REGEX_ASSERT(matcher->find());
        REGEX_ASSERT(matcher->start(status) == 1);
        REGEX_ASSERT(matcher->find());
        REGEX_ASSERT(matcher->start(status) == 6);
        REGEX_ASSERT(matcher->find());
        REGEX_ASSERT(matcher->start(status) == 12);
        REGEX_ASSERT(matcher->find() == FALSE);
        REGEX_ASSERT(matcher->find() == FALSE);

        matcher->reset();
        REGEX_ASSERT(matcher->find());
        REGEX_ASSERT(matcher->start(status) == 1);

        REGEX_ASSERT(matcher->find(0, status));
        REGEX_ASSERT(matcher->start(status) == 1);
        REGEX_ASSERT(matcher->find(1, status));
        REGEX_ASSERT(matcher->start(status) == 1);
        REGEX_ASSERT(matcher->find(2, status));
        REGEX_ASSERT(matcher->start(status) == 6);
        REGEX_ASSERT(matcher->find(12, status));
        REGEX_ASSERT(matcher->start(status) == 12);
        REGEX_ASSERT(matcher->find(13, status) == FALSE);
        REGEX_ASSERT(matcher->find(16, status) == FALSE);
        REGEX_ASSERT_FAIL(matcher->start(status), U_REGEX_INVALID_STATE);
        REGEX_CHECK_STATUS;

        REGEX_ASSERT_FAIL(matcher->find(-1, status), U_INDEX_OUTOFBOUNDS_ERROR);
        REGEX_ASSERT_FAIL(matcher->find(17, status), U_INDEX_OUTOFBOUNDS_ERROR);

        REGEX_ASSERT(matcher->groupCount() == 0);

        delete matcher;
        delete pat;
    }
        
}


//---------------------------------------------------------------------------
//
//      Basic      Check for basic functionality of
//                          regex pattern matching.
//
//---------------------------------------------------------------------------
void RegexTest::Basic() {


//
// Debug - slide failing test cases early
//
#if 0
    {
    }
    return;
#endif


    //
    // Pattern with parentheses
    //
    REGEX_TESTLM("st(abc)ring", "stabcring thing", TRUE,  FALSE);
    REGEX_TESTLM("st(abc)ring", "stabcring",       TRUE,  TRUE);
    REGEX_TESTLM("st(abc)ring", "stabcrung",       FALSE, FALSE);

    //
    // Patterns with *
    //
    REGEX_TESTLM("st(abc)*ring", "string", TRUE, TRUE);
    REGEX_TESTLM("st(abc)*ring", "stabcring", TRUE, TRUE);
    REGEX_TESTLM("st(abc)*ring", "stabcabcring", TRUE, TRUE);
    REGEX_TESTLM("st(abc)*ring", "stabcabcdring", FALSE, FALSE);
    REGEX_TESTLM("st(abc)*ring", "stabcabcabcring etc.", TRUE, FALSE);

    REGEX_TESTLM("a*", "",  TRUE, TRUE);
    REGEX_TESTLM("a*", "b", TRUE, FALSE);


    //
    //  Patterns with "."
    //
    REGEX_TESTLM(".", "abc", TRUE, FALSE);
    REGEX_TESTLM("...", "abc", TRUE, TRUE);
    REGEX_TESTLM("....", "abc", FALSE, FALSE);
    REGEX_TESTLM(".*", "abcxyz123", TRUE, TRUE);
    REGEX_TESTLM("ab.*xyz", "abcdefghij", FALSE, FALSE);
    REGEX_TESTLM("ab.*xyz", "abcdefg...wxyz", TRUE, TRUE);
    REGEX_TESTLM("ab.*xyz", "abcde...wxyz...abc..xyz", TRUE, TRUE);
    REGEX_TESTLM("ab.*xyz", "abcde...wxyz...abc..xyz...", TRUE, FALSE);

    //
    //  Patterns with * applied to chars at end of literal string
    //
    REGEX_TESTLM("abc*", "ab", TRUE, TRUE);
    REGEX_TESTLM("abc*", "abccccc", TRUE, TRUE);

    //
    //  Supplemental chars match as single chars, not a pair of surrogates.
    //
    REGEX_TESTLM(".", "\\U00011000", TRUE, TRUE);
    REGEX_TESTLM("...", "\\U00011000x\\U00012002", TRUE, TRUE);
    REGEX_TESTLM("...", "\\U00011000x\\U00012002y", TRUE, FALSE);


    //
    //  UnicodeSets in the pattern
    //
    REGEX_TESTLM("[1-6]", "1", TRUE, TRUE);
    REGEX_TESTLM("[1-6]", "3", TRUE, TRUE);
    REGEX_TESTLM("[1-6]", "7", FALSE, FALSE);
    REGEX_TESTLM("a[1-6]", "a3", TRUE, TRUE);
    REGEX_TESTLM("a[1-6]", "a3", TRUE, TRUE);
    REGEX_TESTLM("a[1-6]b", "a3b", TRUE, TRUE);

    REGEX_TESTLM("a[0-9]*b", "a123b", TRUE, TRUE);
    REGEX_TESTLM("a[0-9]*b", "abc", TRUE, FALSE);
    REGEX_TESTLM("[\\p{Nd}]*", "123456", TRUE, TRUE);
    REGEX_TESTLM("[\\p{Nd}]*", "a123456", TRUE, FALSE);   // note that * matches 0 occurences.
    REGEX_TESTLM("[a][b][[:Zs:]]*", "ab   ", TRUE, TRUE);

    //
    //   OR operator in patterns
    //
    REGEX_TESTLM("(a|b)", "a", TRUE, TRUE);
    REGEX_TESTLM("(a|b)", "b", TRUE, TRUE);
    REGEX_TESTLM("(a|b)", "c", FALSE, FALSE);
    REGEX_TESTLM("a|b", "b", TRUE, TRUE);

    REGEX_TESTLM("(a|b|c)*", "aabcaaccbcabc", TRUE, TRUE);
    REGEX_TESTLM("(a|b|c)*", "aabcaaccbcabdc", TRUE, FALSE);
    REGEX_TESTLM("(a(b|c|d)(x|y|z)*|123)", "ac", TRUE, TRUE);
    REGEX_TESTLM("(a(b|c|d)(x|y|z)*|123)", "123", TRUE, TRUE);
    REGEX_TESTLM("(a|(1|2)*)(b|c|d)(x|y|z)*|123", "123", TRUE, TRUE);
    REGEX_TESTLM("(a|(1|2)*)(b|c|d)(x|y|z)*|123", "222211111czzzzw", TRUE, FALSE);

    //
    //  +
    //
    REGEX_TESTLM("ab+", "abbc", TRUE, FALSE);
    REGEX_TESTLM("ab+c", "ac", FALSE, FALSE);
    REGEX_TESTLM("b+", "", FALSE, FALSE);
    REGEX_TESTLM("(abc|def)+", "defabc", TRUE, TRUE);
    REGEX_TESTLM(".+y", "zippity dooy dah ", TRUE, FALSE);
    REGEX_TESTLM(".+y", "zippity dooy", TRUE, TRUE);

    //
    //   ?
    //
    REGEX_TESTLM("ab?", "ab", TRUE, TRUE);
    REGEX_TESTLM("ab?", "a", TRUE, TRUE);
    REGEX_TESTLM("ab?", "ac", TRUE, FALSE);
    REGEX_TESTLM("ab?", "abb", TRUE, FALSE);
    REGEX_TESTLM("a(b|c)?d", "abd", TRUE, TRUE);
    REGEX_TESTLM("a(b|c)?d", "acd", TRUE, TRUE);
    REGEX_TESTLM("a(b|c)?d", "ad", TRUE, TRUE);
    REGEX_TESTLM("a(b|c)?d", "abcd", FALSE, FALSE);
    REGEX_TESTLM("a(b|c)?d", "ab", FALSE, FALSE);

};


//---------------------------------------------------------------------------
//
//      API_Replace
//
//---------------------------------------------------------------------------
void RegexTest::API_Replace() {
}


//---------------------------------------------------------------------------
//
//      API_Pattern
//
//---------------------------------------------------------------------------
void RegexTest::API_Pattern() {
    RegexPattern        pata;    // Test default constructor to not crash.
    RegexPattern        patb;

    REGEX_ASSERT(pata == patb);
    REGEX_ASSERT(pata == pata);

    UnicodeString re1("abc[a-l][m-z]");
    UnicodeString re2("def");
    UErrorCode    status = U_ZERO_ERROR;
    UParseError   pe;

    RegexPattern        *pat1 = RegexPattern::compile(re1, 0, pe, status);
    RegexPattern        *pat2 = RegexPattern::compile(re2, 0, pe, status);
    REGEX_CHECK_STATUS;
    REGEX_ASSERT(*pat1 == *pat1);
    REGEX_ASSERT(*pat1 != pata);

    // Assign
    patb = *pat1;
    REGEX_ASSERT(patb == *pat1);

    // Copy Construct
    RegexPattern patc(*pat1);
    REGEX_ASSERT(patc == *pat1);
    REGEX_ASSERT(patb == patc);
    REGEX_ASSERT(pat1 != pat2);
    patb = *pat2;
    REGEX_ASSERT(patb != patc);
    REGEX_ASSERT(patb == *pat2);

    // Compile with no flags.
    RegexPattern         *pat1a = RegexPattern::compile(re1, pe, status);
    REGEX_ASSERT(*pat1a == *pat1);

    // Compile with different flags should be not equal
    RegexPattern        *pat1b = RegexPattern::compile(re1, UREGEX_CASE_INSENSITIVE, pe, status);
    REGEX_CHECK_STATUS;
    REGEX_ASSERT(*pat1b != *pat1a);
    REGEX_ASSERT(pat1b->flags() == UREGEX_CASE_INSENSITIVE);
    REGEX_ASSERT(pat1a->flags() == 0);

    // clone
    RegexPattern *pat1c = pat1b->clone();
    REGEX_ASSERT(*pat1b == *pat1c);
    REGEX_ASSERT(*pat1a != *pat1c);


    // TODO:  Actually do some matches with the cloned/copied/assigned patterns.


    delete pat1c;
    delete pat1b;
    delete pat1a;
    delete pat1;
    delete pat2;

    //
    //   matches convenience API
    //
    REGEX_ASSERT(RegexPattern::matches(".*", "random input", pe, status) == TRUE);
    REGEX_CHECK_STATUS;
    REGEX_ASSERT(RegexPattern::matches("abc", "random input", pe, status) == FALSE);
    REGEX_CHECK_STATUS;
    REGEX_ASSERT(RegexPattern::matches(".*nput", "random input", pe, status) == TRUE);
    REGEX_CHECK_STATUS;
    REGEX_ASSERT(RegexPattern::matches("random input", "random input", pe, status) == TRUE);
    REGEX_CHECK_STATUS;
    REGEX_ASSERT(RegexPattern::matches(".*u", "random input", pe, status) == FALSE);
    REGEX_CHECK_STATUS;
    status = U_INDEX_OUTOFBOUNDS_ERROR;
    REGEX_ASSERT(RegexPattern::matches("abc", "abc", pe, status) == FALSE);
    REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);


    //
    // Split()
    //
    status = U_ZERO_ERROR;
    pat1 = RegexPattern::compile(" +",  pe, status);
    REGEX_CHECK_STATUS;
    UnicodeString  fields[10];

    int32_t n;
    n = pat1->split("Now is the time", fields, 10, status);
    REGEX_CHECK_STATUS;
    REGEX_ASSERT(n==4);
    REGEX_ASSERT(fields[0]=="Now");
    REGEX_ASSERT(fields[1]=="is");
    REGEX_ASSERT(fields[2]=="the");
    REGEX_ASSERT(fields[3]=="time");
    REGEX_ASSERT(fields[4]=="");

    n = pat1->split("Now is the time", fields, 2, status);
    REGEX_CHECK_STATUS;
    REGEX_ASSERT(n==2);
    REGEX_ASSERT(fields[0]=="Now");
    REGEX_ASSERT(fields[1]=="is the time");
    REGEX_ASSERT(fields[2]=="the");   // left over from previous test

    fields[1] = "*";
    n = pat1->split("Now is the time", fields, 1, status);
    REGEX_CHECK_STATUS;
    REGEX_ASSERT(n==1);
    REGEX_ASSERT(fields[0]=="Now is the time");
    REGEX_ASSERT(fields[1]=="*");

    n = pat1->split("    Now       is the time   ", fields, 10, status);
    REGEX_CHECK_STATUS;
    REGEX_ASSERT(n==5);
    REGEX_ASSERT(fields[0]=="");
    REGEX_ASSERT(fields[1]=="Now");
    REGEX_ASSERT(fields[2]=="is");
    REGEX_ASSERT(fields[3]=="the");
    REGEX_ASSERT(fields[4]=="time");
    REGEX_ASSERT(fields[5]=="");

    n = pat1->split("     ", fields, 10, status);
    REGEX_CHECK_STATUS;
    REGEX_ASSERT(n==1);
    REGEX_ASSERT(fields[0]=="");

    fields[0] = "foo";
    n = pat1->split("", fields, 10, status);
    REGEX_CHECK_STATUS;
    REGEX_ASSERT(n==0);
    REGEX_ASSERT(fields[0]=="foo");


}
ICU-105 Regular Expressions initial check in X-SVN-Rev: 10050 2002-10-22 00:09:32 +00:00			`/********************************************************************`
			`* COPYRIGHT:`
			`* Copyright (c) 2002, International Business Machines Corporation and`
			`* others. All Rights Reserved.`
			`********************************************************************/`

			`//`
			`// regex.cpp`
			`//`
			`// ICU Regular Expressions test, part of intltest.`
			`//`

			`#include "unicode/utypes.h"`
			`#include "intltest.h"`
			`#include "regextst.h"`


			`RegexTest::RegexTest()`
			`{`
			`};`


			`RegexTest::~RegexTest()`
			`{`
			`};`



			`void RegexTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /par/ )`
			`{`
			`if (exec) logln("TestSuite RegexTest: ");`
			`switch (index) {`

ICU-105 Regular Expressions initial check in X-SVN-Rev: 10051 2002-10-23 01:14:17 +00:00			`case 0: name = "API_Match";`
			`if (exec) API_Match();`
			`break;`
			`case 1: name = "Basic";`
			`if (exec) Basic();`
			`break;`
			`case 2: name = "API_Replace";`
			`if (exec) API_Replace();`
			`break;`
			`case 3: name = "API_Pattern";`
			`if (exec) API_Pattern();`
			`break;`
			`default: name = "";`
			`break; //needed to end loop`
ICU-105 Regular Expressions initial check in X-SVN-Rev: 10050 2002-10-22 00:09:32 +00:00			`}`
			`}`


			`//---------------------------------------------------------------------------`
			`//`
			`// REGEX_TESTLM Macro + invocation function to simplify writing quick tests`
			`// for the LookingAt() and Match() functions.`
			`//`
			`// usage:`
			`// REGEX_TESTLM("pattern", "input text", lookingAt expected, matches expected);`
			`//`
			`// The expected results are UBool - TRUE or FALSE.`
			`// The input text is unescaped. The pattern is not.`
			`//`
			`//`
			`//---------------------------------------------------------------------------`
			`#define REGEX_CHECK_STATUS {if (U_FAILURE(status)) {errln("RegexTest failure at line %d. status=%d\n", \`
			`__LINE__, status); return;}}`

			`#define REGEX_ASSERT(expr) {if ((expr)==FALSE) {errln("RegexTest failure at line %d.\n", __LINE__);};}`

ICU-105 Regular Expressions initial check in X-SVN-Rev: 10051 2002-10-23 01:14:17 +00:00			`#define REGEX_ASSERT_FAIL(expr, errcode) {UErrorCode status=U_ZERO_ERROR; (expr);\`
			`if (status!=errcode) {errln("RegexTest failure at line %d.\n", __LINE__);};}`

ICU-105 Regular Expressions initial check in X-SVN-Rev: 10050 2002-10-22 00:09:32 +00:00			`#define REGEX_TESTLM(pat, text, looking, match) doRegexLMTest(pat, text, looking, match, __LINE__);`

			`UBool RegexTest::doRegexLMTest(char pat, char text, UBool looking, UBool match, int line) {`
			`const UnicodeString pattern(pat);`
			`const UnicodeString inputText(text);`
			`UErrorCode status = U_ZERO_ERROR;`
			`UParseError pe;`
			`RegexPattern *REPattern = NULL;`
			`RegexMatcher *REMatcher = NULL;`
			`UBool retVal = TRUE;`

			`UnicodeString patString(pat);`
			`REPattern = RegexPattern::compile(patString, 0, pe, status);`
			`if (U_FAILURE(status)) {`
			`errln("RegexTest failure in RegexPattern::compile() at line %d. Status = %d\n", line, status);`
			`return FALSE;`
			`}`

			`UnicodeString inputString(inputText);`
			`UnicodeString unEscapedInput = inputString.unescape();`
			`REMatcher = REPattern->matcher(unEscapedInput, status);`
			`if (U_FAILURE(status)) {`
			`errln("RegexTest failure in REPattern::matcher() at line %d. Status = %d\n", line, status);`
			`return FALSE;`
			`}`

			`UBool actualmatch;`
			`actualmatch = REMatcher->lookingAt(status);`
			`if (U_FAILURE(status)) {`
			`errln("RegexTest failure in lookingAt() at line %d. Status = %d\n", line, status);`
			`retVal = FALSE;`
			`}`
			`if (actualmatch != looking) {`
			`errln("RegexTest: wrong return from lookingAt() at line %d.\n", line);`
			`retVal = FALSE;`
			`}`

			`status = U_ZERO_ERROR;`
			`actualmatch = REMatcher->matches(status);`
			`if (U_FAILURE(status)) {`
			`errln("RegexTest failure in matches() at line %d. Status = %d\n", line, status);`
			`retVal = FALSE;`
			`}`
			`if (actualmatch != match) {`
			`errln("RegexTest: wrong return from matches() at line %d.\n", line);`
			`retVal = FALSE;`
			`}`

			`if (retVal == FALSE) {`
			`REPattern->dump();`
			`}`

			`delete REPattern;`
			`delete REMatcher;`
			`return retVal;`
			`}`


			`//---------------------------------------------------------------------------`
			`//`
ICU-105 Regular Expressions initial check in X-SVN-Rev: 10051 2002-10-23 01:14:17 +00:00			`// API_Match`
ICU-105 Regular Expressions initial check in X-SVN-Rev: 10050 2002-10-22 00:09:32 +00:00			`//`
			`//---------------------------------------------------------------------------`
ICU-105 Regular Expressions initial check in X-SVN-Rev: 10051 2002-10-23 01:14:17 +00:00			`void RegexTest::API_Match() {`
ICU-105 Regular Expressions initial check in X-SVN-Rev: 10050 2002-10-22 00:09:32 +00:00			`UParseError pe;`
			`UErrorCode status=U_ZERO_ERROR;`
			`int32_t flags = 0;`

			`//`
			`// Debug - slide failing test cases early`
			`//`
			`#if 0`
ICU-105 Regular Expressions initial check in X-SVN-Rev: 10051 2002-10-23 01:14:17 +00:00			`{`
			`}`
			`return;`
ICU-105 Regular Expressions initial check in X-SVN-Rev: 10050 2002-10-22 00:09:32 +00:00			`#endif`

			`//`
			`// Simple pattern compilation`
			`//`
ICU-105 Regular Expressions initial check in X-SVN-Rev: 10051 2002-10-23 01:14:17 +00:00			`{`
			`UnicodeString re("abc");`
			`RegexPattern *pat2;`
			`pat2 = RegexPattern::compile(re, flags, pe, status);`
			`REGEX_CHECK_STATUS;`

			`UnicodeString inStr1 = "abcdef this is a test";`
			`UnicodeString instr2 = "not abc";`
			`UnicodeString empty = "";`


			`//`
			`// Matcher creation and reset.`
			`//`
			`RegexMatcher *m1 = pat2->matcher(inStr1, status);`
			`REGEX_CHECK_STATUS;`
			`REGEX_ASSERT(m1->lookingAt(status) == TRUE);`
			`REGEX_ASSERT(m1->input() == inStr1);`
			`m1->reset(instr2);`
			`REGEX_ASSERT(m1->lookingAt(status) == FALSE);`
			`REGEX_ASSERT(m1->input() == instr2);`
			`m1->reset(inStr1);`
			`REGEX_ASSERT(m1->input() == inStr1);`
			`REGEX_ASSERT(m1->lookingAt(status) == TRUE);`
			`m1->reset(empty);`
			`REGEX_ASSERT(m1->lookingAt(status) == FALSE);`
			`REGEX_ASSERT(m1->input() == empty);`
			`REGEX_ASSERT(&m1->pattern() == pat2);`
			`delete m1;`
			`delete pat2;`
			`}`
ICU-105 Regular Expressions initial check in X-SVN-Rev: 10050 2002-10-22 00:09:32 +00:00

ICU-105 Regular Expressions initial check in X-SVN-Rev: 10051 2002-10-23 01:14:17 +00:00			`//`
			`// Capture Group.`
			`// RegexMatcher::start();`
			`// RegexMatcher::end();`
			`// RegexMatcher::groupCount();`
			`//`
			`{`
			`int32_t flags=0;`
			`UParseError pe;`
			`UErrorCode status=U_ZERO_ERROR;`

			`UnicodeString re("01(23(45)67)(.*)");`
			`RegexPattern *pat = RegexPattern::compile(re, flags, pe, status);`
			`REGEX_CHECK_STATUS;`
			`UnicodeString data = "0123456789";`

			`RegexMatcher *matcher = pat->matcher(data, status);`
			`REGEX_CHECK_STATUS;`
			`REGEX_ASSERT(matcher->lookingAt(status) == TRUE);`
			`int matchStarts[] = {0, 2, 4, 8};`
			`int matchEnds[] = {10, 8, 6, 10};`
			`int i;`
			`for (i=0; i<4; i++) {`
			`int32_t actualStart = matcher->start(i, status);`
			`REGEX_CHECK_STATUS;`
			`if (actualStart != matchStarts[i]) {`
			`errln("RegexTest failure at line %d, index %d. Expected %d, got %d\n",`
			`__LINE__, i, matchStarts[i], actualStart);`
			`}`
			`int32_t actualEnd = matcher->end(i, status);`
			`REGEX_CHECK_STATUS;`
			`if (actualEnd != matchEnds[i]) {`
			`errln("RegexTest failure at line %d index %d. Expected %d, got %d\n",`
			`__LINE__, i, matchEnds[i], actualEnd);`
			`}`
			`}`

			`REGEX_ASSERT(matcher->start(0, status) == matcher->start(status));`
			`REGEX_ASSERT(matcher->end(0, status) == matcher->end(status));`

			`REGEX_ASSERT_FAIL(matcher->start(-1, status), U_INDEX_OUTOFBOUNDS_ERROR);`
			`REGEX_ASSERT_FAIL(matcher->start( 4, status), U_INDEX_OUTOFBOUNDS_ERROR);`
			`matcher->reset();`
			`REGEX_ASSERT_FAIL(matcher->start( 0, status), U_REGEX_INVALID_STATE);`

			`matcher->lookingAt(status);`
			`REGEX_ASSERT(matcher->group(status) == "0123456789");`
			`REGEX_ASSERT(matcher->group(0, status) == "0123456789");`
			`REGEX_ASSERT(matcher->group(1, status) == "234567" );`
			`REGEX_ASSERT(matcher->group(2, status) == "45" );`
			`REGEX_ASSERT(matcher->group(3, status) == "89" );`
			`REGEX_CHECK_STATUS;`
			`REGEX_ASSERT_FAIL(matcher->group(-1, status), U_INDEX_OUTOFBOUNDS_ERROR);`
			`REGEX_ASSERT_FAIL(matcher->group( 4, status), U_INDEX_OUTOFBOUNDS_ERROR);`
			`matcher->reset();`
			`REGEX_ASSERT_FAIL(matcher->group( 0, status), U_REGEX_INVALID_STATE);`

			`delete matcher;`
			`delete pat;`

			`}`
ICU-105 Regular Expressions initial check in X-SVN-Rev: 10050 2002-10-22 00:09:32 +00:00
			`//`
ICU-105 Regular Expressions initial check in X-SVN-Rev: 10051 2002-10-23 01:14:17 +00:00			`// find`
ICU-105 Regular Expressions initial check in X-SVN-Rev: 10050 2002-10-22 00:09:32 +00:00			`//`
ICU-105 Regular Expressions initial check in X-SVN-Rev: 10051 2002-10-23 01:14:17 +00:00			`{`
			`int32_t flags=0;`
			`UParseError pe;`
			`UErrorCode status=U_ZERO_ERROR;`

			`UnicodeString re("abc");`
			`RegexPattern *pat = RegexPattern::compile(re, flags, pe, status);`
			`REGEX_CHECK_STATUS;`
			`UnicodeString data = ".abc..abc...abc..";`
			`// 012345678901234567`

			`RegexMatcher *matcher = pat->matcher(data, status);`
			`REGEX_CHECK_STATUS;`
			`REGEX_ASSERT(matcher->find());`
			`REGEX_ASSERT(matcher->start(status) == 1);`
			`REGEX_ASSERT(matcher->find());`
			`REGEX_ASSERT(matcher->start(status) == 6);`
			`REGEX_ASSERT(matcher->find());`
			`REGEX_ASSERT(matcher->start(status) == 12);`
			`REGEX_ASSERT(matcher->find() == FALSE);`
			`REGEX_ASSERT(matcher->find() == FALSE);`

			`matcher->reset();`
			`REGEX_ASSERT(matcher->find());`
			`REGEX_ASSERT(matcher->start(status) == 1);`

			`REGEX_ASSERT(matcher->find(0, status));`
			`REGEX_ASSERT(matcher->start(status) == 1);`
			`REGEX_ASSERT(matcher->find(1, status));`
			`REGEX_ASSERT(matcher->start(status) == 1);`
			`REGEX_ASSERT(matcher->find(2, status));`
			`REGEX_ASSERT(matcher->start(status) == 6);`
			`REGEX_ASSERT(matcher->find(12, status));`
			`REGEX_ASSERT(matcher->start(status) == 12);`
			`REGEX_ASSERT(matcher->find(13, status) == FALSE);`
			`REGEX_ASSERT(matcher->find(16, status) == FALSE);`
			`REGEX_ASSERT_FAIL(matcher->start(status), U_REGEX_INVALID_STATE);`
			`REGEX_CHECK_STATUS;`

			`REGEX_ASSERT_FAIL(matcher->find(-1, status), U_INDEX_OUTOFBOUNDS_ERROR);`
			`REGEX_ASSERT_FAIL(matcher->find(17, status), U_INDEX_OUTOFBOUNDS_ERROR);`

			`REGEX_ASSERT(matcher->groupCount() == 0);`

			`delete matcher;`
			`delete pat;`
			`}`

			`}`



			`//---------------------------------------------------------------------------`
			`//`
			`// Basic Check for basic functionality of`
			`// regex pattern matching.`
			`//`
			`//---------------------------------------------------------------------------`
			`void RegexTest::Basic() {`


			`//`
			`// Debug - slide failing test cases early`
			`//`
			`#if 0`
			`{`
			`}`
			`return;`
			`#endif`

ICU-105 Regular Expressions initial check in X-SVN-Rev: 10050 2002-10-22 00:09:32 +00:00
			`//`
			`// Pattern with parentheses`
			`//`
			`REGEX_TESTLM("st(abc)ring", "stabcring thing", TRUE, FALSE);`
			`REGEX_TESTLM("st(abc)ring", "stabcring", TRUE, TRUE);`
			`REGEX_TESTLM("st(abc)ring", "stabcrung", FALSE, FALSE);`

			`//`
			`// Patterns with *`
			`//`
			`REGEX_TESTLM("st(abc)*ring", "string", TRUE, TRUE);`
			`REGEX_TESTLM("st(abc)*ring", "stabcring", TRUE, TRUE);`
			`REGEX_TESTLM("st(abc)*ring", "stabcabcring", TRUE, TRUE);`
			`REGEX_TESTLM("st(abc)*ring", "stabcabcdring", FALSE, FALSE);`
			`REGEX_TESTLM("st(abc)*ring", "stabcabcabcring etc.", TRUE, FALSE);`

			`REGEX_TESTLM("a*", "", TRUE, TRUE);`
			`REGEX_TESTLM("a*", "b", TRUE, FALSE);`


			`//`
			`// Patterns with "."`
			`//`
			`REGEX_TESTLM(".", "abc", TRUE, FALSE);`
			`REGEX_TESTLM("...", "abc", TRUE, TRUE);`
			`REGEX_TESTLM("....", "abc", FALSE, FALSE);`
			`REGEX_TESTLM(".*", "abcxyz123", TRUE, TRUE);`
			`REGEX_TESTLM("ab.*xyz", "abcdefghij", FALSE, FALSE);`
			`REGEX_TESTLM("ab.*xyz", "abcdefg...wxyz", TRUE, TRUE);`
			`REGEX_TESTLM("ab.*xyz", "abcde...wxyz...abc..xyz", TRUE, TRUE);`
			`REGEX_TESTLM("ab.*xyz", "abcde...wxyz...abc..xyz...", TRUE, FALSE);`

			`//`
			`// Patterns with * applied to chars at end of literal string`
			`//`
			`REGEX_TESTLM("abc*", "ab", TRUE, TRUE);`
			`REGEX_TESTLM("abc*", "abccccc", TRUE, TRUE);`

			`//`
			`// Supplemental chars match as single chars, not a pair of surrogates.`
			`//`
			`REGEX_TESTLM(".", "\\U00011000", TRUE, TRUE);`
			`REGEX_TESTLM("...", "\\U00011000x\\U00012002", TRUE, TRUE);`
			`REGEX_TESTLM("...", "\\U00011000x\\U00012002y", TRUE, FALSE);`


			`//`
			`// UnicodeSets in the pattern`
			`//`
			`REGEX_TESTLM("[1-6]", "1", TRUE, TRUE);`
			`REGEX_TESTLM("[1-6]", "3", TRUE, TRUE);`
			`REGEX_TESTLM("[1-6]", "7", FALSE, FALSE);`
			`REGEX_TESTLM("a[1-6]", "a3", TRUE, TRUE);`
			`REGEX_TESTLM("a[1-6]", "a3", TRUE, TRUE);`
			`REGEX_TESTLM("a[1-6]b", "a3b", TRUE, TRUE);`

			`REGEX_TESTLM("a[0-9]*b", "a123b", TRUE, TRUE);`
			`REGEX_TESTLM("a[0-9]*b", "abc", TRUE, FALSE);`
			`REGEX_TESTLM("[\\p{Nd}]*", "123456", TRUE, TRUE);`
			`REGEX_TESTLM("[\\p{Nd}]", "a123456", TRUE, FALSE); // note that matches 0 occurences.`
			`REGEX_TESTLM("[a][b][[:Zs:]]*", "ab ", TRUE, TRUE);`

			`//`
			`// OR operator in patterns`
			`//`
			`REGEX_TESTLM("(a\|b)", "a", TRUE, TRUE);`
			`REGEX_TESTLM("(a\|b)", "b", TRUE, TRUE);`
			`REGEX_TESTLM("(a\|b)", "c", FALSE, FALSE);`
			`REGEX_TESTLM("a\|b", "b", TRUE, TRUE);`

			`REGEX_TESTLM("(a\|b\|c)*", "aabcaaccbcabc", TRUE, TRUE);`
			`REGEX_TESTLM("(a\|b\|c)*", "aabcaaccbcabdc", TRUE, FALSE);`
			`REGEX_TESTLM("(a(b\|c\|d)(x\|y\|z)*\|123)", "ac", TRUE, TRUE);`
			`REGEX_TESTLM("(a(b\|c\|d)(x\|y\|z)*\|123)", "123", TRUE, TRUE);`
			`REGEX_TESTLM("(a\|(1\|2))(b\|c\|d)(x\|y\|z)\|123", "123", TRUE, TRUE);`
			`REGEX_TESTLM("(a\|(1\|2))(b\|c\|d)(x\|y\|z)\|123", "222211111czzzzw", TRUE, FALSE);`

			`//`
			`// +`
			`//`
			`REGEX_TESTLM("ab+", "abbc", TRUE, FALSE);`
			`REGEX_TESTLM("ab+c", "ac", FALSE, FALSE);`
			`REGEX_TESTLM("b+", "", FALSE, FALSE);`
			`REGEX_TESTLM("(abc\|def)+", "defabc", TRUE, TRUE);`
			`REGEX_TESTLM(".+y", "zippity dooy dah ", TRUE, FALSE);`
			`REGEX_TESTLM(".+y", "zippity dooy", TRUE, TRUE);`

			`//`
			`// ?`
			`//`
			`REGEX_TESTLM("ab?", "ab", TRUE, TRUE);`
			`REGEX_TESTLM("ab?", "a", TRUE, TRUE);`
			`REGEX_TESTLM("ab?", "ac", TRUE, FALSE);`
			`REGEX_TESTLM("ab?", "abb", TRUE, FALSE);`
			`REGEX_TESTLM("a(b\|c)?d", "abd", TRUE, TRUE);`
			`REGEX_TESTLM("a(b\|c)?d", "acd", TRUE, TRUE);`
			`REGEX_TESTLM("a(b\|c)?d", "ad", TRUE, TRUE);`
			`REGEX_TESTLM("a(b\|c)?d", "abcd", FALSE, FALSE);`
			`REGEX_TESTLM("a(b\|c)?d", "ab", FALSE, FALSE);`

			`};`

ICU-105 Regular Expressions initial check in X-SVN-Rev: 10051 2002-10-23 01:14:17 +00:00

			`//---------------------------------------------------------------------------`
			`//`
			`// API_Replace`
			`//`
			`//---------------------------------------------------------------------------`
			`void RegexTest::API_Replace() {`
			`}`


			`//---------------------------------------------------------------------------`
			`//`
			`// API_Pattern`
			`//`
			`//---------------------------------------------------------------------------`
			`void RegexTest::API_Pattern() {`
			`RegexPattern pata; // Test default constructor to not crash.`
			`RegexPattern patb;`

			`REGEX_ASSERT(pata == patb);`
			`REGEX_ASSERT(pata == pata);`

			`UnicodeString re1("abc[a-l][m-z]");`
			`UnicodeString re2("def");`
			`UErrorCode status = U_ZERO_ERROR;`
			`UParseError pe;`

			`RegexPattern *pat1 = RegexPattern::compile(re1, 0, pe, status);`
			`RegexPattern *pat2 = RegexPattern::compile(re2, 0, pe, status);`
			`REGEX_CHECK_STATUS;`
			`REGEX_ASSERT(pat1 == pat1);`
			`REGEX_ASSERT(*pat1 != pata);`

			`// Assign`
			`patb = *pat1;`
			`REGEX_ASSERT(patb == *pat1);`

			`// Copy Construct`
			`RegexPattern patc(*pat1);`
			`REGEX_ASSERT(patc == *pat1);`
			`REGEX_ASSERT(patb == patc);`
			`REGEX_ASSERT(pat1 != pat2);`
			`patb = *pat2;`
			`REGEX_ASSERT(patb != patc);`
			`REGEX_ASSERT(patb == *pat2);`

			`// Compile with no flags.`
			`RegexPattern *pat1a = RegexPattern::compile(re1, pe, status);`
			`REGEX_ASSERT(pat1a == pat1);`

			`// Compile with different flags should be not equal`
			`RegexPattern *pat1b = RegexPattern::compile(re1, UREGEX_CASE_INSENSITIVE, pe, status);`
			`REGEX_CHECK_STATUS;`
			`REGEX_ASSERT(pat1b != pat1a);`
			`REGEX_ASSERT(pat1b->flags() == UREGEX_CASE_INSENSITIVE);`
			`REGEX_ASSERT(pat1a->flags() == 0);`

			`// clone`
			`RegexPattern *pat1c = pat1b->clone();`
			`REGEX_ASSERT(pat1b == pat1c);`
			`REGEX_ASSERT(pat1a != pat1c);`


			`// TODO: Actually do some matches with the cloned/copied/assigned patterns.`



			`delete pat1c;`
			`delete pat1b;`
			`delete pat1a;`
			`delete pat1;`
			`delete pat2;`

			`//`
			`// matches convenience API`
			`//`
			`REGEX_ASSERT(RegexPattern::matches(".*", "random input", pe, status) == TRUE);`
			`REGEX_CHECK_STATUS;`
			`REGEX_ASSERT(RegexPattern::matches("abc", "random input", pe, status) == FALSE);`
			`REGEX_CHECK_STATUS;`
			`REGEX_ASSERT(RegexPattern::matches(".*nput", "random input", pe, status) == TRUE);`
			`REGEX_CHECK_STATUS;`
			`REGEX_ASSERT(RegexPattern::matches("random input", "random input", pe, status) == TRUE);`
			`REGEX_CHECK_STATUS;`
			`REGEX_ASSERT(RegexPattern::matches(".*u", "random input", pe, status) == FALSE);`
			`REGEX_CHECK_STATUS;`
			`status = U_INDEX_OUTOFBOUNDS_ERROR;`
			`REGEX_ASSERT(RegexPattern::matches("abc", "abc", pe, status) == FALSE);`
			`REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);`

ICU-105 Regular Expressions, ongoing development X-SVN-Rev: 10053 2002-10-23 16:38:10 +00:00
			`//`
			`// Split()`
			`//`
			`status = U_ZERO_ERROR;`
			`pat1 = RegexPattern::compile(" +", pe, status);`
			`REGEX_CHECK_STATUS;`
			`UnicodeString fields[10];`

			`int32_t n;`
			`n = pat1->split("Now is the time", fields, 10, status);`
			`REGEX_CHECK_STATUS;`
			`REGEX_ASSERT(n==4);`
			`REGEX_ASSERT(fields[0]=="Now");`
			`REGEX_ASSERT(fields[1]=="is");`
			`REGEX_ASSERT(fields[2]=="the");`
			`REGEX_ASSERT(fields[3]=="time");`
			`REGEX_ASSERT(fields[4]=="");`

			`n = pat1->split("Now is the time", fields, 2, status);`
			`REGEX_CHECK_STATUS;`
			`REGEX_ASSERT(n==2);`
			`REGEX_ASSERT(fields[0]=="Now");`
			`REGEX_ASSERT(fields[1]=="is the time");`
			`REGEX_ASSERT(fields[2]=="the"); // left over from previous test`

			`fields[1] = "*";`
			`n = pat1->split("Now is the time", fields, 1, status);`
			`REGEX_CHECK_STATUS;`
			`REGEX_ASSERT(n==1);`
			`REGEX_ASSERT(fields[0]=="Now is the time");`
			`REGEX_ASSERT(fields[1]=="*");`

			`n = pat1->split(" Now is the time ", fields, 10, status);`
			`REGEX_CHECK_STATUS;`
			`REGEX_ASSERT(n==5);`
			`REGEX_ASSERT(fields[0]=="");`
			`REGEX_ASSERT(fields[1]=="Now");`
			`REGEX_ASSERT(fields[2]=="is");`
			`REGEX_ASSERT(fields[3]=="the");`
			`REGEX_ASSERT(fields[4]=="time");`
			`REGEX_ASSERT(fields[5]=="");`

			`n = pat1->split(" ", fields, 10, status);`
			`REGEX_CHECK_STATUS;`
			`REGEX_ASSERT(n==1);`
			`REGEX_ASSERT(fields[0]=="");`

			`fields[0] = "foo";`
			`n = pat1->split("", fields, 10, status);`
			`REGEX_CHECK_STATUS;`
			`REGEX_ASSERT(n==0);`
			`REGEX_ASSERT(fields[0]=="foo");`


ICU-105 Regular Expressions initial check in X-SVN-Rev: 10051 2002-10-23 01:14:17 +00:00			`}`