From 4524f0f633362b111cb17ff6dd785f7e62453b84 Mon Sep 17 00:00:00 2001 From: Alan Liu Date: Tue, 30 Oct 2001 18:14:57 +0000 Subject: [PATCH] ICU-1406 make quantified segments behave like perl counterparts X-SVN-Rev: 6494 --- icu4c/source/test/intltest/transtst.cpp | 74 ++++++++++++++++++++++--- icu4c/source/test/intltest/transtst.h | 6 ++ 2 files changed, 71 insertions(+), 9 deletions(-) diff --git a/icu4c/source/test/intltest/transtst.cpp b/icu4c/source/test/intltest/transtst.cpp index d7c591126e..0a2d1b88b6 100644 --- a/icu4c/source/test/intltest/transtst.cpp +++ b/icu4c/source/test/intltest/transtst.cpp @@ -126,10 +126,11 @@ TransliteratorTest::runIndexedTest(int32_t index, UBool exec, TESTCASE(44,TestCompoundFilterID); TESTCASE(45,TestPropertySet); TESTCASE(46,TestNewEngine); - TESTCASE(47,TestDevanagariLatinRT); - TESTCASE(48,TestTeluguLatinRT); - TESTCASE(49,TestCompoundLatinRT); - TESTCASE(50,TestSanskritLatinRT); + TESTCASE(47,TestQuantifiedSegment); + TESTCASE(48,TestDevanagariLatinRT); + TESTCASE(49,TestTeluguLatinRT); + TESTCASE(50,TestCompoundLatinRT); + TESTCASE(51,TestSanskritLatinRT); default: name = ""; break; } } @@ -1657,9 +1658,11 @@ void TransliteratorTest::TestQuantifier() { "c abc ababc", "d d abd"); + // NOTE: The (ab)+ when referenced just yields a single "ab", + // not the full sequence of them. This accords with perl behavior. expect("(ab)+ {x} > '(' $1 ')';", "x abx ababxy", - "x ab(ab) abab(abab)y"); + "x ab(ab) abab(ab)y"); expect("b+ > x;", "ac abc abbc abbbc", @@ -1677,12 +1680,11 @@ void TransliteratorTest::TestQuantifier() { "qa qab qaba qababc", "xa x xa xc"); - // Oddity -- "(foo)* > $1" causes $1 to match the run of "foo"s - // In perl, it only matches the first occurrence, so the output - // is "()a (ab) (ab)a (ab)c". + // NOTE: The (ab)+ when referenced just yields a single "ab", + // not the full sequence of them. This accords with perl behavior. expect("q(ab)* > '(' $1 ')';", "qa qab qaba qababc", - "()a (ab) (ab)a (abab)c"); + "()a (ab) (ab)a (ab)c"); // 'foo'+ and 'foo'* -- the quantifier should apply to the entire // quoted string @@ -2145,6 +2147,60 @@ void TransliteratorTest::TestNewEngine() { expect(gr, CharsToUnicodeString("\\u03B1\\u0314"), "ha"); } +/** + * Test quantified segment behavior. We want: + * ([abc])+ > x $1 x; applied to "cba" produces "xax" + */ +void TransliteratorTest::TestQuantifiedSegment(void) { + // The normal case + expect("([abc]+) > x $1 x;", "cba", "xcbax"); + + // The tricky case; the quantifier is around the segment + expect("([abc])+ > x $1 x;", "cba", "xax"); + + // Tricky case in reverse direction + expect("([abc])+ { q > x $1 x;", "cbaq", "cbaxax"); + + // Check post-context segment + expect("{q} ([a-d])+ > '(' $1 ')';", "ddqcba", "dd(a)cba"); + + // Test toRule/toPattern for non-quantified segment. + // Careful with spacing here. + UnicodeString r("([a-c]){q} > x $1 x;"); + UParseError pe; + UErrorCode ec = U_ZERO_ERROR; + Transliterator* t = Transliterator::createFromRules("ID", r, UTRANS_FORWARD, pe, ec); + if (U_FAILURE(ec)) { + errln("FAIL: createFromRules"); + delete t; + return; + } + UnicodeString rr; + t->toRules(rr, true); + if (r != rr) { + errln((UnicodeString)"FAIL: \"" + r + "\" x toRules() => \"" + rr + "\""); + } else { + logln((UnicodeString)"Ok: \"" + r + "\" x toRules() => \"" + rr + "\""); + } + delete t; + + // Test toRule/toPattern for quantified segment. + // Careful with spacing here. + r = "([a-c])+{q} > x $1 x;"; + t = Transliterator::createFromRules("ID", r, UTRANS_FORWARD, pe, ec); + if (U_FAILURE(ec)) { + errln("FAIL: createFromRules"); + delete t; + return; + } + t->toRules(rr, true); + if (r != rr) { + errln((UnicodeString)"FAIL: \"" + r + "\" x toRules() => \"" + rr + "\""); + } else { + logln((UnicodeString)"Ok: \"" + r + "\" x toRules() => \"" + rr + "\""); + } +} + //====================================================================== // Ram's tests //====================================================================== diff --git a/icu4c/source/test/intltest/transtst.h b/icu4c/source/test/intltest/transtst.h index bb13ddb12e..cf6a1db6bf 100644 --- a/icu4c/source/test/intltest/transtst.h +++ b/icu4c/source/test/intltest/transtst.h @@ -220,6 +220,12 @@ class TransliteratorTest : public IntlTest { */ void TestNewEngine(void); + /** + * Test quantified segment behavior. We want: + * ([abc])+ > x $1 x; applied to "cba" produces "xax" + */ + void TestQuantifiedSegment(void); + /* Devanagari-Latin rules Test */ void TestDevanagariLatinRT(void);