ICU-1406 make quantified segments behave like perl counterparts

X-SVN-Rev: 6494
This commit is contained in:
Alan Liu 2001-10-30 18:14:57 +00:00
parent 2c2b11dfe8
commit 4524f0f633
2 changed files with 71 additions and 9 deletions

View File

@ -126,10 +126,11 @@ TransliteratorTest::runIndexedTest(int32_t index, UBool exec,
TESTCASE(44,TestCompoundFilterID);
TESTCASE(45,TestPropertySet);
TESTCASE(46,TestNewEngine);
TESTCASE(47,TestDevanagariLatinRT);
TESTCASE(48,TestTeluguLatinRT);
TESTCASE(49,TestCompoundLatinRT);
TESTCASE(50,TestSanskritLatinRT);
TESTCASE(47,TestQuantifiedSegment);
TESTCASE(48,TestDevanagariLatinRT);
TESTCASE(49,TestTeluguLatinRT);
TESTCASE(50,TestCompoundLatinRT);
TESTCASE(51,TestSanskritLatinRT);
default: name = ""; break;
}
}
@ -1657,9 +1658,11 @@ void TransliteratorTest::TestQuantifier() {
"c abc ababc",
"d d abd");
// NOTE: The (ab)+ when referenced just yields a single "ab",
// not the full sequence of them. This accords with perl behavior.
expect("(ab)+ {x} > '(' $1 ')';",
"x abx ababxy",
"x ab(ab) abab(abab)y");
"x ab(ab) abab(ab)y");
expect("b+ > x;",
"ac abc abbc abbbc",
@ -1677,12 +1680,11 @@ void TransliteratorTest::TestQuantifier() {
"qa qab qaba qababc",
"xa x xa xc");
// Oddity -- "(foo)* > $1" causes $1 to match the run of "foo"s
// In perl, it only matches the first occurrence, so the output
// is "()a (ab) (ab)a (ab)c".
// NOTE: The (ab)+ when referenced just yields a single "ab",
// not the full sequence of them. This accords with perl behavior.
expect("q(ab)* > '(' $1 ')';",
"qa qab qaba qababc",
"()a (ab) (ab)a (abab)c");
"()a (ab) (ab)a (ab)c");
// 'foo'+ and 'foo'* -- the quantifier should apply to the entire
// quoted string
@ -2145,6 +2147,60 @@ void TransliteratorTest::TestNewEngine() {
expect(gr, CharsToUnicodeString("\\u03B1\\u0314"), "ha");
}
/**
* Test quantified segment behavior. We want:
* ([abc])+ > x $1 x; applied to "cba" produces "xax"
*/
void TransliteratorTest::TestQuantifiedSegment(void) {
// The normal case
expect("([abc]+) > x $1 x;", "cba", "xcbax");
// The tricky case; the quantifier is around the segment
expect("([abc])+ > x $1 x;", "cba", "xax");
// Tricky case in reverse direction
expect("([abc])+ { q > x $1 x;", "cbaq", "cbaxax");
// Check post-context segment
expect("{q} ([a-d])+ > '(' $1 ')';", "ddqcba", "dd(a)cba");
// Test toRule/toPattern for non-quantified segment.
// Careful with spacing here.
UnicodeString r("([a-c]){q} > x $1 x;");
UParseError pe;
UErrorCode ec = U_ZERO_ERROR;
Transliterator* t = Transliterator::createFromRules("ID", r, UTRANS_FORWARD, pe, ec);
if (U_FAILURE(ec)) {
errln("FAIL: createFromRules");
delete t;
return;
}
UnicodeString rr;
t->toRules(rr, true);
if (r != rr) {
errln((UnicodeString)"FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
} else {
logln((UnicodeString)"Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
}
delete t;
// Test toRule/toPattern for quantified segment.
// Careful with spacing here.
r = "([a-c])+{q} > x $1 x;";
t = Transliterator::createFromRules("ID", r, UTRANS_FORWARD, pe, ec);
if (U_FAILURE(ec)) {
errln("FAIL: createFromRules");
delete t;
return;
}
t->toRules(rr, true);
if (r != rr) {
errln((UnicodeString)"FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
} else {
logln((UnicodeString)"Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
}
}
//======================================================================
// Ram's tests
//======================================================================

View File

@ -220,6 +220,12 @@ class TransliteratorTest : public IntlTest {
*/
void TestNewEngine(void);
/**
* Test quantified segment behavior. We want:
* ([abc])+ > x $1 x; applied to "cba" produces "xax"
*/
void TestQuantifiedSegment(void);
/* Devanagari-Latin rules Test */
void TestDevanagariLatinRT(void);