ICU-1825 allow filters in &func calls; fix canonicalization of &func

X-SVN-Rev: 8478
This commit is contained in:
Alan Liu 2002-04-17 16:38:18 +00:00
parent 13f66fd2a7
commit 8ed4fea84a
5 changed files with 158 additions and 44 deletions

View File

@ -501,14 +501,16 @@ int32_t RuleHalf::parseSection(const UnicodeString& rule, int32_t pos, int32_t l
case FUNCTION:
{
int32_t iref = pos;
UnicodeString id = TransliteratorIDParser::parseBasicID(rule, iref);
TransliteratorIDParser::SingleID* single =
TransliteratorIDParser::parseFilterID(rule, iref);
// The next character MUST be a segment open
if (id.length() == 0 ||
if (single == NULL ||
!ICU_Utility::parseChar(rule, iref, SEGMENT_OPEN)) {
return syntaxError(U_INVALID_FUNCTION, rule, start);
}
Transliterator *t = TransliteratorParser::createBasicInstance(id, &id);
Transliterator *t = single->createInstance();
delete single;
if (t == NULL) {
return syntaxError(U_INVALID_FUNCTION, rule, start);
}

View File

@ -85,37 +85,6 @@ Transliterator* TransliteratorIDParser::SingleID::createInstance() {
return t;
}
/**
* Parse a basic ID from the given string. A basic ID contains
* only a single source, target, and variant. It does not contain
* a filter or an explicit inverse.
* @param id the id to be parsed
* @param pos INPUT-OUTPUT parameter. On input, the position of
* the first character to parse. On output, the position after
* the last character parsed. If the parse fails pos will be
* unchanged.
* @return the parsed ID in canonical format, or NULL on parse
* failure. If the parsed ID did not contain a source, the return
* ID will not.
*/
UnicodeString TransliteratorIDParser::parseBasicID(const UnicodeString& id, int32_t& pos) {
Specs* specs = parseFilterID(id, pos, FALSE);
if (specs != NULL) {
UnicodeString buf;
if (specs->sawSource) {
buf.append(specs->source);
buf.append(TARGET_SEP);
}
buf.append(specs->target);
if (specs->variant.length() != 0) {
buf.append(VARIANT_SEP);
buf.append(specs->variant);
}
delete specs;
return buf;
}
return EMPTY;
}
/**
* Parse a single ID, that is, an ID of the general form
@ -207,6 +176,33 @@ TransliteratorIDParser::parseSingleID(const UnicodeString& id, int32_t& pos,
return single;
}
/**
* Parse a filter ID, that is, an ID of the general form
* "[f1] s1-t1/v1", with the filters optional, and the variants optional.
* @param id the id to be parsed
* @param pos INPUT-OUTPUT parameter. On input, the position of
* the first character to parse. On output, the position after
* the last character parsed.
* @return a SingleID object or null if the parse fails
*/
TransliteratorIDParser::SingleID*
TransliteratorIDParser::parseFilterID(const UnicodeString& id, int32_t& pos) {
int32_t start = pos;
Specs* specs = parseFilterID(id, pos, TRUE);
if (specs == NULL) {
pos = start;
return NULL;
}
// Assemble return results
SingleID* single = specsToID(specs, FORWARD);
single->filter = specs->filter;
delete specs;
return single;
}
/**
* Parse a global filter of the form "[f]" or "([f])", depending
* on 'withParens'.

View File

@ -98,19 +98,15 @@ class TransliteratorIDParser {
};
/**
* Parse a basic ID from the given string. A basic ID contains
* only a single source, target, and variant. It does not contain
* a filter or an explicit inverse.
* Parse a filter ID, that is, an ID of the general form
* "[f1] s1-t1/v1", with the filters optional, and the variants optional.
* @param id the id to be parsed
* @param pos INPUT-OUTPUT parameter. On input, the position of
* the first character to parse. On output, the position after
* the last character parsed. If the parse fails pos[0] will be
* unchanged.
* @return the parsed ID in canonical format, or null on parse
* failure. If the parsed ID did not contain a source, the return
* ID will not.
* the last character parsed.
* @return a SingleID object or null if the parse fails
*/
static UnicodeString parseBasicID(const UnicodeString& id, int32_t& pos);
static SingleID* parseFilterID(const UnicodeString& id, int32_t& pos);
/**
* Parse a single ID, that is, an ID of the general form

View File

@ -156,6 +156,7 @@ TransliteratorTest::runIndexedTest(int32_t index, UBool exec,
TESTCASE(66,TestSurrogateCasing);
TESTCASE(67,TestFunction);
TESTCASE(68,TestInvalidBackRef);
TESTCASE(69,TestUserFunction);
default: name = ""; break;
}
@ -3337,7 +3338,7 @@ void TransliteratorTest::TestFunction() {
// Careful with spacing and ';' here: Phrase this exactly
// as toRules() is going to return it. If toRules() changes
// with regard to spacing or ';', then adjust this string.
UnicodeString rule = // TODO clean up spacing
UnicodeString rule =
"([:Lu:]) > $1 '(' &Lower( $1 ) '=' &Hex( &Any-Lower( $1 ) ) ')';";
UParseError pe;
@ -3388,6 +3389,123 @@ void TransliteratorTest::TestInvalidBackRef(void) {
}
}
// vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
// BEGIN TestUserFunction support factory
Transliterator* _TUFF[4];
UnicodeString _TUFID[4];
static Transliterator* _TUFFactory(const UnicodeString& ID,
Transliterator::Token context) {
return _TUFF[context.integer]->clone();
}
static void _TUFReg(const UnicodeString& ID, Transliterator* t, int32_t n) {
_TUFF[n] = t;
_TUFID[n] = ID;
Transliterator::registerFactory(ID, _TUFFactory, Transliterator::integerToken(n));
}
static void _TUFUnreg(int32_t n) {
if (_TUFF[n] != NULL) {
Transliterator::unregister(_TUFID[n]);
delete _TUFF[n];
}
}
// END TestUserFunction support factory
// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/**
* Test that user-registered transliterators can be used under function
* syntax.
*/
void TransliteratorTest::TestUserFunction() {
Transliterator* t;
UParseError pe;
UErrorCode ec = U_ZERO_ERROR;
// Setup our factory
int32_t i;
for (i=0; i<4; ++i) {
_TUFF[i] = NULL;
}
// There's no need to register inverses if we don't use them
t = Transliterator::createFromRules("gif",
"'\\'u(..)(..) > '<img src=\"http://www.unicode.org/gifs/24/' $1 '/U' $1$2 '.gif\">';",
UTRANS_FORWARD, pe, ec);
if (t == NULL || U_FAILURE(ec)) {
errln((UnicodeString)"FAIL: createFromRules gif " + u_errorName(ec));
return;
}
_TUFReg("Any-gif", t, 0);
t = Transliterator::createFromRules("RemoveCurly",
"[\\{\\}] > ;",
UTRANS_FORWARD, pe, ec);
if (t == NULL || U_FAILURE(ec)) {
errln((UnicodeString)"FAIL: createFromRules RemoveCurly " + u_errorName(ec));
goto FAIL;
}
_TUFReg("Any-RemoveCurly", t, 1);
logln("Trying &hex");
t = Transliterator::createFromRules("hex2",
"(.) > &hex($1);",
UTRANS_FORWARD, pe, ec);
if (t == NULL || U_FAILURE(ec)) {
errln("FAIL: createFromRules");
goto FAIL;
}
logln("Registering");
_TUFReg("Any-hex2", t, 2);
t = Transliterator::createInstance("Any-hex2", UTRANS_FORWARD, ec);
if (t == NULL || U_FAILURE(ec)) {
errln((UnicodeString)"FAIL: createInstance Any-hex2 " + u_errorName(ec));
goto FAIL;
}
expect(*t, "abc", "\\u0061\\u0062\\u0063");
delete t;
logln("Trying &gif");
t = Transliterator::createFromRules("gif2",
"(.) > &Gif(&Hex2($1));",
UTRANS_FORWARD, pe, ec);
if (t == NULL || U_FAILURE(ec)) {
errln((UnicodeString)"FAIL: createFromRules gif2 " + u_errorName(ec));
goto FAIL;
}
logln("Registering");
_TUFReg("Any-gif2", t, 3);
t = Transliterator::createInstance("Any-gif2", UTRANS_FORWARD, ec);
if (t == NULL || U_FAILURE(ec)) {
errln((UnicodeString)"FAIL: createInstance Any-gif2 " + u_errorName(ec));
goto FAIL;
}
expect(*t, "ab", "<img src=\"http://www.unicode.org/gifs/24/00/U0061.gif\">"
"<img src=\"http://www.unicode.org/gifs/24/00/U0062.gif\">");
delete t;
// Test that filters are allowed after &
t = Transliterator::createFromRules("test",
"(.) > &Hex($1) ' ' &[\\{\\}]Remove(&Name($1)) ' ';",
UTRANS_FORWARD, pe, ec);
if (t == NULL || U_FAILURE(ec)) {
errln((UnicodeString)"FAIL: createFromRules test " + u_errorName(ec));
goto FAIL;
}
expect(*t, "abc",
"\\u0061 LATIN SMALL LETTER A \\u0062 LATIN SMALL LETTER B \\u0063 LATIN SMALL LETTER C ");
delete t;
FAIL:
for (i=0; i<4; ++i) {
_TUFUnreg(i);
}
}
//======================================================================
// Support methods
//======================================================================

View File

@ -322,6 +322,8 @@ private:
void TestInvalidBackRef(void);
void TestUserFunction(void);
//======================================================================
// Support methods
//======================================================================