ICU-1825 allow filters in &func calls; fix canonicalization of &func
X-SVN-Rev: 8478
This commit is contained in:
parent
13f66fd2a7
commit
8ed4fea84a
@ -501,14 +501,16 @@ int32_t RuleHalf::parseSection(const UnicodeString& rule, int32_t pos, int32_t l
|
||||
case FUNCTION:
|
||||
{
|
||||
int32_t iref = pos;
|
||||
UnicodeString id = TransliteratorIDParser::parseBasicID(rule, iref);
|
||||
TransliteratorIDParser::SingleID* single =
|
||||
TransliteratorIDParser::parseFilterID(rule, iref);
|
||||
// The next character MUST be a segment open
|
||||
if (id.length() == 0 ||
|
||||
if (single == NULL ||
|
||||
!ICU_Utility::parseChar(rule, iref, SEGMENT_OPEN)) {
|
||||
return syntaxError(U_INVALID_FUNCTION, rule, start);
|
||||
}
|
||||
|
||||
Transliterator *t = TransliteratorParser::createBasicInstance(id, &id);
|
||||
Transliterator *t = single->createInstance();
|
||||
delete single;
|
||||
if (t == NULL) {
|
||||
return syntaxError(U_INVALID_FUNCTION, rule, start);
|
||||
}
|
||||
|
@ -85,37 +85,6 @@ Transliterator* TransliteratorIDParser::SingleID::createInstance() {
|
||||
return t;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse a basic ID from the given string. A basic ID contains
|
||||
* only a single source, target, and variant. It does not contain
|
||||
* a filter or an explicit inverse.
|
||||
* @param id the id to be parsed
|
||||
* @param pos INPUT-OUTPUT parameter. On input, the position of
|
||||
* the first character to parse. On output, the position after
|
||||
* the last character parsed. If the parse fails pos will be
|
||||
* unchanged.
|
||||
* @return the parsed ID in canonical format, or NULL on parse
|
||||
* failure. If the parsed ID did not contain a source, the return
|
||||
* ID will not.
|
||||
*/
|
||||
UnicodeString TransliteratorIDParser::parseBasicID(const UnicodeString& id, int32_t& pos) {
|
||||
Specs* specs = parseFilterID(id, pos, FALSE);
|
||||
if (specs != NULL) {
|
||||
UnicodeString buf;
|
||||
if (specs->sawSource) {
|
||||
buf.append(specs->source);
|
||||
buf.append(TARGET_SEP);
|
||||
}
|
||||
buf.append(specs->target);
|
||||
if (specs->variant.length() != 0) {
|
||||
buf.append(VARIANT_SEP);
|
||||
buf.append(specs->variant);
|
||||
}
|
||||
delete specs;
|
||||
return buf;
|
||||
}
|
||||
return EMPTY;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse a single ID, that is, an ID of the general form
|
||||
@ -207,6 +176,33 @@ TransliteratorIDParser::parseSingleID(const UnicodeString& id, int32_t& pos,
|
||||
return single;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse a filter ID, that is, an ID of the general form
|
||||
* "[f1] s1-t1/v1", with the filters optional, and the variants optional.
|
||||
* @param id the id to be parsed
|
||||
* @param pos INPUT-OUTPUT parameter. On input, the position of
|
||||
* the first character to parse. On output, the position after
|
||||
* the last character parsed.
|
||||
* @return a SingleID object or null if the parse fails
|
||||
*/
|
||||
TransliteratorIDParser::SingleID*
|
||||
TransliteratorIDParser::parseFilterID(const UnicodeString& id, int32_t& pos) {
|
||||
|
||||
int32_t start = pos;
|
||||
|
||||
Specs* specs = parseFilterID(id, pos, TRUE);
|
||||
if (specs == NULL) {
|
||||
pos = start;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Assemble return results
|
||||
SingleID* single = specsToID(specs, FORWARD);
|
||||
single->filter = specs->filter;
|
||||
delete specs;
|
||||
return single;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse a global filter of the form "[f]" or "([f])", depending
|
||||
* on 'withParens'.
|
||||
|
@ -98,19 +98,15 @@ class TransliteratorIDParser {
|
||||
};
|
||||
|
||||
/**
|
||||
* Parse a basic ID from the given string. A basic ID contains
|
||||
* only a single source, target, and variant. It does not contain
|
||||
* a filter or an explicit inverse.
|
||||
* Parse a filter ID, that is, an ID of the general form
|
||||
* "[f1] s1-t1/v1", with the filters optional, and the variants optional.
|
||||
* @param id the id to be parsed
|
||||
* @param pos INPUT-OUTPUT parameter. On input, the position of
|
||||
* the first character to parse. On output, the position after
|
||||
* the last character parsed. If the parse fails pos[0] will be
|
||||
* unchanged.
|
||||
* @return the parsed ID in canonical format, or null on parse
|
||||
* failure. If the parsed ID did not contain a source, the return
|
||||
* ID will not.
|
||||
* the last character parsed.
|
||||
* @return a SingleID object or null if the parse fails
|
||||
*/
|
||||
static UnicodeString parseBasicID(const UnicodeString& id, int32_t& pos);
|
||||
static SingleID* parseFilterID(const UnicodeString& id, int32_t& pos);
|
||||
|
||||
/**
|
||||
* Parse a single ID, that is, an ID of the general form
|
||||
|
@ -156,6 +156,7 @@ TransliteratorTest::runIndexedTest(int32_t index, UBool exec,
|
||||
TESTCASE(66,TestSurrogateCasing);
|
||||
TESTCASE(67,TestFunction);
|
||||
TESTCASE(68,TestInvalidBackRef);
|
||||
TESTCASE(69,TestUserFunction);
|
||||
|
||||
default: name = ""; break;
|
||||
}
|
||||
@ -3337,7 +3338,7 @@ void TransliteratorTest::TestFunction() {
|
||||
// Careful with spacing and ';' here: Phrase this exactly
|
||||
// as toRules() is going to return it. If toRules() changes
|
||||
// with regard to spacing or ';', then adjust this string.
|
||||
UnicodeString rule = // TODO clean up spacing
|
||||
UnicodeString rule =
|
||||
"([:Lu:]) > $1 '(' &Lower( $1 ) '=' &Hex( &Any-Lower( $1 ) ) ')';";
|
||||
|
||||
UParseError pe;
|
||||
@ -3388,6 +3389,123 @@ void TransliteratorTest::TestInvalidBackRef(void) {
|
||||
}
|
||||
}
|
||||
|
||||
// vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
|
||||
// BEGIN TestUserFunction support factory
|
||||
|
||||
Transliterator* _TUFF[4];
|
||||
UnicodeString _TUFID[4];
|
||||
|
||||
static Transliterator* _TUFFactory(const UnicodeString& ID,
|
||||
Transliterator::Token context) {
|
||||
return _TUFF[context.integer]->clone();
|
||||
}
|
||||
|
||||
static void _TUFReg(const UnicodeString& ID, Transliterator* t, int32_t n) {
|
||||
_TUFF[n] = t;
|
||||
_TUFID[n] = ID;
|
||||
Transliterator::registerFactory(ID, _TUFFactory, Transliterator::integerToken(n));
|
||||
}
|
||||
|
||||
static void _TUFUnreg(int32_t n) {
|
||||
if (_TUFF[n] != NULL) {
|
||||
Transliterator::unregister(_TUFID[n]);
|
||||
delete _TUFF[n];
|
||||
}
|
||||
}
|
||||
|
||||
// END TestUserFunction support factory
|
||||
// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
/**
|
||||
* Test that user-registered transliterators can be used under function
|
||||
* syntax.
|
||||
*/
|
||||
void TransliteratorTest::TestUserFunction() {
|
||||
|
||||
Transliterator* t;
|
||||
UParseError pe;
|
||||
UErrorCode ec = U_ZERO_ERROR;
|
||||
|
||||
// Setup our factory
|
||||
int32_t i;
|
||||
for (i=0; i<4; ++i) {
|
||||
_TUFF[i] = NULL;
|
||||
}
|
||||
|
||||
// There's no need to register inverses if we don't use them
|
||||
t = Transliterator::createFromRules("gif",
|
||||
"'\\'u(..)(..) > '<img src=\"http://www.unicode.org/gifs/24/' $1 '/U' $1$2 '.gif\">';",
|
||||
UTRANS_FORWARD, pe, ec);
|
||||
if (t == NULL || U_FAILURE(ec)) {
|
||||
errln((UnicodeString)"FAIL: createFromRules gif " + u_errorName(ec));
|
||||
return;
|
||||
}
|
||||
_TUFReg("Any-gif", t, 0);
|
||||
|
||||
t = Transliterator::createFromRules("RemoveCurly",
|
||||
"[\\{\\}] > ;",
|
||||
UTRANS_FORWARD, pe, ec);
|
||||
if (t == NULL || U_FAILURE(ec)) {
|
||||
errln((UnicodeString)"FAIL: createFromRules RemoveCurly " + u_errorName(ec));
|
||||
goto FAIL;
|
||||
}
|
||||
_TUFReg("Any-RemoveCurly", t, 1);
|
||||
|
||||
logln("Trying &hex");
|
||||
t = Transliterator::createFromRules("hex2",
|
||||
"(.) > &hex($1);",
|
||||
UTRANS_FORWARD, pe, ec);
|
||||
if (t == NULL || U_FAILURE(ec)) {
|
||||
errln("FAIL: createFromRules");
|
||||
goto FAIL;
|
||||
}
|
||||
logln("Registering");
|
||||
_TUFReg("Any-hex2", t, 2);
|
||||
t = Transliterator::createInstance("Any-hex2", UTRANS_FORWARD, ec);
|
||||
if (t == NULL || U_FAILURE(ec)) {
|
||||
errln((UnicodeString)"FAIL: createInstance Any-hex2 " + u_errorName(ec));
|
||||
goto FAIL;
|
||||
}
|
||||
expect(*t, "abc", "\\u0061\\u0062\\u0063");
|
||||
delete t;
|
||||
|
||||
logln("Trying &gif");
|
||||
t = Transliterator::createFromRules("gif2",
|
||||
"(.) > &Gif(&Hex2($1));",
|
||||
UTRANS_FORWARD, pe, ec);
|
||||
if (t == NULL || U_FAILURE(ec)) {
|
||||
errln((UnicodeString)"FAIL: createFromRules gif2 " + u_errorName(ec));
|
||||
goto FAIL;
|
||||
}
|
||||
logln("Registering");
|
||||
_TUFReg("Any-gif2", t, 3);
|
||||
t = Transliterator::createInstance("Any-gif2", UTRANS_FORWARD, ec);
|
||||
if (t == NULL || U_FAILURE(ec)) {
|
||||
errln((UnicodeString)"FAIL: createInstance Any-gif2 " + u_errorName(ec));
|
||||
goto FAIL;
|
||||
}
|
||||
expect(*t, "ab", "<img src=\"http://www.unicode.org/gifs/24/00/U0061.gif\">"
|
||||
"<img src=\"http://www.unicode.org/gifs/24/00/U0062.gif\">");
|
||||
delete t;
|
||||
|
||||
// Test that filters are allowed after &
|
||||
t = Transliterator::createFromRules("test",
|
||||
"(.) > &Hex($1) ' ' &[\\{\\}]Remove(&Name($1)) ' ';",
|
||||
UTRANS_FORWARD, pe, ec);
|
||||
if (t == NULL || U_FAILURE(ec)) {
|
||||
errln((UnicodeString)"FAIL: createFromRules test " + u_errorName(ec));
|
||||
goto FAIL;
|
||||
}
|
||||
expect(*t, "abc",
|
||||
"\\u0061 LATIN SMALL LETTER A \\u0062 LATIN SMALL LETTER B \\u0063 LATIN SMALL LETTER C ");
|
||||
delete t;
|
||||
|
||||
FAIL:
|
||||
for (i=0; i<4; ++i) {
|
||||
_TUFUnreg(i);
|
||||
}
|
||||
}
|
||||
|
||||
//======================================================================
|
||||
// Support methods
|
||||
//======================================================================
|
||||
|
@ -322,6 +322,8 @@ private:
|
||||
|
||||
void TestInvalidBackRef(void);
|
||||
|
||||
void TestUserFunction(void);
|
||||
|
||||
//======================================================================
|
||||
// Support methods
|
||||
//======================================================================
|
||||
|
Loading…
Reference in New Issue
Block a user