From 8ed4fea84a479bb80e4799b6b5a1007b96099ae1 Mon Sep 17 00:00:00 2001 From: Alan Liu Date: Wed, 17 Apr 2002 16:38:18 +0000 Subject: [PATCH] ICU-1825 allow filters in &func calls; fix canonicalization of &func X-SVN-Rev: 8478 --- icu4c/source/i18n/rbt_pars.cpp | 8 +- icu4c/source/i18n/tridpars.cpp | 58 ++++++------ icu4c/source/i18n/tridpars.h | 14 +-- icu4c/source/test/intltest/transtst.cpp | 120 +++++++++++++++++++++++- icu4c/source/test/intltest/transtst.h | 2 + 5 files changed, 158 insertions(+), 44 deletions(-) diff --git a/icu4c/source/i18n/rbt_pars.cpp b/icu4c/source/i18n/rbt_pars.cpp index 3c0ae10418..d0d71ae854 100644 --- a/icu4c/source/i18n/rbt_pars.cpp +++ b/icu4c/source/i18n/rbt_pars.cpp @@ -501,14 +501,16 @@ int32_t RuleHalf::parseSection(const UnicodeString& rule, int32_t pos, int32_t l case FUNCTION: { int32_t iref = pos; - UnicodeString id = TransliteratorIDParser::parseBasicID(rule, iref); + TransliteratorIDParser::SingleID* single = + TransliteratorIDParser::parseFilterID(rule, iref); // The next character MUST be a segment open - if (id.length() == 0 || + if (single == NULL || !ICU_Utility::parseChar(rule, iref, SEGMENT_OPEN)) { return syntaxError(U_INVALID_FUNCTION, rule, start); } - Transliterator *t = TransliteratorParser::createBasicInstance(id, &id); + Transliterator *t = single->createInstance(); + delete single; if (t == NULL) { return syntaxError(U_INVALID_FUNCTION, rule, start); } diff --git a/icu4c/source/i18n/tridpars.cpp b/icu4c/source/i18n/tridpars.cpp index daa42882ed..171b3d881e 100644 --- a/icu4c/source/i18n/tridpars.cpp +++ b/icu4c/source/i18n/tridpars.cpp @@ -85,37 +85,6 @@ Transliterator* TransliteratorIDParser::SingleID::createInstance() { return t; } -/** - * Parse a basic ID from the given string. A basic ID contains - * only a single source, target, and variant. It does not contain - * a filter or an explicit inverse. - * @param id the id to be parsed - * @param pos INPUT-OUTPUT parameter. On input, the position of - * the first character to parse. On output, the position after - * the last character parsed. If the parse fails pos will be - * unchanged. - * @return the parsed ID in canonical format, or NULL on parse - * failure. If the parsed ID did not contain a source, the return - * ID will not. - */ -UnicodeString TransliteratorIDParser::parseBasicID(const UnicodeString& id, int32_t& pos) { - Specs* specs = parseFilterID(id, pos, FALSE); - if (specs != NULL) { - UnicodeString buf; - if (specs->sawSource) { - buf.append(specs->source); - buf.append(TARGET_SEP); - } - buf.append(specs->target); - if (specs->variant.length() != 0) { - buf.append(VARIANT_SEP); - buf.append(specs->variant); - } - delete specs; - return buf; - } - return EMPTY; -} /** * Parse a single ID, that is, an ID of the general form @@ -207,6 +176,33 @@ TransliteratorIDParser::parseSingleID(const UnicodeString& id, int32_t& pos, return single; } +/** + * Parse a filter ID, that is, an ID of the general form + * "[f1] s1-t1/v1", with the filters optional, and the variants optional. + * @param id the id to be parsed + * @param pos INPUT-OUTPUT parameter. On input, the position of + * the first character to parse. On output, the position after + * the last character parsed. + * @return a SingleID object or null if the parse fails + */ +TransliteratorIDParser::SingleID* +TransliteratorIDParser::parseFilterID(const UnicodeString& id, int32_t& pos) { + + int32_t start = pos; + + Specs* specs = parseFilterID(id, pos, TRUE); + if (specs == NULL) { + pos = start; + return NULL; + } + + // Assemble return results + SingleID* single = specsToID(specs, FORWARD); + single->filter = specs->filter; + delete specs; + return single; +} + /** * Parse a global filter of the form "[f]" or "([f])", depending * on 'withParens'. diff --git a/icu4c/source/i18n/tridpars.h b/icu4c/source/i18n/tridpars.h index 8349215bbb..f5fcafaa04 100644 --- a/icu4c/source/i18n/tridpars.h +++ b/icu4c/source/i18n/tridpars.h @@ -98,19 +98,15 @@ class TransliteratorIDParser { }; /** - * Parse a basic ID from the given string. A basic ID contains - * only a single source, target, and variant. It does not contain - * a filter or an explicit inverse. + * Parse a filter ID, that is, an ID of the general form + * "[f1] s1-t1/v1", with the filters optional, and the variants optional. * @param id the id to be parsed * @param pos INPUT-OUTPUT parameter. On input, the position of * the first character to parse. On output, the position after - * the last character parsed. If the parse fails pos[0] will be - * unchanged. - * @return the parsed ID in canonical format, or null on parse - * failure. If the parsed ID did not contain a source, the return - * ID will not. + * the last character parsed. + * @return a SingleID object or null if the parse fails */ - static UnicodeString parseBasicID(const UnicodeString& id, int32_t& pos); + static SingleID* parseFilterID(const UnicodeString& id, int32_t& pos); /** * Parse a single ID, that is, an ID of the general form diff --git a/icu4c/source/test/intltest/transtst.cpp b/icu4c/source/test/intltest/transtst.cpp index 5a8033eb9b..5d7c432974 100644 --- a/icu4c/source/test/intltest/transtst.cpp +++ b/icu4c/source/test/intltest/transtst.cpp @@ -156,6 +156,7 @@ TransliteratorTest::runIndexedTest(int32_t index, UBool exec, TESTCASE(66,TestSurrogateCasing); TESTCASE(67,TestFunction); TESTCASE(68,TestInvalidBackRef); + TESTCASE(69,TestUserFunction); default: name = ""; break; } @@ -3337,7 +3338,7 @@ void TransliteratorTest::TestFunction() { // Careful with spacing and ';' here: Phrase this exactly // as toRules() is going to return it. If toRules() changes // with regard to spacing or ';', then adjust this string. - UnicodeString rule = // TODO clean up spacing + UnicodeString rule = "([:Lu:]) > $1 '(' &Lower( $1 ) '=' &Hex( &Any-Lower( $1 ) ) ')';"; UParseError pe; @@ -3388,6 +3389,123 @@ void TransliteratorTest::TestInvalidBackRef(void) { } } +// vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv +// BEGIN TestUserFunction support factory + +Transliterator* _TUFF[4]; +UnicodeString _TUFID[4]; + +static Transliterator* _TUFFactory(const UnicodeString& ID, + Transliterator::Token context) { + return _TUFF[context.integer]->clone(); +} + +static void _TUFReg(const UnicodeString& ID, Transliterator* t, int32_t n) { + _TUFF[n] = t; + _TUFID[n] = ID; + Transliterator::registerFactory(ID, _TUFFactory, Transliterator::integerToken(n)); +} + +static void _TUFUnreg(int32_t n) { + if (_TUFF[n] != NULL) { + Transliterator::unregister(_TUFID[n]); + delete _TUFF[n]; + } +} + +// END TestUserFunction support factory +// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +/** + * Test that user-registered transliterators can be used under function + * syntax. + */ +void TransliteratorTest::TestUserFunction() { + + Transliterator* t; + UParseError pe; + UErrorCode ec = U_ZERO_ERROR; + + // Setup our factory + int32_t i; + for (i=0; i<4; ++i) { + _TUFF[i] = NULL; + } + + // There's no need to register inverses if we don't use them + t = Transliterator::createFromRules("gif", + "'\\'u(..)(..) > '';", + UTRANS_FORWARD, pe, ec); + if (t == NULL || U_FAILURE(ec)) { + errln((UnicodeString)"FAIL: createFromRules gif " + u_errorName(ec)); + return; + } + _TUFReg("Any-gif", t, 0); + + t = Transliterator::createFromRules("RemoveCurly", + "[\\{\\}] > ;", + UTRANS_FORWARD, pe, ec); + if (t == NULL || U_FAILURE(ec)) { + errln((UnicodeString)"FAIL: createFromRules RemoveCurly " + u_errorName(ec)); + goto FAIL; + } + _TUFReg("Any-RemoveCurly", t, 1); + + logln("Trying &hex"); + t = Transliterator::createFromRules("hex2", + "(.) > &hex($1);", + UTRANS_FORWARD, pe, ec); + if (t == NULL || U_FAILURE(ec)) { + errln("FAIL: createFromRules"); + goto FAIL; + } + logln("Registering"); + _TUFReg("Any-hex2", t, 2); + t = Transliterator::createInstance("Any-hex2", UTRANS_FORWARD, ec); + if (t == NULL || U_FAILURE(ec)) { + errln((UnicodeString)"FAIL: createInstance Any-hex2 " + u_errorName(ec)); + goto FAIL; + } + expect(*t, "abc", "\\u0061\\u0062\\u0063"); + delete t; + + logln("Trying &gif"); + t = Transliterator::createFromRules("gif2", + "(.) > &Gif(&Hex2($1));", + UTRANS_FORWARD, pe, ec); + if (t == NULL || U_FAILURE(ec)) { + errln((UnicodeString)"FAIL: createFromRules gif2 " + u_errorName(ec)); + goto FAIL; + } + logln("Registering"); + _TUFReg("Any-gif2", t, 3); + t = Transliterator::createInstance("Any-gif2", UTRANS_FORWARD, ec); + if (t == NULL || U_FAILURE(ec)) { + errln((UnicodeString)"FAIL: createInstance Any-gif2 " + u_errorName(ec)); + goto FAIL; + } + expect(*t, "ab", "" + ""); + delete t; + + // Test that filters are allowed after & + t = Transliterator::createFromRules("test", + "(.) > &Hex($1) ' ' &[\\{\\}]Remove(&Name($1)) ' ';", + UTRANS_FORWARD, pe, ec); + if (t == NULL || U_FAILURE(ec)) { + errln((UnicodeString)"FAIL: createFromRules test " + u_errorName(ec)); + goto FAIL; + } + expect(*t, "abc", + "\\u0061 LATIN SMALL LETTER A \\u0062 LATIN SMALL LETTER B \\u0063 LATIN SMALL LETTER C "); + delete t; + + FAIL: + for (i=0; i<4; ++i) { + _TUFUnreg(i); + } +} + //====================================================================== // Support methods //====================================================================== diff --git a/icu4c/source/test/intltest/transtst.h b/icu4c/source/test/intltest/transtst.h index 60e0871e6c..1713febfc4 100644 --- a/icu4c/source/test/intltest/transtst.h +++ b/icu4c/source/test/intltest/transtst.h @@ -322,6 +322,8 @@ private: void TestInvalidBackRef(void); + void TestUserFunction(void); + //====================================================================== // Support methods //======================================================================