ICU-1825 allow filters in &func calls; fix canonicalization of &func

X-SVN-Rev: 8478
2002-04-17 16:38:18 +00:00 · 2002-04-17 16:38:18 +00:00 · 8ed4fea84a
commit 8ed4fea84a
parent 13f66fd2a7
5 changed files with 158 additions and 44 deletions
--- a/icu4c/source/i18n/rbt_pars.cpp
+++ b/icu4c/source/i18n/rbt_pars.cpp
@ -501,14 +501,16 @@ int32_t RuleHalf::parseSection(const UnicodeString& rule, int32_t pos, int32_t l
        case FUNCTION:
            {
                int32_t iref = pos;
-                UnicodeString id = TransliteratorIDParser::parseBasicID(rule, iref);
+                TransliteratorIDParser::SingleID* single =
+                    TransliteratorIDParser::parseFilterID(rule, iref);
                // The next character MUST be a segment open
-                if (id.length() == 0 ||
+                if (single == NULL ||
                    !ICU_Utility::parseChar(rule, iref, SEGMENT_OPEN)) {
                    return syntaxError(U_INVALID_FUNCTION, rule, start);
                }
                
-                Transliterator *t = TransliteratorParser::createBasicInstance(id, &id);
+                Transliterator *t = single->createInstance();
+                delete single;
                if (t == NULL) {
                    return syntaxError(U_INVALID_FUNCTION, rule, start);
                }
--- a/icu4c/source/i18n/tridpars.cpp
+++ b/icu4c/source/i18n/tridpars.cpp
@ -85,37 +85,6 @@ Transliterator* TransliteratorIDParser::SingleID::createInstance() {
    return t;
 }

-/**
- * Parse a basic ID from the given string.  A basic ID contains
- * only a single source, target, and variant.  It does not contain
- * a filter or an explicit inverse.
- * @param id the id to be parsed
- * @param pos INPUT-OUTPUT parameter.  On input, the position of
- * the first character to parse.  On output, the position after
- * the last character parsed.  If the parse fails pos will be
- * unchanged.
- * @return the parsed ID in canonical format, or NULL on parse
- * failure.  If the parsed ID did not contain a source, the return
- * ID will not.
- */
-UnicodeString TransliteratorIDParser::parseBasicID(const UnicodeString& id, int32_t& pos) {
-    Specs* specs = parseFilterID(id, pos, FALSE);
-    if (specs != NULL) {
-        UnicodeString buf;
-        if (specs->sawSource) {
-            buf.append(specs->source);
-            buf.append(TARGET_SEP);
-        }
-        buf.append(specs->target);
-        if (specs->variant.length() != 0) {
-            buf.append(VARIANT_SEP);
-            buf.append(specs->variant);
-        }
-        delete specs;
-        return buf;
-    }
-    return EMPTY;
-}

 /**
 * Parse a single ID, that is, an ID of the general form
@ -207,6 +176,33 @@ TransliteratorIDParser::parseSingleID(const UnicodeString& id, int32_t& pos,
    return single;
 }

+/**
+ * Parse a filter ID, that is, an ID of the general form
+ * "[f1] s1-t1/v1", with the filters optional, and the variants optional.
+ * @param id the id to be parsed
+ * @param pos INPUT-OUTPUT parameter.  On input, the position of
+ * the first character to parse.  On output, the position after
+ * the last character parsed.
+ * @return a SingleID object or null if the parse fails
+ */
+TransliteratorIDParser::SingleID*
+TransliteratorIDParser::parseFilterID(const UnicodeString& id, int32_t& pos) {
+
+    int32_t start = pos;
+
+    Specs* specs = parseFilterID(id, pos, TRUE);
+    if (specs == NULL) {
+        pos = start;
+        return NULL;
+    }
+
+    // Assemble return results
+    SingleID* single = specsToID(specs, FORWARD);
+    single->filter = specs->filter;
+    delete specs;
+    return single;
+}
+
 /**
 * Parse a global filter of the form "[f]" or "([f])", depending
 * on 'withParens'.
--- a/icu4c/source/i18n/tridpars.h
+++ b/icu4c/source/i18n/tridpars.h
@ -98,19 +98,15 @@ class TransliteratorIDParser {
    };

    /**
-     * Parse a basic ID from the given string.  A basic ID contains
-     * only a single source, target, and variant.  It does not contain
-     * a filter or an explicit inverse.
+     * Parse a filter ID, that is, an ID of the general form
+     * "[f1] s1-t1/v1", with the filters optional, and the variants optional.
     * @param id the id to be parsed
     * @param pos INPUT-OUTPUT parameter.  On input, the position of
     * the first character to parse.  On output, the position after
-     * the last character parsed.  If the parse fails pos[0] will be
-     * unchanged.
-     * @return the parsed ID in canonical format, or null on parse
-     * failure.  If the parsed ID did not contain a source, the return
-     * ID will not.
+     * the last character parsed.
+     * @return a SingleID object or null if the parse fails
     */
-    static UnicodeString parseBasicID(const UnicodeString& id, int32_t& pos);
+    static SingleID* parseFilterID(const UnicodeString& id, int32_t& pos);

    /**
     * Parse a single ID, that is, an ID of the general form
--- a/icu4c/source/test/intltest/transtst.cpp
+++ b/icu4c/source/test/intltest/transtst.cpp
@ -156,6 +156,7 @@ TransliteratorTest::runIndexedTest(int32_t index, UBool exec,
        TESTCASE(66,TestSurrogateCasing);
        TESTCASE(67,TestFunction);
        TESTCASE(68,TestInvalidBackRef);
+        TESTCASE(69,TestUserFunction);

        default: name = ""; break;
    }
@ -3337,7 +3338,7 @@ void TransliteratorTest::TestFunction() {
    // Careful with spacing and ';' here:  Phrase this exactly
    // as toRules() is going to return it.  If toRules() changes
    // with regard to spacing or ';', then adjust this string.
-    UnicodeString rule = // TODO clean up spacing
+    UnicodeString rule =
        "([:Lu:]) > $1 '(' &Lower( $1 ) '=' &Hex( &Any-Lower( $1 ) ) ')';";
    
    UParseError pe;
@ -3388,6 +3389,123 @@ void TransliteratorTest::TestInvalidBackRef(void) {
    }
 }

+// vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
+// BEGIN TestUserFunction support factory
+
+Transliterator* _TUFF[4];
+UnicodeString _TUFID[4];
+
+static Transliterator* _TUFFactory(const UnicodeString& ID,
+                                   Transliterator::Token context) {
+    return _TUFF[context.integer]->clone();
+}
+
+static void _TUFReg(const UnicodeString& ID, Transliterator* t, int32_t n) {
+    _TUFF[n] = t;
+    _TUFID[n] = ID;
+    Transliterator::registerFactory(ID, _TUFFactory, Transliterator::integerToken(n));
+}
+
+static void _TUFUnreg(int32_t n) {
+    if (_TUFF[n] != NULL) {
+        Transliterator::unregister(_TUFID[n]);
+        delete _TUFF[n];
+    }
+}
+
+// END TestUserFunction support factory
+// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+/**
+ * Test that user-registered transliterators can be used under function
+ * syntax.
+ */
+void TransliteratorTest::TestUserFunction() {
+ 
+    Transliterator* t;
+    UParseError pe;
+    UErrorCode ec = U_ZERO_ERROR;
+
+    // Setup our factory
+    int32_t i;
+    for (i=0; i<4; ++i) {
+        _TUFF[i] = NULL;
+    }
+
+    // There's no need to register inverses if we don't use them
+    t = Transliterator::createFromRules("gif",
+                                        "'\\'u(..)(..) > '<img src=\"http://www.unicode.org/gifs/24/' $1 '/U' $1$2 '.gif\">';",
+                                        UTRANS_FORWARD, pe, ec);
+    if (t == NULL || U_FAILURE(ec)) {
+        errln((UnicodeString)"FAIL: createFromRules gif " + u_errorName(ec));
+        return;
+    }
+    _TUFReg("Any-gif", t, 0);
+
+    t = Transliterator::createFromRules("RemoveCurly",
+                                        "[\\{\\}] > ;",
+                                        UTRANS_FORWARD, pe, ec);
+    if (t == NULL || U_FAILURE(ec)) {
+        errln((UnicodeString)"FAIL: createFromRules RemoveCurly " + u_errorName(ec));
+        goto FAIL;
+    }
+    _TUFReg("Any-RemoveCurly", t, 1);
+
+    logln("Trying &hex");
+    t = Transliterator::createFromRules("hex2",
+                                        "(.) > &hex($1);",
+                                        UTRANS_FORWARD, pe, ec);
+    if (t == NULL || U_FAILURE(ec)) {
+        errln("FAIL: createFromRules");
+        goto FAIL;
+    }
+    logln("Registering");
+    _TUFReg("Any-hex2", t, 2);
+    t = Transliterator::createInstance("Any-hex2", UTRANS_FORWARD, ec);
+    if (t == NULL || U_FAILURE(ec)) {
+        errln((UnicodeString)"FAIL: createInstance Any-hex2 " + u_errorName(ec));
+        goto FAIL;
+    }
+    expect(*t, "abc", "\\u0061\\u0062\\u0063");
+    delete t;
+
+    logln("Trying &gif");
+    t = Transliterator::createFromRules("gif2",
+                                        "(.) > &Gif(&Hex2($1));",
+                                        UTRANS_FORWARD, pe, ec);
+    if (t == NULL || U_FAILURE(ec)) {
+        errln((UnicodeString)"FAIL: createFromRules gif2 " + u_errorName(ec));
+        goto FAIL;
+    }
+    logln("Registering");
+    _TUFReg("Any-gif2", t, 3);
+    t = Transliterator::createInstance("Any-gif2", UTRANS_FORWARD, ec);
+    if (t == NULL || U_FAILURE(ec)) {
+        errln((UnicodeString)"FAIL: createInstance Any-gif2 " + u_errorName(ec));
+        goto FAIL;
+    }
+    expect(*t, "ab", "<img src=\"http://www.unicode.org/gifs/24/00/U0061.gif\">"
+           "<img src=\"http://www.unicode.org/gifs/24/00/U0062.gif\">");
+    delete t;
+
+    // Test that filters are allowed after &
+    t = Transliterator::createFromRules("test",
+                                        "(.) > &Hex($1) ' ' &[\\{\\}]Remove(&Name($1)) ' ';",
+                                        UTRANS_FORWARD, pe, ec);
+    if (t == NULL || U_FAILURE(ec)) {
+        errln((UnicodeString)"FAIL: createFromRules test " + u_errorName(ec));
+        goto FAIL;
+    }
+    expect(*t, "abc",
+           "\\u0061 LATIN SMALL LETTER A \\u0062 LATIN SMALL LETTER B \\u0063 LATIN SMALL LETTER C ");
+    delete t;
+
+ FAIL:
+    for (i=0; i<4; ++i) {
+        _TUFUnreg(i);
+    }
+}
+
 //======================================================================
 // Support methods
 //======================================================================
--- a/icu4c/source/test/intltest/transtst.h
+++ b/icu4c/source/test/intltest/transtst.h
@ -322,6 +322,8 @@ private:

    void TestInvalidBackRef(void);

+    void TestUserFunction(void);
+
    //======================================================================
    // Support methods
    //======================================================================