From 8ed4fea84a479bb80e4799b6b5a1007b96099ae1 Mon Sep 17 00:00:00 2001
From: Alan Liu <alansliu@gmail.com>
Date: Wed, 17 Apr 2002 16:38:18 +0000
Subject: [PATCH] ICU-1825 allow filters in  &func calls; fix canonicalization
 of &func

X-SVN-Rev: 8478
---
 icu4c/source/i18n/rbt_pars.cpp          |   8 +-
 icu4c/source/i18n/tridpars.cpp          |  58 ++++++------
 icu4c/source/i18n/tridpars.h            |  14 +--
 icu4c/source/test/intltest/transtst.cpp | 120 +++++++++++++++++++++++-
 icu4c/source/test/intltest/transtst.h   |   2 +
 5 files changed, 158 insertions(+), 44 deletions(-)

diff --git a/icu4c/source/i18n/rbt_pars.cpp b/icu4c/source/i18n/rbt_pars.cpp
index 3c0ae10418..d0d71ae854 100644
--- a/icu4c/source/i18n/rbt_pars.cpp
+++ b/icu4c/source/i18n/rbt_pars.cpp
@@ -501,14 +501,16 @@ int32_t RuleHalf::parseSection(const UnicodeString& rule, int32_t pos, int32_t l
         case FUNCTION:
             {
                 int32_t iref = pos;
-                UnicodeString id = TransliteratorIDParser::parseBasicID(rule, iref);
+                TransliteratorIDParser::SingleID* single =
+                    TransliteratorIDParser::parseFilterID(rule, iref);
                 // The next character MUST be a segment open
-                if (id.length() == 0 ||
+                if (single == NULL ||
                     !ICU_Utility::parseChar(rule, iref, SEGMENT_OPEN)) {
                     return syntaxError(U_INVALID_FUNCTION, rule, start);
                 }
                 
-                Transliterator *t = TransliteratorParser::createBasicInstance(id, &id);
+                Transliterator *t = single->createInstance();
+                delete single;
                 if (t == NULL) {
                     return syntaxError(U_INVALID_FUNCTION, rule, start);
                 }
diff --git a/icu4c/source/i18n/tridpars.cpp b/icu4c/source/i18n/tridpars.cpp
index daa42882ed..171b3d881e 100644
--- a/icu4c/source/i18n/tridpars.cpp
+++ b/icu4c/source/i18n/tridpars.cpp
@@ -85,37 +85,6 @@ Transliterator* TransliteratorIDParser::SingleID::createInstance() {
     return t;
 }
 
-/**
- * Parse a basic ID from the given string.  A basic ID contains
- * only a single source, target, and variant.  It does not contain
- * a filter or an explicit inverse.
- * @param id the id to be parsed
- * @param pos INPUT-OUTPUT parameter.  On input, the position of
- * the first character to parse.  On output, the position after
- * the last character parsed.  If the parse fails pos will be
- * unchanged.
- * @return the parsed ID in canonical format, or NULL on parse
- * failure.  If the parsed ID did not contain a source, the return
- * ID will not.
- */
-UnicodeString TransliteratorIDParser::parseBasicID(const UnicodeString& id, int32_t& pos) {
-    Specs* specs = parseFilterID(id, pos, FALSE);
-    if (specs != NULL) {
-        UnicodeString buf;
-        if (specs->sawSource) {
-            buf.append(specs->source);
-            buf.append(TARGET_SEP);
-        }
-        buf.append(specs->target);
-        if (specs->variant.length() != 0) {
-            buf.append(VARIANT_SEP);
-            buf.append(specs->variant);
-        }
-        delete specs;
-        return buf;
-    }
-    return EMPTY;
-}
 
 /**
  * Parse a single ID, that is, an ID of the general form
@@ -207,6 +176,33 @@ TransliteratorIDParser::parseSingleID(const UnicodeString& id, int32_t& pos,
     return single;
 }
 
+/**
+ * Parse a filter ID, that is, an ID of the general form
+ * "[f1] s1-t1/v1", with the filters optional, and the variants optional.
+ * @param id the id to be parsed
+ * @param pos INPUT-OUTPUT parameter.  On input, the position of
+ * the first character to parse.  On output, the position after
+ * the last character parsed.
+ * @return a SingleID object or null if the parse fails
+ */
+TransliteratorIDParser::SingleID*
+TransliteratorIDParser::parseFilterID(const UnicodeString& id, int32_t& pos) {
+
+    int32_t start = pos;
+
+    Specs* specs = parseFilterID(id, pos, TRUE);
+    if (specs == NULL) {
+        pos = start;
+        return NULL;
+    }
+
+    // Assemble return results
+    SingleID* single = specsToID(specs, FORWARD);
+    single->filter = specs->filter;
+    delete specs;
+    return single;
+}
+
 /**
  * Parse a global filter of the form "[f]" or "([f])", depending
  * on 'withParens'.
diff --git a/icu4c/source/i18n/tridpars.h b/icu4c/source/i18n/tridpars.h
index 8349215bbb..f5fcafaa04 100644
--- a/icu4c/source/i18n/tridpars.h
+++ b/icu4c/source/i18n/tridpars.h
@@ -98,19 +98,15 @@ class TransliteratorIDParser {
     };
 
     /**
-     * Parse a basic ID from the given string.  A basic ID contains
-     * only a single source, target, and variant.  It does not contain
-     * a filter or an explicit inverse.
+     * Parse a filter ID, that is, an ID of the general form
+     * "[f1] s1-t1/v1", with the filters optional, and the variants optional.
      * @param id the id to be parsed
      * @param pos INPUT-OUTPUT parameter.  On input, the position of
      * the first character to parse.  On output, the position after
-     * the last character parsed.  If the parse fails pos[0] will be
-     * unchanged.
-     * @return the parsed ID in canonical format, or null on parse
-     * failure.  If the parsed ID did not contain a source, the return
-     * ID will not.
+     * the last character parsed.
+     * @return a SingleID object or null if the parse fails
      */
-    static UnicodeString parseBasicID(const UnicodeString& id, int32_t& pos);
+    static SingleID* parseFilterID(const UnicodeString& id, int32_t& pos);
 
     /**
      * Parse a single ID, that is, an ID of the general form
diff --git a/icu4c/source/test/intltest/transtst.cpp b/icu4c/source/test/intltest/transtst.cpp
index 5a8033eb9b..5d7c432974 100644
--- a/icu4c/source/test/intltest/transtst.cpp
+++ b/icu4c/source/test/intltest/transtst.cpp
@@ -156,6 +156,7 @@ TransliteratorTest::runIndexedTest(int32_t index, UBool exec,
         TESTCASE(66,TestSurrogateCasing);
         TESTCASE(67,TestFunction);
         TESTCASE(68,TestInvalidBackRef);
+        TESTCASE(69,TestUserFunction);
 
         default: name = ""; break;
     }
@@ -3337,7 +3338,7 @@ void TransliteratorTest::TestFunction() {
     // Careful with spacing and ';' here:  Phrase this exactly
     // as toRules() is going to return it.  If toRules() changes
     // with regard to spacing or ';', then adjust this string.
-    UnicodeString rule = // TODO clean up spacing
+    UnicodeString rule =
         "([:Lu:]) > $1 '(' &Lower( $1 ) '=' &Hex( &Any-Lower( $1 ) ) ')';";
     
     UParseError pe;
@@ -3388,6 +3389,123 @@ void TransliteratorTest::TestInvalidBackRef(void) {
     }
 }
 
+// vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
+// BEGIN TestUserFunction support factory
+
+Transliterator* _TUFF[4];
+UnicodeString _TUFID[4];
+
+static Transliterator* _TUFFactory(const UnicodeString& ID,
+                                   Transliterator::Token context) {
+    return _TUFF[context.integer]->clone();
+}
+
+static void _TUFReg(const UnicodeString& ID, Transliterator* t, int32_t n) {
+    _TUFF[n] = t;
+    _TUFID[n] = ID;
+    Transliterator::registerFactory(ID, _TUFFactory, Transliterator::integerToken(n));
+}
+
+static void _TUFUnreg(int32_t n) {
+    if (_TUFF[n] != NULL) {
+        Transliterator::unregister(_TUFID[n]);
+        delete _TUFF[n];
+    }
+}
+
+// END TestUserFunction support factory
+// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+/**
+ * Test that user-registered transliterators can be used under function
+ * syntax.
+ */
+void TransliteratorTest::TestUserFunction() {
+ 
+    Transliterator* t;
+    UParseError pe;
+    UErrorCode ec = U_ZERO_ERROR;
+
+    // Setup our factory
+    int32_t i;
+    for (i=0; i<4; ++i) {
+        _TUFF[i] = NULL;
+    }
+
+    // There's no need to register inverses if we don't use them
+    t = Transliterator::createFromRules("gif",
+                                        "'\\'u(..)(..) > '<img src=\"http://www.unicode.org/gifs/24/' $1 '/U' $1$2 '.gif\">';",
+                                        UTRANS_FORWARD, pe, ec);
+    if (t == NULL || U_FAILURE(ec)) {
+        errln((UnicodeString)"FAIL: createFromRules gif " + u_errorName(ec));
+        return;
+    }
+    _TUFReg("Any-gif", t, 0);
+
+    t = Transliterator::createFromRules("RemoveCurly",
+                                        "[\\{\\}] > ;",
+                                        UTRANS_FORWARD, pe, ec);
+    if (t == NULL || U_FAILURE(ec)) {
+        errln((UnicodeString)"FAIL: createFromRules RemoveCurly " + u_errorName(ec));
+        goto FAIL;
+    }
+    _TUFReg("Any-RemoveCurly", t, 1);
+
+    logln("Trying &hex");
+    t = Transliterator::createFromRules("hex2",
+                                        "(.) > &hex($1);",
+                                        UTRANS_FORWARD, pe, ec);
+    if (t == NULL || U_FAILURE(ec)) {
+        errln("FAIL: createFromRules");
+        goto FAIL;
+    }
+    logln("Registering");
+    _TUFReg("Any-hex2", t, 2);
+    t = Transliterator::createInstance("Any-hex2", UTRANS_FORWARD, ec);
+    if (t == NULL || U_FAILURE(ec)) {
+        errln((UnicodeString)"FAIL: createInstance Any-hex2 " + u_errorName(ec));
+        goto FAIL;
+    }
+    expect(*t, "abc", "\\u0061\\u0062\\u0063");
+    delete t;
+
+    logln("Trying &gif");
+    t = Transliterator::createFromRules("gif2",
+                                        "(.) > &Gif(&Hex2($1));",
+                                        UTRANS_FORWARD, pe, ec);
+    if (t == NULL || U_FAILURE(ec)) {
+        errln((UnicodeString)"FAIL: createFromRules gif2 " + u_errorName(ec));
+        goto FAIL;
+    }
+    logln("Registering");
+    _TUFReg("Any-gif2", t, 3);
+    t = Transliterator::createInstance("Any-gif2", UTRANS_FORWARD, ec);
+    if (t == NULL || U_FAILURE(ec)) {
+        errln((UnicodeString)"FAIL: createInstance Any-gif2 " + u_errorName(ec));
+        goto FAIL;
+    }
+    expect(*t, "ab", "<img src=\"http://www.unicode.org/gifs/24/00/U0061.gif\">"
+           "<img src=\"http://www.unicode.org/gifs/24/00/U0062.gif\">");
+    delete t;
+
+    // Test that filters are allowed after &
+    t = Transliterator::createFromRules("test",
+                                        "(.) > &Hex($1) ' ' &[\\{\\}]Remove(&Name($1)) ' ';",
+                                        UTRANS_FORWARD, pe, ec);
+    if (t == NULL || U_FAILURE(ec)) {
+        errln((UnicodeString)"FAIL: createFromRules test " + u_errorName(ec));
+        goto FAIL;
+    }
+    expect(*t, "abc",
+           "\\u0061 LATIN SMALL LETTER A \\u0062 LATIN SMALL LETTER B \\u0063 LATIN SMALL LETTER C ");
+    delete t;
+
+ FAIL:
+    for (i=0; i<4; ++i) {
+        _TUFUnreg(i);
+    }
+}
+
 //======================================================================
 // Support methods
 //======================================================================
diff --git a/icu4c/source/test/intltest/transtst.h b/icu4c/source/test/intltest/transtst.h
index 60e0871e6c..1713febfc4 100644
--- a/icu4c/source/test/intltest/transtst.h
+++ b/icu4c/source/test/intltest/transtst.h
@@ -322,6 +322,8 @@ private:
 
     void TestInvalidBackRef(void);
 
+    void TestUserFunction(void);
+
     //======================================================================
     // Support methods
     //======================================================================