ICU-1825 allow filter in &func; fix canonicalization of &func

X-SVN-Rev: 8479
2002-04-17 16:48:28 +00:00 · 2002-04-17 16:48:28 +00:00 · ab07165e4c
commit ab07165e4c
parent 8ed4fea84a
3 changed files with 92 additions and 30 deletions
--- a/icu4j/src/com/ibm/icu/dev/test/translit/TransliteratorTest.java
+++ b/icu4j/src/com/ibm/icu/dev/test/translit/TransliteratorTest.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/translit/TransliteratorTest.java,v $
- * $Date: 2002/04/01 22:15:23 $
- * $Revision: 1.103 $
+ * $Date: 2002/04/17 16:48:28 $
+ * $Revision: 1.104 $
 *
 *****************************************************************************************
 */
@ -15,6 +15,7 @@ import com.ibm.icu.lang.*;
 import com.ibm.icu.text.*;
 import com.ibm.icu.dev.test.*;
 import com.ibm.icu.impl.Utility;
+import com.ibm.icu.util.CaseInsensitiveString;
 import java.text.*;
 import java.util.*;

@ -2531,6 +2532,71 @@ public class TransliteratorTest extends TestFmwk {
                  "x x x qtp qstp qrstp");
    }

+    /**
+     * Test that user-registered transliterators can be used under function
+     * syntax.
+     */
+    public void TestUserFunction() {
+        Transliterator t;
+
+        // There's no need to register inverses if we don't use them
+        TestUserFunctionFactory.add("Any-gif",
+            Transliterator.createFromRules("gif",
+                "'\\'u(..)(..) > '<img src=\"http://www.unicode.org/gifs/24/' $1 '/U' $1$2 '.gif\">';",
+                Transliterator.FORWARD));
+        //TestUserFunctionFactory.add("gif-Any", Transliterator.getInstance("Any-Null"));
+
+        TestUserFunctionFactory.add("Any-RemoveCurly",
+            Transliterator.createFromRules("RemoveCurly", "[\\{\\}] > ;", Transliterator.FORWARD));
+        //TestUserFunctionFactory.add("RemoveCurly-Any", Transliterator.getInstance("Any-Null"));
+
+        logln("Trying &hex");
+        t = Transliterator.createFromRules("hex2", "(.) > &hex($1);", Transliterator.FORWARD);
+        logln("Registering");
+        TestUserFunctionFactory.add("Any-hex2", t);
+        t = Transliterator.getInstance("Any-hex2");
+        expect(t, "abc", "\\u0061\\u0062\\u0063");
+
+        logln("Trying &gif");
+        t = Transliterator.createFromRules("gif2", "(.) > &Gif(&Hex2($1));", Transliterator.FORWARD);
+        logln("Registering");
+        TestUserFunctionFactory.add("Any-gif2", t);
+        t = Transliterator.getInstance("Any-gif2");
+        expect(t, "ab", "<img src=\"http://www.unicode.org/gifs/24/00/U0061.gif\">" +
+               "<img src=\"http://www.unicode.org/gifs/24/00/U0062.gif\">");
+
+        // Test that filters are allowed after &
+        t = Transliterator.createFromRules("test",
+                "(.) > &Hex($1) ' ' &[\\{\\}]Remove(&Name($1)) ' ';", Transliterator.FORWARD);
+        expect(t, "abc", "\\u0061 LATIN SMALL LETTER A \\u0062 LATIN SMALL LETTER B \\u0063 LATIN SMALL LETTER C ");
+
+        // Unregister our test stuff
+        TestUserFunctionFactory.unregister();
+    }
+
+    static class TestUserFunctionFactory implements Transliterator.Factory {
+        static TestUserFunctionFactory singleton = new TestUserFunctionFactory();
+        static HashMap m = new HashMap();
+
+        static void add(String ID, Transliterator t) {
+            m.put(new CaseInsensitiveString(ID), t);
+            Transliterator.registerFactory(ID, singleton);
+        }
+
+        public Transliterator getInstance(String ID) {
+            return (Transliterator) m.get(new CaseInsensitiveString(ID));
+        }
+        
+        static void unregister() {
+            Iterator ids = m.keySet().iterator();
+            while (ids.hasNext()) {
+                CaseInsensitiveString id = (CaseInsensitiveString) ids.next();
+                Transliterator.unregister(id.getString());
+                ids.remove(); // removes pair from m
+            }
+        }
+    }
+
    //======================================================================
    // Support methods
    //======================================================================
--- a/icu4j/src/com/ibm/icu/text/TransliteratorIDParser.java
+++ b/icu4j/src/com/ibm/icu/text/TransliteratorIDParser.java
@ -131,34 +131,27 @@ class TransliteratorIDParser {
    }

    /**
-     * Parse a basic ID from the given string.  A basic ID contains
-     * only a single source, target, and variant.  It does not contain
-     * a filter or an explicit inverse.
+     * Parse a filter ID, that is, an ID of the general form
+     * "[f1] s1-t1/v1", with the filters optional, and the variants optional.
     * @param id the id to be parsed
     * @param pos INPUT-OUTPUT parameter.  On input, the position of
     * the first character to parse.  On output, the position after
-     * the last character parsed.  If the parse fails pos[0] will be
-     * unchanged.
-     * @return the parsed ID in canonical format, or null on parse
-     * failure.  If the parsed ID did not contain a source, the return
-     * ID will not.
+     * the last character parsed.
+     * @return a SingleID object or null if the parse fails
     */
-    public static String parseBasicID(String id, int[] pos) {
-        Specs specs = parseFilterID(id, pos, false);
-        if (specs != null) {
-            StringBuffer buf = new StringBuffer();
-            if (specs.sawSource) {
-                buf.append(specs.source);
-                buf.append(TARGET_SEP);
-            }
-            buf.append(specs.target);
-            if (specs.variant != null) {
-                buf.append(VARIANT_SEP);
-                buf.append(specs.variant);
-            }
-            return buf.toString();
+    public static SingleID parseFilterID(String id, int[] pos) {
+
+        int start = pos[0];
+        Specs specs = parseFilterID(id, pos, true);
+        if (specs == null) {
+            pos[0] = start;
+            return null;
        }
-        return null;
+
+        // Assemble return results
+        SingleID single = specsToID(specs, FORWARD);
+        single.filter = specs.filter;
+        return single;
    }

    /**
--- a/icu4j/src/com/ibm/icu/text/TransliteratorParser.java
+++ b/icu4j/src/com/ibm/icu/text/TransliteratorParser.java
@ -4,8 +4,8 @@
 *   Corporation and others.  All Rights Reserved.
 **********************************************************************
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/TransliteratorParser.java,v $
-* $Date: 2002/03/06 00:37:52 $
-* $Revision: 1.20 $
+* $Date: 2002/04/17 16:46:11 $
+* $Revision: 1.21 $
 **********************************************************************
 */
 package com.ibm.icu.text;
@ -573,14 +573,17 @@ class TransliteratorParser {
                case FUNCTION:
                    {
                        iref[0] = pos;
-                        String id = TransliteratorIDParser.parseBasicID(rule, iref);
+                        TransliteratorIDParser.SingleID single = TransliteratorIDParser.parseFilterID(rule, iref);
                        // The next character MUST be a segment open
-                        if (id == null ||
+                        if (single == null ||
                            !Utility.parseChar(rule, iref, SEGMENT_OPEN)) {
                            syntaxError("Invalid function", rule, start);
                        }

-                        Transliterator t = Transliterator.getBasicInstance(id, id);
+                        Transliterator t = single.getInstance();
+                        if (t == null) {
+                            syntaxError("Invalid function ID", rule, start);
+                        }

                        // bufSegStart is the offset in buf to the first
                        // character of the segment we are parsing.