ICU-73 finish quantifier and supplemental char support

X-SVN-Rev: 6003
2001-10-03 00:18:23 +00:00 · 2001-10-03 00:18:23 +00:00 · a56c858f03
commit a56c858f03
parent 40694d1edc
12 changed files with 1154 additions and 660 deletions
--- a/icu4j/src/com/ibm/icu/dev/test/translit/TransliteratorTest.java
+++ b/icu4j/src/com/ibm/icu/dev/test/translit/TransliteratorTest.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/translit/TransliteratorTest.java,v $
- * $Date: 2001/09/28 05:47:30 $
- * $Revision: 1.47 $
+ * $Date: 2001/10/03 00:18:23 $
+ * $Revision: 1.48 $
 *
 *****************************************************************************************
 */
@ -77,14 +77,6 @@ public class TransliteratorTest extends TestFmwk {
        logln("Elapsed time: " + ms + " ms");
    }

-    public void TestDisplayName() {
-        String ID;
-        for (Enumeration e = Transliterator.getAvailableIDs(); e.hasMoreElements(); ) {
-            ID = (String) e.nextElement();
-            logln(ID + " -> " + Transliterator.getDisplayName(ID));
-        }
-    }
-
    public void TestSimpleRules() {
        /* Example: rules 1. ab>x|y
         *                2. yc>z
@ -131,29 +123,6 @@ public class TransliteratorTest extends TestFmwk {
        expect(t, "abcdefgABCDEFGU", "&bcd&fg!^**!^*&");
    }

-    /**
-     * Test undefined variable.
-     */
-    public void TestUndefinedVariable() {
-        String rule = "$initial } a <> \u1161;";
-        try {
-            Transliterator t = new RuleBasedTransliterator("<ID>", rule);
-            t = null;
-        } catch (IllegalArgumentException e) {
-            logln("OK: Got exception for " + rule + ", as expected: " +
-                  e.getMessage());
-            return;
-        }
-        errln("Fail: bogus rule " + rule + " compiled without error");
-    }
-
-    /**
-     * Test empty context.
-     */
-    public void TestEmptyContext() {
-        expect(" { a } > b;", "xay a ", "xby b ");
-    }
-
    /**
     * Test inline set syntax and set variable syntax.
     */
@ -524,6 +493,9 @@ public class TransliteratorTest extends TestFmwk {
        expect(hex3, "012", "&#x30;&#x31;&#x32;");
    }

+    public void TestJ329_TODO() {
+    }
+
    /**
     * Test segments and segment references.
     */
@ -681,6 +653,9 @@ public class TransliteratorTest extends TestFmwk {

    }

+    public void TestCopyJ476_TODO() {
+    }
+
    /**
     * Test inter-Indic transliterators.  These are composed.
     */
@ -700,47 +675,6 @@ public class TransliteratorTest extends TestFmwk {
        expect(dg, dev, guj);
    }

-    /**
-     * Test IDs of inverses of compound transliterators. (J20)
-     */
-    public void TestCompoundInverseID() {
-        String ID = "Latin-Jamo;NFC(NFD)";
-        Transliterator t = Transliterator.getInstance(ID);
-        Transliterator u = t.getInverse();
-        String exp = "NFD(NFC);Jamo-Latin";
-        String got = u.getID();
-        if (!got.equals(exp)) {
-            errln("FAIL: Inverse of " + ID + " is " + got +
-                  ", expected " + exp);
-        }
-    }
-
-    /**
-     * Inverse of "Null" should be "Null". (J21)
-     */
-    public void TestNullInverse() {
-        Transliterator t = Transliterator.getInstance("Null");
-        Transliterator u = t.getInverse();
-        if (!u.getID().equals("Null")) {
-            errln("FAIL: Inverse of Null should be Null");
-        }
-    }
-
-    /**
-     * Check ID of inverse of alias. (J22)
-     */
-    public void TestAliasInverseID() {
-        String ID = "Latin-Hangul"; // This should be any alias ID with an inverse
-        Transliterator t = Transliterator.getInstance(ID);
-        Transliterator u = t.getInverse();
-        String exp = "Hangul-Latin";
-        String got = u.getID();
-        if (!got.equals(exp)) {
-            errln("FAIL: Inverse of " + ID + " is " + got +
-                  ", expected " + exp);
-        }
-    }
-
    /**
     * Test filter syntax in IDs. (J23)
     */
@ -785,118 +719,6 @@ public class TransliteratorTest extends TestFmwk {
        }
    }

-    /**
-     * Test the "Remove" transliterator.
-     */
-    public void TestRemove() {
-        Transliterator t = Transliterator.getInstance("Remove[aeiou]");
-        expect(t, "The quick brown fox.",
-               "Th qck brwn fx.");
-    }
-
-    public void TestToRules() {
-        String RBT = "rbt";
-        String SET = "set";
-        String[] DATA = {
-            RBT,
-            "$a=\\u4E61; [$a] > A;",
-            "[\\u4E61] > A;",
-
-            RBT,
-            "$white=[[:Zs:][:Zl:]]; $white{a} > A;",
-            "[[:Zs:][:Zl:]]{a} > A;",
-
-            SET,
-            "[[:Zs:][:Zl:]]",
-            "[[:Zs:][:Zl:]]",
-
-            SET,
-            "[:Ps:]",
-            "[:Ps:]",
-
-            SET,
-            "[:L:]",
-            "[:L:]",
-
-            SET,
-            "[[:L:]-[A]]",
-            "[[:L:]-[A]]",
-
-            SET,
-            "[~[:Lu:][:Ll:]]",
-            "[~[:Lu:][:Ll:]]",
-
-            SET,
-            "[~[a-z]]",
-            "[~[a-z]]",
-
-            RBT,
-            "$white=[:Zs:]; $black=[^$white]; $black{a} > A;",
-            "[^[:Zs:]]{a} > A;",
-
-            RBT,
-            "$a=[:Zs:]; $b=[[a-z]-$a]; $b{a} > A;",
-            "[[a-z]-[:Zs:]]{a} > A;",
-
-            RBT,
-            "$a=[:Zs:]; $b=[$a&[a-z]]; $b{a} > A;",
-            "[[:Zs:]&[a-z]]{a} > A;",
-
-            RBT,
-            "$a=[:Zs:]; $b=[x$a]; $b{a} > A;",
-            "[x[:Zs:]]{a} > A;",
-        };
-
-        for (int d=0; d < DATA.length; d+=3) {
-            if (DATA[d] == RBT) {
-                // Transliterator test
-                Transliterator t = Transliterator.createFromRules("ID",
-                                       DATA[d+1], Transliterator.FORWARD);
-                if (t == null) {
-                    errln("FAIL: createFromRules failed");
-                    return;
-                }
-                String rules, escapedRules;
-                rules = t.toRules(false);
-                escapedRules = t.toRules(true);
-                String expRules = Utility.unescape(DATA[d+2]);
-                String expEscapedRules = DATA[d+2];
-                if (rules.equals(expRules)) {
-                    logln("Ok: " + DATA[d+1] +
-                          " => " + Utility.escape(rules));
-                } else {
-                    errln("FAIL: " + DATA[d+1] +
-                          " => " + Utility.escape(rules + ", exp " + expRules));
-                }
-                if (escapedRules.equals(expEscapedRules)) {
-                    logln("Ok: " + DATA[d+1] +
-                          " => " + escapedRules);
-                } else {
-                    errln("FAIL: " + DATA[d+1] +
-                          " => " + escapedRules + ", exp " + expEscapedRules);
-                }
-
-            } else {
-                // UnicodeSet test
-                String pat = DATA[d+1];
-                String expToPat = DATA[d+2];
-                UnicodeSet set = new UnicodeSet(pat);
-
-                // Adjust spacing etc. as necessary.
-                String toPat;
-                toPat = set.toPattern(true);
-                if (expToPat.equals(toPat)) {
-                    logln("Ok: " + pat +
-                          " => " + toPat);
-                } else {
-                    errln("FAIL: " + pat +
-                          " => " + Utility.escape(toPat) +
-                          ", exp " + Utility.escape(pat));
-                }
-            }
-        }
-    }
-
    /**
     * Test the case mapping transliterators.
     */
@ -966,6 +788,9 @@ public class TransliteratorTest extends TestFmwk {
        }
    }

+    public void TestCreateInstance_TODO() {
+    }
+
    /**
     * Test the normalization transliterator.
     */
@ -1139,6 +964,249 @@ public class TransliteratorTest extends TestFmwk {
        }
    }

+    public void TestCompoundFilter_TODO() {
+    }
+
+    /**
+     * Test the "Remove" transliterator.
+     */
+    public void TestRemove() {
+        Transliterator t = Transliterator.getInstance("Remove[aeiou]");
+        expect(t, "The quick brown fox.",
+               "Th qck brwn fx.");
+    }
+
+    public void TestToRules() {
+        String RBT = "rbt";
+        String SET = "set";
+        String[] DATA = {
+            RBT,
+            "$a=\\u4E61; [$a] > A;",
+            "[\\u4E61] > A;",
+
+            RBT,
+            "$white=[[:Zs:][:Zl:]]; $white{a} > A;",
+            "[[:Zs:][:Zl:]]{a} > A;",
+
+            SET,
+            "[[:Zs:][:Zl:]]",
+            "[[:Zs:][:Zl:]]",
+
+            SET,
+            "[:Ps:]",
+            "[:Ps:]",
+
+            SET,
+            "[:L:]",
+            "[:L:]",
+
+            SET,
+            "[[:L:]-[A]]",
+            "[[:L:]-[A]]",
+
+            SET,
+            "[~[:Lu:][:Ll:]]",
+            "[~[:Lu:][:Ll:]]",
+
+            SET,
+            "[~[a-z]]",
+            "[~[a-z]]",
+
+            RBT,
+            "$white=[:Zs:]; $black=[^$white]; $black{a} > A;",
+            "[^[:Zs:]]{a} > A;",
+
+            RBT,
+            "$a=[:Zs:]; $b=[[a-z]-$a]; $b{a} > A;",
+            "[[a-z]-[:Zs:]]{a} > A;",
+
+            RBT,
+            "$a=[:Zs:]; $b=[$a&[a-z]]; $b{a} > A;",
+            "[[:Zs:]&[a-z]]{a} > A;",
+
+            RBT,
+            "$a=[:Zs:]; $b=[x$a]; $b{a} > A;",
+            "[x[:Zs:]]{a} > A;",
+        };
+
+        for (int d=0; d < DATA.length; d+=3) {
+            if (DATA[d] == RBT) {
+                // Transliterator test
+                Transliterator t = Transliterator.createFromRules("ID",
+                                       DATA[d+1], Transliterator.FORWARD);
+                if (t == null) {
+                    errln("FAIL: createFromRules failed");
+                    return;
+                }
+                String rules, escapedRules;
+                rules = t.toRules(false);
+                escapedRules = t.toRules(true);
+                String expRules = Utility.unescape(DATA[d+2]);
+                String expEscapedRules = DATA[d+2];
+                if (rules.equals(expRules)) {
+                    logln("Ok: " + DATA[d+1] +
+                          " => " + Utility.escape(rules));
+                } else {
+                    errln("FAIL: " + DATA[d+1] +
+                          " => " + Utility.escape(rules + ", exp " + expRules));
+                }
+                if (escapedRules.equals(expEscapedRules)) {
+                    logln("Ok: " + DATA[d+1] +
+                          " => " + escapedRules);
+                } else {
+                    errln("FAIL: " + DATA[d+1] +
+                          " => " + escapedRules + ", exp " + expEscapedRules);
+                }
+
+            } else {
+                // UnicodeSet test
+                String pat = DATA[d+1];
+                String expToPat = DATA[d+2];
+                UnicodeSet set = new UnicodeSet(pat);
+
+                // Adjust spacing etc. as necessary.
+                String toPat;
+                toPat = set.toPattern(true);
+                if (expToPat.equals(toPat)) {
+                    logln("Ok: " + pat +
+                          " => " + toPat);
+                } else {
+                    errln("FAIL: " + pat +
+                          " => " + Utility.escape(toPat) +
+                          ", exp " + Utility.escape(pat));
+                }
+            }
+        }
+    }
+
+    public void TestContext() {
+        Transliterator.Position pos = new Transliterator.Position(0, 2, 0, 1); // cs cl s l
+
+        expect("de > x; {d}e > y;",
+               "de",
+               "ye",
+               pos);
+
+        expect("ab{c} > z;",
+               "xadabdabcy",
+               "xadabdabzy");
+    }
+
+    static final String CharsToUnicodeString(String s) {
+        return Utility.unescape(s);
+    }
+
+    public void TestSupplemental() {
+
+        expect(CharsToUnicodeString("$a=\\U00010300; $s=[\\U00010300-\\U00010323];" +
+                                    "a > $a; $s > i;"),
+               CharsToUnicodeString("ab\\U0001030Fx"),
+               CharsToUnicodeString("\\U00010300bix"));
+
+        expect(CharsToUnicodeString("$a=[a-z\\U00010300-\\U00010323];" +
+                                    "$b=[A-Z\\U00010400-\\U0001044D];" +
+                                    "($a)($b) > $2 $1;"),
+               CharsToUnicodeString("aB\\U00010300\\U00010400c\\U00010401\\U00010301D"),
+               CharsToUnicodeString("Ba\\U00010400\\U00010300\\U00010401cD\\U00010301"));
+
+        // k|ax\\U00010300xm
+
+        // k|a\\U00010400\\U00010300xm
+        // ky|\\U00010400\\U00010300xm
+        // ky\\U00010400|\\U00010300xm
+
+        // ky\\U00010400|\\U00010300\\U00010400m
+        // ky\\U00010400y|\\U00010400m
+        expect(CharsToUnicodeString("$a=[a\\U00010300-\\U00010323];" +
+                                    "$a {x} > | @ \\U00010400;" +
+                                    "{$a} [^\\u0000-\\uFFFF] > y;"),
+               CharsToUnicodeString("kax\\U00010300xm"),
+               CharsToUnicodeString("ky\\U00010400y\\U00010400m"));
+    }
+
+    public void TestQuantifier() {
+
+        // Make sure @ in a quantified anteContext works
+        expect("a+ {b} > | @@ c; A > a; (a+ c) > '(' $1 ')';",
+               "AAAAAb",
+               "aaa(aac)");
+
+        // Make sure @ in a quantified postContext works
+        expect("{b} a+ > c @@ |; (a+) > '(' $1 ')';",
+               "baaaaa",
+               "caa(aaa)");
+
+        // Make sure @ in a quantified postContext with seg ref works
+        expect("{(b)} a+ > $1 @@ |; (a+) > '(' $1 ')';",
+               "baaaaa",
+               "baa(aaa)");
+
+        // Make sure @ past ante context doesn't enter ante context
+        Transliterator.Position pos = new Transliterator.Position(0, 5, 3, 5);
+        expect("a+ {b} > | @@ c; x > y; (a+ c) > '(' $1 ')';",
+               "xxxab",
+               "xxx(ac)",
+               pos);
+
+        // Make sure @ past post context doesn't pass limit
+        Transliterator.Position pos2 = new Transliterator.Position(0, 4, 0, 2);
+        expect("{b} a+ > c @@ |; x > y; a > A;",
+               "baxx",
+               "caxx",
+               pos2);
+
+        // Make sure @ past post context doesn't enter post context
+        expect("{b} a+ > c @@ |; x > y; a > A;",
+               "baxx",
+               "cayy");
+
+        expect("(ab)? c > d;",
+               "c abc ababc",
+               "d d abd");
+
+        expect("(ab)+ {x} > '(' $1 ')';",
+               "x abx ababxy",
+               "x ab(ab) abab(abab)y");
+
+        expect("b+ > x;",
+               "ac abc abbc abbbc",
+               "ac axc axc axc");
+
+        expect("[abc]+ > x;",
+               "qac abrc abbcs abtbbc",
+               "qx xrx xs xtx");
+
+        expect("q{(ab)+} > x;",
+               "qa qab qaba qababc qaba",
+               "qa qx qxa qxc qxa");
+
+        expect("q(ab)* > x;",
+               "qa qab qaba qababc",
+               "xa x xa xc");
+
+        // Oddity -- "(foo)* > $1" causes $1 to match the run of "foo"s
+        // In perl, it only matches the first occurrence, so the output
+        // is "()a (ab) (ab)a (ab)c".
+        expect("q(ab)* > '(' $1 ')';",
+               "qa qab qaba qababc",
+               "()a (ab) (ab)a (abab)c");
+
+        // 'foo'+ and 'foo'* -- the quantifier should apply to the entire
+        // quoted string
+        expect("'ab'+ > x;",
+               "bb ab ababb",
+               "bb x xb");
+
+        // $foo+ and $foo* -- the quantifier should apply to the entire
+        // variable reference
+        expect("$var = ab; $var+ > x;",
+               "bb ab ababb",
+               "bb x xb");
+    }
+
+    public void TestSTV_TODO() {
+    }
+
    /**
     * Test inverse of Greek-Latin; Title()
     */
@ -1159,9 +1227,92 @@ public class TransliteratorTest extends TestFmwk {
        }
    }

+    //======================================================================
+    // icu4j only
+    //======================================================================
+
+    /**
+     * Inverse of "Null" should be "Null". (J21)
+     */
+    public void TestNullInverse() {
+        Transliterator t = Transliterator.getInstance("Null");
+        Transliterator u = t.getInverse();
+        if (!u.getID().equals("Null")) {
+            errln("FAIL: Inverse of Null should be Null");
+        }
+    }
+
+    /**
+     * Check ID of inverse of alias. (J22)
+     */
+    public void TestAliasInverseID() {
+        String ID = "Latin-Hangul"; // This should be any alias ID with an inverse
+        Transliterator t = Transliterator.getInstance(ID);
+        Transliterator u = t.getInverse();
+        String exp = "Hangul-Latin";
+        String got = u.getID();
+        if (!got.equals(exp)) {
+            errln("FAIL: Inverse of " + ID + " is " + got +
+                  ", expected " + exp);
+        }
+    }
+
+    /**
+     * Test IDs of inverses of compound transliterators. (J20)
+     */
+    public void TestCompoundInverseID() {
+        String ID = "Latin-Jamo;NFC(NFD)";
+        Transliterator t = Transliterator.getInstance(ID);
+        Transliterator u = t.getInverse();
+        String exp = "NFD(NFC);Jamo-Latin";
+        String got = u.getID();
+        if (!got.equals(exp)) {
+            errln("FAIL: Inverse of " + ID + " is " + got +
+                  ", expected " + exp);
+        }
+    }
+
+    /**
+     * Test undefined variable.
+     */
+    public void TestUndefinedVariable() {
+        String rule = "$initial } a <> \u1161;";
+        try {
+            Transliterator t = new RuleBasedTransliterator("<ID>", rule);
+            t = null;
+        } catch (IllegalArgumentException e) {
+            logln("OK: Got exception for " + rule + ", as expected: " +
+                  e.getMessage());
+            return;
+        }
+        errln("Fail: bogus rule " + rule + " compiled without error");
+    }
+
+    /**
+     * Test empty context.
+     */
+    public void TestEmptyContext() {
+        expect(" { a } > b;", "xay a ", "xby b ");
+    }
+
+    public void TestDisplayName() {
+        String ID;
+        for (Enumeration e = Transliterator.getAvailableIDs(); e.hasMoreElements(); ) {
+            ID = (String) e.nextElement();
+            logln(ID + " -> " + Transliterator.getDisplayName(ID));
+        }
+    }
+
    //======================================================================
    // Support methods
    //======================================================================
+    void expect(String rules,
+                String source,
+                String expectedResult,
+                Transliterator.Position pos) {
+        Transliterator t = new RuleBasedTransliterator("<ID>", rules);
+        expect(t, source, expectedResult, pos);
+    }

    void expect(String rules, String source, String expectedResult) {
        expect(new RuleBasedTransliterator("<ID>", rules), source, expectedResult);
@ -1176,33 +1327,53 @@ public class TransliteratorTest extends TestFmwk {
    }

    void expect(Transliterator t, String source, String expectedResult) {
-        String result = t.transliterate(source);
-        expectAux(t.getID() + ":String", source, result, expectedResult);
+        expect(t, source, expectedResult, (Transliterator.Position) null);
+    }
+
+    void expect(Transliterator t, String source, String expectedResult,
+                Transliterator.Position pos) {
+        if (pos == null) {
+            String result = t.transliterate(source);
+            expectAux(t.getID() + ":String", source, result, expectedResult);
+        }
+
+        Transliterator.Position index = null;
+        if (pos == null) {
+            index = new Transliterator.Position();
+        } else {
+            index = new Transliterator.Position(pos.contextStart, pos.contextLimit,
+                                                pos.start, pos.limit);
+        }

        ReplaceableString rsource = new ReplaceableString(source);
-        t.transliterate(rsource);
-        result = rsource.toString();
+        if (pos == null) {
+            t.transliterate(rsource);
+        } else {
+            // Do it all at once -- below we do it incrementally
+            t.finishTransliteration(rsource, pos);
+        }
+        String result = rsource.toString();
        expectAux(t.getID() + ":Replaceable", source, result, expectedResult);

        // Test keyboard (incremental) transliteration -- this result
        // must be the same after we finalize (see below).
-        rsource.replace(0, rsource.length(), "");
-        Transliterator.Position index = new Transliterator.Position();
        StringBuffer log = new StringBuffer();
-
-        for (int i=0; i<source.length(); ++i) {
-            if (i != 0) {
-                log.append(" + ");
+        rsource.replace(0, rsource.length(), "");
+        if (pos != null) {
+            rsource.replace(0, 0, source);
+            formatInput(log, rsource, index);
+            log.append(" -> ");
+            t.transliterate(rsource, index);
+            formatInput(log, rsource, index);
+        } else {
+            for (int i=0; i<source.length(); ++i) {
+                if (i != 0) {
+                    log.append(" + ");
+                }
+                log.append(source.charAt(i)).append(" -> ");
+                t.transliterate(rsource, index, source.charAt(i));
+                formatInput(log, rsource, index);
            }
-            log.append(source.charAt(i)).append(" -> ");
-            t.transliterate(rsource, index,
-                            String.valueOf(source.charAt(i)));
-            // Append the string buffer with a vertical bar '|' where
-            // the committed index is.
-            String s = rsource.toString();
-            log.append(s.substring(0, index.start)).
-                append('|').
-                append(s.substring(index.start));
        }

        // As a final step in keyboard transliteration, we must call
@ -1217,6 +1388,41 @@ public class TransliteratorTest extends TestFmwk {
                  expectedResult);
    }

+    /**
+     * @param appendTo result is appended to this param.
+     * @param input the string being transliterated
+     * @param pos the index struct
+     */
+    StringBuffer formatInput(StringBuffer appendTo,
+                             final ReplaceableString input,
+                             final Transliterator.Position pos) {
+        // Output a string of the form aaa{bbb|ccc|ddd}eee, where
+        // the {} indicate the context start and limit, and the ||
+        // indicate the start and limit.
+        if (0 <= pos.contextStart &&
+            pos.contextStart <= pos.start &&
+            pos.start <= pos.limit &&
+            pos.limit <= pos.contextLimit &&
+            pos.contextLimit <= input.length()) {
+
+            String a, b, c, d, e;
+            a = input.substring(0, pos.contextStart);
+            b = input.substring(pos.contextStart, pos.start);
+            c = input.substring(pos.start, pos.limit);
+            d = input.substring(pos.limit, pos.contextLimit);
+            e = input.substring(pos.contextLimit, input.length());
+            appendTo.append(a).append('{').append(b).
+                append('|').append(c).append('|').append(d).
+                append('}').append(e);
+        } else {
+            appendTo.append("INVALID Transliterator.Position {cs=" +
+                            pos.contextStart + ", s=" + pos.start + ", l=" +
+                            pos.limit + ", cl=" + pos.contextLimit + "} on " +
+                            input);
+        }
+        return appendTo;
+    }
+
    void expectAux(String tag, String source,
                   String result, String expectedResult) {
        expectAux(tag, source + " -> " + result,
--- a/icu4j/src/com/ibm/icu/text/ReplaceableString.java
+++ b/icu4j/src/com/ibm/icu/text/ReplaceableString.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/ReplaceableString.java,v $ 
- * $Date: 2000/04/25 17:17:37 $ 
- * $Revision: 1.3 $
+ * $Date: 2001/10/03 00:14:22 $ 
+ * $Revision: 1.4 $
 *
 *****************************************************************************************
 */
@ -24,7 +24,7 @@ package com.ibm.text;
 *
 * @see Replaceable
 * @author Alan Liu
- * @version $RCSfile: ReplaceableString.java,v $ $Revision: 1.3 $ $Date: 2000/04/25 17:17:37 $
+ * @version $RCSfile: ReplaceableString.java,v $ $Revision: 1.4 $ $Date: 2001/10/03 00:14:22 $
 */
 public class ReplaceableString implements Replaceable {
    private StringBuffer buf;
@ -67,6 +67,13 @@ public class ReplaceableString implements Replaceable {
        return buf.toString();
    }

+    /**
+     * Return a substring of the given string.
+     */
+    public String substring(int start, int limit) {
+        return buf.substring(start, limit);
+    }
+
    /**
     * Return the number of characters contained in this object.
     * <code>Replaceable</code> API.
--- a/icu4j/src/com/ibm/icu/text/RuleBasedTransliterator.java
+++ b/icu4j/src/com/ibm/icu/text/RuleBasedTransliterator.java
@ -4,9 +4,9 @@
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 *
- * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/RuleBasedTransliterator.java,v $ 
- * $Date: 2001/09/26 18:00:06 $ 
- * $Revision: 1.46 $
+ * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/RuleBasedTransliterator.java,v $
+ * $Date: 2001/10/03 00:14:22 $
+ * $Revision: 1.47 $
 *
 *****************************************************************************************
 */
@ -27,18 +27,18 @@ import com.ibm.text.resources.ResourceReader;
 * Whitespace, as defined by <code>Character.isWhitespace()</code>,
 * is ignored. If the first non-blank character on a line is '#',
 * the entire line is ignored as a comment. </p>
- * 
+ *
 * <p>Each set of rules consists of two groups, one forward, and one
 * reverse. This is a convention that is not enforced; rules for one
 * direction may be omitted, with the result that translations in
 * that direction will not modify the source text. In addition,
 * bidirectional forward-reverse rules may be specified for
 * symmetrical transformations.</p>
- * 
+ *
 * <p><b>Rule syntax</b> </p>
- * 
+ *
 * <p>Rule statements take one of the following forms: </p>
- * 
+ *
 * <dl>
 *     <dt><code>$alefmadda=\u0622;</code></dt>
 *     <dd><strong>Variable definition.</strong> The name on the
@ -66,7 +66,7 @@ import com.ibm.text.resources.ResourceReader;
 *         the string on the left when performing reverse
 *         transliteration.</dd>
 * </dl>
- * 
+ *
 * <dl>
 *     <dt><code>ai&lt;&gt;$alefmadda;</code></dt>
 *     <dd><strong>Bidirectional translation rule.</strong> This
@ -75,7 +75,7 @@ import com.ibm.text.resources.ResourceReader;
 *         transliteration, and vice versa when performing reverse
 *         transliteration.</dd>
 * </dl>
- * 
+ *
 * <p>Translation rules consist of a <em>match pattern</em> and an <em>output
 * string</em>. The match pattern consists of literal characters,
 * optionally preceded by context, and optionally followed by
@ -92,7 +92,7 @@ import com.ibm.text.resources.ResourceReader;
 * (or &quot;<code>123}456</code>&quot;) in which the literal
 * pattern &quot;<code>123</code>&quot; must be followed by &quot;<code>456</code>&quot;.
 * </p>
- * 
+ *
 * <p>The output string of a forward or reverse rule consists of
 * characters to replace the literal pattern characters. If the
 * output string contains the character '<code>|</code>', this is
@ -102,59 +102,59 @@ import com.ibm.text.resources.ResourceReader;
 * placed within the replacement text; however, it can actually be
 * placed into the precending or following context by using the
 * special character '<code>@</code>'. Examples:</p>
- * 
+ *
 * <blockquote>
 *     <p><code>a {foo} z &gt; | @ bar; # foo -&gt; bar, move cursor
 *     before a<br>
 *     {foo} xyz &gt; bar @@|; #&nbsp;foo -&gt; bar, cursor between
 *     y and z</code></p>
 * </blockquote>
- * 
+ *
 * <p><b>UnicodeSet</b></p>
- * 
+ *
 * <p><code>UnicodeSet</code> patterns may appear anywhere that
 * makes sense. They may appear in variable definitions.
 * Contrariwise, <code>UnicodeSet</code> patterns may themselves
 * contain variable references, such as &quot;<code>$a=[a-z];$not_a=[^$a]</code>&quot;,
 * or &quot;<code>$range=a-z;$ll=[$range]</code>&quot;.</p>
- * 
+ *
 * <p><code>UnicodeSet</code> patterns may also be embedded directly
 * into rule strings. Thus, the following two rules are equivalent:</p>
- * 
+ *
 * <blockquote>
 *     <p><code>$vowel=[aeiou]; $vowel&gt;'*'; # One way to do this<br>
 *     [aeiou]&gt;'*';
 *     &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;#
 *     Another way</code></p>
 * </blockquote>
- * 
+ *
 * <p>See {@link UnicodeSet} for more documentation and examples.</p>
- * 
+ *
 * <p><b>Segments</b></p>
- * 
+ *
 * <p>Segments of the input string can be matched and copied to the
 * output string. This makes certain sets of rules simpler and more
 * general, and makes reordering possible. For example:</p>
- * 
+ *
 * <blockquote>
 *     <p><code>([a-z]) &gt; $1 $1;
 *     &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;#
 *     double lowercase letters<br>
 *     ([:Lu:]) ([:Ll:]) &gt; $2 $1; # reverse order of Lu-Ll pairs</code></p>
 * </blockquote>
- * 
+ *
 * <p>The segment of the input string to be copied is delimited by
 * &quot;<code>(</code>&quot; and &quot;<code>)</code>&quot;. Up to
 * nine segments may be defined. Segments may not overlap. In the
 * output string, &quot;<code>$1</code>&quot; through &quot;<code>$9</code>&quot;
 * represent the input string segments, in left-to-right order of
 * definition.</p>
- * 
+ *
 * <p><b>Anchors</b></p>
- * 
+ *
 * <p>Patterns can be anchored to the beginning or the end of the text. This is done with the
 * special characters '<code>^</code>' and '<code>$</code>'. For example:</p>
- * 
+ *
 * <blockquote>
 *   <p><code>^ a&nbsp;&nbsp; &gt; 'BEG_A'; &nbsp;&nbsp;# match 'a' at start of text<br>
 *   &nbsp; a&nbsp;&nbsp; &gt; 'A';&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; # match other instances
@ -163,24 +163,24 @@ import com.ibm.text.resources.ResourceReader;
 *   &nbsp; z&nbsp;&nbsp; &gt; 'Z';&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; # match other instances
 *   of 'z'</code></p>
 * </blockquote>
- * 
+ *
 * <p>It is also possible to match the beginning or the end of the text using a <code>UnicodeSet</code>.
 * This is done by including a virtual anchor character '<code>$</code>' at the end of the
 * set pattern. Although this is usually the match chafacter for the end anchor, the set will
 * match either the beginning or the end of the text, depending on its placement. For
 * example:</p>
- * 
+ *
 * <blockquote>
 *   <p><code>$x = [a-z$]; &nbsp;&nbsp;# match 'a' through 'z' OR anchor<br>
 *   $x 1&nbsp;&nbsp;&nbsp; &gt; 2;&nbsp;&nbsp; # match '1' after a-z or at the start<br>
 *   &nbsp;&nbsp; 3 $x &gt; 4; &nbsp;&nbsp;# match '3' before a-z or at the end</code></p>
 * </blockquote>
- * 
+ *
 * <p><b>Example</b> </p>
- * 
+ *
 * <p>The following example rules illustrate many of the features of
 * the rule language. </p>
- * 
+ *
 * <table border="0" cellpadding="4">
 *     <tr>
 *         <td valign="top">Rule 1.</td>
@ -195,10 +195,10 @@ import com.ibm.text.resources.ResourceReader;
 *         <td valign="top" nowrap><code>yz&gt;q</code></td>
 *     </tr>
 * </table>
- * 
+ *
 * <p>Applying these rules to the string &quot;<code>adefabcdefz</code>&quot;
 * yields the following results: </p>
- * 
+ *
 * <table border="0" cellpadding="4">
 *     <tr>
 *         <td valign="top" nowrap><code>|adefabcdefz</code></td>
@ -251,23 +251,23 @@ import com.ibm.text.resources.ResourceReader;
 *         transliteration is complete.</td>
 *     </tr>
 * </table>
- * 
+ *
 * <p>The order of rules is significant. If multiple rules may match
 * at some point, the first matching rule is applied. </p>
- * 
+ *
 * <p>Forward and reverse rules may have an empty output string.
 * Otherwise, an empty left or right hand side of any statement is a
 * syntax error. </p>
- * 
+ *
 * <p>Single quotes are used to quote any character other than a
 * digit or letter. To specify a single quote itself, inside or
 * outside of quotes, use two single quotes in a row. For example,
 * the rule &quot;<code>'&gt;'&gt;o''clock</code>&quot; changes the
 * string &quot;<code>&gt;</code>&quot; to the string &quot;<code>o'clock</code>&quot;.
 * </p>
- * 
+ *
 * <p><b>Notes</b> </p>
- * 
+ *
 * <p>While a RuleBasedTransliterator is being built, it checks that
 * the rules are added in proper order. For example, if the rule
 * &quot;a&gt;x&quot; is followed by the rule &quot;ab&gt;y&quot;,
@ -275,11 +275,11 @@ import com.ibm.text.resources.ResourceReader;
 * the second rule can never be triggered, since the first rule
 * always matches anything it matches. In other words, the first
 * rule <em>masks</em> the second rule. </p>
- * 
+ *
 * <p>Copyright (c) IBM Corporation 1999-2000. All rights reserved.</p>
- * 
+ *
 * @author Alan Liu
- * @version $RCSfile: RuleBasedTransliterator.java,v $ $Revision: 1.46 $ $Date: 2001/09/26 18:00:06 $
+ * @version $RCSfile: RuleBasedTransliterator.java,v $ $Revision: 1.47 $ $Date: 2001/10/03 00:14:22 $
 */
 public class RuleBasedTransliterator extends Transliterator {

@ -433,7 +433,7 @@ public class RuleBasedTransliterator extends Transliterator {
         * stored in the rule text to represent the set of characters.
         * variables[i] represents character (variablesBase + i).
         */
-        UnicodeSet[] variables;
+        UnicodeMatcher[] variables;

        /**
         * The character that represents variables[0].  Characters
@ -498,6 +498,9 @@ public class RuleBasedTransliterator extends Transliterator {

 /**
 * $Log: RuleBasedTransliterator.java,v $
+ * Revision 1.47  2001/10/03 00:14:22  alan
+ * jitterbug 73: finish quantifier and supplemental char support
+ *
 * Revision 1.46  2001/09/26 18:00:06  alan
 * jitterbug 67: sync parser with icu4c, allow unlimited, nested segments
 *
--- a/icu4j/src/com/ibm/icu/text/TransliterationRule.java
+++ b/icu4j/src/com/ibm/icu/text/TransliterationRule.java
@ -4,9 +4,9 @@
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 *
- * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/TransliterationRule.java,v $ 
- * $Date: 2001/09/26 18:00:06 $ 
- * $Revision: 1.28 $
+ * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/TransliterationRule.java,v $
+ * $Date: 2001/10/03 00:14:23 $
+ * $Revision: 1.29 $
 *
 *****************************************************************************************
 */
@ -28,7 +28,7 @@ import com.ibm.util.Utility;
 * may contain variables.  Variables represent a set of Unicode
 * characters, such as the letters <i>a</i> through <i>z</i>.
 * Variables are detected by looking up each character in a supplied
- * variable list to see if it has been so defined. 
+ * variable list to see if it has been so defined.
 *
 * <p>A rule may contain segments in its input string and segment references in
 * its output string.  A segment is a substring of the input pattern, indicated
@ -44,7 +44,7 @@ import com.ibm.util.Utility;
 * <p>Copyright &copy; IBM Corporation 1999.  All rights reserved.
 *
 * @author Alan Liu
- * @version $RCSfile: TransliterationRule.java,v $ $Revision: 1.28 $ $Date: 2001/09/26 18:00:06 $
+ * @version $RCSfile: TransliterationRule.java,v $ $Revision: 1.29 $ $Date: 2001/10/03 00:14:23 $
 */
 class TransliterationRule {

@ -310,7 +310,7 @@ class TransliterationRule {
         * r1:      aakkkpppp
         * r2:     aaakkkkkpppp
         *            ^
-         * 
+         *
         * The strings must be aligned at the first character of the
         * key.  The length of r1 to the left of the alignment point
         * must be <= the length of r2 to the left; ditto for the
@ -346,10 +346,10 @@ class TransliterationRule {
        int left2 = r2.anteContextLength;
        int right = pattern.length() - left;
        int right2 = r2.pattern.length() - left2;
-        
+
        // TODO Clean this up -- some logic might be combinable with the
        // next statement.
-        
+
        // Test for anchor masking
        if (left == left2 && right == right2 &&
            keyLength <= r2.keyLength &&
@ -371,7 +371,7 @@ class TransliterationRule {
            pos - UTF16.getCharCount(UTF16.charAt(str, pos-1)) :
            pos - 1;
    }
-    
+
    static final int posAfter(Replaceable str, int pos) {
        return (pos >= 0 && pos < str.length()) ?
            pos + UTF16.getCharCount(UTF16.charAt(str, pos)) :
@ -387,10 +387,10 @@ class TransliterationRule {
     * context and key characters match, but the text is not long
     * enough to match all of them.  A full match means all context
     * and key characters match.
-     * 
+     *
     * If a full match is obtained, perform a replacement, update pos,
     * and return U_MATCH.  Otherwise both text and pos are unchanged.
-     * 
+     *
     * @param text the text
     * @param pos the position indices
     * @param incremental if TRUE, test for partial matches that may
@ -559,13 +559,13 @@ class TransliterationRule {
        if (segments == null) {
            text.replace(pos.start, keyLimit, output);
            lenDelta = output.length() - (keyLimit - pos.start);
-            if (cursorPos >= 0 && cursorPos < keyLength) {
-                // Within the key, the cursor refers to 16-bit code units
+            if (cursorPos >= 0 && cursorPos <= output.length()) {
+                // Within the output string, the cursor refers to 16-bit code units
                newStart = pos.start + cursorPos;
            } else {
                newStart = pos.start;
                int n = cursorPos;
-                // Outside the key, cursorPos counts code points
+                // Outside the output string, cursorPos counts code points
                while (n > 0) {
                    newStart += UTF16.getCharCount(UTF16.charAt(text, newStart));
                    --n;
@ -638,7 +638,7 @@ class TransliterationRule {
                }
            }
        }
-    
+
        oText += lenDelta;
        pos.limit += lenDelta;
        pos.contextLimit += lenDelta;
@ -665,11 +665,11 @@ class TransliterationRule {
     * cleared out by, at the end, calling this method with a literal
     * character.
     */
-    protected void appendToRule(StringBuffer rule,
-                                int c,
-                                boolean isLiteral,
-                                boolean escapeUnprintable,
-                                StringBuffer quoteBuf) {
+    static void appendToRule(StringBuffer rule,
+                             int c,
+                             boolean isLiteral,
+                             boolean escapeUnprintable,
+                             StringBuffer quoteBuf) {
        // If we are escaping unprintables, then escape them outside
        // quotes.  <backslash>u and <backslash>U are not recognized within quotes.  The same
        // logic applies to literals, but literals are never escaped.
@ -745,11 +745,11 @@ class TransliterationRule {
        //System.out.println("rule=" + rule.toString() + " qb=" + quoteBuf.toString());
    }

-    protected final void appendToRule(StringBuffer rule,
-                                      String text,
-                                      boolean isLiteral,
-                                      boolean escapeUnprintable,
-                                      StringBuffer quoteBuf) {
+    static final void appendToRule(StringBuffer rule,
+                                   String text,
+                                   boolean isLiteral,
+                                   boolean escapeUnprintable,
+                                   StringBuffer quoteBuf) {
        for (int i=0; i<text.length(); ++i) {
            appendToRule(rule, text.charAt(i), isLiteral, escapeUnprintable, quoteBuf);
        }
@ -764,7 +764,7 @@ class TransliterationRule {
     */
    public String toRule(boolean escapeUnprintable) {
        int i;
-        
+
        StringBuffer rule = new StringBuffer();

        // iseg indexes into segments[] directly (not offset from FSPI)
@ -863,7 +863,7 @@ class TransliterationRule {
                    if (show) {
                        rule.append((char)(48+d));
                    }
-                }            
+                }
                rule.append(' ');
            }
        }
@ -905,6 +905,9 @@ class TransliterationRule {

 /**
 * $Log: TransliterationRule.java,v $
+ * Revision 1.29  2001/10/03 00:14:23  alan
+ * jitterbug 73: finish quantifier and supplemental char support
+ *
 * Revision 1.28  2001/09/26 18:00:06  alan
 * jitterbug 67: sync parser with icu4c, allow unlimited, nested segments
 *
--- a/icu4j/src/com/ibm/icu/text/Transliterator.java
+++ b/icu4j/src/com/ibm/icu/text/Transliterator.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/Transliterator.java,v $
- * $Date: 2001/09/28 20:37:09 $
- * $Revision: 1.43 $
+ * $Date: 2001/10/03 00:14:23 $
+ * $Revision: 1.44 $
 *
 *****************************************************************************************
 */
@ -241,7 +241,7 @@ import com.ibm.util.CaseInsensitiveString;
 * <p>Copyright &copy; IBM Corporation 1999.  All rights reserved.
 *
 * @author Alan Liu
- * @version $RCSfile: Transliterator.java,v $ $Revision: 1.43 $ $Date: 2001/09/28 20:37:09 $
+ * @version $RCSfile: Transliterator.java,v $ $Revision: 1.44 $ $Date: 2001/10/03 00:14:23 $
 */
 public abstract class Transliterator {
    /**
@ -553,6 +553,15 @@ public abstract class Transliterator {
            index.contextLimit += insertion.length();
        }

+        char last = (text.length() > 0) ?
+            text.charAt(text.length() - 1) : 0;
+        if (UTF16.isLeadSurrogate(last)) {
+            // Oops, the caller passed us a single lead surrogate at the
+            // end of the insertion.  Don't transliterate until more text
+            // comes in.
+            return;
+        }
+
        filteredTransliterate(text, index, true);

 // This doesn't work once we add quantifier support.  Need to rewrite
--- a/icu4j/src/com/ibm/icu/text/TransliteratorParser.java
+++ b/icu4j/src/com/ibm/icu/text/TransliteratorParser.java
@ -142,6 +142,10 @@ class TransliteratorParser {
    private static final char CURSOR_OFFSET       = '@';
    private static final char ANCHOR_START        = '^';

+    private static final char KLEENE_STAR         = '*';
+    private static final char ONE_OR_MORE         = '+';
+    private static final char ZERO_OR_ONE         = '?';
+
    // By definition, the ANCHOR_END special character is a
    // trailing SymbolTable.SYMBOL_REF character.
    // private static final char ANCHOR_END       = '$';
@ -382,7 +386,7 @@ class TransliteratorParser {
        idBlock = idBlockResult.toString();

        // Convert the set vector to an array
-        data.variables = new UnicodeSet[variablesVector.size()];
+        data.variables = new UnicodeMatcher[variablesVector.size()];
        variablesVector.copyInto(data.variables);
        variablesVector = null;

@ -658,7 +662,7 @@ class TransliteratorParser {
            int varStart = -1; // Most recent $variableReference
            int varLimit = -1;
            int[] iref = new int[1];
-            
+
        main:
            while (pos < limit && !done) {
                char c = rule.charAt(pos++);
@ -853,56 +857,71 @@ class TransliteratorParser {
                        }
                    }
                    break;
-
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-
+                case KLEENE_STAR:
+                case ONE_OR_MORE:
+                case ZERO_OR_ONE:
+                    // Quantifiers.  We handle single characters, quoted strings,
+                    // variable references, and segments.
+                    //  a+      matches  aaa
+                    //  'foo'+  matches  foofoofoo
+                    //  $v+     matches  xyxyxy if $v == xy
+                    //  (seg)+  matches  segsegseg
+                    {
+                        int qstart, qlimit;
+                        boolean[] isOpenParen = new boolean[1];
+                        boolean isSegment = false;
+                        if (segments != null &&
+                            segments.getLastParenOffset(isOpenParen) == buf.length()) {
+                            // The */+ immediately follows a segment
+                            if (isOpenParen[0]) {
+                                syntaxError("Misplaced quantifier", rule, start);
+                            }
+                            int[] startparam = new int[1];
+                            int[] limitparam = new int[1];
+                            if (!segments.extractLastParenSubstring(startparam, limitparam)) {
+                                syntaxError("Mismatched segment delimiters", rule, start);
+                            }
+                            qstart = startparam[0];
+                            qlimit = limitparam[0];
+                            isSegment = true;
+                        } else {
+                            // The */+ follows an isolated character or quote
+                            // or variable reference
+                            if (buf.length() == quoteLimit) {
+                                // The */+ follows a 'quoted string'
+                                qstart = quoteStart;
+                                qlimit = quoteLimit;
+                            } else if (buf.length() == varLimit) {
+                                // The */+ follows a $variableReference
+                                qstart = varStart;
+                                qlimit = varLimit;
+                            } else {
+                                // The */+ follows a single character
+                                qstart = buf.length() - 1;
+                                qlimit = qstart + 1;
+                            }
+                        }
+                        UnicodeMatcher m =
+                            new StringMatcher(buf.toString(), qstart, qlimit,
+                                              isSegment, parser.data);
+                        int min = 0;
+                        int max = Quantifier.MAX;
+                        switch (c) {
+                        case ONE_OR_MORE:
+                            min = 1;
+                            break;
+                        case ZERO_OR_ONE:
+                            min = 0;
+                            max = 1;
+                            break;
+                            // case KLEENE_STAR:
+                            //    do nothing -- min, max already set
+                        }
+                        m = new Quantifier(m, min, max);
+                        buf.setLength(qstart);
+                        buf.append(parser.generateStandInFor(m));
+                    }
+                    break;
                // case SET_CLOSE:
                default:
                    // Disallow unquoted characters other than [0-9A-Za-z]
@ -947,7 +966,7 @@ class TransliteratorParser {
    //----------------------------------------------------------------------
    // END RuleHalf
    //----------------------------------------------------------------------
-    
+
    /**
     * MAIN PARSER.  Parse the next rule in the given rule string, starting
     * at pos.  Return the index after the last character parsed.  Do not
--- a/icu4j/src/com/ibm/test/translit/TransliteratorTest.java
+++ b/icu4j/src/com/ibm/test/translit/TransliteratorTest.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/test/translit/Attic/TransliteratorTest.java,v $
- * $Date: 2001/09/28 05:47:30 $
- * $Revision: 1.47 $
+ * $Date: 2001/10/03 00:18:23 $
+ * $Revision: 1.48 $
 *
 *****************************************************************************************
 */
@ -77,14 +77,6 @@ public class TransliteratorTest extends TestFmwk {
        logln("Elapsed time: " + ms + " ms");
    }

-    public void TestDisplayName() {
-        String ID;
-        for (Enumeration e = Transliterator.getAvailableIDs(); e.hasMoreElements(); ) {
-            ID = (String) e.nextElement();
-            logln(ID + " -> " + Transliterator.getDisplayName(ID));
-        }
-    }
-
    public void TestSimpleRules() {
        /* Example: rules 1. ab>x|y
         *                2. yc>z
@ -131,29 +123,6 @@ public class TransliteratorTest extends TestFmwk {
        expect(t, "abcdefgABCDEFGU", "&bcd&fg!^**!^*&");
    }

-    /**
-     * Test undefined variable.
-     */
-    public void TestUndefinedVariable() {
-        String rule = "$initial } a <> \u1161;";
-        try {
-            Transliterator t = new RuleBasedTransliterator("<ID>", rule);
-            t = null;
-        } catch (IllegalArgumentException e) {
-            logln("OK: Got exception for " + rule + ", as expected: " +
-                  e.getMessage());
-            return;
-        }
-        errln("Fail: bogus rule " + rule + " compiled without error");
-    }
-
-    /**
-     * Test empty context.
-     */
-    public void TestEmptyContext() {
-        expect(" { a } > b;", "xay a ", "xby b ");
-    }
-
    /**
     * Test inline set syntax and set variable syntax.
     */
@ -524,6 +493,9 @@ public class TransliteratorTest extends TestFmwk {
        expect(hex3, "012", "&#x30;&#x31;&#x32;");
    }

+    public void TestJ329_TODO() {
+    }
+
    /**
     * Test segments and segment references.
     */
@ -681,6 +653,9 @@ public class TransliteratorTest extends TestFmwk {

    }

+    public void TestCopyJ476_TODO() {
+    }
+
    /**
     * Test inter-Indic transliterators.  These are composed.
     */
@ -700,47 +675,6 @@ public class TransliteratorTest extends TestFmwk {
        expect(dg, dev, guj);
    }

-    /**
-     * Test IDs of inverses of compound transliterators. (J20)
-     */
-    public void TestCompoundInverseID() {
-        String ID = "Latin-Jamo;NFC(NFD)";
-        Transliterator t = Transliterator.getInstance(ID);
-        Transliterator u = t.getInverse();
-        String exp = "NFD(NFC);Jamo-Latin";
-        String got = u.getID();
-        if (!got.equals(exp)) {
-            errln("FAIL: Inverse of " + ID + " is " + got +
-                  ", expected " + exp);
-        }
-    }
-
-    /**
-     * Inverse of "Null" should be "Null". (J21)
-     */
-    public void TestNullInverse() {
-        Transliterator t = Transliterator.getInstance("Null");
-        Transliterator u = t.getInverse();
-        if (!u.getID().equals("Null")) {
-            errln("FAIL: Inverse of Null should be Null");
-        }
-    }
-
-    /**
-     * Check ID of inverse of alias. (J22)
-     */
-    public void TestAliasInverseID() {
-        String ID = "Latin-Hangul"; // This should be any alias ID with an inverse
-        Transliterator t = Transliterator.getInstance(ID);
-        Transliterator u = t.getInverse();
-        String exp = "Hangul-Latin";
-        String got = u.getID();
-        if (!got.equals(exp)) {
-            errln("FAIL: Inverse of " + ID + " is " + got +
-                  ", expected " + exp);
-        }
-    }
-
    /**
     * Test filter syntax in IDs. (J23)
     */
@ -785,118 +719,6 @@ public class TransliteratorTest extends TestFmwk {
        }
    }

-    /**
-     * Test the "Remove" transliterator.
-     */
-    public void TestRemove() {
-        Transliterator t = Transliterator.getInstance("Remove[aeiou]");
-        expect(t, "The quick brown fox.",
-               "Th qck brwn fx.");
-    }
-
-    public void TestToRules() {
-        String RBT = "rbt";
-        String SET = "set";
-        String[] DATA = {
-            RBT,
-            "$a=\\u4E61; [$a] > A;",
-            "[\\u4E61] > A;",
-
-            RBT,
-            "$white=[[:Zs:][:Zl:]]; $white{a} > A;",
-            "[[:Zs:][:Zl:]]{a} > A;",
-
-            SET,
-            "[[:Zs:][:Zl:]]",
-            "[[:Zs:][:Zl:]]",
-
-            SET,
-            "[:Ps:]",
-            "[:Ps:]",
-
-            SET,
-            "[:L:]",
-            "[:L:]",
-
-            SET,
-            "[[:L:]-[A]]",
-            "[[:L:]-[A]]",
-
-            SET,
-            "[~[:Lu:][:Ll:]]",
-            "[~[:Lu:][:Ll:]]",
-
-            SET,
-            "[~[a-z]]",
-            "[~[a-z]]",
-
-            RBT,
-            "$white=[:Zs:]; $black=[^$white]; $black{a} > A;",
-            "[^[:Zs:]]{a} > A;",
-
-            RBT,
-            "$a=[:Zs:]; $b=[[a-z]-$a]; $b{a} > A;",
-            "[[a-z]-[:Zs:]]{a} > A;",
-
-            RBT,
-            "$a=[:Zs:]; $b=[$a&[a-z]]; $b{a} > A;",
-            "[[:Zs:]&[a-z]]{a} > A;",
-
-            RBT,
-            "$a=[:Zs:]; $b=[x$a]; $b{a} > A;",
-            "[x[:Zs:]]{a} > A;",
-        };
-
-        for (int d=0; d < DATA.length; d+=3) {
-            if (DATA[d] == RBT) {
-                // Transliterator test
-                Transliterator t = Transliterator.createFromRules("ID",
-                                       DATA[d+1], Transliterator.FORWARD);
-                if (t == null) {
-                    errln("FAIL: createFromRules failed");
-                    return;
-                }
-                String rules, escapedRules;
-                rules = t.toRules(false);
-                escapedRules = t.toRules(true);
-                String expRules = Utility.unescape(DATA[d+2]);
-                String expEscapedRules = DATA[d+2];
-                if (rules.equals(expRules)) {
-                    logln("Ok: " + DATA[d+1] +
-                          " => " + Utility.escape(rules));
-                } else {
-                    errln("FAIL: " + DATA[d+1] +
-                          " => " + Utility.escape(rules + ", exp " + expRules));
-                }
-                if (escapedRules.equals(expEscapedRules)) {
-                    logln("Ok: " + DATA[d+1] +
-                          " => " + escapedRules);
-                } else {
-                    errln("FAIL: " + DATA[d+1] +
-                          " => " + escapedRules + ", exp " + expEscapedRules);
-                }
-
-            } else {
-                // UnicodeSet test
-                String pat = DATA[d+1];
-                String expToPat = DATA[d+2];
-                UnicodeSet set = new UnicodeSet(pat);
-
-                // Adjust spacing etc. as necessary.
-                String toPat;
-                toPat = set.toPattern(true);
-                if (expToPat.equals(toPat)) {
-                    logln("Ok: " + pat +
-                          " => " + toPat);
-                } else {
-                    errln("FAIL: " + pat +
-                          " => " + Utility.escape(toPat) +
-                          ", exp " + Utility.escape(pat));
-                }
-            }
-        }
-    }
-
    /**
     * Test the case mapping transliterators.
     */
@ -966,6 +788,9 @@ public class TransliteratorTest extends TestFmwk {
        }
    }

+    public void TestCreateInstance_TODO() {
+    }
+
    /**
     * Test the normalization transliterator.
     */
@ -1139,6 +964,249 @@ public class TransliteratorTest extends TestFmwk {
        }
    }

+    public void TestCompoundFilter_TODO() {
+    }
+
+    /**
+     * Test the "Remove" transliterator.
+     */
+    public void TestRemove() {
+        Transliterator t = Transliterator.getInstance("Remove[aeiou]");
+        expect(t, "The quick brown fox.",
+               "Th qck brwn fx.");
+    }
+
+    public void TestToRules() {
+        String RBT = "rbt";
+        String SET = "set";
+        String[] DATA = {
+            RBT,
+            "$a=\\u4E61; [$a] > A;",
+            "[\\u4E61] > A;",
+
+            RBT,
+            "$white=[[:Zs:][:Zl:]]; $white{a} > A;",
+            "[[:Zs:][:Zl:]]{a} > A;",
+
+            SET,
+            "[[:Zs:][:Zl:]]",
+            "[[:Zs:][:Zl:]]",
+
+            SET,
+            "[:Ps:]",
+            "[:Ps:]",
+
+            SET,
+            "[:L:]",
+            "[:L:]",
+
+            SET,
+            "[[:L:]-[A]]",
+            "[[:L:]-[A]]",
+
+            SET,
+            "[~[:Lu:][:Ll:]]",
+            "[~[:Lu:][:Ll:]]",
+
+            SET,
+            "[~[a-z]]",
+            "[~[a-z]]",
+
+            RBT,
+            "$white=[:Zs:]; $black=[^$white]; $black{a} > A;",
+            "[^[:Zs:]]{a} > A;",
+
+            RBT,
+            "$a=[:Zs:]; $b=[[a-z]-$a]; $b{a} > A;",
+            "[[a-z]-[:Zs:]]{a} > A;",
+
+            RBT,
+            "$a=[:Zs:]; $b=[$a&[a-z]]; $b{a} > A;",
+            "[[:Zs:]&[a-z]]{a} > A;",
+
+            RBT,
+            "$a=[:Zs:]; $b=[x$a]; $b{a} > A;",
+            "[x[:Zs:]]{a} > A;",
+        };
+
+        for (int d=0; d < DATA.length; d+=3) {
+            if (DATA[d] == RBT) {
+                // Transliterator test
+                Transliterator t = Transliterator.createFromRules("ID",
+                                       DATA[d+1], Transliterator.FORWARD);
+                if (t == null) {
+                    errln("FAIL: createFromRules failed");
+                    return;
+                }
+                String rules, escapedRules;
+                rules = t.toRules(false);
+                escapedRules = t.toRules(true);
+                String expRules = Utility.unescape(DATA[d+2]);
+                String expEscapedRules = DATA[d+2];
+                if (rules.equals(expRules)) {
+                    logln("Ok: " + DATA[d+1] +
+                          " => " + Utility.escape(rules));
+                } else {
+                    errln("FAIL: " + DATA[d+1] +
+                          " => " + Utility.escape(rules + ", exp " + expRules));
+                }
+                if (escapedRules.equals(expEscapedRules)) {
+                    logln("Ok: " + DATA[d+1] +
+                          " => " + escapedRules);
+                } else {
+                    errln("FAIL: " + DATA[d+1] +
+                          " => " + escapedRules + ", exp " + expEscapedRules);
+                }
+
+            } else {
+                // UnicodeSet test
+                String pat = DATA[d+1];
+                String expToPat = DATA[d+2];
+                UnicodeSet set = new UnicodeSet(pat);
+
+                // Adjust spacing etc. as necessary.
+                String toPat;
+                toPat = set.toPattern(true);
+                if (expToPat.equals(toPat)) {
+                    logln("Ok: " + pat +
+                          " => " + toPat);
+                } else {
+                    errln("FAIL: " + pat +
+                          " => " + Utility.escape(toPat) +
+                          ", exp " + Utility.escape(pat));
+                }
+            }
+        }
+    }
+
+    public void TestContext() {
+        Transliterator.Position pos = new Transliterator.Position(0, 2, 0, 1); // cs cl s l
+
+        expect("de > x; {d}e > y;",
+               "de",
+               "ye",
+               pos);
+
+        expect("ab{c} > z;",
+               "xadabdabcy",
+               "xadabdabzy");
+    }
+
+    static final String CharsToUnicodeString(String s) {
+        return Utility.unescape(s);
+    }
+
+    public void TestSupplemental() {
+
+        expect(CharsToUnicodeString("$a=\\U00010300; $s=[\\U00010300-\\U00010323];" +
+                                    "a > $a; $s > i;"),
+               CharsToUnicodeString("ab\\U0001030Fx"),
+               CharsToUnicodeString("\\U00010300bix"));
+
+        expect(CharsToUnicodeString("$a=[a-z\\U00010300-\\U00010323];" +
+                                    "$b=[A-Z\\U00010400-\\U0001044D];" +
+                                    "($a)($b) > $2 $1;"),
+               CharsToUnicodeString("aB\\U00010300\\U00010400c\\U00010401\\U00010301D"),
+               CharsToUnicodeString("Ba\\U00010400\\U00010300\\U00010401cD\\U00010301"));
+
+        // k|ax\\U00010300xm
+
+        // k|a\\U00010400\\U00010300xm
+        // ky|\\U00010400\\U00010300xm
+        // ky\\U00010400|\\U00010300xm
+
+        // ky\\U00010400|\\U00010300\\U00010400m
+        // ky\\U00010400y|\\U00010400m
+        expect(CharsToUnicodeString("$a=[a\\U00010300-\\U00010323];" +
+                                    "$a {x} > | @ \\U00010400;" +
+                                    "{$a} [^\\u0000-\\uFFFF] > y;"),
+               CharsToUnicodeString("kax\\U00010300xm"),
+               CharsToUnicodeString("ky\\U00010400y\\U00010400m"));
+    }
+
+    public void TestQuantifier() {
+
+        // Make sure @ in a quantified anteContext works
+        expect("a+ {b} > | @@ c; A > a; (a+ c) > '(' $1 ')';",
+               "AAAAAb",
+               "aaa(aac)");
+
+        // Make sure @ in a quantified postContext works
+        expect("{b} a+ > c @@ |; (a+) > '(' $1 ')';",
+               "baaaaa",
+               "caa(aaa)");
+
+        // Make sure @ in a quantified postContext with seg ref works
+        expect("{(b)} a+ > $1 @@ |; (a+) > '(' $1 ')';",
+               "baaaaa",
+               "baa(aaa)");
+
+        // Make sure @ past ante context doesn't enter ante context
+        Transliterator.Position pos = new Transliterator.Position(0, 5, 3, 5);
+        expect("a+ {b} > | @@ c; x > y; (a+ c) > '(' $1 ')';",
+               "xxxab",
+               "xxx(ac)",
+               pos);
+
+        // Make sure @ past post context doesn't pass limit
+        Transliterator.Position pos2 = new Transliterator.Position(0, 4, 0, 2);
+        expect("{b} a+ > c @@ |; x > y; a > A;",
+               "baxx",
+               "caxx",
+               pos2);
+
+        // Make sure @ past post context doesn't enter post context
+        expect("{b} a+ > c @@ |; x > y; a > A;",
+               "baxx",
+               "cayy");
+
+        expect("(ab)? c > d;",
+               "c abc ababc",
+               "d d abd");
+
+        expect("(ab)+ {x} > '(' $1 ')';",
+               "x abx ababxy",
+               "x ab(ab) abab(abab)y");
+
+        expect("b+ > x;",
+               "ac abc abbc abbbc",
+               "ac axc axc axc");
+
+        expect("[abc]+ > x;",
+               "qac abrc abbcs abtbbc",
+               "qx xrx xs xtx");
+
+        expect("q{(ab)+} > x;",
+               "qa qab qaba qababc qaba",
+               "qa qx qxa qxc qxa");
+
+        expect("q(ab)* > x;",
+               "qa qab qaba qababc",
+               "xa x xa xc");
+
+        // Oddity -- "(foo)* > $1" causes $1 to match the run of "foo"s
+        // In perl, it only matches the first occurrence, so the output
+        // is "()a (ab) (ab)a (ab)c".
+        expect("q(ab)* > '(' $1 ')';",
+               "qa qab qaba qababc",
+               "()a (ab) (ab)a (abab)c");
+
+        // 'foo'+ and 'foo'* -- the quantifier should apply to the entire
+        // quoted string
+        expect("'ab'+ > x;",
+               "bb ab ababb",
+               "bb x xb");
+
+        // $foo+ and $foo* -- the quantifier should apply to the entire
+        // variable reference
+        expect("$var = ab; $var+ > x;",
+               "bb ab ababb",
+               "bb x xb");
+    }
+
+    public void TestSTV_TODO() {
+    }
+
    /**
     * Test inverse of Greek-Latin; Title()
     */
@ -1159,9 +1227,92 @@ public class TransliteratorTest extends TestFmwk {
        }
    }

+    //======================================================================
+    // icu4j only
+    //======================================================================
+
+    /**
+     * Inverse of "Null" should be "Null". (J21)
+     */
+    public void TestNullInverse() {
+        Transliterator t = Transliterator.getInstance("Null");
+        Transliterator u = t.getInverse();
+        if (!u.getID().equals("Null")) {
+            errln("FAIL: Inverse of Null should be Null");
+        }
+    }
+
+    /**
+     * Check ID of inverse of alias. (J22)
+     */
+    public void TestAliasInverseID() {
+        String ID = "Latin-Hangul"; // This should be any alias ID with an inverse
+        Transliterator t = Transliterator.getInstance(ID);
+        Transliterator u = t.getInverse();
+        String exp = "Hangul-Latin";
+        String got = u.getID();
+        if (!got.equals(exp)) {
+            errln("FAIL: Inverse of " + ID + " is " + got +
+                  ", expected " + exp);
+        }
+    }
+
+    /**
+     * Test IDs of inverses of compound transliterators. (J20)
+     */
+    public void TestCompoundInverseID() {
+        String ID = "Latin-Jamo;NFC(NFD)";
+        Transliterator t = Transliterator.getInstance(ID);
+        Transliterator u = t.getInverse();
+        String exp = "NFD(NFC);Jamo-Latin";
+        String got = u.getID();
+        if (!got.equals(exp)) {
+            errln("FAIL: Inverse of " + ID + " is " + got +
+                  ", expected " + exp);
+        }
+    }
+
+    /**
+     * Test undefined variable.
+     */
+    public void TestUndefinedVariable() {
+        String rule = "$initial } a <> \u1161;";
+        try {
+            Transliterator t = new RuleBasedTransliterator("<ID>", rule);
+            t = null;
+        } catch (IllegalArgumentException e) {
+            logln("OK: Got exception for " + rule + ", as expected: " +
+                  e.getMessage());
+            return;
+        }
+        errln("Fail: bogus rule " + rule + " compiled without error");
+    }
+
+    /**
+     * Test empty context.
+     */
+    public void TestEmptyContext() {
+        expect(" { a } > b;", "xay a ", "xby b ");
+    }
+
+    public void TestDisplayName() {
+        String ID;
+        for (Enumeration e = Transliterator.getAvailableIDs(); e.hasMoreElements(); ) {
+            ID = (String) e.nextElement();
+            logln(ID + " -> " + Transliterator.getDisplayName(ID));
+        }
+    }
+
    //======================================================================
    // Support methods
    //======================================================================
+    void expect(String rules,
+                String source,
+                String expectedResult,
+                Transliterator.Position pos) {
+        Transliterator t = new RuleBasedTransliterator("<ID>", rules);
+        expect(t, source, expectedResult, pos);
+    }

    void expect(String rules, String source, String expectedResult) {
        expect(new RuleBasedTransliterator("<ID>", rules), source, expectedResult);
@ -1176,33 +1327,53 @@ public class TransliteratorTest extends TestFmwk {
    }

    void expect(Transliterator t, String source, String expectedResult) {
-        String result = t.transliterate(source);
-        expectAux(t.getID() + ":String", source, result, expectedResult);
+        expect(t, source, expectedResult, (Transliterator.Position) null);
+    }
+
+    void expect(Transliterator t, String source, String expectedResult,
+                Transliterator.Position pos) {
+        if (pos == null) {
+            String result = t.transliterate(source);
+            expectAux(t.getID() + ":String", source, result, expectedResult);
+        }
+
+        Transliterator.Position index = null;
+        if (pos == null) {
+            index = new Transliterator.Position();
+        } else {
+            index = new Transliterator.Position(pos.contextStart, pos.contextLimit,
+                                                pos.start, pos.limit);
+        }

        ReplaceableString rsource = new ReplaceableString(source);
-        t.transliterate(rsource);
-        result = rsource.toString();
+        if (pos == null) {
+            t.transliterate(rsource);
+        } else {
+            // Do it all at once -- below we do it incrementally
+            t.finishTransliteration(rsource, pos);
+        }
+        String result = rsource.toString();
        expectAux(t.getID() + ":Replaceable", source, result, expectedResult);

        // Test keyboard (incremental) transliteration -- this result
        // must be the same after we finalize (see below).
-        rsource.replace(0, rsource.length(), "");
-        Transliterator.Position index = new Transliterator.Position();
        StringBuffer log = new StringBuffer();
-
-        for (int i=0; i<source.length(); ++i) {
-            if (i != 0) {
-                log.append(" + ");
+        rsource.replace(0, rsource.length(), "");
+        if (pos != null) {
+            rsource.replace(0, 0, source);
+            formatInput(log, rsource, index);
+            log.append(" -> ");
+            t.transliterate(rsource, index);
+            formatInput(log, rsource, index);
+        } else {
+            for (int i=0; i<source.length(); ++i) {
+                if (i != 0) {
+                    log.append(" + ");
+                }
+                log.append(source.charAt(i)).append(" -> ");
+                t.transliterate(rsource, index, source.charAt(i));
+                formatInput(log, rsource, index);
            }
-            log.append(source.charAt(i)).append(" -> ");
-            t.transliterate(rsource, index,
-                            String.valueOf(source.charAt(i)));
-            // Append the string buffer with a vertical bar '|' where
-            // the committed index is.
-            String s = rsource.toString();
-            log.append(s.substring(0, index.start)).
-                append('|').
-                append(s.substring(index.start));
        }

        // As a final step in keyboard transliteration, we must call
@ -1217,6 +1388,41 @@ public class TransliteratorTest extends TestFmwk {
                  expectedResult);
    }

+    /**
+     * @param appendTo result is appended to this param.
+     * @param input the string being transliterated
+     * @param pos the index struct
+     */
+    StringBuffer formatInput(StringBuffer appendTo,
+                             final ReplaceableString input,
+                             final Transliterator.Position pos) {
+        // Output a string of the form aaa{bbb|ccc|ddd}eee, where
+        // the {} indicate the context start and limit, and the ||
+        // indicate the start and limit.
+        if (0 <= pos.contextStart &&
+            pos.contextStart <= pos.start &&
+            pos.start <= pos.limit &&
+            pos.limit <= pos.contextLimit &&
+            pos.contextLimit <= input.length()) {
+
+            String a, b, c, d, e;
+            a = input.substring(0, pos.contextStart);
+            b = input.substring(pos.contextStart, pos.start);
+            c = input.substring(pos.start, pos.limit);
+            d = input.substring(pos.limit, pos.contextLimit);
+            e = input.substring(pos.contextLimit, input.length());
+            appendTo.append(a).append('{').append(b).
+                append('|').append(c).append('|').append(d).
+                append('}').append(e);
+        } else {
+            appendTo.append("INVALID Transliterator.Position {cs=" +
+                            pos.contextStart + ", s=" + pos.start + ", l=" +
+                            pos.limit + ", cl=" + pos.contextLimit + "} on " +
+                            input);
+        }
+        return appendTo;
+    }
+
    void expectAux(String tag, String source,
                   String result, String expectedResult) {
        expectAux(tag, source + " -> " + result,
--- a/icu4j/src/com/ibm/text/ReplaceableString.java
+++ b/icu4j/src/com/ibm/text/ReplaceableString.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/ReplaceableString.java,v $ 
- * $Date: 2000/04/25 17:17:37 $ 
- * $Revision: 1.3 $
+ * $Date: 2001/10/03 00:14:22 $ 
+ * $Revision: 1.4 $
 *
 *****************************************************************************************
 */
@ -24,7 +24,7 @@ package com.ibm.text;
 *
 * @see Replaceable
 * @author Alan Liu
- * @version $RCSfile: ReplaceableString.java,v $ $Revision: 1.3 $ $Date: 2000/04/25 17:17:37 $
+ * @version $RCSfile: ReplaceableString.java,v $ $Revision: 1.4 $ $Date: 2001/10/03 00:14:22 $
 */
 public class ReplaceableString implements Replaceable {
    private StringBuffer buf;
@ -67,6 +67,13 @@ public class ReplaceableString implements Replaceable {
        return buf.toString();
    }

+    /**
+     * Return a substring of the given string.
+     */
+    public String substring(int start, int limit) {
+        return buf.substring(start, limit);
+    }
+
    /**
     * Return the number of characters contained in this object.
     * <code>Replaceable</code> API.
--- a/icu4j/src/com/ibm/text/RuleBasedTransliterator.java
+++ b/icu4j/src/com/ibm/text/RuleBasedTransliterator.java
@ -4,9 +4,9 @@
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 *
- * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/RuleBasedTransliterator.java,v $ 
- * $Date: 2001/09/26 18:00:06 $ 
- * $Revision: 1.46 $
+ * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/RuleBasedTransliterator.java,v $
+ * $Date: 2001/10/03 00:14:22 $
+ * $Revision: 1.47 $
 *
 *****************************************************************************************
 */
@ -27,18 +27,18 @@ import com.ibm.text.resources.ResourceReader;
 * Whitespace, as defined by <code>Character.isWhitespace()</code>,
 * is ignored. If the first non-blank character on a line is '#',
 * the entire line is ignored as a comment. </p>
- * 
+ *
 * <p>Each set of rules consists of two groups, one forward, and one
 * reverse. This is a convention that is not enforced; rules for one
 * direction may be omitted, with the result that translations in
 * that direction will not modify the source text. In addition,
 * bidirectional forward-reverse rules may be specified for
 * symmetrical transformations.</p>
- * 
+ *
 * <p><b>Rule syntax</b> </p>
- * 
+ *
 * <p>Rule statements take one of the following forms: </p>
- * 
+ *
 * <dl>
 *     <dt><code>$alefmadda=\u0622;</code></dt>
 *     <dd><strong>Variable definition.</strong> The name on the
@ -66,7 +66,7 @@ import com.ibm.text.resources.ResourceReader;
 *         the string on the left when performing reverse
 *         transliteration.</dd>
 * </dl>
- * 
+ *
 * <dl>
 *     <dt><code>ai&lt;&gt;$alefmadda;</code></dt>
 *     <dd><strong>Bidirectional translation rule.</strong> This
@ -75,7 +75,7 @@ import com.ibm.text.resources.ResourceReader;
 *         transliteration, and vice versa when performing reverse
 *         transliteration.</dd>
 * </dl>
- * 
+ *
 * <p>Translation rules consist of a <em>match pattern</em> and an <em>output
 * string</em>. The match pattern consists of literal characters,
 * optionally preceded by context, and optionally followed by
@ -92,7 +92,7 @@ import com.ibm.text.resources.ResourceReader;
 * (or &quot;<code>123}456</code>&quot;) in which the literal
 * pattern &quot;<code>123</code>&quot; must be followed by &quot;<code>456</code>&quot;.
 * </p>
- * 
+ *
 * <p>The output string of a forward or reverse rule consists of
 * characters to replace the literal pattern characters. If the
 * output string contains the character '<code>|</code>', this is
@ -102,59 +102,59 @@ import com.ibm.text.resources.ResourceReader;
 * placed within the replacement text; however, it can actually be
 * placed into the precending or following context by using the
 * special character '<code>@</code>'. Examples:</p>
- * 
+ *
 * <blockquote>
 *     <p><code>a {foo} z &gt; | @ bar; # foo -&gt; bar, move cursor
 *     before a<br>
 *     {foo} xyz &gt; bar @@|; #&nbsp;foo -&gt; bar, cursor between
 *     y and z</code></p>
 * </blockquote>
- * 
+ *
 * <p><b>UnicodeSet</b></p>
- * 
+ *
 * <p><code>UnicodeSet</code> patterns may appear anywhere that
 * makes sense. They may appear in variable definitions.
 * Contrariwise, <code>UnicodeSet</code> patterns may themselves
 * contain variable references, such as &quot;<code>$a=[a-z];$not_a=[^$a]</code>&quot;,
 * or &quot;<code>$range=a-z;$ll=[$range]</code>&quot;.</p>
- * 
+ *
 * <p><code>UnicodeSet</code> patterns may also be embedded directly
 * into rule strings. Thus, the following two rules are equivalent:</p>
- * 
+ *
 * <blockquote>
 *     <p><code>$vowel=[aeiou]; $vowel&gt;'*'; # One way to do this<br>
 *     [aeiou]&gt;'*';
 *     &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;#
 *     Another way</code></p>
 * </blockquote>
- * 
+ *
 * <p>See {@link UnicodeSet} for more documentation and examples.</p>
- * 
+ *
 * <p><b>Segments</b></p>
- * 
+ *
 * <p>Segments of the input string can be matched and copied to the
 * output string. This makes certain sets of rules simpler and more
 * general, and makes reordering possible. For example:</p>
- * 
+ *
 * <blockquote>
 *     <p><code>([a-z]) &gt; $1 $1;
 *     &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;#
 *     double lowercase letters<br>
 *     ([:Lu:]) ([:Ll:]) &gt; $2 $1; # reverse order of Lu-Ll pairs</code></p>
 * </blockquote>
- * 
+ *
 * <p>The segment of the input string to be copied is delimited by
 * &quot;<code>(</code>&quot; and &quot;<code>)</code>&quot;. Up to
 * nine segments may be defined. Segments may not overlap. In the
 * output string, &quot;<code>$1</code>&quot; through &quot;<code>$9</code>&quot;
 * represent the input string segments, in left-to-right order of
 * definition.</p>
- * 
+ *
 * <p><b>Anchors</b></p>
- * 
+ *
 * <p>Patterns can be anchored to the beginning or the end of the text. This is done with the
 * special characters '<code>^</code>' and '<code>$</code>'. For example:</p>
- * 
+ *
 * <blockquote>
 *   <p><code>^ a&nbsp;&nbsp; &gt; 'BEG_A'; &nbsp;&nbsp;# match 'a' at start of text<br>
 *   &nbsp; a&nbsp;&nbsp; &gt; 'A';&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; # match other instances
@ -163,24 +163,24 @@ import com.ibm.text.resources.ResourceReader;
 *   &nbsp; z&nbsp;&nbsp; &gt; 'Z';&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; # match other instances
 *   of 'z'</code></p>
 * </blockquote>
- * 
+ *
 * <p>It is also possible to match the beginning or the end of the text using a <code>UnicodeSet</code>.
 * This is done by including a virtual anchor character '<code>$</code>' at the end of the
 * set pattern. Although this is usually the match chafacter for the end anchor, the set will
 * match either the beginning or the end of the text, depending on its placement. For
 * example:</p>
- * 
+ *
 * <blockquote>
 *   <p><code>$x = [a-z$]; &nbsp;&nbsp;# match 'a' through 'z' OR anchor<br>
 *   $x 1&nbsp;&nbsp;&nbsp; &gt; 2;&nbsp;&nbsp; # match '1' after a-z or at the start<br>
 *   &nbsp;&nbsp; 3 $x &gt; 4; &nbsp;&nbsp;# match '3' before a-z or at the end</code></p>
 * </blockquote>
- * 
+ *
 * <p><b>Example</b> </p>
- * 
+ *
 * <p>The following example rules illustrate many of the features of
 * the rule language. </p>
- * 
+ *
 * <table border="0" cellpadding="4">
 *     <tr>
 *         <td valign="top">Rule 1.</td>
@ -195,10 +195,10 @@ import com.ibm.text.resources.ResourceReader;
 *         <td valign="top" nowrap><code>yz&gt;q</code></td>
 *     </tr>
 * </table>
- * 
+ *
 * <p>Applying these rules to the string &quot;<code>adefabcdefz</code>&quot;
 * yields the following results: </p>
- * 
+ *
 * <table border="0" cellpadding="4">
 *     <tr>
 *         <td valign="top" nowrap><code>|adefabcdefz</code></td>
@ -251,23 +251,23 @@ import com.ibm.text.resources.ResourceReader;
 *         transliteration is complete.</td>
 *     </tr>
 * </table>
- * 
+ *
 * <p>The order of rules is significant. If multiple rules may match
 * at some point, the first matching rule is applied. </p>
- * 
+ *
 * <p>Forward and reverse rules may have an empty output string.
 * Otherwise, an empty left or right hand side of any statement is a
 * syntax error. </p>
- * 
+ *
 * <p>Single quotes are used to quote any character other than a
 * digit or letter. To specify a single quote itself, inside or
 * outside of quotes, use two single quotes in a row. For example,
 * the rule &quot;<code>'&gt;'&gt;o''clock</code>&quot; changes the
 * string &quot;<code>&gt;</code>&quot; to the string &quot;<code>o'clock</code>&quot;.
 * </p>
- * 
+ *
 * <p><b>Notes</b> </p>
- * 
+ *
 * <p>While a RuleBasedTransliterator is being built, it checks that
 * the rules are added in proper order. For example, if the rule
 * &quot;a&gt;x&quot; is followed by the rule &quot;ab&gt;y&quot;,
@ -275,11 +275,11 @@ import com.ibm.text.resources.ResourceReader;
 * the second rule can never be triggered, since the first rule
 * always matches anything it matches. In other words, the first
 * rule <em>masks</em> the second rule. </p>
- * 
+ *
 * <p>Copyright (c) IBM Corporation 1999-2000. All rights reserved.</p>
- * 
+ *
 * @author Alan Liu
- * @version $RCSfile: RuleBasedTransliterator.java,v $ $Revision: 1.46 $ $Date: 2001/09/26 18:00:06 $
+ * @version $RCSfile: RuleBasedTransliterator.java,v $ $Revision: 1.47 $ $Date: 2001/10/03 00:14:22 $
 */
 public class RuleBasedTransliterator extends Transliterator {

@ -433,7 +433,7 @@ public class RuleBasedTransliterator extends Transliterator {
         * stored in the rule text to represent the set of characters.
         * variables[i] represents character (variablesBase + i).
         */
-        UnicodeSet[] variables;
+        UnicodeMatcher[] variables;

        /**
         * The character that represents variables[0].  Characters
@ -498,6 +498,9 @@ public class RuleBasedTransliterator extends Transliterator {

 /**
 * $Log: RuleBasedTransliterator.java,v $
+ * Revision 1.47  2001/10/03 00:14:22  alan
+ * jitterbug 73: finish quantifier and supplemental char support
+ *
 * Revision 1.46  2001/09/26 18:00:06  alan
 * jitterbug 67: sync parser with icu4c, allow unlimited, nested segments
 *
--- a/icu4j/src/com/ibm/text/TransliterationRule.java
+++ b/icu4j/src/com/ibm/text/TransliterationRule.java
@ -4,9 +4,9 @@
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 *
- * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/TransliterationRule.java,v $ 
- * $Date: 2001/09/26 18:00:06 $ 
- * $Revision: 1.28 $
+ * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/TransliterationRule.java,v $
+ * $Date: 2001/10/03 00:14:23 $
+ * $Revision: 1.29 $
 *
 *****************************************************************************************
 */
@ -28,7 +28,7 @@ import com.ibm.util.Utility;
 * may contain variables.  Variables represent a set of Unicode
 * characters, such as the letters <i>a</i> through <i>z</i>.
 * Variables are detected by looking up each character in a supplied
- * variable list to see if it has been so defined. 
+ * variable list to see if it has been so defined.
 *
 * <p>A rule may contain segments in its input string and segment references in
 * its output string.  A segment is a substring of the input pattern, indicated
@ -44,7 +44,7 @@ import com.ibm.util.Utility;
 * <p>Copyright &copy; IBM Corporation 1999.  All rights reserved.
 *
 * @author Alan Liu
- * @version $RCSfile: TransliterationRule.java,v $ $Revision: 1.28 $ $Date: 2001/09/26 18:00:06 $
+ * @version $RCSfile: TransliterationRule.java,v $ $Revision: 1.29 $ $Date: 2001/10/03 00:14:23 $
 */
 class TransliterationRule {

@ -310,7 +310,7 @@ class TransliterationRule {
         * r1:      aakkkpppp
         * r2:     aaakkkkkpppp
         *            ^
-         * 
+         *
         * The strings must be aligned at the first character of the
         * key.  The length of r1 to the left of the alignment point
         * must be <= the length of r2 to the left; ditto for the
@ -346,10 +346,10 @@ class TransliterationRule {
        int left2 = r2.anteContextLength;
        int right = pattern.length() - left;
        int right2 = r2.pattern.length() - left2;
-        
+
        // TODO Clean this up -- some logic might be combinable with the
        // next statement.
-        
+
        // Test for anchor masking
        if (left == left2 && right == right2 &&
            keyLength <= r2.keyLength &&
@ -371,7 +371,7 @@ class TransliterationRule {
            pos - UTF16.getCharCount(UTF16.charAt(str, pos-1)) :
            pos - 1;
    }
-    
+
    static final int posAfter(Replaceable str, int pos) {
        return (pos >= 0 && pos < str.length()) ?
            pos + UTF16.getCharCount(UTF16.charAt(str, pos)) :
@ -387,10 +387,10 @@ class TransliterationRule {
     * context and key characters match, but the text is not long
     * enough to match all of them.  A full match means all context
     * and key characters match.
-     * 
+     *
     * If a full match is obtained, perform a replacement, update pos,
     * and return U_MATCH.  Otherwise both text and pos are unchanged.
-     * 
+     *
     * @param text the text
     * @param pos the position indices
     * @param incremental if TRUE, test for partial matches that may
@ -559,13 +559,13 @@ class TransliterationRule {
        if (segments == null) {
            text.replace(pos.start, keyLimit, output);
            lenDelta = output.length() - (keyLimit - pos.start);
-            if (cursorPos >= 0 && cursorPos < keyLength) {
-                // Within the key, the cursor refers to 16-bit code units
+            if (cursorPos >= 0 && cursorPos <= output.length()) {
+                // Within the output string, the cursor refers to 16-bit code units
                newStart = pos.start + cursorPos;
            } else {
                newStart = pos.start;
                int n = cursorPos;
-                // Outside the key, cursorPos counts code points
+                // Outside the output string, cursorPos counts code points
                while (n > 0) {
                    newStart += UTF16.getCharCount(UTF16.charAt(text, newStart));
                    --n;
@ -638,7 +638,7 @@ class TransliterationRule {
                }
            }
        }
-    
+
        oText += lenDelta;
        pos.limit += lenDelta;
        pos.contextLimit += lenDelta;
@ -665,11 +665,11 @@ class TransliterationRule {
     * cleared out by, at the end, calling this method with a literal
     * character.
     */
-    protected void appendToRule(StringBuffer rule,
-                                int c,
-                                boolean isLiteral,
-                                boolean escapeUnprintable,
-                                StringBuffer quoteBuf) {
+    static void appendToRule(StringBuffer rule,
+                             int c,
+                             boolean isLiteral,
+                             boolean escapeUnprintable,
+                             StringBuffer quoteBuf) {
        // If we are escaping unprintables, then escape them outside
        // quotes.  <backslash>u and <backslash>U are not recognized within quotes.  The same
        // logic applies to literals, but literals are never escaped.
@ -745,11 +745,11 @@ class TransliterationRule {
        //System.out.println("rule=" + rule.toString() + " qb=" + quoteBuf.toString());
    }

-    protected final void appendToRule(StringBuffer rule,
-                                      String text,
-                                      boolean isLiteral,
-                                      boolean escapeUnprintable,
-                                      StringBuffer quoteBuf) {
+    static final void appendToRule(StringBuffer rule,
+                                   String text,
+                                   boolean isLiteral,
+                                   boolean escapeUnprintable,
+                                   StringBuffer quoteBuf) {
        for (int i=0; i<text.length(); ++i) {
            appendToRule(rule, text.charAt(i), isLiteral, escapeUnprintable, quoteBuf);
        }
@ -764,7 +764,7 @@ class TransliterationRule {
     */
    public String toRule(boolean escapeUnprintable) {
        int i;
-        
+
        StringBuffer rule = new StringBuffer();

        // iseg indexes into segments[] directly (not offset from FSPI)
@ -863,7 +863,7 @@ class TransliterationRule {
                    if (show) {
                        rule.append((char)(48+d));
                    }
-                }            
+                }
                rule.append(' ');
            }
        }
@ -905,6 +905,9 @@ class TransliterationRule {

 /**
 * $Log: TransliterationRule.java,v $
+ * Revision 1.29  2001/10/03 00:14:23  alan
+ * jitterbug 73: finish quantifier and supplemental char support
+ *
 * Revision 1.28  2001/09/26 18:00:06  alan
 * jitterbug 67: sync parser with icu4c, allow unlimited, nested segments
 *
--- a/icu4j/src/com/ibm/text/Transliterator.java
+++ b/icu4j/src/com/ibm/text/Transliterator.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/Transliterator.java,v $
- * $Date: 2001/09/28 20:37:09 $
- * $Revision: 1.43 $
+ * $Date: 2001/10/03 00:14:23 $
+ * $Revision: 1.44 $
 *
 *****************************************************************************************
 */
@ -241,7 +241,7 @@ import com.ibm.util.CaseInsensitiveString;
 * <p>Copyright &copy; IBM Corporation 1999.  All rights reserved.
 *
 * @author Alan Liu
- * @version $RCSfile: Transliterator.java,v $ $Revision: 1.43 $ $Date: 2001/09/28 20:37:09 $
+ * @version $RCSfile: Transliterator.java,v $ $Revision: 1.44 $ $Date: 2001/10/03 00:14:23 $
 */
 public abstract class Transliterator {
    /**
@ -553,6 +553,15 @@ public abstract class Transliterator {
            index.contextLimit += insertion.length();
        }

+        char last = (text.length() > 0) ?
+            text.charAt(text.length() - 1) : 0;
+        if (UTF16.isLeadSurrogate(last)) {
+            // Oops, the caller passed us a single lead surrogate at the
+            // end of the insertion.  Don't transliterate until more text
+            // comes in.
+            return;
+        }
+
        filteredTransliterate(text, index, true);

 // This doesn't work once we add quantifier support.  Need to rewrite
--- a/icu4j/src/com/ibm/text/TransliteratorParser.java
+++ b/icu4j/src/com/ibm/text/TransliteratorParser.java
@ -142,6 +142,10 @@ class TransliteratorParser {
    private static final char CURSOR_OFFSET       = '@';
    private static final char ANCHOR_START        = '^';

+    private static final char KLEENE_STAR         = '*';
+    private static final char ONE_OR_MORE         = '+';
+    private static final char ZERO_OR_ONE         = '?';
+
    // By definition, the ANCHOR_END special character is a
    // trailing SymbolTable.SYMBOL_REF character.
    // private static final char ANCHOR_END       = '$';
@ -382,7 +386,7 @@ class TransliteratorParser {
        idBlock = idBlockResult.toString();

        // Convert the set vector to an array
-        data.variables = new UnicodeSet[variablesVector.size()];
+        data.variables = new UnicodeMatcher[variablesVector.size()];
        variablesVector.copyInto(data.variables);
        variablesVector = null;

@ -658,7 +662,7 @@ class TransliteratorParser {
            int varStart = -1; // Most recent $variableReference
            int varLimit = -1;
            int[] iref = new int[1];
-            
+
        main:
            while (pos < limit && !done) {
                char c = rule.charAt(pos++);
@ -853,56 +857,71 @@ class TransliteratorParser {
                        }
                    }
                    break;
-
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-                    // TODO Add quantifier parsing
-
+                case KLEENE_STAR:
+                case ONE_OR_MORE:
+                case ZERO_OR_ONE:
+                    // Quantifiers.  We handle single characters, quoted strings,
+                    // variable references, and segments.
+                    //  a+      matches  aaa
+                    //  'foo'+  matches  foofoofoo
+                    //  $v+     matches  xyxyxy if $v == xy
+                    //  (seg)+  matches  segsegseg
+                    {
+                        int qstart, qlimit;
+                        boolean[] isOpenParen = new boolean[1];
+                        boolean isSegment = false;
+                        if (segments != null &&
+                            segments.getLastParenOffset(isOpenParen) == buf.length()) {
+                            // The */+ immediately follows a segment
+                            if (isOpenParen[0]) {
+                                syntaxError("Misplaced quantifier", rule, start);
+                            }
+                            int[] startparam = new int[1];
+                            int[] limitparam = new int[1];
+                            if (!segments.extractLastParenSubstring(startparam, limitparam)) {
+                                syntaxError("Mismatched segment delimiters", rule, start);
+                            }
+                            qstart = startparam[0];
+                            qlimit = limitparam[0];
+                            isSegment = true;
+                        } else {
+                            // The */+ follows an isolated character or quote
+                            // or variable reference
+                            if (buf.length() == quoteLimit) {
+                                // The */+ follows a 'quoted string'
+                                qstart = quoteStart;
+                                qlimit = quoteLimit;
+                            } else if (buf.length() == varLimit) {
+                                // The */+ follows a $variableReference
+                                qstart = varStart;
+                                qlimit = varLimit;
+                            } else {
+                                // The */+ follows a single character
+                                qstart = buf.length() - 1;
+                                qlimit = qstart + 1;
+                            }
+                        }
+                        UnicodeMatcher m =
+                            new StringMatcher(buf.toString(), qstart, qlimit,
+                                              isSegment, parser.data);
+                        int min = 0;
+                        int max = Quantifier.MAX;
+                        switch (c) {
+                        case ONE_OR_MORE:
+                            min = 1;
+                            break;
+                        case ZERO_OR_ONE:
+                            min = 0;
+                            max = 1;
+                            break;
+                            // case KLEENE_STAR:
+                            //    do nothing -- min, max already set
+                        }
+                        m = new Quantifier(m, min, max);
+                        buf.setLength(qstart);
+                        buf.append(parser.generateStandInFor(m));
+                    }
+                    break;
                // case SET_CLOSE:
                default:
                    // Disallow unquoted characters other than [0-9A-Za-z]
@ -947,7 +966,7 @@ class TransliteratorParser {
    //----------------------------------------------------------------------
    // END RuleHalf
    //----------------------------------------------------------------------
-    
+
    /**
     * MAIN PARSER.  Parse the next rule in the given rule string, starting
     * at pos.  Return the index after the last character parsed.  Do not