From 572e9063c00af4004de0e528ffe6b2b53362962a Mon Sep 17 00:00:00 2001
From: Alan Liu <alansliu@gmail.com>
Date: Tue, 11 Jan 2000 02:25:03 +0000
Subject: [PATCH] Rewrite UnicodeSet and RBT parsers for better performance and
 new syntax

X-SVN-Rev: 519
---
 .../ibm/icu/text/RuleBasedTransliterator.java | 1078 ++++++++++-------
 .../com/ibm/icu/text/TransliterationRule.java |   78 +-
 icu4j/src/com/ibm/icu/text/UnicodeSet.java    |  522 ++++----
 .../com/ibm/text/RuleBasedTransliterator.java | 1078 ++++++++++-------
 .../src/com/ibm/text/TransliterationRule.java |   78 +-
 icu4j/src/com/ibm/text/UnicodeSet.java        |  522 ++++----
 6 files changed, 1960 insertions(+), 1396 deletions(-)
diff --git a/icu4j/src/com/ibm/icu/text/RuleBasedTransliterator.java b/icu4j/src/com/ibm/icu/text/RuleBasedTransliterator.java
index 572a959963..7337a05292 100755
--- a/icu4j/src/com/ibm/icu/text/RuleBasedTransliterator.java
+++ b/icu4j/src/com/ibm/icu/text/RuleBasedTransliterator.java
@@ -2,6 +2,7 @@ package com.ibm.text;
 
 import java.util.Hashtable;
 import java.util.Vector;
+import java.text.ParsePosition;
 
 /**
  * A transliterator that reads a set of rules in order to determine how to
@@ -181,9 +182,12 @@ import java.util.Vector;
  * <p>Copyright &copy; IBM Corporation 1999.  All rights reserved.
  *
  * @author Alan Liu
- * @version $RCSfile: RuleBasedTransliterator.java,v $ $Revision: 1.7 $ $Date: 2000/01/06 01:36:36 $
+ * @version $RCSfile: RuleBasedTransliterator.java,v $ $Revision: 1.8 $ $Date: 2000/01/11 02:25:03 $
  *
  * $Log: RuleBasedTransliterator.java,v $
+ * Revision 1.8  2000/01/11 02:25:03  Alan
+ * Rewrite UnicodeSet and RBT parsers for better performance and new syntax
+ *
  * Revision 1.7  2000/01/06 01:36:36  Alan
  * Allow string arrays in rule resource bundles
  *
@@ -195,7 +199,6 @@ import java.util.Vector;
  *
  * Revision 1.4  1999/12/22 01:05:54  Alan
  * Improve masking checking; turn it off by default, for better performance
- *
  */
 public class RuleBasedTransliterator extends Transliterator {
     /**
@@ -214,8 +217,6 @@ public class RuleBasedTransliterator extends Transliterator {
 
     static final boolean DEBUG = false;
 
-    static final boolean CHECK_MASKING = true;
-
     private static final String COPYRIGHT =
         "\u00A9 IBM Corporation 1999. All rights reserved.";
 
@@ -561,33 +562,34 @@ public class RuleBasedTransliterator extends Transliterator {
         private static final char VARIABLE_DEF_OP   = '=';
         private static final char FORWARD_RULE_OP   = '>';
         private static final char REVERSE_RULE_OP   = '<';
-        private static final char FWDREV_RULE_OP    = '~'; // internal rep of FWDREF_OP_STRING
+        private static final char FWDREV_RULE_OP    = '~'; // internal rep of <> op
 
         private static final String OPERATORS = "=><";
 
-        // Forward-Reverse operator
-        // a<>b is equivalent to a<b;a>b
-        private static final String FWDREV_OP_STRING  = "<>"; // must have length 2
-
         // Other special characters
         private static final char QUOTE               = '\'';
+        private static final char ESCAPE              = '\\';
+        private static final char END_OF_RULE         = ';';
+        private static final char RULE_COMMENT_CHAR   = '#';
+
         private static final char VARIABLE_REF_OPEN   = '{';
         private static final char VARIABLE_REF_CLOSE  = '}';
-        private static final char CONTEXT_OPEN        = '[';
-        private static final char CONTEXT_CLOSE       = ']';
+        private static final char CONTEXT_OPEN        = '(';
+        private static final char CONTEXT_CLOSE       = ')';
+        private static final char SET_OPEN            = '[';
+        private static final char SET_CLOSE           = ']';
         private static final char CURSOR_POS          = '|';
-        private static final char RULE_COMMENT_CHAR   = '#';
 
         /**
          * Specials must be quoted in rules to be used as literals.
          * Specials may not occur in variable names.
          */
-        private static final String SPECIALS = "'{}[]|#" + OPERATORS;
+//!        private static final String SPECIALS = "{}[]|" + OPERATORS;
 
         /**
          * Specials that must be quoted in variable definitions.
          */
-        private static final String DEF_SPECIALS = "'{}";
+//!        private static final String DEF_SPECIALS = "{}";
 
         /**
          * @param rules list of rules, separated by semicolon characters
@@ -616,37 +618,12 @@ public class RuleBasedTransliterator extends Transliterator {
             determineVariableRange(ruleArray);
 
             StringBuffer errors = null;
-            for (int irule=0; irule<ruleArray.length; ++irule) {
-                rules = ruleArray[irule];
-                int n = rules.length();
-                int i = 0;
-                while (i<n) {
-                    int limit = rules.indexOf(';', i);
-
-                    // Recognize "\\;" as an escaped ";"
-                    while (limit>0 && rules.charAt(limit-1) == '\\') {
-                        limit = rules.indexOf(';', limit+1);
-                    }
-
-                    if (limit == -1) {
-                        limit = n;
-                    }
-                    // Skip over empty lines and line starting with #
-                    if (limit > i && rules.charAt(i) != RULE_COMMENT_CHAR) {
-                        try {
-                            applyRule(i, limit);
-                        } catch (IllegalArgumentException e) {
-                            if (errors == null) {
-                                errors = new StringBuffer(e.getMessage());
-                            } else {
-                                errors.append("\n").append(e.getMessage());
-                            }
-                        }
-                    }
-                    i = limit + 1;
-                }
+            try {
+                parseRuleArray(ruleArray);
+            } catch (IllegalArgumentException e) {
+                errors = new StringBuffer(e.getMessage());
             }
-
+            
             // Index the rules
             try {
                 data.ruleSet.freeze(data.setVariables);
@@ -663,411 +640,684 @@ public class RuleBasedTransliterator extends Transliterator {
             }
         }
 
-        /**
-         * Parse the given substring as a rule, and append it to the rules currently
-         * represented in this object.
-         * @param start the beginning index, inclusive; <code>0 <= start
-         * <= limit</code>.
-         * @param limit the ending index, exclusive; <code>start <= limit
-         * <= rules.length()</code>.
-         * @exception IllegalArgumentException if there is a syntax error in the
-         * rules
-         */
-        private void applyRule(int start, int limit) {
-            /* General description of parsing: Initially, rules contain two types of
-             * quoted characters.  First, there are variable references, such as
-             * "{alpha}".  Second, there are quotes, such as "'<'" or "''".  One of
-             * the first steps in parsing a rule is to resolve such quoted matter.
-             * Quotes are removed early, leaving unquoted literal matter.  Variable
-             * references are resolved and replaced by single characters.  In some
-             * instances these characters represent themselves; in others, they
-             * stand for categories of characters.  Character categories are either
-             * predefined (e.g., "{Lu}"), or are defined by the user using a
-             * statement (e.g., "vowels:aeiouAEIOU").
-             *
-             * Another early step in parsing is to split each rule into component
-             * pieces.  These pieces are, for every rule, a left-hand side, a right-
-             * hand side, and an operator.  The left- and right-hand sides may not
-             * be empty, except for the output patterns of forward and reverse
-             * rules.  In addition to this partitioning, the match patterns of
-             * forward and reverse rules must be partitioned into antecontext,
-             * postcontext, and literal pattern, where the context portions may or
-             * may not be present.  Finally, output patterns must have the cursor
-             * indicator '|' detected and removed, with its position recorded.
-             *
-             * Quote removal, variable resolution, and sub-pattern splitting must
-             * all happen at once.  This is due chiefly to the quoting mechanism,
-             * which allows special characters to appear at arbitrary positions in
-             * the final unquoted text.  (For this reason, alteration of the rule
-             * language is somewhat clumsy; it entails reassessment and revision of
-             * the parsing methods as a whole.)
-             *
-             * After this processing of rules is complete, the final end products
-             * are unquoted pieces of text of various types, and an integer cursor
-             * position, if one is specified.  These processed raw materials are now
-             * easy to deal with; other classes such as UnicodeSet and
-             * TransliterationRule need know nothing of quoting or variables.
-             */
-            StringBuffer left = new StringBuffer();
-            StringBuffer right = new StringBuffer();
-            StringBuffer anteContext = new StringBuffer();
-            StringBuffer postContext = new StringBuffer();
-            int cursorPos[] = new int[1];
 
-            char operator = parseRule(start, limit, left, right,
-                                      anteContext, postContext, cursorPos);
+
+
+
+
+
+
+        private void parseRuleArray(String[] ruleArray) {
+            String[] leftRight = new String[2];
+            char[] op = new char[1];
+            for (int i=0; i<ruleArray.length; ++i) {
+                String rule = ruleArray[i];
+                int pos = 0;
+                int limit = rule.length();
+                while (pos < limit) {
+                    char c = rule.charAt(pos++);
+                    if (Character.isWhitespace(c)) {
+                        // Ignore leading whitespace.  Note that this is not
+                        // Unicode spaces, but Java spaces -- a subset,
+                        // representing whitespace likely to be seen in code.
+                        continue;
+                    }
+                    // Skip lines starting with the comment character
+                    if (c == RULE_COMMENT_CHAR) {
+                        pos = rule.indexOf("\n", pos) + 1;
+                        if (pos == 0) {
+                            break; // No "\n" found; rest of rule is a commnet
+                        }
+                        continue; // Either fall out or restart with next line
+                    }
+                    // We've found the start of a rule.  c is its first
+                    // character, and pos points past c.  Lexically parse the
+                    // rule into component pieces.
+                    pos = parseRule(rule, --pos, limit);                    
+                }
+            }
+        }
+
+        /**
+         * Do a lexical parse of the next rule in the given rule string,
+         * starting at pos.  Return the index after the last character parsed.
+         * Do not parse characters at or after limit.
+         *
+         * The character at pos must be a non-whitespace character
+         * that is not the comment character.
+         *
+         * This method handles quoting, escaping, and whitespace removal.  It
+         * parses the end-of-rule character.
+         */
+        int parseRule(String rule, int pos, int limit) {
+            // Locate the left side, operator, and right side
+            int start = pos;
+            char operator = 0;
+
+            StringBuffer buf = new StringBuffer();
+            int cursor = -1; // position of cursor in buf
+            int ante = -1;   // position of ante context marker ')' in buf
+            int post = -1;   // position of post context marker '(' in buf
+            int postClose = -1; // position of post context close ')' in buf
+
+            // Assigned to buf and its adjuncts after the LHS has been
+            // parsed.  Thereafter, buf etc. refer to the RHS.
+            String left = null;
+            int leftCursor = -1, leftAnte = -1, leftPost = -1, leftPostClose = -1;
+
+        main:
+            while (pos < limit) {
+                char c = rule.charAt(pos++);
+                if (Character.isWhitespace(c)) {
+                    // Ignore whitespace.  Note that this is not Unicode
+                    // spaces, but Java spaces -- a subset, representing
+                    // whitespace likely to be seen in code.
+                    continue;
+                }
+                // Handle escapes
+                if (c == ESCAPE) {
+                    if (pos == limit) {
+                        syntaxError("Trailing backslash", rule, start);
+                    }
+                    buf.append(rule.charAt(pos++));
+                    continue;
+                }
+                // Handle quoted matter
+                if (c == QUOTE) {
+                    int iq = rule.indexOf(QUOTE, pos);
+                    if (iq == pos) {
+                        buf.append(c); // Parse [''] outside quotes as [']
+                        ++pos;
+                    } else {
+                        /* This loop picks up a segment of quoted text of the
+                         * form 'aaaa' each time through.  If this segment
+                         * hasn't really ended ('aaaa''bbbb') then it keeps
+                         * looping, each time adding on a new segment.  When it
+                         * reaches the final quote it breaks.
+                         */
+                        for (;;) {
+                            if (iq < 0) {
+                                syntaxError("Unterminated quote", rule, start);
+                            }
+                            buf.append(rule.substring(pos, iq));
+                            pos = iq+1;
+                            if (pos < limit && rule.charAt(pos) == QUOTE) {
+                                // Parse [''] inside quotes as [']
+                                iq = rule.indexOf(QUOTE, pos+1);
+                                // Continue looping
+                            } else {
+                                break;
+                            }
+                        }
+                    }
+                    continue;
+                }
+                if (OPERATORS.indexOf(c) >= 0) {
+                    if (operator != 0) {
+                        syntaxError("Unquoted " + c, rule, start);
+                    }
+                    // Found an operator char.  Check for forward-reverse operator.
+                    if (c == REVERSE_RULE_OP &&
+                        (pos < limit && rule.charAt(pos) == FORWARD_RULE_OP)) {
+                        ++pos;
+                        operator = FWDREV_RULE_OP;
+                    } else {
+                        operator = c;
+                    }
+                    left = buf.toString(); // lhs
+                    leftCursor = cursor;
+                    leftAnte = ante;
+                    leftPost = post;
+                    leftPostClose = postClose;
+
+                    buf.setLength(0);
+                    cursor = ante = post = postClose = -1;
+                    continue;
+                }
+                switch (c) {
+                case END_OF_RULE:
+                    break main;
+                case VARIABLE_REF_OPEN:
+                    {
+                        int j = rule.indexOf(VARIABLE_REF_CLOSE, pos);
+                        if (pos == j || j < 0) { // empty or unterminated
+                            syntaxError("Malformed variable reference", rule, start);
+                        }
+                        String name = rule.substring(pos, j);
+                        pos = j+1;
+                        buf.append(getVariableDef(name).charValue());
+                    }
+                    break;
+                case CONTEXT_OPEN:
+                    if (post >= 0) {
+                        syntaxError("Multiple post contexts", rule, start);
+                    }
+                    // Ignore CONTEXT_OPEN if buffer length is zero -- that means
+                    // this is the optional opening delimiter for the ante context.
+                    if (buf.length() > 0) {
+                        post = buf.length();
+                    }
+                    break;
+                case CONTEXT_CLOSE:
+                    if (postClose >= 0) {
+                        syntaxError("Unexpected " + c, rule, start);
+                    }
+                    if (post >= 0) {
+                        // This is probably the optional closing delimiter
+                        // for the post context; save the pos and check later.
+                        postClose = buf.length();
+                    } else if (ante >= 0) {
+                        syntaxError("Multiple ante contexts", rule, start);
+                    } else {
+                        ante = buf.length();
+                    }
+                    break;
+                case SET_OPEN:
+                    ParsePosition pp = new ParsePosition(pos-1); // Backup to opening '['
+                    buf.append(registerSet(new UnicodeSet(rule, pp,
+                                   data.variableNames, data.setVariables)).charValue());
+                    pos = pp.getIndex();
+                    break;
+                case VARIABLE_REF_CLOSE:
+                case SET_CLOSE:
+                    syntaxError("Unquoted " + c, rule, start);
+                case CURSOR_POS:
+                    if (cursor >= 0) {
+                        syntaxError("Multiple cursors", rule, start);
+                    }
+                    cursor = buf.length();
+                    break;
+                default:
+                    buf.append(c);
+                    break;
+                }
+            }
+            if (operator == 0) {
+                syntaxError("No operator", rule, start);
+            }
+
+            // Check context close parameters
+            if ((leftPostClose >= 0 && leftPostClose != left.length()) ||
+                (postClose >= 0 && postClose != buf.length())) {
+                syntaxError("Extra text after ]", rule, start);
+            }
+
+            // Context is only allowed on the input side; that is, the left side
+            // for forward rules.  Cursors are only allowed on the output side;
+            // that is, the right side for forward rules.  Bidirectional rules
+            // ignore elements that do not apply.
 
             switch (operator) {
             case VARIABLE_DEF_OP:
-                applyVariableDef(left.toString(), right.toString());
+                // LHS is the name.  RHS is a single character, either a literal
+                // or a set (already parsed).  If RHS is longer than one
+                // character, it is either a multi-character string, or multiple
+                // sets, or a mixture of chars and sets -- syntax error.
+                if (buf.length() != 1) {
+                    syntaxError("Malformed RHS", rule, start);
+                }
+                if (data.variableNames.get(left) != null) {
+                    syntaxError("Duplicate definition of {" +
+                                left + "}", rule, start);
+                }
+                data.variableNames.put(left, new Character(buf.charAt(0)));
                 break;
+
             case FORWARD_RULE_OP:
                 if (direction == FORWARD) {
+                    if (ante >= 0 || post >= 0 || leftCursor >= 0) {
+                        syntaxError("Malformed rule", rule, start);
+                    }
                     data.ruleSet.addRule(new TransliterationRule(
-                                             left.toString(), right.toString(),
-                                             anteContext.toString(), postContext.toString(),
-                                             cursorPos[0]));
+                                             left, leftAnte, leftPost,
+                                             buf.toString(), cursor));
                 } // otherwise ignore the rule; it's not the direction we want
                 break;
+
             case REVERSE_RULE_OP:
                 if (direction == REVERSE) {
+                    if (leftAnte >= 0 || leftPost >= 0 || cursor >= 0) {
+                        syntaxError("Malformed rule", rule, start);
+                    }
                     data.ruleSet.addRule(new TransliterationRule(
-                                             right.toString(), left.toString(),
-                                             anteContext.toString(), postContext.toString(),
-                                             cursorPos[0]));
+                                             buf.toString(), ante, post,
+                                             left, leftCursor));
                 } // otherwise ignore the rule; it's not the direction we want
                 break;
+
             case FWDREV_RULE_OP:
-                data.ruleSet.addRule(new TransliterationRule(
-                                         direction == FORWARD ? left.toString() : right.toString(),
-                                         direction == FORWARD ? right.toString() : left.toString(),
-                                         // Context & cursor disallowed
-                                         "", "", -1));
+                if (direction == FORWARD) {
+                    // The output side is the right; trim off any context
+                    String output = buf.toString().substring(ante < 0 ? 0 : ante,
+                                                             post < 0 ? buf.length() : post);
+                    data.ruleSet.addRule(new TransliterationRule(
+                                             left, leftAnte, leftPost,
+                                             output, cursor));
+                } else {
+                    // The output side is the left; trim off any context
+                    String output = left.substring(leftAnte < 0 ? 0 : leftAnte,
+                                                   leftPost < 0 ? left.length() : leftPost);
+                    data.ruleSet.addRule(new TransliterationRule(
+                                             buf.toString(), ante, post,
+                                             output, leftCursor));
+                }
                 break;
             }
+
+            return pos;
         }
 
-        /**
-         * Add a variable definition.
-         * @param name the name of the variable.  It must not already be defined.
-         * @param pattern the value of the variable.  It may be a single character
-         * or a pattern describing a character set.
-         * @exception IllegalArgumentException if there is a syntax error
-         */
-        private final void applyVariableDef(String name, String pattern) {
-            validateVariableName(name);
-            if (data.variableNames.get(name) != null) {
-                throw new IllegalArgumentException("Duplicate variable definition: "
-                                                   + name + '=' + pattern);
-            }
-//!         if (UnicodeSet.getCategoryID(name) >= 0) {
-//!             throw new IllegalArgumentException("Reserved variable name: "
-//!                                                + name);
-//!         }
-            if (pattern.length() < 1) {
-                throw new IllegalArgumentException("Variable definition missing: "
-                                                   + name);
-            }
-            if (pattern.length() == 1) {
-                // Got a single character variable definition
-                data.variableNames.put(name, new Character(pattern.charAt(0)));
-            } else {
-                // Got more than one character; parse it as a category
-                if (variableNext >= variableLimit) {
-                    throw new RuntimeException("Private use variables exhausted");
-                }
-                Character c = new Character(variableNext++);
-                data.variableNames.put(name, c);
-                data.setVariables.put(c, new UnicodeSet(pattern));
+
+
+        private static final void syntaxError(String msg, String rule, int start) {
+            int end = quotedIndexOf(rule, start, rule.length(), ";");
+            if (end < 0) {
+                end = rule.length();
             }
+            throw new IllegalArgumentException(msg + " in " +
+                                               rule.substring(start, end));
         }
 
-        /**
-         * Given a rule, parses it into three pieces: The left side, the right side,
-         * and the operator.  Returns the operator.  Quotes and variable references
-         * are resolved; the otuput text in all <code>StringBuffer</code> parameters
-         * is literal text.  This method delegates to other parsing methods to
-         * handle the match pattern, output pattern, and other sub-patterns in the
-         * rule.
-         * @param start the beginning index, inclusive; <code>0 <= start
-         * <= limit</code>.
-         * @param limit the ending index, exclusive; <code>start <= limit
-         * <= rules.length()</code>.
-         * @param left left side of rule is appended to this buffer
-         * with the quotes removed and variables resolved
-         * @param right right side of rule is appended to this buffer
-         * with the quotes removed and variables resolved
-         * @param anteContext the preceding context of the match pattern,
-         * if there is one, is appended to this buffer
-         * @param postContext the following context of the match pattern,
-         * if there is one, is appended to this buffer
-         * @param cursorPos if there is a cursor in the output pattern, its
-         * offset is stored in <code>cursorPos[0]</code>
-         * @return The operator character, one of the characters in OPERATORS.
-         */
-        private char parseRule(int start, int limit,
-                               StringBuffer left, StringBuffer right,
-                               StringBuffer anteContext,
-                               StringBuffer postContext,
-                               int[] cursorPos) {
-            if (false) {
-                System.err.println("Parsing " + rules.substring(start, limit));
-            }
-            /* Parse the rule into three pieces -- left, operator, and right,
-             * parsing out quotes.  The result is that left and right will have
-             * unquoted text.  E.g., "gt<'>'" will have right = ">".  Unquoted
-             * operators throw an exception.  Two quotes inside or outside
-             * quotes indicates a quote literal.  E.g., "o''clock" -> "o'clock".
-             */
-            int i = quotedIndexOf(rules, start, limit, OPERATORS);
-            if (i < 0) {
-                throw new IllegalArgumentException(
-                              "Syntax error: "
-                              + rules.substring(start, limit));
-            }
-            char c = rules.charAt(i);
-            
-            // Look for "<>" double rules.
-            if ((i+1) < limit && rules.substring(i, i+2).equals(FWDREV_OP_STRING)) {
-                if (i == start) {
-                    throw new IllegalArgumentException(
-                                  "Empty left side: "
-                                  + rules.substring(start, limit));
-                }
-                if (i+2 == limit) {
-                    throw new IllegalArgumentException(
-                                  "Empty right side: "
-                                  + rules.substring(start, limit));
-                }
-                parseSubPattern(start, i, left, null, SPECIALS);
-                parseSubPattern(i+2, limit, right, null, SPECIALS);
-                return FWDREV_RULE_OP;
-            }
 
-            switch (c) {
-            case FORWARD_RULE_OP:
-                if (i == start) {
-                    throw new IllegalArgumentException(
-                                  "Empty left side: "
-                                  + rules.substring(start, limit));
-                }
-                parseMatchPattern(start, i, left, anteContext, postContext);
-                if (i != (limit-1)) {
-                    parseOutputPattern(i+1, limit, right, cursorPos);
-                }
-                break;
-            case REVERSE_RULE_OP:
-                if (i == (limit-1)) {
-                    throw new IllegalArgumentException(
-                                  "Empty right side: "
-                                  + rules.substring(start, limit));
-                }
-                if (i != start) {
-                    parseOutputPattern(start, i, left, cursorPos);
-                }
-                parseMatchPattern(i+1, limit, right, anteContext, postContext);
-                break;
-            case VARIABLE_DEF_OP:
-                if (i == start || i == (limit-1)) {
-                    throw new IllegalArgumentException(
-                                  "Empty left or right side: "
-                                  + rules.substring(start, limit));
-                }
-                parseSubPattern(start, i, left);
-                parseDefPattern(i+1, limit, right);
-                break;
-            default:
-                throw new RuntimeException();
+
+//|        /**
+//|         * Parse the given substring as a rule, and append it to the rules currently
+//|         * represented in this object.
+//|         * @param start the beginning index, inclusive; <code>0 <= start
+//|         * <= limit</code>.
+//|         * @param limit the ending index, exclusive; <code>start <= limit
+//|         * <= rules.length()</code>.
+//|         * @exception IllegalArgumentException if there is a syntax error in the
+//|         * rules
+//|         */
+//|        private void applyRule(int start, int limit) {
+//|            /* General description of parsing: Initially, rules contain two types of
+//|             * quoted characters.  First, there are variable references, such as
+//|             * "{alpha}".  Second, there are quotes, such as "'<'" or "''".  One of
+//|             * the first steps in parsing a rule is to resolve such quoted matter.
+//|             * Quotes are removed early, leaving unquoted literal matter.  Variable
+//|             * references are resolved and replaced by single characters.  In some
+//|             * instances these characters represent themselves; in others, they
+//|             * stand for categories of characters.  Character categories are either
+//|             * predefined (e.g., "{Lu}"), or are defined by the user using a
+//|             * statement (e.g., "vowels:aeiouAEIOU").
+//|             *
+//|             * Another early step in parsing is to split each rule into component
+//|             * pieces.  These pieces are, for every rule, a left-hand side, a right-
+//|             * hand side, and an operator.  The left- and right-hand sides may not
+//|             * be empty, except for the output patterns of forward and reverse
+//|             * rules.  In addition to this partitioning, the match patterns of
+//|             * forward and reverse rules must be partitioned into antecontext,
+//|             * postcontext, and literal pattern, where the context portions may or
+//|             * may not be present.  Finally, output patterns must have the cursor
+//|             * indicator '|' detected and removed, with its position recorded.
+//|             *
+//|             * Quote removal, variable resolution, and sub-pattern splitting must
+//|             * all happen at once.  This is due chiefly to the quoting mechanism,
+//|             * which allows special characters to appear at arbitrary positions in
+//|             * the final unquoted text.  (For this reason, alteration of the rule
+//|             * language is somewhat clumsy; it entails reassessment and revision of
+//|             * the parsing methods as a whole.)
+//|             *
+//|             * After this processing of rules is complete, the final end products
+//|             * are unquoted pieces of text of various types, and an integer cursor
+//|             * position, if one is specified.  These processed raw materials are now
+//|             * easy to deal with; other classes such as UnicodeSet and
+//|             * TransliterationRule need know nothing of quoting or variables.
+//|             */
+//|            StringBuffer left = new StringBuffer();
+//|            StringBuffer right = new StringBuffer();
+//|            StringBuffer anteContext = new StringBuffer();
+//|            StringBuffer postContext = new StringBuffer();
+//|            int cursorPos[] = new int[1];
+//|
+//|            char operator = parseRule(start, limit, left, right,
+//|                                      anteContext, postContext, cursorPos);
+//|
+//|            switch (operator) {
+//|            case VARIABLE_DEF_OP:
+//|                applyVariableDef(left.toString(), right.toString());
+//|                break;
+//|            case FORWARD_RULE_OP:
+//|                if (direction == FORWARD) {
+//|                    data.ruleSet.addRule(new TransliterationRule(
+//|                                             left.toString(), right.toString(),
+//|                                             anteContext.toString(), postContext.toString(),
+//|                                             cursorPos[0]));
+//|                } // otherwise ignore the rule; it's not the direction we want
+//|                break;
+//|            case REVERSE_RULE_OP:
+//|                if (direction == REVERSE) {
+//|                    data.ruleSet.addRule(new TransliterationRule(
+//|                                             right.toString(), left.toString(),
+//|                                             anteContext.toString(), postContext.toString(),
+//|                                             cursorPos[0]));
+//|                } // otherwise ignore the rule; it's not the direction we want
+//|                break;
+//|            case FWDREV_RULE_OP:
+//|                data.ruleSet.addRule(new TransliterationRule(
+//|                                         direction == FORWARD ? left.toString() : right.toString(),
+//|                                         direction == FORWARD ? right.toString() : left.toString(),
+//|                                         // Context & cursor disallowed
+//|                                         "", "", -1));
+//|                break;
+//|            }
+//|        }
+
+//|        /**
+//|         * Add a variable definition.
+//|         * @param name the name of the variable.  It must not already be defined.
+//|         * @param pattern the value of the variable.  It may be a single character
+//|         * or a pattern describing a character set.
+//|         * @exception IllegalArgumentException if there is a syntax error
+//|         */
+//|        private final void applyVariableDef(String name, String pattern) {
+//|            validateVariableName(name);
+//|            if (data.variableNames.get(name) != null) {
+//|                throw new IllegalArgumentException("Duplicate variable definition: "
+//|                                                   + name + '=' + pattern);
+//|            }
+//|            if (pattern.length() < 1) {
+//|                throw new IllegalArgumentException("Variable definition missing: "
+//|                                                   + name);
+//|            }
+//|            if (pattern.length() == 1) {
+//|                // Got a single character variable definition
+//|                data.variableNames.put(name, new Character(pattern.charAt(0)));
+//|            } else {
+//|                // Got more than one character; parse it as a category
+//|                UnicodeSet set = new UnicodeSet(pattern);
+//|                data.variableNames.put(name, registerSet(set));
+//|            }
+//|        }
+
+
+
+
+        private final Character registerSet(UnicodeSet set) {
+            if (variableNext >= variableLimit) {
+                throw new RuntimeException("Private use variables exhausted");
             }
+            Character c = new Character(variableNext++);
+            data.setVariables.put(c, set);
             return c;
         }
 
-        /**
-         * Parses the match pattern of a forward or reverse rule.  Given the raw
-         * match pattern, return the match text and the context on both sides, if
-         * any.  Resolves all quotes and variables.
-         * @param start the beginning index, inclusive; <code>0 <= start
-         * <= limit</code>.
-         * @param limit the ending index, exclusive; <code>start <= limit
-         * <= rules.length()</code>.
-         * @param text the key to be matched will be appended to this buffer
-         * @param anteContext the preceding context, if any, will be appended
-         * to this buffer.
-         * @param postContext the following context, if any, will be appended
-         * to this buffer.
-         */
-        private void parseMatchPattern(int start, int limit,
-                                       StringBuffer text,
-                                       StringBuffer anteContext,
-                                       StringBuffer postContext) {
-            if (start >= limit) {
-                throw new IllegalArgumentException(
-                              "Empty expression in rule: "
-                              + rules.substring(start, limit));
-            }
-            if (anteContext != null) {
-                // Ignore optional opening and closing context characters
-                if (rules.charAt(start) == CONTEXT_OPEN) {
-                    ++start;
-                }
-                if (rules.charAt(limit-1) == CONTEXT_CLOSE) {
-                    --limit;
-                }
-                // The four possibilities are:
-                //             key
-                // anteContext]key
-                // anteContext]key[postContext
-                //             key[postContext
-                int ante = quotedIndexOf(rules, start, limit, String.valueOf(CONTEXT_CLOSE));
-                int post = quotedIndexOf(rules, start, limit, String.valueOf(CONTEXT_OPEN));
-                if (ante >= 0 && post >= 0 && ante > post) {
-                    throw new IllegalArgumentException(
-                                  "Syntax error in context specifier: "
-                                  + rules.substring(start, limit));
-                }
-                if (ante >= 0) {
-                    parseSubPattern(start, ante, anteContext);
-                    start = ante+1;
-                }
-                if (post >= 0) {
-                    parseSubPattern(post+1, limit, postContext);
-                    limit = post;
-                }
-            }
-            parseSubPattern(start, limit, text);
-        }
 
-        private final void parseSubPattern(int start, int limit,
-                                           StringBuffer text) {
-            parseSubPattern(start, limit, text, null, SPECIALS);
-        }
 
-        /**
-         * Parse a variable definition sub pattern.  This kind of sub
-         * pattern differs in the set of characters that are considered
-         * special.  In particular, the '[' and ']' characters are not
-         * special, since these are used in UnicodeSet patterns.
-         */
-        private final void parseDefPattern(int start, int limit,
-                                           StringBuffer text) {
-            parseSubPattern(start, limit, text, null, DEF_SPECIALS);
-        }
 
-        /**
-         * Parses the output pattern of a forward or reverse rule.  Given the
-         * output pattern, return the output text and the position of the cursor,
-         * if any.  Resolves all quotes and variables.
-         * @param rules the string to be parsed
-         * @param start the beginning index, inclusive; <code>0 <= start
-         * <= limit</code>.
-         * @param limit the ending index, exclusive; <code>start <= limit
-         * <= rules.length()</code>.
-         * @param text the output text will be appended to this buffer
-         * @param cursorPos if this parameter is not null, then cursorPos[0]
-         * will be set to the cursor position, or -1 if there is none.  If this
-         * parameter is null, then cursors will be disallowed.
-         */
-        private final void parseOutputPattern(int start, int limit,
-                                              StringBuffer text,
-                                              int[] cursorPos) {
-            parseSubPattern(start, limit, text, cursorPos, SPECIALS);
-        }
-
-        /**
-         * Parses a sub-pattern of a rule.  Return the text and the position of the cursor,
-         * if any.  Resolves all quotes and variables.
-         * @param rules the string to be parsed
-         * @param start the beginning index, inclusive; <code>0 <= start
-         * <= limit</code>.
-         * @param limit the ending index, exclusive; <code>start <= limit
-         * <= rules.length()</code>.
-         * @param text the output text will be appended to this buffer
-         * @param cursorPos if this parameter is not null, then cursorPos[0]
-         * will be set to the cursor position, or -1 if there is none.  If this
-         * parameter is null, then cursors will be disallowed.
-         * @param specials characters that must be quoted; typically either
-         * SPECIALS or DEF_SPECIALS.
-         */
-        private void parseSubPattern(int start, int limit,
-                                     StringBuffer text,
-                                     int[] cursorPos,
-                                     String specials) {
-            boolean inQuote = false;
-
-            if (start >= limit) {
-                throw new IllegalArgumentException("Empty expression in rule");
-            }
-            if (cursorPos != null) {
-                cursorPos[0] = -1;
-            }
-            for (int i=start; i<limit; ++i) {
-                char c = rules.charAt(i);
-                if (c == QUOTE) {
-                    // Check for double quote
-                    if ((i+1) < limit
-                        && rules.charAt(i+1) == QUOTE) {
-                        text.append(QUOTE);
-                        ++i; // Skip over both quotes
-                    } else {
-                        inQuote = !inQuote;
-                    }
-                } else if (inQuote) {
-                    text.append(c);
-                } else if (c == VARIABLE_REF_OPEN) {
-                    ++i;
-                    int j = rules.indexOf(VARIABLE_REF_CLOSE, i);
-                    if (i == j || j < 0) { // empty or unterminated
-                        throw new IllegalArgumentException("Illegal variable reference: "
-                                                           + rules.substring(start, limit));
-                    }
-                    String name = rules.substring(i, j);
-                    validateVariableName(name);
-                    text.append(getVariableDef(name).charValue());
-                    i = j;
-                } else if (c == CURSOR_POS && cursorPos != null) {
-                    if (cursorPos[0] >= 0) {
-                        throw new IllegalArgumentException("Multiple cursors: "
-                                                           + rules.substring(start, limit));
-                    }
-                    cursorPos[0] = text.length();
-                } else if (specials.indexOf(c) >= 0) {
-                    throw new IllegalArgumentException("Unquoted special character: "
-                                                       + rules.substring(start, limit));
-                } else {
-                    text.append(c);
-                }
-            }
-        }
-
-        private static void validateVariableName(String name) {
-            if (indexOf(name, SPECIALS) >= 0) {
-                throw new IllegalArgumentException(
-                              "Special character in variable name: "
-                              + name);
-            }
-        }
+//|        /**
+//|         * Given a rule, parses it into three pieces: The left side, the right side,
+//|         * and the operator.  Returns the operator.  Quotes and variable references
+//|         * are resolved; the otuput text in all <code>StringBuffer</code> parameters
+//|         * is literal text.  This method delegates to other parsing methods to
+//|         * handle the match pattern, output pattern, and other sub-patterns in the
+//|         * rule.
+//|         * @param start the beginning index, inclusive; <code>0 <= start
+//|         * <= limit</code>.
+//|         * @param limit the ending index, exclusive; <code>start <= limit
+//|         * <= rules.length()</code>.
+//|         * @param left left side of rule is appended to this buffer
+//|         * with the quotes removed and variables resolved
+//|         * @param right right side of rule is appended to this buffer
+//|         * with the quotes removed and variables resolved
+//|         * @param anteContext the preceding context of the match pattern,
+//|         * if there is one, is appended to this buffer
+//|         * @param postContext the following context of the match pattern,
+//|         * if there is one, is appended to this buffer
+//|         * @param cursorPos if there is a cursor in the output pattern, its
+//|         * offset is stored in <code>cursorPos[0]</code>
+//|         * @return The operator character, one of the characters in OPERATORS.
+//|         */
+//|        private char parseRule(int start, int limit,
+//|                               StringBuffer left, StringBuffer right,
+//|                               StringBuffer anteContext,
+//|                               StringBuffer postContext,
+//|                               int[] cursorPos) {
+//|            if (false) {
+//|                System.err.println("Parsing " + rules.substring(start, limit));
+//|            }
+//|            /* Parse the rule into three pieces -- left, operator, and right,
+//|             * parsing out quotes.  The result is that left and right will have
+//|             * unquoted text.  E.g., "gt<'>'" will have right = ">".  Unquoted
+//|             * operators throw an exception.  Two quotes inside or outside
+//|             * quotes indicates a quote literal.  E.g., "o''clock" -> "o'clock".
+//|             */
+//|            int i = quotedIndexOf(rules, start, limit, OPERATORS);
+//|            if (i < 0) {
+//|                throw new IllegalArgumentException(
+//|                              "Syntax error: "
+//|                              + rules.substring(start, limit));
+//|            }
+//|            char c = rules.charAt(i);
+//|            
+//|            // Look for "<>" double rules.
+//|            if ((i+1) < limit && rules.substring(i, i+2).equals(FWDREV_OP_STRING)) {
+//|                if (i == start) {
+//|                    throw new IllegalArgumentException(
+//|                                  "Empty left side: "
+//|                                  + rules.substring(start, limit));
+//|                }
+//|                if (i+2 == limit) {
+//|                    throw new IllegalArgumentException(
+//|                                  "Empty right side: "
+//|                                  + rules.substring(start, limit));
+//|                }
+//|                parseSubPattern(start, i, left, null, SPECIALS);
+//|                parseSubPattern(i+2, limit, right, null, SPECIALS);
+//|                return FWDREV_RULE_OP;
+//|            }
+//|
+//|            switch (c) {
+//|            case FORWARD_RULE_OP:
+//|                if (i == start) {
+//|                    throw new IllegalArgumentException(
+//|                                  "Empty left side: "
+//|                                  + rules.substring(start, limit));
+//|                }
+//|                parseMatchPattern(start, i, left, anteContext, postContext);
+//|                if (i != (limit-1)) {
+//|                    parseOutputPattern(i+1, limit, right, cursorPos);
+//|                }
+//|                break;
+//|            case REVERSE_RULE_OP:
+//|                if (i == (limit-1)) {
+//|                    throw new IllegalArgumentException(
+//|                                  "Empty right side: "
+//|                                  + rules.substring(start, limit));
+//|                }
+//|                if (i != start) {
+//|                    parseOutputPattern(start, i, left, cursorPos);
+//|                }
+//|                parseMatchPattern(i+1, limit, right, anteContext, postContext);
+//|                break;
+//|            case VARIABLE_DEF_OP:
+//|                if (i == start || i == (limit-1)) {
+//|                    throw new IllegalArgumentException(
+//|                                  "Empty left or right side: "
+//|                                  + rules.substring(start, limit));
+//|                }
+//|                parseSubPattern(start, i, left);
+//|                parseDefPattern(i+1, limit, right);
+//|                break;
+//|            default:
+//|                throw new RuntimeException();
+//|            }
+//|            return c;
+//|        }
+//|
+//|        /**
+//|         * Parses the match pattern of a forward or reverse rule.  Given the raw
+//|         * match pattern, return the match text and the context on both sides, if
+//|         * any.  Resolves all quotes and variables.
+//|         * @param start the beginning index, inclusive; <code>0 <= start
+//|         * <= limit</code>.
+//|         * @param limit the ending index, exclusive; <code>start <= limit
+//|         * <= rules.length()</code>.
+//|         * @param text the key to be matched will be appended to this buffer
+//|         * @param anteContext the preceding context, if any, will be appended
+//|         * to this buffer.
+//|         * @param postContext the following context, if any, will be appended
+//|         * to this buffer.
+//|         */
+//|        private void parseMatchPattern(int start, int limit,
+//|                                       StringBuffer text,
+//|                                       StringBuffer anteContext,
+//|                                       StringBuffer postContext) {
+//|            if (start >= limit) {
+//|                throw new IllegalArgumentException(
+//|                              "Empty expression in rule: "
+//|                              + rules.substring(start, limit));
+//|            }
+//|            if (anteContext != null) {
+//|                // Ignore optional opening and closing context characters
+//|                if (rules.charAt(start) == CONTEXT_OPEN) {
+//|                    ++start;
+//|                }
+//|                if (rules.charAt(limit-1) == CONTEXT_CLOSE) {
+//|                    --limit;
+//|                }
+//|                // The four possibilities are:
+//|                //             key
+//|                // anteContext]key
+//|                // anteContext]key[postContext
+//|                //             key[postContext
+//|                int ante = quotedIndexOf(rules, start, limit, String.valueOf(CONTEXT_CLOSE));
+//|                int post = quotedIndexOf(rules, start, limit, String.valueOf(CONTEXT_OPEN));
+//|                if (ante >= 0 && post >= 0 && ante > post) {
+//|                    throw new IllegalArgumentException(
+//|                                  "Syntax error in context specifier: "
+//|                                  + rules.substring(start, limit));
+//|                }
+//|                if (ante >= 0) {
+//|                    parseSubPattern(start, ante, anteContext);
+//|                    start = ante+1;
+//|                }
+//|                if (post >= 0) {
+//|                    parseSubPattern(post+1, limit, postContext);
+//|                    limit = post;
+//|                }
+//|            }
+//|            parseSubPattern(start, limit, text);
+//|        }
+//|
+//|        private final void parseSubPattern(int start, int limit,
+//|                                           StringBuffer text) {
+//|            parseSubPattern(start, limit, text, null, SPECIALS);
+//|        }
+//|
+//|        /**
+//|         * Parse a variable definition sub pattern.  This kind of sub
+//|         * pattern differs in the set of characters that are considered
+//|         * special.  In particular, the '[' and ']' characters are not
+//|         * special, since these are used in UnicodeSet patterns.
+//|         */
+//|        private final void parseDefPattern(int start, int limit,
+//|                                           StringBuffer text) {
+//|            parseSubPattern(start, limit, text, null, DEF_SPECIALS);
+//|        }
+//|
+//|        /**
+//|         * Parses the output pattern of a forward or reverse rule.  Given the
+//|         * output pattern, return the output text and the position of the cursor,
+//|         * if any.  Resolves all quotes and variables.
+//|         * @param rules the string to be parsed
+//|         * @param start the beginning index, inclusive; <code>0 <= start
+//|         * <= limit</code>.
+//|         * @param limit the ending index, exclusive; <code>start <= limit
+//|         * <= rules.length()</code>.
+//|         * @param text the output text will be appended to this buffer
+//|         * @param cursorPos if this parameter is not null, then cursorPos[0]
+//|         * will be set to the cursor position, or -1 if there is none.  If this
+//|         * parameter is null, then cursors will be disallowed.
+//|         */
+//|        private final void parseOutputPattern(int start, int limit,
+//|                                              StringBuffer text,
+//|                                              int[] cursorPos) {
+//|            parseSubPattern(start, limit, text, cursorPos, SPECIALS);
+//|        }
+//|
+//|        /**
+//|         * Parses a sub-pattern of a rule.  Return the text and the position of the cursor,
+//|         * if any.  Resolves all quotes and variables.
+//|         * @param rules the string to be parsed
+//|         * @param start the beginning index, inclusive; <code>0 <= start
+//|         * <= limit</code>.
+//|         * @param limit the ending index, exclusive; <code>start <= limit
+//|         * <= rules.length()</code>.
+//|         * @param text the output text will be appended to this buffer
+//|         * @param cursorPos if this parameter is not null, then cursorPos[0]
+//|         * will be set to the cursor position, or -1 if there is none.  If this
+//|         * parameter is null, then cursors will be disallowed.
+//|         * @param specials characters that must be quoted; typically either
+//|         * SPECIALS or DEF_SPECIALS.
+//|         */
+//|        private void parseSubPattern(int start, int limit,
+//|                                     StringBuffer text,
+//|                                     int[] cursorPos,
+//|                                     String specials) {
+//|            boolean inQuote = false;
+//|
+//|            if (start >= limit) {
+//|                throw new IllegalArgumentException("Empty expression in rule");
+//|            }
+//|            if (cursorPos != null) {
+//|                cursorPos[0] = -1;
+//|            }
+//|            for (int i=start; i<limit; ++i) {
+//|                char c = rules.charAt(i);
+//|                if (c == QUOTE) {
+//|                    // Check for double quote
+//|                    if ((i+1) < limit
+//|                        && rules.charAt(i+1) == QUOTE) {
+//|                        text.append(QUOTE);
+//|                        ++i; // Skip over both quotes
+//|                    } else {
+//|                        inQuote = !inQuote;
+//|                    }
+//|                } else if (inQuote) {
+//|                    text.append(c);
+//|                } else if (c == VARIABLE_REF_OPEN) {
+//|                    ++i;
+//|                    int j = rules.indexOf(VARIABLE_REF_CLOSE, i);
+//|                    if (i == j || j < 0) { // empty or unterminated
+//|                        throw new IllegalArgumentException("Illegal variable reference: "
+//|                                                           + rules.substring(start, limit));
+//|                    }
+//|                    String name = rules.substring(i, j);
+//|                    validateVariableName(name);
+//|                    text.append(getVariableDef(name).charValue());
+//|                    i = j;
+//|                } else if (c == CURSOR_POS && cursorPos != null) {
+//|                    if (cursorPos[0] >= 0) {
+//|                        throw new IllegalArgumentException("Multiple cursors: "
+//|                                                           + rules.substring(start, limit));
+//|                    }
+//|                    cursorPos[0] = text.length();
+//|                } else if (specials.indexOf(c) >= 0) {
+//|                    throw new IllegalArgumentException("Unquoted special character: "
+//|                                                       + rules.substring(start, limit));
+//|                } else {
+//|                    text.append(c);
+//|                }
+//|            }
+//|        }
+//|
+//|        private static void validateVariableName(String name) {
+//|            if (indexOf(name, SPECIALS) >= 0) {
+//|                throw new IllegalArgumentException(
+//|                              "Special character in variable name: "
+//|                              + name);
+//|            }
+//|        }
 
         /**
          * Returns the single character value of the given variable name.  Defined
          * names are recognized.
-         *
-         * NO LONGER SUPPORTED:
-         * If a Unicode category name is given, a standard character variable
-         * in the range firstCategoryVariable to lastCategoryVariable is returned,
-         * with value firstCategoryVariable + n, where n is the category
-         * number.
          * @exception IllegalArgumentException if the name is unknown.
          */
         private Character getVariableDef(String name) {
             Character ch = (Character) data.variableNames.get(name);
-//!         if (ch == null) {
-//!             int id = UnicodeSet.getCategoryID(name);
-//!             if (id >= 0) {
-//!                 ch = new Character((char) (firstCategoryVariable + id));
-//!                 data.variableNames.put(name, ch);
-//!                 data.setVariables.put(ch, new UnicodeSet(id));
-//!             }
-//!         }
             if (ch == null) {
                 throw new IllegalArgumentException("Undefined variable: "
                                                    + name);
@@ -1084,6 +1334,10 @@ public class RuleBasedTransliterator extends Transliterator {
          * this method may employ some other algorithm for improved speed.
          */
         private final void determineVariableRange(String[] ruleArray) {
+            // As an initial implementation, we just run through all the
+            // characters, ignoring any quoting.  This works since the quote
+            // mechanisms are outside the private use area.
+
             Range r = new Range('\uE000', 0x1900); // Private use area
             r = r.largestUnusedSubrange(ruleArray);
             
@@ -1121,7 +1375,9 @@ public class RuleBasedTransliterator extends Transliterator {
                                          String setOfChars) {
             for (int i=start; i<limit; ++i) {
                 char c = text.charAt(i);
-                if (c == QUOTE) {
+                if (c == ESCAPE) {
+                    ++i;
+                } else if (c == QUOTE) {
                     while (++i < limit
                            && text.charAt(i) != QUOTE) {}
                 } else if (setOfChars.indexOf(c) >= 0) {
diff --git a/icu4j/src/com/ibm/icu/text/TransliterationRule.java b/icu4j/src/com/ibm/icu/text/TransliterationRule.java
index a06801f3fd..55104c8610 100755
--- a/icu4j/src/com/ibm/icu/text/TransliterationRule.java
+++ b/icu4j/src/com/ibm/icu/text/TransliterationRule.java
@@ -21,9 +21,12 @@ import java.util.Dictionary;
  * <p>Copyright &copy; IBM Corporation 1999.  All rights reserved.
  *
  * @author Alan Liu
- * @version $RCSfile: TransliterationRule.java,v $ $Revision: 1.5 $ $Date: 2000/01/04 21:43:57 $
+ * @version $RCSfile: TransliterationRule.java,v $ $Revision: 1.6 $ $Date: 2000/01/11 02:25:03 $
  *
  * $Log: TransliterationRule.java,v $
+ * Revision 1.6  2000/01/11 02:25:03  Alan
+ * Rewrite UnicodeSet and RBT parsers for better performance and new syntax
+ *
  * Revision 1.5  2000/01/04 21:43:57  Alan
  * Add rule indexing, and move masking check to TransliterationRuleSet.
  *
@@ -134,6 +137,46 @@ class TransliterationRule {
         }
     }
 
+
+
+
+
+
+
+    /**
+     * @param input input string, including key and optional ante and
+     * post context
+     * @param anteContextPos offset into input to end of ante context, or
+     * -1 if none
+     * @param postContextPos offset into input to start of post context,
+     * or -1 if none
+     * @param output output string
+     * @param cursorPos offset into output at which cursor is located,
+     * or -1 if none.
+     */
+    public TransliterationRule(String input,
+                               int anteContextPos, int postContextPos,
+                               String output,
+                               int cursorPos) {
+        anteContextLength = (anteContextPos < 0) ? 0 : anteContextPos;
+        keyLength = (postContextPos < 0) ? input.length() - anteContextLength :
+            postContextPos - anteContextLength;
+        pattern = input;
+        this.output = output;
+        this.cursorPos = cursorPos < 0 ? output.length() : cursorPos;
+        if (anteContextPos > input.length() || postContextPos > input.length() ||
+            cursorPos > output.length()) {
+            throw new IllegalArgumentException();
+        }
+    }
+
+
+
+
+
+
+
+
     /**
      * Return the length of the key.  Equivalent to <code>getKey().length()</code>.
      * @return the length of the match key.
@@ -171,9 +214,14 @@ class TransliterationRule {
      * Internal method.  Returns 8-bit index value for this rule.
      * This is the low byte of the first character of the key,
      * unless the first character of the key is a set.  If it's a
-     * set, the index value is -1.
+     * set, or otherwise can match multiple keys, the index value is -1.
      */
     final int getIndexValue(Dictionary variables) {
+        if (anteContextLength == pattern.length()) {
+            // A pattern with just ante context {such as foo)>bar} can
+            // match any key.
+            return -1;
+        }
         char c = pattern.charAt(anteContextLength);
         return variables.get(new Character(c)) == null ? (c & 0xFF) : -1;
     }
@@ -185,9 +233,15 @@ class TransliterationRule {
      * It matches this rule if it matches the first character of the
      * key, or if the first character of the key is a set, and the set
      * contains any character with a low byte equal to the index
-     * value.
+     * value.  If the rule contains only ante context, as in foo)>bar,
+     * then it will match any key.
      */
     final boolean matchesIndexValue(int v, Dictionary variables) {
+        if (anteContextLength == pattern.length()) {
+            // A pattern with just ante context {such as foo)>bar} can
+            // match any key.
+            return true;
+        }
         char c = pattern.charAt(anteContextLength);
         UnicodeSet set = (UnicodeSet) variables.get(new Character(c));
         return set == null ? (c & 0xFF) == v : set.containsIndexValue(v);
@@ -238,15 +292,15 @@ class TransliterationRule {
      */
     public String toString() {
         return getClass().getName() + '{'
-            + escape(anteContextLength > 0 ? ("[" + pattern.substring(0, anteContextLength) +
-                                              ']') : "")
-            + pattern.substring(anteContextLength, anteContextLength + keyLength)
-            + (anteContextLength + keyLength < pattern.length() ?
-               ("[" + pattern.substring(anteContextLength + keyLength) + ']') : "")
-            + " -> "
-            + (cursorPos < output.length()
-               ? (output.substring(0, cursorPos) + '|' + output.substring(cursorPos))
-               : output)
+            + escape((anteContextLength > 0 ? ("(" + pattern.substring(0, anteContextLength) +
+                                              ") ") : "")
+                     + pattern.substring(anteContextLength, anteContextLength + keyLength)
+                     + (anteContextLength + keyLength < pattern.length() ?
+                        (" (" + pattern.substring(anteContextLength + keyLength) + ")") : "")
+                     + " > "
+                     + (cursorPos < output.length()
+                        ? (output.substring(0, cursorPos) + '|' + output.substring(cursorPos))
+                        : output))
             + '}';
     }
 
diff --git a/icu4j/src/com/ibm/icu/text/UnicodeSet.java b/icu4j/src/com/ibm/icu/text/UnicodeSet.java
index c63c0de07c..975f2856fd 100755
--- a/icu4j/src/com/ibm/icu/text/UnicodeSet.java
+++ b/icu4j/src/com/ibm/icu/text/UnicodeSet.java
@@ -1,6 +1,7 @@
 package com.ibm.text;
 
 import java.text.*;
+import java.util.Dictionary;
 
 /**
  * A mutable set of Unicode characters.  Objects of this class
@@ -225,7 +226,7 @@ import java.text.*;
  * *Unsupported by Java (and hence unsupported by UnicodeSet).
  *
  * @author Alan Liu
- * @version $RCSfile: UnicodeSet.java,v $ $Revision: 1.2 $ $Date: 2000/01/04 21:43:58 $ */
+ * @version $RCSfile: UnicodeSet.java,v $ $Revision: 1.3 $ $Date: 2000/01/11 02:25:03 $ */
 public class UnicodeSet {
     /**
      * The internal representation is a StringBuffer of even length.
@@ -251,6 +252,9 @@ public class UnicodeSet {
 
     private static final int UNSUPPORTED_CATEGORY = 17;
 
+    private static final char VARIABLE_REF_OPEN = '{';
+    private static final char VARIABLE_REF_CLOSE = '}';
+
     private static final int CATEGORY_COUNT = 29;
 
     /**
@@ -293,25 +297,21 @@ public class UnicodeSet {
      * a syntax error.
      */
     public UnicodeSet(String pattern) {
-        applyPattern(pattern, false);
+        applyPattern(pattern);
     }
 
-    /**
-     * Constructs a set from the given pattern, optionally ignoring
-     * white space.  See the class description for the syntax of the
-     * pattern language.
-     * @param pattern a string specifying what characters are in the set
-     * @param ignoreSpaces if <code>true</code>, all spaces in the
-     * pattern are ignored, except those preceded by '\u005C'.  Spaces are
-     * those characters for which <code>Character.isSpaceChar()</code>
-     * is <code>true</code>.
-     * @exception <code>IllegalArgumentException</code> if the pattern
-     * contains a syntax error.
-     */
-    public UnicodeSet(String pattern, boolean ignoreSpaces) {
-        applyPattern(pattern, ignoreSpaces);
+
+
+
+
+    public UnicodeSet(String pattern, ParsePosition pos,
+                      Dictionary varNameToChar, Dictionary varCharToSet) {
+        applyPattern(pattern, pos, varNameToChar, varCharToSet);
     }
 
+
+
+
     /**
      * Constructs a set from the given Unicode character category.
      * @param category an integer indicating the character category as
@@ -328,57 +328,15 @@ public class UnicodeSet {
     }
 
     /**
-     * Modifies this set to represent the set specified by the given
-     * pattern.  See the class description for the syntax of the
-     * pattern language.
+     * Modifies this set to represent the set specified by the given pattern.
+     * See the class description for the syntax of the pattern language.
      * @param pattern a string specifying what characters are in the set
      * @exception <code>IllegalArgumentException</code> if the pattern
      * contains a syntax error.
      */
-    public final void applyPattern(String pattern) {
-        applyPattern(pattern, false);
-    }
-
-    /**
-     * Modifies this set to represent the set specified by the given
-     * pattern, optionally ignoring white space.  See the class
-     * description for the syntax of the pattern language.
-     * @param pattern a string specifying what characters are in the set
-     * @param ignoreSpaces if <code>true</code>, all spaces in the
-     * pattern are ignored.  Spaces are those characters for which
-     * <code>Character.isSpaceChar()</code> is <code>true</code>.
-     * Characters preceded by '\\' are escaped, losing any special
-     * meaning they otherwise have.  Spaces may be included by
-     * escaping them.
-     * @exception <code>IllegalArgumentException</code> if the pattern
-     * contains a syntax error.
-     */
-    public void applyPattern(String pattern, boolean ignoreSpaces) {
+    public void applyPattern(String pattern) {
         ParsePosition pos = new ParsePosition(0);
-
-        // To ignore spaces, create a new pattern without spaces.  We
-        // have to process all '\' escapes.  If '\' is encountered,
-        // insert it and the following character (if any -- let parse
-        // deal with any syntax errors) in the pattern.  This allows
-        // escaped spaces.
-        if (ignoreSpaces) {
-            StringBuffer pat = new StringBuffer();
-            for (int i=0; i<pattern.length(); ++i) {
-                char c = pattern.charAt(i);
-                if (Character.isSpaceChar(c)) {
-                    continue;
-                }
-                if (c == '\\' && (i+1) < pattern.length()) {
-                    pat.append(c);
-                    c = pattern.charAt(++i);
-                    // Fall through and append the following char
-                }
-                pat.append(c);
-            }
-            pattern = pat.toString();
-        }
-
-        pairs = parse(pattern, pos);
+        pairs = parse(pattern, pos, null, null);
         if (pos.getIndex() != pattern.length()) {
             throw new IllegalArgumentException("Parse of \"" + pattern +
                                                "\" failed at " +
@@ -386,6 +344,19 @@ public class UnicodeSet {
         }
     }
 
+
+
+
+
+    private void applyPattern(String pattern, ParsePosition pos,
+                              Dictionary varNameToChar, Dictionary varCharToSet) {
+        pairs = parse(pattern, pos, varNameToChar, varCharToSet);
+    }
+
+
+
+
+
     /**
      * Returns a string representation of this set.  If the result of
      * calling this function is passed to a UnicodeSet constructor, it
@@ -643,77 +614,137 @@ public class UnicodeSet {
         return pairs.hashCode();
     }
 
+    /**
+     * Return a programmer-readable string representation of this object.
+     */
+    public String toString() {
+        return getClass().getName() + '{' + toPattern() + '}';
+    }
+
     //----------------------------------------------------------------
     // Implementation: Pattern parsing
     //----------------------------------------------------------------
 
     /**
-     * Parses the given pattern, starting at the given position.  The
-     * character at pattern.charAt(pos.getIndex()) must be '[', or the
-     * parse fails.  Parsing continues until the corresponding closing
-     * ']'.  If a syntax error is encountered between the opening and
-     * closing brace, the parse fails.  Upon return from a successful
-     * parse, the ParsePosition is updated to point to the character
-     * following the closing ']', and a StringBuffer containing a
-     * pairs list for the parsed pattern is returned.  This method calls
-     * itself recursively to parse embedded subpatterns.
+     * Parses the given pattern, starting at the given position.  The character
+     * at pattern.charAt(pos.getIndex()) must be '[', or the parse fails.
+     * Parsing continues until the corresponding closing ']'.  If a syntax error
+     * is encountered between the opening and closing brace, the parse fails.
+     * Upon return from a successful parse, the ParsePosition is updated to
+     * point to the character following the closing ']', and a StringBuffer
+     * containing a pairs list for the parsed pattern is returned.  This method
+     * calls itself recursively to parse embedded subpatterns.
      *
-     * @param pattern the string containing the pattern to be parsed.
-     * The portion of the string from pos.getIndex(), which must be a
-     * '[', to the corresponding closing ']', is parsed.
-     * @param pos upon entry, the position at which to being parsing.
-     * The character at pattern.charAt(pos.getIndex()) must be a '['.
-     * Upon return from a successful parse, pos.getIndex() is either
-     * the character after the closing ']' of the parsed pattern, or
-     * pattern.length() if the closing ']' is the last character of
-     * the pattern string.
-     * @return a StringBuffer containing a pairs list for the parsed
-     * substring of <code>pattern</code>
+     * @param pattern the string containing the pattern to be parsed.  The
+     * portion of the string from pos.getIndex(), which must be a '[', to the
+     * corresponding closing ']', is parsed.
+     * @param pos upon entry, the position at which to being parsing.  The
+     * character at pattern.charAt(pos.getIndex()) must be a '['.  Upon return
+     * from a successful parse, pos.getIndex() is either the character after the
+     * closing ']' of the parsed pattern, or pattern.length() if the closing ']'
+     * is the last character of the pattern string.
+     * @return a StringBuffer containing a pairs list for the parsed substring
+     * of <code>pattern</code>
      * @exception IllegalArgumentException if the parse fails.
      */
-    private static StringBuffer parse(String pattern, ParsePosition pos) {
+    private static StringBuffer parse(String pattern, ParsePosition pos,
+                                      Dictionary varNameToChar, Dictionary varCharToSet) {
 
-        boolean invert = false;
         StringBuffer pairsBuf = new StringBuffer();
+        boolean invert = false;
 
-        /**
-         * Nodes:  0 - idle, waiting for '['
-         *        10 - like 11, but immediately after "[" or "[^"
-         *        11 - awaiting x, "]", "[...]", or "[:...:]"
-         *        21 - after x
-         *        23 - after x-
-         * 
-         * The parsing state machine moves from node 0 through zero or more
-         * other nodes back to node 0, in a successful parse.
+        int lastChar = -1; // This is either a char (0..FFFF) or -1
+        char lastOp = 0;
+
+        /* This loop iterates over the characters in the pattern.  We start at
+         * the position specified by pos.  We exit the loop when either a
+         * matching closing ']' is seen, or we read all characters of the
+         * pattern.  In the latter case an error will be thrown.
          */
-        int node = 0;
-        char first = 0;
-        int i;
 
-        /**
-         * This loop iterates over the characters in the pattern.  We
-         * start at the position specified by pos.  We exit the loop
-         * when either a matching closing ']' is seen, or we read all
-         * characters of the pattern.
+        /* Pattern syntax:
+         *  pat := '[' '^'? elem* ']'
+         *  elem := a | a '-' a | set | set op set
+         *  set := pat | (a set variable)
+         *  op := '&' | '-'
+         *  a := (a character, possibly defined by a var)
          */
-        for (i=pos.getIndex(); i<pattern.length(); ++i) {
-            char c = pattern.charAt(i);
 
-            /**
-             * Handle escapes here.  If a character is escaped, then
-             * it assumes its literal value.  This is true for all
-             * characters, both special characters and characters with
-             * no special meaning.  We also interpret '\\uxxxx' Unicode
-             * escapes here.
+        // mode 0: No chars parsed yet; next must be '['
+        // mode 1: '[' seen; if next is '^' or ':' then special
+        // mode 2: '[' '^'? seen; parse pattern and close with ']'
+        // mode 3: '[:' seen; parse category and close with ':]'
+        int mode = 0;
+        int openPos = 0; // offset to opening '['
+        int i = pos.getIndex();
+        int limit = pattern.length();
+        for (; i<limit; ++i) {
+            /* If the next element is a single character, c will be set to it,
+             * and nestedPairs will be null.  In this case isLiteral indicates
+             * whether the character should assume special meaning if it has
+             * one.  If the next element is a nested set, either via a variable
+             * reference, or via an embedded "[..]"  or "[:..:]" pattern, then
+             * nestedPairs will be set to the pairs list for the nested set, and
+             * c's value should be ignored.
              */
+            char c = pattern.charAt(i);
+            String nestedPairs = null;
             boolean isLiteral = false;
+
+            // Ignore whitespace.  This is not Unicode whitespace, but Java
+            // whitespace, a subset of Unicode whitespace.
+            if (Character.isWhitespace(c)) {
+                continue;
+            }
+
+            // Parse the opening '[' and optional following '^'
+            switch (mode) {
+            case 0:
+                if (c == '[') {
+                    mode = 1; // Next look for '^'
+                    openPos = i;
+                    continue;
+                } else {
+                    throw new IllegalArgumentException("Missing opening '['");
+                }
+            case 1:
+                mode = 2;
+                switch (c) {
+                case '^':
+                    invert = true;
+                    continue; // Back to top to fetch next character
+                case ':':
+                    if (i == openPos+1) {
+                        // '[:' cannot have whitespace in it
+                        --i;
+                        c = '[';
+                        mode = 3;
+                        // Fall through and parse category normally
+                    }
+                    break; // Fall through
+                case '-':
+                    isLiteral = true; // Treat leading '-' as a literal
+                    break; // Fall through
+                }
+                // else fall through and parse this character normally
+            }
+
+            // After opening matter is parsed ("[", "[^", or "[:"), the mode
+            // will be 2 if we want a closing ']', or 3 if we should parse a
+            // category and close with ":]".
+
+            /* Handle escapes.  If a character is escaped, then it assumes its
+             * literal value.  This is true for all characters, both special
+             * characters and characters with no special meaning.  We also
+             * interpret '\\uxxxx' Unicode escapes here (as literals).
+             */
             if (c == '\\') {
                 ++i;
-                if (i < pattern.length()) {
+                if (i < limit) {
                     c = pattern.charAt(i);
                     isLiteral = true;
                     if (c == 'u') {
-                        if ((i+4) >= pattern.length()) {
+                        if ((i+4) >= limit) {
                             throw new IllegalArgumentException("Invalid \\u escape");
                         }
                         c = '\u0000';
@@ -731,201 +762,143 @@ public class UnicodeSet {
                 }
             }
 
-            /**
-             * Within this loop, we handle each of the four
-             * conditions: '[', ']', '-', other.  The first three
-             * characters must not be escaped.
+            /* Parse variable references.  These are treated as literals.  If a
+             * variable refers to a UnicodeSet, nestedPairs is assigned here.
+             * Variable names are only parsed if varNameToChar is not null.
+             * Set variables are only looked up if varCharToSet is not null.
              */
+            else if (varNameToChar != null && !isLiteral && c == VARIABLE_REF_OPEN) {
+                ++i;
+                int j = pattern.indexOf(VARIABLE_REF_CLOSE, i);
+                if (i == j || j < 0) { // empty or unterminated
+                    throw new IllegalArgumentException("Illegal variable reference");
+                }
+                String name = pattern.substring(i, j);
+                ++j;
+                Character ch = (Character) varNameToChar.get(name);
+                if (ch == null) {
+                    throw new IllegalArgumentException("Undefined variable: "
+                                                       + name);
+                }
+                c = ch.charValue();
+                isLiteral = true;
 
-            /**
-             * An opening bracket indicates either the first bracket
-             * of the entire subpattern we are parsing, in which case
-             * we are in node 0 and move into node 10.  We also check
-             * for an immediately following '^', indicating the
-             * complement of the following pattern.  ('^' is any other
-             * position has no special meaning.)  If we are not in
-             * node 0, '[' represents a nested subpattern that must be
-             * recursively parsed and checked for following operators
-             * ('&' or '|').  If two nested subpatterns follow one
-             * another with no operator, their union is formed, just
-             * as with any other elements that follow one another
-             * without intervening operator.  The other thing we
-             * handle here is the syntax "[:Xx:]" or "[:X:]" that
-             * indicates a Unicode category or supercategory.
+                if (varCharToSet != null) {
+                    UnicodeSet set = (UnicodeSet) varCharToSet.get(ch);
+                    if (set != null) {
+                        nestedPairs = set.pairs.toString();
+                    }
+                }
+            }
+
+            /* An opening bracket indicates the first bracket of a nested
+             * subpattern, either a normal pattern or a category pattern.  We
+             * recognize these here and set nestedPairs accordingly.
              */
-            if (!isLiteral && c == '[') {
-                boolean parseOp = false;
+            else if (!isLiteral && c == '[') {
+                // Handle "[:...:]", representing a character category
                 char d = charAfter(pattern, i);
-                // "[:...:]" represents a character category
                 if (d == ':') {
-                    if (node == 23) {
-                        throw new IllegalArgumentException("Unexpected \"[:\"");
-                    }
-                    if (node == 21) {
-                        addPair(pairsBuf, first, first);
-                        node = 11;
-                    }
                     i += 2;
                     int j = pattern.indexOf(":]", i);
                     if (j < 0) {
                         throw new IllegalArgumentException("Missing \":]\"");
                     }
-                    doUnion(pairsBuf,
-                            getCategoryPairs(pattern.substring(i, j)));
-                    i = j+1;
-                    if (node == 10) {
-                        node = 11;
-                        parseOp = true;
-                    } else if (node == 0) {
+                    nestedPairs = getCategoryPairs(pattern.substring(i, j));
+                    i = j+1; // Make i point to ']'
+                    if (mode == 3) {
+                        // Entire pattern is a category; leave parse loop
+                        pairsBuf.append(nestedPairs);
                         break;
                     }
                 } else {
-                    if (node == 0) {
-                        node = 10;
-                        if (d == '^') {
-                            invert = true;
-                            ++i;
-                        }
-                    } else {
-                        // Nested '['
-                        pos.setIndex(i);
-                        doUnion(pairsBuf, parse(pattern, pos)
-                                .toString());
-                        i = pos.getIndex() - 1; // Subtract 1 to point at ']'
-                        parseOp = true;
-                    }
+                    // Recurse to get the pairs for this nested set.
+                    pos.setIndex(i); // Add 2 to point AFTER op
+                    nestedPairs = parse(pattern, pos, varNameToChar, varCharToSet).toString();
+                    i = pos.getIndex() - 1; // - 1 to point at ']'
                 }
-                /**
-                 * parseOp is true after "[:...:]" or a nested
-                 * "[...]".  It is false only after the final closing
-                 * ']'.  If parseOp is true, we look past the closing
-                 * ']' to see if we have an operator character.  If
-                 * so, we parse the subsequent "[...]" recursively,
-                 * then perform the operation.  We do this in a loop
-                 * until there are no more operators.  Note that this
-                 * means the operators have equal precedence and are
-                 * bound left-to-right.
-                 */
-                if (parseOp) {
-                    for (;;) {
-                        // Is the next character an operator?
-                        char op = charAfter(pattern, i);
-                        if (op == '-' || op == '&') {
-                            pos.setIndex(i+2); // Add 2 to point AFTER op
-                            String rhs = parse(pattern, pos).toString();
-                            if (op == '-') {
-                                doDifference(pairsBuf, rhs);
-                            } else if (op == '&') {
-                                doIntersection(pairsBuf, rhs);
-                            }
-                            i = pos.getIndex() - 1; // - 1 to point at ']'
-                        } else {
-                            break;
-                        }
-                    }
-                }          
             }
 
-            /**
-             * A closing bracket can only be a closing bracket for
-             * "[...]", since the closing bracket for "[:...:]" is
-             * taken care of when the initial "[:" is seen.  When we
-             * see a closing bracket, we then know, if we were in node
-             * 21 (after x) or 23 (after x-) that nothing more is
-             * coming, and we add the last character(s) we saw to the
-             * set.  Note that a trailing '-' assumes its literal
-             * meaning, just as a leading '-' after "[" or "[^".
+            /* At this point we have either a character c, or a nested set.  If
+             * we have encountered a nested set, either embedded in the pattern,
+             * or as a variable, we have a non-null nestedPairs, and c should be
+             * ignored.  Otherwise c is the current character, and isLiteral
+             * indicates whether it is an escaped literal (or variable) or a
+             * normal unescaped character.  Unescaped characters '-', '&', and
+             * ']' have special meanings.
              */
-            else if (!isLiteral && c == ']') {
-                if (node == 0) {
-                    throw new IllegalArgumentException("Unexpected ']'");
-                }
-                if (node == 21 || node == 23) {
-                    addPair(pairsBuf, first, first);
-                    if (node == 23) {
-                        addPair(pairsBuf, '-', '-');
+            if (nestedPairs != null) {
+                if (lastChar >= 0) {
+                    if (lastOp != 0) {
+                        throw new IllegalArgumentException("Illegal rhs for " + lastChar + lastOp);
                     }
+                    addPair(pairsBuf, (char)lastChar, (char)lastChar);
+                    lastChar = -1;
                 }
-                node = 0;
+                switch (lastOp) {
+                case '-':
+                    doDifference(pairsBuf, nestedPairs);
+                    break;
+                case '&':
+                    doIntersection(pairsBuf, nestedPairs);
+                    break;
+                case 0:
+                    doUnion(pairsBuf, nestedPairs);
+                    break;
+                }
+                lastOp = 0;
+            } else if (!isLiteral && c == ']') {
+                // Final closing delimiter.  This is the only way we leave this
+                // loop if the pattern is well-formed.
                 break;
-            }
-
-            /**
-             * '-' has the following interpretations: 1. Within
-             * "[...]", between two letters, it indicates a range.
-             * 2. Between two nested bracket patterns, "[[...]-[...]",
-             * it indicates asymmetric difference.  3. At the start of
-             * a bracket pattern, "[-...]", "[^-...]", it indicates
-             * the literal character '-'.  4. At the end of a bracket
-             * pattern, "[...-]", it indicates the literal character
-             * '-'.
-             *
-             * We handle cases 1 and 3 here.  Cases 2 and 4 are
-             * handled in the ']' parsing code.
-             */
-            else if (!isLiteral && c == '-') {
-                if (node == 10) {
-                    addPair(pairsBuf, c, c); // Handle "[-...]", "[^-...]"
-                } else if (node == 21) {
-                    node = 23;
-                } else {
-                    throw new IllegalArgumentException("Unexpected '-'");
-                }
-            } 
-
-            /**
-             * If we fall through to this point, we have a literal
-             * character, either one that has been escaped with a
-             * backslash, escaped with a backslash u, or that isn't
-             * a special '[', ']', or '-'.
-             *
-             * Literals can either start a range "x-...", end a range,
-             * "...-x", or indicate a single character "x".
-             */
-            else {
-                if (node == 10 || node == 11) {
-                    first = c;
-                    node = 21;
-                } else if (node == 21) {
-                    addPair(pairsBuf, first, first);
-                    first = c;
-                    node = 21;
-                } else if (node == 23) {
-                    if (c < first) {
-                        throw new IllegalArgumentException("Bad range");
-                    }
-                    addPair(pairsBuf, first, c);
-                    node = 11;
-                } else {
-                    throw new IllegalArgumentException("Expected '[', got '" + c + '\'');
+            } else if (lastOp == 0 && !isLiteral && (c == '-' || c == '&')) {
+                lastOp = c;
+            } else if (lastOp == '-') {
+                addPair(pairsBuf, (char)lastChar, c);
+                lastOp = 0;
+                lastChar = -1;
+            } else if (lastOp != 0) {
+                // We have <set>&<char> or <char>&<char>
+                throw new IllegalArgumentException("Unquoted " + lastOp);
+            } else {
+                if (lastChar >= 0) {
+                    // We have <char><char>
+                    addPair(pairsBuf, (char)lastChar, (char)lastChar);
                 }
+                lastChar = c;
             }
         }
 
-        if (node != 0) {
-            throw new IllegalArgumentException("Missing ']'");
+        // Handle unprocessed stuff preceding the closing ']'
+        if (lastOp == '-') {
+            // Trailing '-' is treated as literal
+            addPair(pairsBuf, lastOp, lastOp);
+        } else if (lastOp == '&') {
+            throw new IllegalArgumentException("Unquoted trailing " + lastOp);
+        }
+        if (lastChar >= 0) {
+            addPair(pairsBuf, (char)lastChar, (char)lastChar);                    
         }
 
         /**
-         * i indexes the last character we parsed or is
-         * pattern.length().  In the latter case, the node will not be
-         * zero, since we have run off the end without finding a
-         * closing ']'.  Therefore, the above statement will have
-         * thrown an exception, and we'll never get here.  If we get
-         * here, we know i < pattern.length(), and we set the
-         * ParsePosition to the next character to be parsed.
-         */
-        pos.setIndex(i+1);
-
-        /**
-         * If we saw a '^' after the initial '[' of this pattern, then
-         * perform the complement.  (Inversion after '[:' is handled
-         * elsewhere.)
+         * If we saw a '^' after the initial '[' of this pattern, then perform
+         * the complement.  (Inversion after '[:' is handled elsewhere.)
          */
         if (invert) {
             doComplement(pairsBuf);
         }
 
+        /**
+         * i indexes the last character we parsed or is pattern.length().  In
+         * the latter case, we have run off the end without finding a closing
+         * ']'.  Otherwise, we know i < pattern.length(), and we set the
+         * ParsePosition to the next character to be parsed.
+         */
+        if (i == limit) {
+            throw new IllegalArgumentException("Missing ']'");
+        }
+        pos.setIndex(i+1);
+
         return pairsBuf;
     }
 
@@ -1352,7 +1325,6 @@ public class UnicodeSet {
     /**
      * Returns the character after the given position, or '\uFFFF' if
      * there is none.
-
      */
     private static final char charAfter(String str, int i) {
         return ((++i) < str.length()) ? str.charAt(i) : '\uFFFF';
diff --git a/icu4j/src/com/ibm/text/RuleBasedTransliterator.java b/icu4j/src/com/ibm/text/RuleBasedTransliterator.java
index 572a959963..7337a05292 100755
--- a/icu4j/src/com/ibm/text/RuleBasedTransliterator.java
+++ b/icu4j/src/com/ibm/text/RuleBasedTransliterator.java
@@ -2,6 +2,7 @@ package com.ibm.text;
 
 import java.util.Hashtable;
 import java.util.Vector;
+import java.text.ParsePosition;
 
 /**
  * A transliterator that reads a set of rules in order to determine how to
@@ -181,9 +182,12 @@ import java.util.Vector;
  * <p>Copyright &copy; IBM Corporation 1999.  All rights reserved.
  *
  * @author Alan Liu
- * @version $RCSfile: RuleBasedTransliterator.java,v $ $Revision: 1.7 $ $Date: 2000/01/06 01:36:36 $
+ * @version $RCSfile: RuleBasedTransliterator.java,v $ $Revision: 1.8 $ $Date: 2000/01/11 02:25:03 $
  *
  * $Log: RuleBasedTransliterator.java,v $
+ * Revision 1.8  2000/01/11 02:25:03  Alan
+ * Rewrite UnicodeSet and RBT parsers for better performance and new syntax
+ *
  * Revision 1.7  2000/01/06 01:36:36  Alan
  * Allow string arrays in rule resource bundles
  *
@@ -195,7 +199,6 @@ import java.util.Vector;
  *
  * Revision 1.4  1999/12/22 01:05:54  Alan
  * Improve masking checking; turn it off by default, for better performance
- *
  */
 public class RuleBasedTransliterator extends Transliterator {
     /**
@@ -214,8 +217,6 @@ public class RuleBasedTransliterator extends Transliterator {
 
     static final boolean DEBUG = false;
 
-    static final boolean CHECK_MASKING = true;
-
     private static final String COPYRIGHT =
         "\u00A9 IBM Corporation 1999. All rights reserved.";
 
@@ -561,33 +562,34 @@ public class RuleBasedTransliterator extends Transliterator {
         private static final char VARIABLE_DEF_OP   = '=';
         private static final char FORWARD_RULE_OP   = '>';
         private static final char REVERSE_RULE_OP   = '<';
-        private static final char FWDREV_RULE_OP    = '~'; // internal rep of FWDREF_OP_STRING
+        private static final char FWDREV_RULE_OP    = '~'; // internal rep of <> op
 
         private static final String OPERATORS = "=><";
 
-        // Forward-Reverse operator
-        // a<>b is equivalent to a<b;a>b
-        private static final String FWDREV_OP_STRING  = "<>"; // must have length 2
-
         // Other special characters
         private static final char QUOTE               = '\'';
+        private static final char ESCAPE              = '\\';
+        private static final char END_OF_RULE         = ';';
+        private static final char RULE_COMMENT_CHAR   = '#';
+
         private static final char VARIABLE_REF_OPEN   = '{';
         private static final char VARIABLE_REF_CLOSE  = '}';
-        private static final char CONTEXT_OPEN        = '[';
-        private static final char CONTEXT_CLOSE       = ']';
+        private static final char CONTEXT_OPEN        = '(';
+        private static final char CONTEXT_CLOSE       = ')';
+        private static final char SET_OPEN            = '[';
+        private static final char SET_CLOSE           = ']';
         private static final char CURSOR_POS          = '|';
-        private static final char RULE_COMMENT_CHAR   = '#';
 
         /**
          * Specials must be quoted in rules to be used as literals.
          * Specials may not occur in variable names.
          */
-        private static final String SPECIALS = "'{}[]|#" + OPERATORS;
+//!        private static final String SPECIALS = "{}[]|" + OPERATORS;
 
         /**
          * Specials that must be quoted in variable definitions.
          */
-        private static final String DEF_SPECIALS = "'{}";
+//!        private static final String DEF_SPECIALS = "{}";
 
         /**
          * @param rules list of rules, separated by semicolon characters
@@ -616,37 +618,12 @@ public class RuleBasedTransliterator extends Transliterator {
             determineVariableRange(ruleArray);
 
             StringBuffer errors = null;
-            for (int irule=0; irule<ruleArray.length; ++irule) {
-                rules = ruleArray[irule];
-                int n = rules.length();
-                int i = 0;
-                while (i<n) {
-                    int limit = rules.indexOf(';', i);
-
-                    // Recognize "\\;" as an escaped ";"
-                    while (limit>0 && rules.charAt(limit-1) == '\\') {
-                        limit = rules.indexOf(';', limit+1);
-                    }
-
-                    if (limit == -1) {
-                        limit = n;
-                    }
-                    // Skip over empty lines and line starting with #
-                    if (limit > i && rules.charAt(i) != RULE_COMMENT_CHAR) {
-                        try {
-                            applyRule(i, limit);
-                        } catch (IllegalArgumentException e) {
-                            if (errors == null) {
-                                errors = new StringBuffer(e.getMessage());
-                            } else {
-                                errors.append("\n").append(e.getMessage());
-                            }
-                        }
-                    }
-                    i = limit + 1;
-                }
+            try {
+                parseRuleArray(ruleArray);
+            } catch (IllegalArgumentException e) {
+                errors = new StringBuffer(e.getMessage());
             }
-
+            
             // Index the rules
             try {
                 data.ruleSet.freeze(data.setVariables);
@@ -663,411 +640,684 @@ public class RuleBasedTransliterator extends Transliterator {
             }
         }
 
-        /**
-         * Parse the given substring as a rule, and append it to the rules currently
-         * represented in this object.
-         * @param start the beginning index, inclusive; <code>0 <= start
-         * <= limit</code>.
-         * @param limit the ending index, exclusive; <code>start <= limit
-         * <= rules.length()</code>.
-         * @exception IllegalArgumentException if there is a syntax error in the
-         * rules
-         */
-        private void applyRule(int start, int limit) {
-            /* General description of parsing: Initially, rules contain two types of
-             * quoted characters.  First, there are variable references, such as
-             * "{alpha}".  Second, there are quotes, such as "'<'" or "''".  One of
-             * the first steps in parsing a rule is to resolve such quoted matter.
-             * Quotes are removed early, leaving unquoted literal matter.  Variable
-             * references are resolved and replaced by single characters.  In some
-             * instances these characters represent themselves; in others, they
-             * stand for categories of characters.  Character categories are either
-             * predefined (e.g., "{Lu}"), or are defined by the user using a
-             * statement (e.g., "vowels:aeiouAEIOU").
-             *
-             * Another early step in parsing is to split each rule into component
-             * pieces.  These pieces are, for every rule, a left-hand side, a right-
-             * hand side, and an operator.  The left- and right-hand sides may not
-             * be empty, except for the output patterns of forward and reverse
-             * rules.  In addition to this partitioning, the match patterns of
-             * forward and reverse rules must be partitioned into antecontext,
-             * postcontext, and literal pattern, where the context portions may or
-             * may not be present.  Finally, output patterns must have the cursor
-             * indicator '|' detected and removed, with its position recorded.
-             *
-             * Quote removal, variable resolution, and sub-pattern splitting must
-             * all happen at once.  This is due chiefly to the quoting mechanism,
-             * which allows special characters to appear at arbitrary positions in
-             * the final unquoted text.  (For this reason, alteration of the rule
-             * language is somewhat clumsy; it entails reassessment and revision of
-             * the parsing methods as a whole.)
-             *
-             * After this processing of rules is complete, the final end products
-             * are unquoted pieces of text of various types, and an integer cursor
-             * position, if one is specified.  These processed raw materials are now
-             * easy to deal with; other classes such as UnicodeSet and
-             * TransliterationRule need know nothing of quoting or variables.
-             */
-            StringBuffer left = new StringBuffer();
-            StringBuffer right = new StringBuffer();
-            StringBuffer anteContext = new StringBuffer();
-            StringBuffer postContext = new StringBuffer();
-            int cursorPos[] = new int[1];
 
-            char operator = parseRule(start, limit, left, right,
-                                      anteContext, postContext, cursorPos);
+
+
+
+
+
+
+        private void parseRuleArray(String[] ruleArray) {
+            String[] leftRight = new String[2];
+            char[] op = new char[1];
+            for (int i=0; i<ruleArray.length; ++i) {
+                String rule = ruleArray[i];
+                int pos = 0;
+                int limit = rule.length();
+                while (pos < limit) {
+                    char c = rule.charAt(pos++);
+                    if (Character.isWhitespace(c)) {
+                        // Ignore leading whitespace.  Note that this is not
+                        // Unicode spaces, but Java spaces -- a subset,
+                        // representing whitespace likely to be seen in code.
+                        continue;
+                    }
+                    // Skip lines starting with the comment character
+                    if (c == RULE_COMMENT_CHAR) {
+                        pos = rule.indexOf("\n", pos) + 1;
+                        if (pos == 0) {
+                            break; // No "\n" found; rest of rule is a commnet
+                        }
+                        continue; // Either fall out or restart with next line
+                    }
+                    // We've found the start of a rule.  c is its first
+                    // character, and pos points past c.  Lexically parse the
+                    // rule into component pieces.
+                    pos = parseRule(rule, --pos, limit);                    
+                }
+            }
+        }
+
+        /**
+         * Do a lexical parse of the next rule in the given rule string,
+         * starting at pos.  Return the index after the last character parsed.
+         * Do not parse characters at or after limit.
+         *
+         * The character at pos must be a non-whitespace character
+         * that is not the comment character.
+         *
+         * This method handles quoting, escaping, and whitespace removal.  It
+         * parses the end-of-rule character.
+         */
+        int parseRule(String rule, int pos, int limit) {
+            // Locate the left side, operator, and right side
+            int start = pos;
+            char operator = 0;
+
+            StringBuffer buf = new StringBuffer();
+            int cursor = -1; // position of cursor in buf
+            int ante = -1;   // position of ante context marker ')' in buf
+            int post = -1;   // position of post context marker '(' in buf
+            int postClose = -1; // position of post context close ')' in buf
+
+            // Assigned to buf and its adjuncts after the LHS has been
+            // parsed.  Thereafter, buf etc. refer to the RHS.
+            String left = null;
+            int leftCursor = -1, leftAnte = -1, leftPost = -1, leftPostClose = -1;
+
+        main:
+            while (pos < limit) {
+                char c = rule.charAt(pos++);
+                if (Character.isWhitespace(c)) {
+                    // Ignore whitespace.  Note that this is not Unicode
+                    // spaces, but Java spaces -- a subset, representing
+                    // whitespace likely to be seen in code.
+                    continue;
+                }
+                // Handle escapes
+                if (c == ESCAPE) {
+                    if (pos == limit) {
+                        syntaxError("Trailing backslash", rule, start);
+                    }
+                    buf.append(rule.charAt(pos++));
+                    continue;
+                }
+                // Handle quoted matter
+                if (c == QUOTE) {
+                    int iq = rule.indexOf(QUOTE, pos);
+                    if (iq == pos) {
+                        buf.append(c); // Parse [''] outside quotes as [']
+                        ++pos;
+                    } else {
+                        /* This loop picks up a segment of quoted text of the
+                         * form 'aaaa' each time through.  If this segment
+                         * hasn't really ended ('aaaa''bbbb') then it keeps
+                         * looping, each time adding on a new segment.  When it
+                         * reaches the final quote it breaks.
+                         */
+                        for (;;) {
+                            if (iq < 0) {
+                                syntaxError("Unterminated quote", rule, start);
+                            }
+                            buf.append(rule.substring(pos, iq));
+                            pos = iq+1;
+                            if (pos < limit && rule.charAt(pos) == QUOTE) {
+                                // Parse [''] inside quotes as [']
+                                iq = rule.indexOf(QUOTE, pos+1);
+                                // Continue looping
+                            } else {
+                                break;
+                            }
+                        }
+                    }
+                    continue;
+                }
+                if (OPERATORS.indexOf(c) >= 0) {
+                    if (operator != 0) {
+                        syntaxError("Unquoted " + c, rule, start);
+                    }
+                    // Found an operator char.  Check for forward-reverse operator.
+                    if (c == REVERSE_RULE_OP &&
+                        (pos < limit && rule.charAt(pos) == FORWARD_RULE_OP)) {
+                        ++pos;
+                        operator = FWDREV_RULE_OP;
+                    } else {
+                        operator = c;
+                    }
+                    left = buf.toString(); // lhs
+                    leftCursor = cursor;
+                    leftAnte = ante;
+                    leftPost = post;
+                    leftPostClose = postClose;
+
+                    buf.setLength(0);
+                    cursor = ante = post = postClose = -1;
+                    continue;
+                }
+                switch (c) {
+                case END_OF_RULE:
+                    break main;
+                case VARIABLE_REF_OPEN:
+                    {
+                        int j = rule.indexOf(VARIABLE_REF_CLOSE, pos);
+                        if (pos == j || j < 0) { // empty or unterminated
+                            syntaxError("Malformed variable reference", rule, start);
+                        }
+                        String name = rule.substring(pos, j);
+                        pos = j+1;
+                        buf.append(getVariableDef(name).charValue());
+                    }
+                    break;
+                case CONTEXT_OPEN:
+                    if (post >= 0) {
+                        syntaxError("Multiple post contexts", rule, start);
+                    }
+                    // Ignore CONTEXT_OPEN if buffer length is zero -- that means
+                    // this is the optional opening delimiter for the ante context.
+                    if (buf.length() > 0) {
+                        post = buf.length();
+                    }
+                    break;
+                case CONTEXT_CLOSE:
+                    if (postClose >= 0) {
+                        syntaxError("Unexpected " + c, rule, start);
+                    }
+                    if (post >= 0) {
+                        // This is probably the optional closing delimiter
+                        // for the post context; save the pos and check later.
+                        postClose = buf.length();
+                    } else if (ante >= 0) {
+                        syntaxError("Multiple ante contexts", rule, start);
+                    } else {
+                        ante = buf.length();
+                    }
+                    break;
+                case SET_OPEN:
+                    ParsePosition pp = new ParsePosition(pos-1); // Backup to opening '['
+                    buf.append(registerSet(new UnicodeSet(rule, pp,
+                                   data.variableNames, data.setVariables)).charValue());
+                    pos = pp.getIndex();
+                    break;
+                case VARIABLE_REF_CLOSE:
+                case SET_CLOSE:
+                    syntaxError("Unquoted " + c, rule, start);
+                case CURSOR_POS:
+                    if (cursor >= 0) {
+                        syntaxError("Multiple cursors", rule, start);
+                    }
+                    cursor = buf.length();
+                    break;
+                default:
+                    buf.append(c);
+                    break;
+                }
+            }
+            if (operator == 0) {
+                syntaxError("No operator", rule, start);
+            }
+
+            // Check context close parameters
+            if ((leftPostClose >= 0 && leftPostClose != left.length()) ||
+                (postClose >= 0 && postClose != buf.length())) {
+                syntaxError("Extra text after ]", rule, start);
+            }
+
+            // Context is only allowed on the input side; that is, the left side
+            // for forward rules.  Cursors are only allowed on the output side;
+            // that is, the right side for forward rules.  Bidirectional rules
+            // ignore elements that do not apply.
 
             switch (operator) {
             case VARIABLE_DEF_OP:
-                applyVariableDef(left.toString(), right.toString());
+                // LHS is the name.  RHS is a single character, either a literal
+                // or a set (already parsed).  If RHS is longer than one
+                // character, it is either a multi-character string, or multiple
+                // sets, or a mixture of chars and sets -- syntax error.
+                if (buf.length() != 1) {
+                    syntaxError("Malformed RHS", rule, start);
+                }
+                if (data.variableNames.get(left) != null) {
+                    syntaxError("Duplicate definition of {" +
+                                left + "}", rule, start);
+                }
+                data.variableNames.put(left, new Character(buf.charAt(0)));
                 break;
+
             case FORWARD_RULE_OP:
                 if (direction == FORWARD) {
+                    if (ante >= 0 || post >= 0 || leftCursor >= 0) {
+                        syntaxError("Malformed rule", rule, start);
+                    }
                     data.ruleSet.addRule(new TransliterationRule(
-                                             left.toString(), right.toString(),
-                                             anteContext.toString(), postContext.toString(),
-                                             cursorPos[0]));
+                                             left, leftAnte, leftPost,
+                                             buf.toString(), cursor));
                 } // otherwise ignore the rule; it's not the direction we want
                 break;
+
             case REVERSE_RULE_OP:
                 if (direction == REVERSE) {
+                    if (leftAnte >= 0 || leftPost >= 0 || cursor >= 0) {
+                        syntaxError("Malformed rule", rule, start);
+                    }
                     data.ruleSet.addRule(new TransliterationRule(
-                                             right.toString(), left.toString(),
-                                             anteContext.toString(), postContext.toString(),
-                                             cursorPos[0]));
+                                             buf.toString(), ante, post,
+                                             left, leftCursor));
                 } // otherwise ignore the rule; it's not the direction we want
                 break;
+
             case FWDREV_RULE_OP:
-                data.ruleSet.addRule(new TransliterationRule(
-                                         direction == FORWARD ? left.toString() : right.toString(),
-                                         direction == FORWARD ? right.toString() : left.toString(),
-                                         // Context & cursor disallowed
-                                         "", "", -1));
+                if (direction == FORWARD) {
+                    // The output side is the right; trim off any context
+                    String output = buf.toString().substring(ante < 0 ? 0 : ante,
+                                                             post < 0 ? buf.length() : post);
+                    data.ruleSet.addRule(new TransliterationRule(
+                                             left, leftAnte, leftPost,
+                                             output, cursor));
+                } else {
+                    // The output side is the left; trim off any context
+                    String output = left.substring(leftAnte < 0 ? 0 : leftAnte,
+                                                   leftPost < 0 ? left.length() : leftPost);
+                    data.ruleSet.addRule(new TransliterationRule(
+                                             buf.toString(), ante, post,
+                                             output, leftCursor));
+                }
                 break;
             }
+
+            return pos;
         }
 
-        /**
-         * Add a variable definition.
-         * @param name the name of the variable.  It must not already be defined.
-         * @param pattern the value of the variable.  It may be a single character
-         * or a pattern describing a character set.
-         * @exception IllegalArgumentException if there is a syntax error
-         */
-        private final void applyVariableDef(String name, String pattern) {
-            validateVariableName(name);
-            if (data.variableNames.get(name) != null) {
-                throw new IllegalArgumentException("Duplicate variable definition: "
-                                                   + name + '=' + pattern);
-            }
-//!         if (UnicodeSet.getCategoryID(name) >= 0) {
-//!             throw new IllegalArgumentException("Reserved variable name: "
-//!                                                + name);
-//!         }
-            if (pattern.length() < 1) {
-                throw new IllegalArgumentException("Variable definition missing: "
-                                                   + name);
-            }
-            if (pattern.length() == 1) {
-                // Got a single character variable definition
-                data.variableNames.put(name, new Character(pattern.charAt(0)));
-            } else {
-                // Got more than one character; parse it as a category
-                if (variableNext >= variableLimit) {
-                    throw new RuntimeException("Private use variables exhausted");
-                }
-                Character c = new Character(variableNext++);
-                data.variableNames.put(name, c);
-                data.setVariables.put(c, new UnicodeSet(pattern));
+
+
+        private static final void syntaxError(String msg, String rule, int start) {
+            int end = quotedIndexOf(rule, start, rule.length(), ";");
+            if (end < 0) {
+                end = rule.length();
             }
+            throw new IllegalArgumentException(msg + " in " +
+                                               rule.substring(start, end));
         }
 
-        /**
-         * Given a rule, parses it into three pieces: The left side, the right side,
-         * and the operator.  Returns the operator.  Quotes and variable references
-         * are resolved; the otuput text in all <code>StringBuffer</code> parameters
-         * is literal text.  This method delegates to other parsing methods to
-         * handle the match pattern, output pattern, and other sub-patterns in the
-         * rule.
-         * @param start the beginning index, inclusive; <code>0 <= start
-         * <= limit</code>.
-         * @param limit the ending index, exclusive; <code>start <= limit
-         * <= rules.length()</code>.
-         * @param left left side of rule is appended to this buffer
-         * with the quotes removed and variables resolved
-         * @param right right side of rule is appended to this buffer
-         * with the quotes removed and variables resolved
-         * @param anteContext the preceding context of the match pattern,
-         * if there is one, is appended to this buffer
-         * @param postContext the following context of the match pattern,
-         * if there is one, is appended to this buffer
-         * @param cursorPos if there is a cursor in the output pattern, its
-         * offset is stored in <code>cursorPos[0]</code>
-         * @return The operator character, one of the characters in OPERATORS.
-         */
-        private char parseRule(int start, int limit,
-                               StringBuffer left, StringBuffer right,
-                               StringBuffer anteContext,
-                               StringBuffer postContext,
-                               int[] cursorPos) {
-            if (false) {
-                System.err.println("Parsing " + rules.substring(start, limit));
-            }
-            /* Parse the rule into three pieces -- left, operator, and right,
-             * parsing out quotes.  The result is that left and right will have
-             * unquoted text.  E.g., "gt<'>'" will have right = ">".  Unquoted
-             * operators throw an exception.  Two quotes inside or outside
-             * quotes indicates a quote literal.  E.g., "o''clock" -> "o'clock".
-             */
-            int i = quotedIndexOf(rules, start, limit, OPERATORS);
-            if (i < 0) {
-                throw new IllegalArgumentException(
-                              "Syntax error: "
-                              + rules.substring(start, limit));
-            }
-            char c = rules.charAt(i);
-            
-            // Look for "<>" double rules.
-            if ((i+1) < limit && rules.substring(i, i+2).equals(FWDREV_OP_STRING)) {
-                if (i == start) {
-                    throw new IllegalArgumentException(
-                                  "Empty left side: "
-                                  + rules.substring(start, limit));
-                }
-                if (i+2 == limit) {
-                    throw new IllegalArgumentException(
-                                  "Empty right side: "
-                                  + rules.substring(start, limit));
-                }
-                parseSubPattern(start, i, left, null, SPECIALS);
-                parseSubPattern(i+2, limit, right, null, SPECIALS);
-                return FWDREV_RULE_OP;
-            }
 
-            switch (c) {
-            case FORWARD_RULE_OP:
-                if (i == start) {
-                    throw new IllegalArgumentException(
-                                  "Empty left side: "
-                                  + rules.substring(start, limit));
-                }
-                parseMatchPattern(start, i, left, anteContext, postContext);
-                if (i != (limit-1)) {
-                    parseOutputPattern(i+1, limit, right, cursorPos);
-                }
-                break;
-            case REVERSE_RULE_OP:
-                if (i == (limit-1)) {
-                    throw new IllegalArgumentException(
-                                  "Empty right side: "
-                                  + rules.substring(start, limit));
-                }
-                if (i != start) {
-                    parseOutputPattern(start, i, left, cursorPos);
-                }
-                parseMatchPattern(i+1, limit, right, anteContext, postContext);
-                break;
-            case VARIABLE_DEF_OP:
-                if (i == start || i == (limit-1)) {
-                    throw new IllegalArgumentException(
-                                  "Empty left or right side: "
-                                  + rules.substring(start, limit));
-                }
-                parseSubPattern(start, i, left);
-                parseDefPattern(i+1, limit, right);
-                break;
-            default:
-                throw new RuntimeException();
+
+//|        /**
+//|         * Parse the given substring as a rule, and append it to the rules currently
+//|         * represented in this object.
+//|         * @param start the beginning index, inclusive; <code>0 <= start
+//|         * <= limit</code>.
+//|         * @param limit the ending index, exclusive; <code>start <= limit
+//|         * <= rules.length()</code>.
+//|         * @exception IllegalArgumentException if there is a syntax error in the
+//|         * rules
+//|         */
+//|        private void applyRule(int start, int limit) {
+//|            /* General description of parsing: Initially, rules contain two types of
+//|             * quoted characters.  First, there are variable references, such as
+//|             * "{alpha}".  Second, there are quotes, such as "'<'" or "''".  One of
+//|             * the first steps in parsing a rule is to resolve such quoted matter.
+//|             * Quotes are removed early, leaving unquoted literal matter.  Variable
+//|             * references are resolved and replaced by single characters.  In some
+//|             * instances these characters represent themselves; in others, they
+//|             * stand for categories of characters.  Character categories are either
+//|             * predefined (e.g., "{Lu}"), or are defined by the user using a
+//|             * statement (e.g., "vowels:aeiouAEIOU").
+//|             *
+//|             * Another early step in parsing is to split each rule into component
+//|             * pieces.  These pieces are, for every rule, a left-hand side, a right-
+//|             * hand side, and an operator.  The left- and right-hand sides may not
+//|             * be empty, except for the output patterns of forward and reverse
+//|             * rules.  In addition to this partitioning, the match patterns of
+//|             * forward and reverse rules must be partitioned into antecontext,
+//|             * postcontext, and literal pattern, where the context portions may or
+//|             * may not be present.  Finally, output patterns must have the cursor
+//|             * indicator '|' detected and removed, with its position recorded.
+//|             *
+//|             * Quote removal, variable resolution, and sub-pattern splitting must
+//|             * all happen at once.  This is due chiefly to the quoting mechanism,
+//|             * which allows special characters to appear at arbitrary positions in
+//|             * the final unquoted text.  (For this reason, alteration of the rule
+//|             * language is somewhat clumsy; it entails reassessment and revision of
+//|             * the parsing methods as a whole.)
+//|             *
+//|             * After this processing of rules is complete, the final end products
+//|             * are unquoted pieces of text of various types, and an integer cursor
+//|             * position, if one is specified.  These processed raw materials are now
+//|             * easy to deal with; other classes such as UnicodeSet and
+//|             * TransliterationRule need know nothing of quoting or variables.
+//|             */
+//|            StringBuffer left = new StringBuffer();
+//|            StringBuffer right = new StringBuffer();
+//|            StringBuffer anteContext = new StringBuffer();
+//|            StringBuffer postContext = new StringBuffer();
+//|            int cursorPos[] = new int[1];
+//|
+//|            char operator = parseRule(start, limit, left, right,
+//|                                      anteContext, postContext, cursorPos);
+//|
+//|            switch (operator) {
+//|            case VARIABLE_DEF_OP:
+//|                applyVariableDef(left.toString(), right.toString());
+//|                break;
+//|            case FORWARD_RULE_OP:
+//|                if (direction == FORWARD) {
+//|                    data.ruleSet.addRule(new TransliterationRule(
+//|                                             left.toString(), right.toString(),
+//|                                             anteContext.toString(), postContext.toString(),
+//|                                             cursorPos[0]));
+//|                } // otherwise ignore the rule; it's not the direction we want
+//|                break;
+//|            case REVERSE_RULE_OP:
+//|                if (direction == REVERSE) {
+//|                    data.ruleSet.addRule(new TransliterationRule(
+//|                                             right.toString(), left.toString(),
+//|                                             anteContext.toString(), postContext.toString(),
+//|                                             cursorPos[0]));
+//|                } // otherwise ignore the rule; it's not the direction we want
+//|                break;
+//|            case FWDREV_RULE_OP:
+//|                data.ruleSet.addRule(new TransliterationRule(
+//|                                         direction == FORWARD ? left.toString() : right.toString(),
+//|                                         direction == FORWARD ? right.toString() : left.toString(),
+//|                                         // Context & cursor disallowed
+//|                                         "", "", -1));
+//|                break;
+//|            }
+//|        }
+
+//|        /**
+//|         * Add a variable definition.
+//|         * @param name the name of the variable.  It must not already be defined.
+//|         * @param pattern the value of the variable.  It may be a single character
+//|         * or a pattern describing a character set.
+//|         * @exception IllegalArgumentException if there is a syntax error
+//|         */
+//|        private final void applyVariableDef(String name, String pattern) {
+//|            validateVariableName(name);
+//|            if (data.variableNames.get(name) != null) {
+//|                throw new IllegalArgumentException("Duplicate variable definition: "
+//|                                                   + name + '=' + pattern);
+//|            }
+//|            if (pattern.length() < 1) {
+//|                throw new IllegalArgumentException("Variable definition missing: "
+//|                                                   + name);
+//|            }
+//|            if (pattern.length() == 1) {
+//|                // Got a single character variable definition
+//|                data.variableNames.put(name, new Character(pattern.charAt(0)));
+//|            } else {
+//|                // Got more than one character; parse it as a category
+//|                UnicodeSet set = new UnicodeSet(pattern);
+//|                data.variableNames.put(name, registerSet(set));
+//|            }
+//|        }
+
+
+
+
+        private final Character registerSet(UnicodeSet set) {
+            if (variableNext >= variableLimit) {
+                throw new RuntimeException("Private use variables exhausted");
             }
+            Character c = new Character(variableNext++);
+            data.setVariables.put(c, set);
             return c;
         }
 
-        /**
-         * Parses the match pattern of a forward or reverse rule.  Given the raw
-         * match pattern, return the match text and the context on both sides, if
-         * any.  Resolves all quotes and variables.
-         * @param start the beginning index, inclusive; <code>0 <= start
-         * <= limit</code>.
-         * @param limit the ending index, exclusive; <code>start <= limit
-         * <= rules.length()</code>.
-         * @param text the key to be matched will be appended to this buffer
-         * @param anteContext the preceding context, if any, will be appended
-         * to this buffer.
-         * @param postContext the following context, if any, will be appended
-         * to this buffer.
-         */
-        private void parseMatchPattern(int start, int limit,
-                                       StringBuffer text,
-                                       StringBuffer anteContext,
-                                       StringBuffer postContext) {
-            if (start >= limit) {
-                throw new IllegalArgumentException(
-                              "Empty expression in rule: "
-                              + rules.substring(start, limit));
-            }
-            if (anteContext != null) {
-                // Ignore optional opening and closing context characters
-                if (rules.charAt(start) == CONTEXT_OPEN) {
-                    ++start;
-                }
-                if (rules.charAt(limit-1) == CONTEXT_CLOSE) {
-                    --limit;
-                }
-                // The four possibilities are:
-                //             key
-                // anteContext]key
-                // anteContext]key[postContext
-                //             key[postContext
-                int ante = quotedIndexOf(rules, start, limit, String.valueOf(CONTEXT_CLOSE));
-                int post = quotedIndexOf(rules, start, limit, String.valueOf(CONTEXT_OPEN));
-                if (ante >= 0 && post >= 0 && ante > post) {
-                    throw new IllegalArgumentException(
-                                  "Syntax error in context specifier: "
-                                  + rules.substring(start, limit));
-                }
-                if (ante >= 0) {
-                    parseSubPattern(start, ante, anteContext);
-                    start = ante+1;
-                }
-                if (post >= 0) {
-                    parseSubPattern(post+1, limit, postContext);
-                    limit = post;
-                }
-            }
-            parseSubPattern(start, limit, text);
-        }
 
-        private final void parseSubPattern(int start, int limit,
-                                           StringBuffer text) {
-            parseSubPattern(start, limit, text, null, SPECIALS);
-        }
 
-        /**
-         * Parse a variable definition sub pattern.  This kind of sub
-         * pattern differs in the set of characters that are considered
-         * special.  In particular, the '[' and ']' characters are not
-         * special, since these are used in UnicodeSet patterns.
-         */
-        private final void parseDefPattern(int start, int limit,
-                                           StringBuffer text) {
-            parseSubPattern(start, limit, text, null, DEF_SPECIALS);
-        }
 
-        /**
-         * Parses the output pattern of a forward or reverse rule.  Given the
-         * output pattern, return the output text and the position of the cursor,
-         * if any.  Resolves all quotes and variables.
-         * @param rules the string to be parsed
-         * @param start the beginning index, inclusive; <code>0 <= start
-         * <= limit</code>.
-         * @param limit the ending index, exclusive; <code>start <= limit
-         * <= rules.length()</code>.
-         * @param text the output text will be appended to this buffer
-         * @param cursorPos if this parameter is not null, then cursorPos[0]
-         * will be set to the cursor position, or -1 if there is none.  If this
-         * parameter is null, then cursors will be disallowed.
-         */
-        private final void parseOutputPattern(int start, int limit,
-                                              StringBuffer text,
-                                              int[] cursorPos) {
-            parseSubPattern(start, limit, text, cursorPos, SPECIALS);
-        }
-
-        /**
-         * Parses a sub-pattern of a rule.  Return the text and the position of the cursor,
-         * if any.  Resolves all quotes and variables.
-         * @param rules the string to be parsed
-         * @param start the beginning index, inclusive; <code>0 <= start
-         * <= limit</code>.
-         * @param limit the ending index, exclusive; <code>start <= limit
-         * <= rules.length()</code>.
-         * @param text the output text will be appended to this buffer
-         * @param cursorPos if this parameter is not null, then cursorPos[0]
-         * will be set to the cursor position, or -1 if there is none.  If this
-         * parameter is null, then cursors will be disallowed.
-         * @param specials characters that must be quoted; typically either
-         * SPECIALS or DEF_SPECIALS.
-         */
-        private void parseSubPattern(int start, int limit,
-                                     StringBuffer text,
-                                     int[] cursorPos,
-                                     String specials) {
-            boolean inQuote = false;
-
-            if (start >= limit) {
-                throw new IllegalArgumentException("Empty expression in rule");
-            }
-            if (cursorPos != null) {
-                cursorPos[0] = -1;
-            }
-            for (int i=start; i<limit; ++i) {
-                char c = rules.charAt(i);
-                if (c == QUOTE) {
-                    // Check for double quote
-                    if ((i+1) < limit
-                        && rules.charAt(i+1) == QUOTE) {
-                        text.append(QUOTE);
-                        ++i; // Skip over both quotes
-                    } else {
-                        inQuote = !inQuote;
-                    }
-                } else if (inQuote) {
-                    text.append(c);
-                } else if (c == VARIABLE_REF_OPEN) {
-                    ++i;
-                    int j = rules.indexOf(VARIABLE_REF_CLOSE, i);
-                    if (i == j || j < 0) { // empty or unterminated
-                        throw new IllegalArgumentException("Illegal variable reference: "
-                                                           + rules.substring(start, limit));
-                    }
-                    String name = rules.substring(i, j);
-                    validateVariableName(name);
-                    text.append(getVariableDef(name).charValue());
-                    i = j;
-                } else if (c == CURSOR_POS && cursorPos != null) {
-                    if (cursorPos[0] >= 0) {
-                        throw new IllegalArgumentException("Multiple cursors: "
-                                                           + rules.substring(start, limit));
-                    }
-                    cursorPos[0] = text.length();
-                } else if (specials.indexOf(c) >= 0) {
-                    throw new IllegalArgumentException("Unquoted special character: "
-                                                       + rules.substring(start, limit));
-                } else {
-                    text.append(c);
-                }
-            }
-        }
-
-        private static void validateVariableName(String name) {
-            if (indexOf(name, SPECIALS) >= 0) {
-                throw new IllegalArgumentException(
-                              "Special character in variable name: "
-                              + name);
-            }
-        }
+//|        /**
+//|         * Given a rule, parses it into three pieces: The left side, the right side,
+//|         * and the operator.  Returns the operator.  Quotes and variable references
+//|         * are resolved; the otuput text in all <code>StringBuffer</code> parameters
+//|         * is literal text.  This method delegates to other parsing methods to
+//|         * handle the match pattern, output pattern, and other sub-patterns in the
+//|         * rule.
+//|         * @param start the beginning index, inclusive; <code>0 <= start
+//|         * <= limit</code>.
+//|         * @param limit the ending index, exclusive; <code>start <= limit
+//|         * <= rules.length()</code>.
+//|         * @param left left side of rule is appended to this buffer
+//|         * with the quotes removed and variables resolved
+//|         * @param right right side of rule is appended to this buffer
+//|         * with the quotes removed and variables resolved
+//|         * @param anteContext the preceding context of the match pattern,
+//|         * if there is one, is appended to this buffer
+//|         * @param postContext the following context of the match pattern,
+//|         * if there is one, is appended to this buffer
+//|         * @param cursorPos if there is a cursor in the output pattern, its
+//|         * offset is stored in <code>cursorPos[0]</code>
+//|         * @return The operator character, one of the characters in OPERATORS.
+//|         */
+//|        private char parseRule(int start, int limit,
+//|                               StringBuffer left, StringBuffer right,
+//|                               StringBuffer anteContext,
+//|                               StringBuffer postContext,
+//|                               int[] cursorPos) {
+//|            if (false) {
+//|                System.err.println("Parsing " + rules.substring(start, limit));
+//|            }
+//|            /* Parse the rule into three pieces -- left, operator, and right,
+//|             * parsing out quotes.  The result is that left and right will have
+//|             * unquoted text.  E.g., "gt<'>'" will have right = ">".  Unquoted
+//|             * operators throw an exception.  Two quotes inside or outside
+//|             * quotes indicates a quote literal.  E.g., "o''clock" -> "o'clock".
+//|             */
+//|            int i = quotedIndexOf(rules, start, limit, OPERATORS);
+//|            if (i < 0) {
+//|                throw new IllegalArgumentException(
+//|                              "Syntax error: "
+//|                              + rules.substring(start, limit));
+//|            }
+//|            char c = rules.charAt(i);
+//|            
+//|            // Look for "<>" double rules.
+//|            if ((i+1) < limit && rules.substring(i, i+2).equals(FWDREV_OP_STRING)) {
+//|                if (i == start) {
+//|                    throw new IllegalArgumentException(
+//|                                  "Empty left side: "
+//|                                  + rules.substring(start, limit));
+//|                }
+//|                if (i+2 == limit) {
+//|                    throw new IllegalArgumentException(
+//|                                  "Empty right side: "
+//|                                  + rules.substring(start, limit));
+//|                }
+//|                parseSubPattern(start, i, left, null, SPECIALS);
+//|                parseSubPattern(i+2, limit, right, null, SPECIALS);
+//|                return FWDREV_RULE_OP;
+//|            }
+//|
+//|            switch (c) {
+//|            case FORWARD_RULE_OP:
+//|                if (i == start) {
+//|                    throw new IllegalArgumentException(
+//|                                  "Empty left side: "
+//|                                  + rules.substring(start, limit));
+//|                }
+//|                parseMatchPattern(start, i, left, anteContext, postContext);
+//|                if (i != (limit-1)) {
+//|                    parseOutputPattern(i+1, limit, right, cursorPos);
+//|                }
+//|                break;
+//|            case REVERSE_RULE_OP:
+//|                if (i == (limit-1)) {
+//|                    throw new IllegalArgumentException(
+//|                                  "Empty right side: "
+//|                                  + rules.substring(start, limit));
+//|                }
+//|                if (i != start) {
+//|                    parseOutputPattern(start, i, left, cursorPos);
+//|                }
+//|                parseMatchPattern(i+1, limit, right, anteContext, postContext);
+//|                break;
+//|            case VARIABLE_DEF_OP:
+//|                if (i == start || i == (limit-1)) {
+//|                    throw new IllegalArgumentException(
+//|                                  "Empty left or right side: "
+//|                                  + rules.substring(start, limit));
+//|                }
+//|                parseSubPattern(start, i, left);
+//|                parseDefPattern(i+1, limit, right);
+//|                break;
+//|            default:
+//|                throw new RuntimeException();
+//|            }
+//|            return c;
+//|        }
+//|
+//|        /**
+//|         * Parses the match pattern of a forward or reverse rule.  Given the raw
+//|         * match pattern, return the match text and the context on both sides, if
+//|         * any.  Resolves all quotes and variables.
+//|         * @param start the beginning index, inclusive; <code>0 <= start
+//|         * <= limit</code>.
+//|         * @param limit the ending index, exclusive; <code>start <= limit
+//|         * <= rules.length()</code>.
+//|         * @param text the key to be matched will be appended to this buffer
+//|         * @param anteContext the preceding context, if any, will be appended
+//|         * to this buffer.
+//|         * @param postContext the following context, if any, will be appended
+//|         * to this buffer.
+//|         */
+//|        private void parseMatchPattern(int start, int limit,
+//|                                       StringBuffer text,
+//|                                       StringBuffer anteContext,
+//|                                       StringBuffer postContext) {
+//|            if (start >= limit) {
+//|                throw new IllegalArgumentException(
+//|                              "Empty expression in rule: "
+//|                              + rules.substring(start, limit));
+//|            }
+//|            if (anteContext != null) {
+//|                // Ignore optional opening and closing context characters
+//|                if (rules.charAt(start) == CONTEXT_OPEN) {
+//|                    ++start;
+//|                }
+//|                if (rules.charAt(limit-1) == CONTEXT_CLOSE) {
+//|                    --limit;
+//|                }
+//|                // The four possibilities are:
+//|                //             key
+//|                // anteContext]key
+//|                // anteContext]key[postContext
+//|                //             key[postContext
+//|                int ante = quotedIndexOf(rules, start, limit, String.valueOf(CONTEXT_CLOSE));
+//|                int post = quotedIndexOf(rules, start, limit, String.valueOf(CONTEXT_OPEN));
+//|                if (ante >= 0 && post >= 0 && ante > post) {
+//|                    throw new IllegalArgumentException(
+//|                                  "Syntax error in context specifier: "
+//|                                  + rules.substring(start, limit));
+//|                }
+//|                if (ante >= 0) {
+//|                    parseSubPattern(start, ante, anteContext);
+//|                    start = ante+1;
+//|                }
+//|                if (post >= 0) {
+//|                    parseSubPattern(post+1, limit, postContext);
+//|                    limit = post;
+//|                }
+//|            }
+//|            parseSubPattern(start, limit, text);
+//|        }
+//|
+//|        private final void parseSubPattern(int start, int limit,
+//|                                           StringBuffer text) {
+//|            parseSubPattern(start, limit, text, null, SPECIALS);
+//|        }
+//|
+//|        /**
+//|         * Parse a variable definition sub pattern.  This kind of sub
+//|         * pattern differs in the set of characters that are considered
+//|         * special.  In particular, the '[' and ']' characters are not
+//|         * special, since these are used in UnicodeSet patterns.
+//|         */
+//|        private final void parseDefPattern(int start, int limit,
+//|                                           StringBuffer text) {
+//|            parseSubPattern(start, limit, text, null, DEF_SPECIALS);
+//|        }
+//|
+//|        /**
+//|         * Parses the output pattern of a forward or reverse rule.  Given the
+//|         * output pattern, return the output text and the position of the cursor,
+//|         * if any.  Resolves all quotes and variables.
+//|         * @param rules the string to be parsed
+//|         * @param start the beginning index, inclusive; <code>0 <= start
+//|         * <= limit</code>.
+//|         * @param limit the ending index, exclusive; <code>start <= limit
+//|         * <= rules.length()</code>.
+//|         * @param text the output text will be appended to this buffer
+//|         * @param cursorPos if this parameter is not null, then cursorPos[0]
+//|         * will be set to the cursor position, or -1 if there is none.  If this
+//|         * parameter is null, then cursors will be disallowed.
+//|         */
+//|        private final void parseOutputPattern(int start, int limit,
+//|                                              StringBuffer text,
+//|                                              int[] cursorPos) {
+//|            parseSubPattern(start, limit, text, cursorPos, SPECIALS);
+//|        }
+//|
+//|        /**
+//|         * Parses a sub-pattern of a rule.  Return the text and the position of the cursor,
+//|         * if any.  Resolves all quotes and variables.
+//|         * @param rules the string to be parsed
+//|         * @param start the beginning index, inclusive; <code>0 <= start
+//|         * <= limit</code>.
+//|         * @param limit the ending index, exclusive; <code>start <= limit
+//|         * <= rules.length()</code>.
+//|         * @param text the output text will be appended to this buffer
+//|         * @param cursorPos if this parameter is not null, then cursorPos[0]
+//|         * will be set to the cursor position, or -1 if there is none.  If this
+//|         * parameter is null, then cursors will be disallowed.
+//|         * @param specials characters that must be quoted; typically either
+//|         * SPECIALS or DEF_SPECIALS.
+//|         */
+//|        private void parseSubPattern(int start, int limit,
+//|                                     StringBuffer text,
+//|                                     int[] cursorPos,
+//|                                     String specials) {
+//|            boolean inQuote = false;
+//|
+//|            if (start >= limit) {
+//|                throw new IllegalArgumentException("Empty expression in rule");
+//|            }
+//|            if (cursorPos != null) {
+//|                cursorPos[0] = -1;
+//|            }
+//|            for (int i=start; i<limit; ++i) {
+//|                char c = rules.charAt(i);
+//|                if (c == QUOTE) {
+//|                    // Check for double quote
+//|                    if ((i+1) < limit
+//|                        && rules.charAt(i+1) == QUOTE) {
+//|                        text.append(QUOTE);
+//|                        ++i; // Skip over both quotes
+//|                    } else {
+//|                        inQuote = !inQuote;
+//|                    }
+//|                } else if (inQuote) {
+//|                    text.append(c);
+//|                } else if (c == VARIABLE_REF_OPEN) {
+//|                    ++i;
+//|                    int j = rules.indexOf(VARIABLE_REF_CLOSE, i);
+//|                    if (i == j || j < 0) { // empty or unterminated
+//|                        throw new IllegalArgumentException("Illegal variable reference: "
+//|                                                           + rules.substring(start, limit));
+//|                    }
+//|                    String name = rules.substring(i, j);
+//|                    validateVariableName(name);
+//|                    text.append(getVariableDef(name).charValue());
+//|                    i = j;
+//|                } else if (c == CURSOR_POS && cursorPos != null) {
+//|                    if (cursorPos[0] >= 0) {
+//|                        throw new IllegalArgumentException("Multiple cursors: "
+//|                                                           + rules.substring(start, limit));
+//|                    }
+//|                    cursorPos[0] = text.length();
+//|                } else if (specials.indexOf(c) >= 0) {
+//|                    throw new IllegalArgumentException("Unquoted special character: "
+//|                                                       + rules.substring(start, limit));
+//|                } else {
+//|                    text.append(c);
+//|                }
+//|            }
+//|        }
+//|
+//|        private static void validateVariableName(String name) {
+//|            if (indexOf(name, SPECIALS) >= 0) {
+//|                throw new IllegalArgumentException(
+//|                              "Special character in variable name: "
+//|                              + name);
+//|            }
+//|        }
 
         /**
          * Returns the single character value of the given variable name.  Defined
          * names are recognized.
-         *
-         * NO LONGER SUPPORTED:
-         * If a Unicode category name is given, a standard character variable
-         * in the range firstCategoryVariable to lastCategoryVariable is returned,
-         * with value firstCategoryVariable + n, where n is the category
-         * number.
          * @exception IllegalArgumentException if the name is unknown.
          */
         private Character getVariableDef(String name) {
             Character ch = (Character) data.variableNames.get(name);
-//!         if (ch == null) {
-//!             int id = UnicodeSet.getCategoryID(name);
-//!             if (id >= 0) {
-//!                 ch = new Character((char) (firstCategoryVariable + id));
-//!                 data.variableNames.put(name, ch);
-//!                 data.setVariables.put(ch, new UnicodeSet(id));
-//!             }
-//!         }
             if (ch == null) {
                 throw new IllegalArgumentException("Undefined variable: "
                                                    + name);
@@ -1084,6 +1334,10 @@ public class RuleBasedTransliterator extends Transliterator {
          * this method may employ some other algorithm for improved speed.
          */
         private final void determineVariableRange(String[] ruleArray) {
+            // As an initial implementation, we just run through all the
+            // characters, ignoring any quoting.  This works since the quote
+            // mechanisms are outside the private use area.
+
             Range r = new Range('\uE000', 0x1900); // Private use area
             r = r.largestUnusedSubrange(ruleArray);
             
@@ -1121,7 +1375,9 @@ public class RuleBasedTransliterator extends Transliterator {
                                          String setOfChars) {
             for (int i=start; i<limit; ++i) {
                 char c = text.charAt(i);
-                if (c == QUOTE) {
+                if (c == ESCAPE) {
+                    ++i;
+                } else if (c == QUOTE) {
                     while (++i < limit
                            && text.charAt(i) != QUOTE) {}
                 } else if (setOfChars.indexOf(c) >= 0) {
diff --git a/icu4j/src/com/ibm/text/TransliterationRule.java b/icu4j/src/com/ibm/text/TransliterationRule.java
index a06801f3fd..55104c8610 100755
--- a/icu4j/src/com/ibm/text/TransliterationRule.java
+++ b/icu4j/src/com/ibm/text/TransliterationRule.java
@@ -21,9 +21,12 @@ import java.util.Dictionary;
  * <p>Copyright &copy; IBM Corporation 1999.  All rights reserved.
  *
  * @author Alan Liu
- * @version $RCSfile: TransliterationRule.java,v $ $Revision: 1.5 $ $Date: 2000/01/04 21:43:57 $
+ * @version $RCSfile: TransliterationRule.java,v $ $Revision: 1.6 $ $Date: 2000/01/11 02:25:03 $
  *
  * $Log: TransliterationRule.java,v $
+ * Revision 1.6  2000/01/11 02:25:03  Alan
+ * Rewrite UnicodeSet and RBT parsers for better performance and new syntax
+ *
  * Revision 1.5  2000/01/04 21:43:57  Alan
  * Add rule indexing, and move masking check to TransliterationRuleSet.
  *
@@ -134,6 +137,46 @@ class TransliterationRule {
         }
     }
 
+
+
+
+
+
+
+    /**
+     * @param input input string, including key and optional ante and
+     * post context
+     * @param anteContextPos offset into input to end of ante context, or
+     * -1 if none
+     * @param postContextPos offset into input to start of post context,
+     * or -1 if none
+     * @param output output string
+     * @param cursorPos offset into output at which cursor is located,
+     * or -1 if none.
+     */
+    public TransliterationRule(String input,
+                               int anteContextPos, int postContextPos,
+                               String output,
+                               int cursorPos) {
+        anteContextLength = (anteContextPos < 0) ? 0 : anteContextPos;
+        keyLength = (postContextPos < 0) ? input.length() - anteContextLength :
+            postContextPos - anteContextLength;
+        pattern = input;
+        this.output = output;
+        this.cursorPos = cursorPos < 0 ? output.length() : cursorPos;
+        if (anteContextPos > input.length() || postContextPos > input.length() ||
+            cursorPos > output.length()) {
+            throw new IllegalArgumentException();
+        }
+    }
+
+
+
+
+
+
+
+
     /**
      * Return the length of the key.  Equivalent to <code>getKey().length()</code>.
      * @return the length of the match key.
@@ -171,9 +214,14 @@ class TransliterationRule {
      * Internal method.  Returns 8-bit index value for this rule.
      * This is the low byte of the first character of the key,
      * unless the first character of the key is a set.  If it's a
-     * set, the index value is -1.
+     * set, or otherwise can match multiple keys, the index value is -1.
      */
     final int getIndexValue(Dictionary variables) {
+        if (anteContextLength == pattern.length()) {
+            // A pattern with just ante context {such as foo)>bar} can
+            // match any key.
+            return -1;
+        }
         char c = pattern.charAt(anteContextLength);
         return variables.get(new Character(c)) == null ? (c & 0xFF) : -1;
     }
@@ -185,9 +233,15 @@ class TransliterationRule {
      * It matches this rule if it matches the first character of the
      * key, or if the first character of the key is a set, and the set
      * contains any character with a low byte equal to the index
-     * value.
+     * value.  If the rule contains only ante context, as in foo)>bar,
+     * then it will match any key.
      */
     final boolean matchesIndexValue(int v, Dictionary variables) {
+        if (anteContextLength == pattern.length()) {
+            // A pattern with just ante context {such as foo)>bar} can
+            // match any key.
+            return true;
+        }
         char c = pattern.charAt(anteContextLength);
         UnicodeSet set = (UnicodeSet) variables.get(new Character(c));
         return set == null ? (c & 0xFF) == v : set.containsIndexValue(v);
@@ -238,15 +292,15 @@ class TransliterationRule {
      */
     public String toString() {
         return getClass().getName() + '{'
-            + escape(anteContextLength > 0 ? ("[" + pattern.substring(0, anteContextLength) +
-                                              ']') : "")
-            + pattern.substring(anteContextLength, anteContextLength + keyLength)
-            + (anteContextLength + keyLength < pattern.length() ?
-               ("[" + pattern.substring(anteContextLength + keyLength) + ']') : "")
-            + " -> "
-            + (cursorPos < output.length()
-               ? (output.substring(0, cursorPos) + '|' + output.substring(cursorPos))
-               : output)
+            + escape((anteContextLength > 0 ? ("(" + pattern.substring(0, anteContextLength) +
+                                              ") ") : "")
+                     + pattern.substring(anteContextLength, anteContextLength + keyLength)
+                     + (anteContextLength + keyLength < pattern.length() ?
+                        (" (" + pattern.substring(anteContextLength + keyLength) + ")") : "")
+                     + " > "
+                     + (cursorPos < output.length()
+                        ? (output.substring(0, cursorPos) + '|' + output.substring(cursorPos))
+                        : output))
             + '}';
     }
 
diff --git a/icu4j/src/com/ibm/text/UnicodeSet.java b/icu4j/src/com/ibm/text/UnicodeSet.java
index c63c0de07c..975f2856fd 100755
--- a/icu4j/src/com/ibm/text/UnicodeSet.java
+++ b/icu4j/src/com/ibm/text/UnicodeSet.java
@@ -1,6 +1,7 @@
 package com.ibm.text;
 
 import java.text.*;
+import java.util.Dictionary;
 
 /**
  * A mutable set of Unicode characters.  Objects of this class
@@ -225,7 +226,7 @@ import java.text.*;
  * *Unsupported by Java (and hence unsupported by UnicodeSet).
  *
  * @author Alan Liu
- * @version $RCSfile: UnicodeSet.java,v $ $Revision: 1.2 $ $Date: 2000/01/04 21:43:58 $ */
+ * @version $RCSfile: UnicodeSet.java,v $ $Revision: 1.3 $ $Date: 2000/01/11 02:25:03 $ */
 public class UnicodeSet {
     /**
      * The internal representation is a StringBuffer of even length.
@@ -251,6 +252,9 @@ public class UnicodeSet {
 
     private static final int UNSUPPORTED_CATEGORY = 17;
 
+    private static final char VARIABLE_REF_OPEN = '{';
+    private static final char VARIABLE_REF_CLOSE = '}';
+
     private static final int CATEGORY_COUNT = 29;
 
     /**
@@ -293,25 +297,21 @@ public class UnicodeSet {
      * a syntax error.
      */
     public UnicodeSet(String pattern) {
-        applyPattern(pattern, false);
+        applyPattern(pattern);
     }
 
-    /**
-     * Constructs a set from the given pattern, optionally ignoring
-     * white space.  See the class description for the syntax of the
-     * pattern language.
-     * @param pattern a string specifying what characters are in the set
-     * @param ignoreSpaces if <code>true</code>, all spaces in the
-     * pattern are ignored, except those preceded by '\u005C'.  Spaces are
-     * those characters for which <code>Character.isSpaceChar()</code>
-     * is <code>true</code>.
-     * @exception <code>IllegalArgumentException</code> if the pattern
-     * contains a syntax error.
-     */
-    public UnicodeSet(String pattern, boolean ignoreSpaces) {
-        applyPattern(pattern, ignoreSpaces);
+
+
+
+
+    public UnicodeSet(String pattern, ParsePosition pos,
+                      Dictionary varNameToChar, Dictionary varCharToSet) {
+        applyPattern(pattern, pos, varNameToChar, varCharToSet);
     }
 
+
+
+
     /**
      * Constructs a set from the given Unicode character category.
      * @param category an integer indicating the character category as
@@ -328,57 +328,15 @@ public class UnicodeSet {
     }
 
     /**
-     * Modifies this set to represent the set specified by the given
-     * pattern.  See the class description for the syntax of the
-     * pattern language.
+     * Modifies this set to represent the set specified by the given pattern.
+     * See the class description for the syntax of the pattern language.
      * @param pattern a string specifying what characters are in the set
      * @exception <code>IllegalArgumentException</code> if the pattern
      * contains a syntax error.
      */
-    public final void applyPattern(String pattern) {
-        applyPattern(pattern, false);
-    }
-
-    /**
-     * Modifies this set to represent the set specified by the given
-     * pattern, optionally ignoring white space.  See the class
-     * description for the syntax of the pattern language.
-     * @param pattern a string specifying what characters are in the set
-     * @param ignoreSpaces if <code>true</code>, all spaces in the
-     * pattern are ignored.  Spaces are those characters for which
-     * <code>Character.isSpaceChar()</code> is <code>true</code>.
-     * Characters preceded by '\\' are escaped, losing any special
-     * meaning they otherwise have.  Spaces may be included by
-     * escaping them.
-     * @exception <code>IllegalArgumentException</code> if the pattern
-     * contains a syntax error.
-     */
-    public void applyPattern(String pattern, boolean ignoreSpaces) {
+    public void applyPattern(String pattern) {
         ParsePosition pos = new ParsePosition(0);
-
-        // To ignore spaces, create a new pattern without spaces.  We
-        // have to process all '\' escapes.  If '\' is encountered,
-        // insert it and the following character (if any -- let parse
-        // deal with any syntax errors) in the pattern.  This allows
-        // escaped spaces.
-        if (ignoreSpaces) {
-            StringBuffer pat = new StringBuffer();
-            for (int i=0; i<pattern.length(); ++i) {
-                char c = pattern.charAt(i);
-                if (Character.isSpaceChar(c)) {
-                    continue;
-                }
-                if (c == '\\' && (i+1) < pattern.length()) {
-                    pat.append(c);
-                    c = pattern.charAt(++i);
-                    // Fall through and append the following char
-                }
-                pat.append(c);
-            }
-            pattern = pat.toString();
-        }
-
-        pairs = parse(pattern, pos);
+        pairs = parse(pattern, pos, null, null);
         if (pos.getIndex() != pattern.length()) {
             throw new IllegalArgumentException("Parse of \"" + pattern +
                                                "\" failed at " +
@@ -386,6 +344,19 @@ public class UnicodeSet {
         }
     }
 
+
+
+
+
+    private void applyPattern(String pattern, ParsePosition pos,
+                              Dictionary varNameToChar, Dictionary varCharToSet) {
+        pairs = parse(pattern, pos, varNameToChar, varCharToSet);
+    }
+
+
+
+
+
     /**
      * Returns a string representation of this set.  If the result of
      * calling this function is passed to a UnicodeSet constructor, it
@@ -643,77 +614,137 @@ public class UnicodeSet {
         return pairs.hashCode();
     }
 
+    /**
+     * Return a programmer-readable string representation of this object.
+     */
+    public String toString() {
+        return getClass().getName() + '{' + toPattern() + '}';
+    }
+
     //----------------------------------------------------------------
     // Implementation: Pattern parsing
     //----------------------------------------------------------------
 
     /**
-     * Parses the given pattern, starting at the given position.  The
-     * character at pattern.charAt(pos.getIndex()) must be '[', or the
-     * parse fails.  Parsing continues until the corresponding closing
-     * ']'.  If a syntax error is encountered between the opening and
-     * closing brace, the parse fails.  Upon return from a successful
-     * parse, the ParsePosition is updated to point to the character
-     * following the closing ']', and a StringBuffer containing a
-     * pairs list for the parsed pattern is returned.  This method calls
-     * itself recursively to parse embedded subpatterns.
+     * Parses the given pattern, starting at the given position.  The character
+     * at pattern.charAt(pos.getIndex()) must be '[', or the parse fails.
+     * Parsing continues until the corresponding closing ']'.  If a syntax error
+     * is encountered between the opening and closing brace, the parse fails.
+     * Upon return from a successful parse, the ParsePosition is updated to
+     * point to the character following the closing ']', and a StringBuffer
+     * containing a pairs list for the parsed pattern is returned.  This method
+     * calls itself recursively to parse embedded subpatterns.
      *
-     * @param pattern the string containing the pattern to be parsed.
-     * The portion of the string from pos.getIndex(), which must be a
-     * '[', to the corresponding closing ']', is parsed.
-     * @param pos upon entry, the position at which to being parsing.
-     * The character at pattern.charAt(pos.getIndex()) must be a '['.
-     * Upon return from a successful parse, pos.getIndex() is either
-     * the character after the closing ']' of the parsed pattern, or
-     * pattern.length() if the closing ']' is the last character of
-     * the pattern string.
-     * @return a StringBuffer containing a pairs list for the parsed
-     * substring of <code>pattern</code>
+     * @param pattern the string containing the pattern to be parsed.  The
+     * portion of the string from pos.getIndex(), which must be a '[', to the
+     * corresponding closing ']', is parsed.
+     * @param pos upon entry, the position at which to being parsing.  The
+     * character at pattern.charAt(pos.getIndex()) must be a '['.  Upon return
+     * from a successful parse, pos.getIndex() is either the character after the
+     * closing ']' of the parsed pattern, or pattern.length() if the closing ']'
+     * is the last character of the pattern string.
+     * @return a StringBuffer containing a pairs list for the parsed substring
+     * of <code>pattern</code>
      * @exception IllegalArgumentException if the parse fails.
      */
-    private static StringBuffer parse(String pattern, ParsePosition pos) {
+    private static StringBuffer parse(String pattern, ParsePosition pos,
+                                      Dictionary varNameToChar, Dictionary varCharToSet) {
 
-        boolean invert = false;
         StringBuffer pairsBuf = new StringBuffer();
+        boolean invert = false;
 
-        /**
-         * Nodes:  0 - idle, waiting for '['
-         *        10 - like 11, but immediately after "[" or "[^"
-         *        11 - awaiting x, "]", "[...]", or "[:...:]"
-         *        21 - after x
-         *        23 - after x-
-         * 
-         * The parsing state machine moves from node 0 through zero or more
-         * other nodes back to node 0, in a successful parse.
+        int lastChar = -1; // This is either a char (0..FFFF) or -1
+        char lastOp = 0;
+
+        /* This loop iterates over the characters in the pattern.  We start at
+         * the position specified by pos.  We exit the loop when either a
+         * matching closing ']' is seen, or we read all characters of the
+         * pattern.  In the latter case an error will be thrown.
          */
-        int node = 0;
-        char first = 0;
-        int i;
 
-        /**
-         * This loop iterates over the characters in the pattern.  We
-         * start at the position specified by pos.  We exit the loop
-         * when either a matching closing ']' is seen, or we read all
-         * characters of the pattern.
+        /* Pattern syntax:
+         *  pat := '[' '^'? elem* ']'
+         *  elem := a | a '-' a | set | set op set
+         *  set := pat | (a set variable)
+         *  op := '&' | '-'
+         *  a := (a character, possibly defined by a var)
          */
-        for (i=pos.getIndex(); i<pattern.length(); ++i) {
-            char c = pattern.charAt(i);
 
-            /**
-             * Handle escapes here.  If a character is escaped, then
-             * it assumes its literal value.  This is true for all
-             * characters, both special characters and characters with
-             * no special meaning.  We also interpret '\\uxxxx' Unicode
-             * escapes here.
+        // mode 0: No chars parsed yet; next must be '['
+        // mode 1: '[' seen; if next is '^' or ':' then special
+        // mode 2: '[' '^'? seen; parse pattern and close with ']'
+        // mode 3: '[:' seen; parse category and close with ':]'
+        int mode = 0;
+        int openPos = 0; // offset to opening '['
+        int i = pos.getIndex();
+        int limit = pattern.length();
+        for (; i<limit; ++i) {
+            /* If the next element is a single character, c will be set to it,
+             * and nestedPairs will be null.  In this case isLiteral indicates
+             * whether the character should assume special meaning if it has
+             * one.  If the next element is a nested set, either via a variable
+             * reference, or via an embedded "[..]"  or "[:..:]" pattern, then
+             * nestedPairs will be set to the pairs list for the nested set, and
+             * c's value should be ignored.
              */
+            char c = pattern.charAt(i);
+            String nestedPairs = null;
             boolean isLiteral = false;
+
+            // Ignore whitespace.  This is not Unicode whitespace, but Java
+            // whitespace, a subset of Unicode whitespace.
+            if (Character.isWhitespace(c)) {
+                continue;
+            }
+
+            // Parse the opening '[' and optional following '^'
+            switch (mode) {
+            case 0:
+                if (c == '[') {
+                    mode = 1; // Next look for '^'
+                    openPos = i;
+                    continue;
+                } else {
+                    throw new IllegalArgumentException("Missing opening '['");
+                }
+            case 1:
+                mode = 2;
+                switch (c) {
+                case '^':
+                    invert = true;
+                    continue; // Back to top to fetch next character
+                case ':':
+                    if (i == openPos+1) {
+                        // '[:' cannot have whitespace in it
+                        --i;
+                        c = '[';
+                        mode = 3;
+                        // Fall through and parse category normally
+                    }
+                    break; // Fall through
+                case '-':
+                    isLiteral = true; // Treat leading '-' as a literal
+                    break; // Fall through
+                }
+                // else fall through and parse this character normally
+            }
+
+            // After opening matter is parsed ("[", "[^", or "[:"), the mode
+            // will be 2 if we want a closing ']', or 3 if we should parse a
+            // category and close with ":]".
+
+            /* Handle escapes.  If a character is escaped, then it assumes its
+             * literal value.  This is true for all characters, both special
+             * characters and characters with no special meaning.  We also
+             * interpret '\\uxxxx' Unicode escapes here (as literals).
+             */
             if (c == '\\') {
                 ++i;
-                if (i < pattern.length()) {
+                if (i < limit) {
                     c = pattern.charAt(i);
                     isLiteral = true;
                     if (c == 'u') {
-                        if ((i+4) >= pattern.length()) {
+                        if ((i+4) >= limit) {
                             throw new IllegalArgumentException("Invalid \\u escape");
                         }
                         c = '\u0000';
@@ -731,201 +762,143 @@ public class UnicodeSet {
                 }
             }
 
-            /**
-             * Within this loop, we handle each of the four
-             * conditions: '[', ']', '-', other.  The first three
-             * characters must not be escaped.
+            /* Parse variable references.  These are treated as literals.  If a
+             * variable refers to a UnicodeSet, nestedPairs is assigned here.
+             * Variable names are only parsed if varNameToChar is not null.
+             * Set variables are only looked up if varCharToSet is not null.
              */
+            else if (varNameToChar != null && !isLiteral && c == VARIABLE_REF_OPEN) {
+                ++i;
+                int j = pattern.indexOf(VARIABLE_REF_CLOSE, i);
+                if (i == j || j < 0) { // empty or unterminated
+                    throw new IllegalArgumentException("Illegal variable reference");
+                }
+                String name = pattern.substring(i, j);
+                ++j;
+                Character ch = (Character) varNameToChar.get(name);
+                if (ch == null) {
+                    throw new IllegalArgumentException("Undefined variable: "
+                                                       + name);
+                }
+                c = ch.charValue();
+                isLiteral = true;
 
-            /**
-             * An opening bracket indicates either the first bracket
-             * of the entire subpattern we are parsing, in which case
-             * we are in node 0 and move into node 10.  We also check
-             * for an immediately following '^', indicating the
-             * complement of the following pattern.  ('^' is any other
-             * position has no special meaning.)  If we are not in
-             * node 0, '[' represents a nested subpattern that must be
-             * recursively parsed and checked for following operators
-             * ('&' or '|').  If two nested subpatterns follow one
-             * another with no operator, their union is formed, just
-             * as with any other elements that follow one another
-             * without intervening operator.  The other thing we
-             * handle here is the syntax "[:Xx:]" or "[:X:]" that
-             * indicates a Unicode category or supercategory.
+                if (varCharToSet != null) {
+                    UnicodeSet set = (UnicodeSet) varCharToSet.get(ch);
+                    if (set != null) {
+                        nestedPairs = set.pairs.toString();
+                    }
+                }
+            }
+
+            /* An opening bracket indicates the first bracket of a nested
+             * subpattern, either a normal pattern or a category pattern.  We
+             * recognize these here and set nestedPairs accordingly.
              */
-            if (!isLiteral && c == '[') {
-                boolean parseOp = false;
+            else if (!isLiteral && c == '[') {
+                // Handle "[:...:]", representing a character category
                 char d = charAfter(pattern, i);
-                // "[:...:]" represents a character category
                 if (d == ':') {
-                    if (node == 23) {
-                        throw new IllegalArgumentException("Unexpected \"[:\"");
-                    }
-                    if (node == 21) {
-                        addPair(pairsBuf, first, first);
-                        node = 11;
-                    }
                     i += 2;
                     int j = pattern.indexOf(":]", i);
                     if (j < 0) {
                         throw new IllegalArgumentException("Missing \":]\"");
                     }
-                    doUnion(pairsBuf,
-                            getCategoryPairs(pattern.substring(i, j)));
-                    i = j+1;
-                    if (node == 10) {
-                        node = 11;
-                        parseOp = true;
-                    } else if (node == 0) {
+                    nestedPairs = getCategoryPairs(pattern.substring(i, j));
+                    i = j+1; // Make i point to ']'
+                    if (mode == 3) {
+                        // Entire pattern is a category; leave parse loop
+                        pairsBuf.append(nestedPairs);
                         break;
                     }
                 } else {
-                    if (node == 0) {
-                        node = 10;
-                        if (d == '^') {
-                            invert = true;
-                            ++i;
-                        }
-                    } else {
-                        // Nested '['
-                        pos.setIndex(i);
-                        doUnion(pairsBuf, parse(pattern, pos)
-                                .toString());
-                        i = pos.getIndex() - 1; // Subtract 1 to point at ']'
-                        parseOp = true;
-                    }
+                    // Recurse to get the pairs for this nested set.
+                    pos.setIndex(i); // Add 2 to point AFTER op
+                    nestedPairs = parse(pattern, pos, varNameToChar, varCharToSet).toString();
+                    i = pos.getIndex() - 1; // - 1 to point at ']'
                 }
-                /**
-                 * parseOp is true after "[:...:]" or a nested
-                 * "[...]".  It is false only after the final closing
-                 * ']'.  If parseOp is true, we look past the closing
-                 * ']' to see if we have an operator character.  If
-                 * so, we parse the subsequent "[...]" recursively,
-                 * then perform the operation.  We do this in a loop
-                 * until there are no more operators.  Note that this
-                 * means the operators have equal precedence and are
-                 * bound left-to-right.
-                 */
-                if (parseOp) {
-                    for (;;) {
-                        // Is the next character an operator?
-                        char op = charAfter(pattern, i);
-                        if (op == '-' || op == '&') {
-                            pos.setIndex(i+2); // Add 2 to point AFTER op
-                            String rhs = parse(pattern, pos).toString();
-                            if (op == '-') {
-                                doDifference(pairsBuf, rhs);
-                            } else if (op == '&') {
-                                doIntersection(pairsBuf, rhs);
-                            }
-                            i = pos.getIndex() - 1; // - 1 to point at ']'
-                        } else {
-                            break;
-                        }
-                    }
-                }          
             }
 
-            /**
-             * A closing bracket can only be a closing bracket for
-             * "[...]", since the closing bracket for "[:...:]" is
-             * taken care of when the initial "[:" is seen.  When we
-             * see a closing bracket, we then know, if we were in node
-             * 21 (after x) or 23 (after x-) that nothing more is
-             * coming, and we add the last character(s) we saw to the
-             * set.  Note that a trailing '-' assumes its literal
-             * meaning, just as a leading '-' after "[" or "[^".
+            /* At this point we have either a character c, or a nested set.  If
+             * we have encountered a nested set, either embedded in the pattern,
+             * or as a variable, we have a non-null nestedPairs, and c should be
+             * ignored.  Otherwise c is the current character, and isLiteral
+             * indicates whether it is an escaped literal (or variable) or a
+             * normal unescaped character.  Unescaped characters '-', '&', and
+             * ']' have special meanings.
              */
-            else if (!isLiteral && c == ']') {
-                if (node == 0) {
-                    throw new IllegalArgumentException("Unexpected ']'");
-                }
-                if (node == 21 || node == 23) {
-                    addPair(pairsBuf, first, first);
-                    if (node == 23) {
-                        addPair(pairsBuf, '-', '-');
+            if (nestedPairs != null) {
+                if (lastChar >= 0) {
+                    if (lastOp != 0) {
+                        throw new IllegalArgumentException("Illegal rhs for " + lastChar + lastOp);
                     }
+                    addPair(pairsBuf, (char)lastChar, (char)lastChar);
+                    lastChar = -1;
                 }
-                node = 0;
+                switch (lastOp) {
+                case '-':
+                    doDifference(pairsBuf, nestedPairs);
+                    break;
+                case '&':
+                    doIntersection(pairsBuf, nestedPairs);
+                    break;
+                case 0:
+                    doUnion(pairsBuf, nestedPairs);
+                    break;
+                }
+                lastOp = 0;
+            } else if (!isLiteral && c == ']') {
+                // Final closing delimiter.  This is the only way we leave this
+                // loop if the pattern is well-formed.
                 break;
-            }
-
-            /**
-             * '-' has the following interpretations: 1. Within
-             * "[...]", between two letters, it indicates a range.
-             * 2. Between two nested bracket patterns, "[[...]-[...]",
-             * it indicates asymmetric difference.  3. At the start of
-             * a bracket pattern, "[-...]", "[^-...]", it indicates
-             * the literal character '-'.  4. At the end of a bracket
-             * pattern, "[...-]", it indicates the literal character
-             * '-'.
-             *
-             * We handle cases 1 and 3 here.  Cases 2 and 4 are
-             * handled in the ']' parsing code.
-             */
-            else if (!isLiteral && c == '-') {
-                if (node == 10) {
-                    addPair(pairsBuf, c, c); // Handle "[-...]", "[^-...]"
-                } else if (node == 21) {
-                    node = 23;
-                } else {
-                    throw new IllegalArgumentException("Unexpected '-'");
-                }
-            } 
-
-            /**
-             * If we fall through to this point, we have a literal
-             * character, either one that has been escaped with a
-             * backslash, escaped with a backslash u, or that isn't
-             * a special '[', ']', or '-'.
-             *
-             * Literals can either start a range "x-...", end a range,
-             * "...-x", or indicate a single character "x".
-             */
-            else {
-                if (node == 10 || node == 11) {
-                    first = c;
-                    node = 21;
-                } else if (node == 21) {
-                    addPair(pairsBuf, first, first);
-                    first = c;
-                    node = 21;
-                } else if (node == 23) {
-                    if (c < first) {
-                        throw new IllegalArgumentException("Bad range");
-                    }
-                    addPair(pairsBuf, first, c);
-                    node = 11;
-                } else {
-                    throw new IllegalArgumentException("Expected '[', got '" + c + '\'');
+            } else if (lastOp == 0 && !isLiteral && (c == '-' || c == '&')) {
+                lastOp = c;
+            } else if (lastOp == '-') {
+                addPair(pairsBuf, (char)lastChar, c);
+                lastOp = 0;
+                lastChar = -1;
+            } else if (lastOp != 0) {
+                // We have <set>&<char> or <char>&<char>
+                throw new IllegalArgumentException("Unquoted " + lastOp);
+            } else {
+                if (lastChar >= 0) {
+                    // We have <char><char>
+                    addPair(pairsBuf, (char)lastChar, (char)lastChar);
                 }
+                lastChar = c;
             }
         }
 
-        if (node != 0) {
-            throw new IllegalArgumentException("Missing ']'");
+        // Handle unprocessed stuff preceding the closing ']'
+        if (lastOp == '-') {
+            // Trailing '-' is treated as literal
+            addPair(pairsBuf, lastOp, lastOp);
+        } else if (lastOp == '&') {
+            throw new IllegalArgumentException("Unquoted trailing " + lastOp);
+        }
+        if (lastChar >= 0) {
+            addPair(pairsBuf, (char)lastChar, (char)lastChar);                    
         }
 
         /**
-         * i indexes the last character we parsed or is
-         * pattern.length().  In the latter case, the node will not be
-         * zero, since we have run off the end without finding a
-         * closing ']'.  Therefore, the above statement will have
-         * thrown an exception, and we'll never get here.  If we get
-         * here, we know i < pattern.length(), and we set the
-         * ParsePosition to the next character to be parsed.
-         */
-        pos.setIndex(i+1);
-
-        /**
-         * If we saw a '^' after the initial '[' of this pattern, then
-         * perform the complement.  (Inversion after '[:' is handled
-         * elsewhere.)
+         * If we saw a '^' after the initial '[' of this pattern, then perform
+         * the complement.  (Inversion after '[:' is handled elsewhere.)
          */
         if (invert) {
             doComplement(pairsBuf);
         }
 
+        /**
+         * i indexes the last character we parsed or is pattern.length().  In
+         * the latter case, we have run off the end without finding a closing
+         * ']'.  Otherwise, we know i < pattern.length(), and we set the
+         * ParsePosition to the next character to be parsed.
+         */
+        if (i == limit) {
+            throw new IllegalArgumentException("Missing ']'");
+        }
+        pos.setIndex(i+1);
+
         return pairsBuf;
     }
 
@@ -1352,7 +1325,6 @@ public class UnicodeSet {
     /**
      * Returns the character after the given position, or '\uFFFF' if
      * there is none.
-
      */
     private static final char charAfter(String str, int i) {
         return ((++i) < str.length()) ? str.charAt(i) : '\uFFFF';