ICU-65 allow explicit reverse ID of the form Foo-Bar(Bar-Baz)

X-SVN-Rev: 5840
2001-09-20 21:21:10 +00:00 · 2001-09-20 21:21:10 +00:00 · 409625bd97
commit 409625bd97
parent d038a7a071
14 changed files with 1200 additions and 194 deletions
--- a/icu4j/src/com/ibm/icu/dev/test/translit/JamoTest.java
+++ b/icu4j/src/com/ibm/icu/dev/test/translit/JamoTest.java
@ -216,7 +216,7 @@ public class JamoTest extends TransliteratorTest {
        // "XML, Java, ECMAScript(JavaScript), LDAP, CORBA 3.0, WML " +
        "\ub4f1\uacfc " +
        "\uac19\uc774 \ud604\uc7ac \ub110\ub9ac \uc0ac\uc6a9\ub418\ub294 " +
-        "\ud45c\uc900\uc5d0\uc11c \ud544\uc694\ud558\uba70 \uc774\ub294 ISO/IEC " +
+        "\ud45c\uc900\uc5d0\uc11c \ud544\uc694\ud558\uba70 \uc774\ub294 " + //ISO/IEC " +
        "10646\uc744 \uad6c\ud604\ud558\ub294 \uacf5\uc2dd\uc801\uc778 " +
        "\ubc29\ubc95\uc785\ub2c8\ub2e4. \uc774\ub294 \ub9ce\uc740 \uc6b4\uc601 " +
        "\uccb4\uc81c, \uc694\uc998 \uc0ac\uc6a9\ub418\ub294 \ubaa8\ub4e0 " +
--- a/icu4j/src/com/ibm/icu/dev/test/translit/TransliteratorTest.java
+++ b/icu4j/src/com/ibm/icu/dev/test/translit/TransliteratorTest.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/translit/TransliteratorTest.java,v $ 
- * $Date: 2001/09/19 17:44:09 $ 
- * $Revision: 1.43 $
+ * $Date: 2001/09/20 21:21:10 $ 
+ * $Revision: 1.44 $
 *
 *****************************************************************************************
 */
@ -961,6 +961,26 @@ public class TransliteratorTest extends TestFmwk {
        }
    }
    
+    /**
+     * Test inverse of Greek-Latin; Title()
+     */
+    public void TestCompoundInverse() {
+        Transliterator t = Transliterator.getInstance
+            ("Greek-Latin; Title()", Transliterator.REVERSE);
+        if (t == null) {
+            errln("FAIL: createInstance");
+            return;
+        }
+        String exp = "(Title);Latin-Greek";
+        if (t.getID().equals(exp)) {
+            logln("Ok: inverse of \"Greek-Latin; Title()\" is \"" +
+                  t.getID());
+        } else {
+            errln("FAIL: inverse of \"Greek-Latin; Title()\" is \"" +
+                  t.getID() + "\", expected \"" + exp + "\"");
+        }
+    }
+
    //======================================================================
    // Support methods
    //======================================================================
--- a/icu4j/src/com/ibm/icu/text/CompoundTransliterator.java
+++ b/icu4j/src/com/ibm/icu/text/CompoundTransliterator.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/CompoundTransliterator.java,v $ 
- * $Date: 2001/03/30 23:33:06 $ 
- * $Revision: 1.12 $
+ * $Date: 2001/09/20 21:20:39 $ 
+ * $Revision: 1.13 $
 *
 *****************************************************************************************
 */
@ -35,7 +35,7 @@ import java.util.Vector;
 * <p>Copyright &copy; IBM Corporation 1999.  All rights reserved.
 *
 * @author Alan Liu
- * @version $RCSfile: CompoundTransliterator.java,v $ $Revision: 1.12 $ $Date: 2001/03/30 23:33:06 $
+ * @version $RCSfile: CompoundTransliterator.java,v $ $Revision: 1.13 $ $Date: 2001/09/20 21:20:39 $
 */
 public class CompoundTransliterator extends Transliterator {

@ -48,6 +48,14 @@ public class CompoundTransliterator extends Transliterator {
     */
    private UnicodeFilter[] filters = null;

+    /**
+     * For compound RBTs (those with an ::id block before and/or after
+     * the main rule block) we record the index of the RBT here.
+     * Otherwise, this should have a value of -1.  We need this
+     * information to implement toRules().
+     */
+    private int compoundRBTIndex;    
+
    private static final String COPYRIGHT =
        "\u00A9 IBM Corporation 1999. All rights reserved.";

@ -131,6 +139,72 @@ public class CompoundTransliterator extends Transliterator {
        this(ID, FORWARD, null);
    }

+    /**
+     * Package private constructor for Transliterator from a vector of
+     * transliterators.  The vector order is FORWARD, so if dir is
+     * REVERSE then the vector order will be reversed.  The caller is
+     * responsible for fixing up the ID.
+     */
+    CompoundTransliterator(int dir,
+                           Vector list) {
+        super("", null);
+        trans = null;
+        compoundRBTIndex = -1;
+        init(list, dir, false);
+        // assume caller will fixup ID
+    }
+
+    /**
+     * Finish constructing a transliterator: only to be called by
+     * constructors.  Before calling init(), set trans and filter to NULL.
+     * @param list a vector of transliterator objects to be adopted.  It
+     * should NOT be empty.  The list should be in declared order.  That
+     * is, it should be in the FORWARD order; if direction is REVERSE then
+     * the list order will be reversed.
+     * @param direction either FORWARD or REVERSE
+     * @param fixReverseID if TRUE, then reconstruct the ID of reverse
+     * entries by calling getID() of component entries.  Some constructors
+     * do not require this because they apply a facade ID anyway.
+     * @param status the error code indicating success or failure
+     */
+    private void init(Vector list,
+                      int direction,
+                      boolean fixReverseID) {
+        // assert(trans == 0);
+
+        // Allocate array
+        int count = list.size();
+        trans = new Transliterator[count];
+
+        // Move the transliterators from the vector into an array.
+        // Reverse the order if necessary.
+        int i;
+        for (i=0; i<count; ++i) {
+            int j = (direction == FORWARD) ? i : count - 1 - i;
+            trans[i] = (Transliterator) list.elementAt(j);
+        }
+
+        // Fix compoundRBTIndex for REVERSE transliterators
+        if (compoundRBTIndex >= 0 && direction == REVERSE) {
+            compoundRBTIndex = count - 1 - compoundRBTIndex;
+        }
+
+        // If the direction is UTRANS_REVERSE then we may need to fix the
+        // ID.
+        if (direction == REVERSE && fixReverseID) {
+            StringBuffer newID = new StringBuffer();
+            for (i=0; i<count; ++i) {
+                if (i > 0) {
+                    newID.append(ID_DELIM);
+                }
+                newID.append(trans[i].getID());
+            }
+            setID(newID.toString());
+        }
+
+        computeMaximumContextLength();
+    }
+
    /**
     * Return the IDs of the given list of transliterators, concatenated
     * with ';' delimiting them.  Equivalent to the perlish expression
--- a/icu4j/src/com/ibm/icu/text/NormalizationTransliterator.java
+++ b/icu4j/src/com/ibm/icu/text/NormalizationTransliterator.java
@ -13,7 +13,7 @@ import java.util.*;

 /*
 * @author Alan Liu
- * @version $RCSfile: NormalizationTransliterator.java,v $ $Revision: 1.1 $ $Date: 2001/06/12 23:01:55 $
+ * @version $RCSfile: NormalizationTransliterator.java,v $ $Revision: 1.2 $ $Date: 2001/09/20 21:20:39 $
 */
 public class NormalizationTransliterator extends Transliterator {

@ -31,25 +31,25 @@ public class NormalizationTransliterator extends Transliterator {
     * System registration hook.
     */
    static void register() {
-        Transliterator.registerFactory("NFC", new Transliterator.Factory() {
+        Transliterator.registerFactory("Any-NFC", new Transliterator.Factory() {
            public Transliterator getInstance() {
                return NormalizationTransliterator.
                    getInstance(Normalizer.COMPOSE);
            }
        });
-        Transliterator.registerFactory("NFD", new Transliterator.Factory() {
+        Transliterator.registerFactory("Any-NFD", new Transliterator.Factory() {
            public Transliterator getInstance() {
                return NormalizationTransliterator.
                    getInstance(Normalizer.DECOMP);
            }
        });
-        Transliterator.registerFactory("NFKC", new Transliterator.Factory() {
+        Transliterator.registerFactory("Any-NFKC", new Transliterator.Factory() {
            public Transliterator getInstance() {
                return NormalizationTransliterator.
                    getInstance(Normalizer.COMPOSE_COMPAT);
            }
        });
-        Transliterator.registerFactory("NFKD", new Transliterator.Factory() {
+        Transliterator.registerFactory("Any-NFKD", new Transliterator.Factory() {
            public Transliterator getInstance() {
                return NormalizationTransliterator.
                    getInstance(Normalizer.DECOMP_COMPAT);
--- a/icu4j/src/com/ibm/icu/text/NullTransliterator.java
+++ b/icu4j/src/com/ibm/icu/text/NullTransliterator.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/NullTransliterator.java,v $ 
- * $Date: 2000/06/28 20:49:54 $ 
- * $Revision: 1.8 $
+ * $Date: 2001/09/20 21:20:39 $ 
+ * $Revision: 1.9 $
 *
 *****************************************************************************************
 */
@ -21,9 +21,10 @@ public class NullTransliterator extends Transliterator {
        "\u00A9 IBM Corporation 2000. All rights reserved.";

    /**
-     * Package accessible ID for this transliterator.
+     * Package accessible IDs for this transliterator.
     */
-    static String _ID = "Null";
+    static String SHORT_ID = "Null";
+    static String _ID      = "Any-Null";

    /**
     * Constructs a transliterator.
--- a/icu4j/src/com/ibm/icu/text/RemoveTransliterator.java
+++ b/icu4j/src/com/ibm/icu/text/RemoveTransliterator.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/RemoveTransliterator.java,v $ 
- * $Date: 2001/04/04 18:06:53 $ 
- * $Revision: 1.1 $
+ * $Date: 2001/09/20 21:20:39 $ 
+ * $Revision: 1.2 $
 *
 *****************************************************************************************
 */
@ -22,7 +22,7 @@ public class RemoveTransliterator extends Transliterator {
    /**
     * Package accessible ID for this transliterator.
     */
-    static String _ID = "Remove";
+    static String _ID = "Any-Remove";

    /**
     * Constructs a transliterator.
--- a/icu4j/src/com/ibm/icu/text/Transliterator.java
+++ b/icu4j/src/com/ibm/icu/text/Transliterator.java
@ -4,9 +4,9 @@
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 *
- * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/Transliterator.java,v $ 
- * $Date: 2001/09/19 17:43:38 $ 
- * $Revision: 1.38 $
+ * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/Transliterator.java,v $
+ * $Date: 2001/09/20 21:20:39 $
+ * $Revision: 1.39 $
 *
 *****************************************************************************************
 */
@ -241,7 +241,7 @@ import com.ibm.util.CaseInsensitiveString;
 * <p>Copyright &copy; IBM Corporation 1999.  All rights reserved.
 *
 * @author Alan Liu
- * @version $RCSfile: Transliterator.java,v $ $Revision: 1.38 $ $Date: 2001/09/19 17:43:38 $
+ * @version $RCSfile: Transliterator.java,v $ $Revision: 1.39 $ $Date: 2001/09/20 21:20:39 $
 */
 public abstract class Transliterator {
    /**
@ -262,7 +262,7 @@ public abstract class Transliterator {
     * @see RuleBasedTransliterator
     * @see CompoundTransliterator
     */
-    public static final int REVERSE = 1;    
+    public static final int REVERSE = 1;

    /**
     * Position structure for incremental transliteration.  This data
@ -337,7 +337,7 @@ public abstract class Transliterator {
     */
    private String ID;

-    /** 
+    /**
     * This transliterator's filter.  Any character for which
     * <tt>filter.contains()</tt> returns <tt>false</tt> will not be
     * altered by this transliterator.  If <tt>filter</tt> is
@ -380,6 +380,12 @@ public abstract class Transliterator {

    private static Hashtable displayNameCache;

+    // TODO Add documentation
+    // TODO Add documentation
+    // TODO Add documentation
+    // TODO Add documentation
+    private static TransliteratorRegistry registry;
+
    /**
     * Prefix for resource bundle key for the display name for a
     * transliterator.  The ID is appended to this to form the key.
@ -412,6 +418,10 @@ public abstract class Transliterator {
    private static final String RB_LOCALE_ELEMENTS =
        "com.ibm.text.resources.LocaleElements";

+    protected static final char ID_DELIM = ';';
+
+    protected static final char ID_SEP = '-';
+
    private static final String COPYRIGHT =
        "\u00A9 IBM Corporation 1999. All rights reserved.";

@ -509,7 +519,7 @@ public abstract class Transliterator {
     * pending transliterations, clients should call {@link
     * #finishTransliteration} after the last call to this
     * method has been made.
-     * 
+     *
     * @param text the buffer holding transliterated and untransliterated text
     * @param index the start and limit of the text, the position
     * of the cursor, and the start and limit of transliteration.
@ -771,74 +781,47 @@ public abstract class Transliterator {
     * @see #getAvailableIDs
     * @see #getID
     */
-    public static Transliterator getInstance(String ID, int direction) {
-        if (ID.indexOf(';') >= 0) {
-            return new CompoundTransliterator(ID, direction, null);
-        }
-        
-        // 'id' is the ID with the filter pattern removed and with
-        // whitespace deleted.
-        StringBuffer id = new StringBuffer(ID);
-        
-        // Look for embedded filter pattern
-        UnicodeSet filter = null;
-        int setStart = ID.indexOf('[');
-        int setLimit = 0;
-        if (setStart >= 0) {
-            ParsePosition pos = new ParsePosition(setStart);
-            filter = new UnicodeSet(ID, pos, null);
-            setLimit = pos.getIndex();
-            id.delete(setStart, setLimit);
-        }
-        
-        // Delete whitespace
-        int i;
-        for (i=0; i<id.length(); ++i) {
-            if (UCharacter.isWhitespace(id.charAt(i))) {
-                id.deleteCharAt(i);
-                --i;
-            }
-        }
-
-        // Fix the id, if necessary, by reversing it (A-B => B-A).
-        // Record the position of the separator.  Detect the special
-        // case of Null, whose inverse is itself.  Given an ID with no
-        // separator "Foo", an abbreviation for "Any-Foo", consider
-        // the inverse to be "Foo-Any".
-        String str = id.toString();
-        int sep = str.indexOf('-');
-        if (str.equalsIgnoreCase(NullTransliterator._ID)) {
-            sep = id.length();
-        } else if (direction == REVERSE) {
-            String left;
-            if (sep >= 0) {
-                left = id.substring(0, sep);
-                id.delete(0, sep+1);
-            } else {
-                left = "Any";
-            }
-            sep = id.length();
-            id.append('-').append(left);
-        } else if (sep < 0) {
-            sep = id.length();
-        }
-
-        Transliterator t = internalGetInstance(id.toString());
-        if (t != null) {
-            if (filter != null) {
-                t.setFilter(filter);
-                id.insert(sep, ID.substring(setStart, setLimit));
-            }
-            t.ID = id.toString();
-            return t;
-        }
-        
-        throw new IllegalArgumentException("Unsupported transliterator: "
-                                           + ID);
+    public static final Transliterator getInstance(String ID, int direction) {
+        return getInstance(ID, direction, -1, null);
    }

    public static final Transliterator getInstance(String ID) {
-        return getInstance(ID, FORWARD);
+        return getInstance(ID, FORWARD, -1, null);
+    }
+
+    /**
+     * Create a transliterator given a compound ID (possibly degenerate,
+     * with no ID_DELIM).  If idSplitPoint >= 0 and adoptedSplitTrans !=
+     * 0, then insert adoptedSplitTrans in the compound ID at offset
+     * idSplitPoint.  Otherwise idSplitPoint should be -1 and
+     * adoptedSplitTrans should be 0.  The resultant transliterator will
+     * be an atomic (non-compound) transliterator if this is indicated by
+     * ID.  Otherwise it will be a compound translitertor.
+     */
+    private static Transliterator getInstance(String ID,
+                                              int dir,
+                                              int idSplitPoint,
+                                              Transliterator adoptedSplitTrans) {
+        Vector list = new Vector();
+        int[] ignored = new int[1];
+        StringBuffer regenID = new StringBuffer();
+        parseCompoundID(ID, regenID, dir, idSplitPoint, adoptedSplitTrans,
+                        list, ignored);
+
+        Transliterator t = null;
+        switch (list.size()) {
+        case 0:
+            t = new NullTransliterator();
+            break;
+        case 1:
+            t = (Transliterator) list.elementAt(0);
+            break;
+        default:
+            t = new CompoundTransliterator(dir, list);
+            break;
+        }
+        t.setID(regenID.toString());
+        return t;
    }

    /**
@ -854,6 +837,428 @@ public abstract class Transliterator {
        return new RuleBasedTransliterator(ID, rules, direction, null);
    }

+    public String toRules(boolean escapeUnprintable) {
+        // The base class implementation of toRules munges the ID into
+        // the correct format.  That is: foo => ::foo
+        // KEEP in sync with rbt_pars
+        return "::" + getID() + ID_DELIM;
+    }
+
+    /**
+     * Parse a compound ID (possibly a degenerate one, containing no
+     * ID_DELIM).  If idSplitPoint >= 0 and adoptedSplitTrans != 0, then
+     * insert adoptedSplitTrans in the compound ID at offset idSplitPoint.
+     * Otherwise idSplitPoint should be -1 and adoptedSplitTrans should be
+     * 0.  Return in the result vector the instantiated transliterator
+     * objects (one of these will be adoptedSplitTrans, if the latter was
+     * specified).  These will be in order of id, so if dir is REVERSE,
+     * then the caller will have to reverse the order.
+     *
+     * @param regenID regenerated ID, reversed if appropriate, which
+     * should be applied to the final created transliterator
+     * @param splitTransIndex output parameter to receive the index in
+     * 'result' at which the adoptedSplitTrans is stored, or -1 if
+     * adoptedSplitTrans == 0
+     */
+    private static void parseCompoundID(String id,
+                                        StringBuffer regenID,
+                                        int dir,
+                                        int idSplitPoint,
+                                        Transliterator adoptedSplitTrans,
+                                        Vector result,
+                                        int[] splitTransIndex) {
+        regenID.setLength(0);
+        splitTransIndex[0] = -1;
+        int pos = 0;
+        int i;
+        while (pos < id.length()) {
+            // We compare (pos >= split), not (pos == split), so we can
+            // skip over whitespace (see below).
+            if (pos >= idSplitPoint && adoptedSplitTrans != null) {
+                splitTransIndex[0] = result.size();
+                result.addElement(adoptedSplitTrans);
+                adoptedSplitTrans = null;
+            }
+            int[] p = new int[] { pos };
+            boolean[] sawDelimiter = new boolean[1];
+            Transliterator t =
+                parseID(id, regenID, p, sawDelimiter, dir, true);
+
+            if (p[0] == pos || (p[0] < id.length() && !sawDelimiter[0])) {
+                // TODO
+                //throw new IllegalArgumentException("Invalid ID " + id);
+                throw new IllegalArgumentException("Invalid ID " + id +
+                                                   " p[0]=" + p[0] +
+                                                   " pos=" + pos +
+                                                   " id.length()=" + id.length() +
+                                                   " sawDelimite[0]=" + sawDelimiter[0] +
+                                                   "");
+            }
+            pos = p[0];
+            // The return value may be NULL when, for instance, creating a
+            // REVERSE transliterator of ID "Latin-Greek()".
+            if (t != null) {
+                result.addElement(t);
+            }
+        }
+
+        // Handle case of idSplitPoint == id.length()
+        if (pos >= idSplitPoint && adoptedSplitTrans != null) {
+            splitTransIndex[0] = result.size();
+            result.addElement(adoptedSplitTrans);
+            adoptedSplitTrans = null;
+        }
+    }
+
+    /**
+     * Parse a single ID, possibly including an inline filter, and return
+     * the resultant transliterator object.  NOTE: If 'create' is false,
+     * then the amount of syntax checking is limited.  However, the 'pos'
+     * parameter will be updated correctly, assuming the input string is
+     * valid.
+     *
+     * A trailing /;? \s* / is skipped.  The parameter sawDelimiter
+     * indicates whether the ';' was seen or not.  Upon return, if pos is
+     * advanced, it will either point to a non-whitespace character past
+     * the trailing ';', if any, or be equal to length().
+     *
+     * @param ID the ID string
+     * @param regenID regenerated ID, reversed if appropriate, which
+     * should be applied to the final created transliterator.  This method
+     * will append to this parameter for FORWARD direction and insert
+     * addition text at offset 0 for REVERSE direction.  If create is
+     * false then this parameter is not used.
+     * @param pos INPUT-OUTPUT parameter.  On input, the position of the
+     * first character to parse.  On output, the position after the last
+     * character parsed.  This will be a semicolon or ID.length().  In the
+     * case of an error this value will be unchanged.
+     * @param create if true, create and return the result.  If false,
+     * only scan the ID, and return NULL.
+     * @return a newly created transliterator, or NULL.  NULL is returned
+     * in all cases if create is false.  If create is true, then NULL is
+     * returned on error, or if the ID is effectively empty.
+     * E.g. "Latin-Greek()" with dir == REVERSE.  Do NOT check for NULL to
+     * determine if there was an error.  Instead, check to see if pos
+     * moved.
+     */
+     private static Transliterator parseID(String ID,
+                                           StringBuffer regenID,
+                                           int[] pos,
+                                           boolean[] sawDelimiter,
+                                           int dir,
+                                           boolean create) {
+        int limit, preDelimLimit,
+            revStart, revLimit=0,
+            idStart, idLimit,
+            setStart, setLimit;
+
+        UnicodeSet[] filter = new UnicodeSet[1];
+        int[] indices = new int[4];
+
+        if (!parseIDBounds(ID, pos[0], false, indices, filter)) {
+            return null;
+        }
+        limit = indices[0];
+        setStart = indices[1];
+        setLimit = indices[2];
+        revStart = indices[3];
+
+        idStart = pos[0];
+        idLimit = limit;
+
+        if (revStart >= 0 && revStart < limit) {
+            int revSetStart, revSetLimit;
+            UnicodeSet[] revFilter = new UnicodeSet[1];
+            if (!parseIDBounds(ID, revStart+1, true, indices, revFilter)) {
+                return null;
+            }
+            revLimit = indices[0];
+            revSetStart = indices[1];
+            revSetLimit = indices[2];
+            // we ignore indices[3]
+
+            // revStart points to '('
+            if (dir == REVERSE) {
+                idStart = revStart+1;
+                idLimit = revLimit;
+                setStart = revSetStart;
+                setLimit = revSetLimit;
+                filter[0] = revFilter[0];
+            } else {
+                idLimit = revStart;
+            }
+            // assert(revLimit < ID.length() && ID.charAt(revLimit) == ')');
+            limit = revLimit+1;
+        } else {
+            // Ignore () exprs outside of this atomic ID, that is, in
+            // "Greek-Latin; Title()", ignore the "()" after Title when
+            // parsing Greek-Latin.
+            revStart = -1;
+        }
+
+        // Advance limit past /\s*;?\s*/
+        preDelimLimit = limit;
+        limit = skipSpaces(ID, limit);
+        sawDelimiter[0] = (limit < ID.length() && ID.charAt(limit) == ID_DELIM);
+        if (sawDelimiter[0]) {
+            limit = skipSpaces(ID, ++limit);
+        }
+
+        if (!create) {
+            // TODO Improve performance by scanning the UnicodeSet pattern
+            // without actually constructing it, if create is false.  That
+            // is, create a method like this one for UnicodeSet.
+            pos[0] = limit;
+            return null;
+        }
+
+        // 'id' is the ID with the filter pattern removed and with
+        // whitespace deleted.  In a Foo(Bar) ID, id is Foo for FORWARD
+        // and Bar for REVERSE.
+        String str;
+        str = ID.substring(setLimit, idLimit);
+        StringBuffer id = new StringBuffer(ID.substring(idStart, setStart));
+        id.append(str);
+
+        // Delete whitespace
+        int i;
+        for (i=0; i<id.length(); ++i) {
+            if (UCharacter.isWhitespace(id.charAt(i))) {
+                id.deleteCharAt(i);
+                --i;
+            }
+        }
+
+        // Fix the id, if necessary, by reversing it (A-B => B-A).  This
+        // is only done if the id is NOT of the form Foo(Bar).  Record the
+        // position of the separator.
+        //
+        // For both A-B and Foo(Bar) ids, detect the special case of Null,
+        // whose inverse is itself.  Given an ID with no separator "Foo",
+        // an abbreviation for "Any-Foo", consider the inverse to be
+        // "Foo-Any".
+        int sep = id.toString().indexOf(ID_SEP);
+        if (sep < 0 && id.toString().equalsIgnoreCase(NullTransliterator.SHORT_ID)) {
+            // Handle "Null"
+            sep = id.length();
+        } else if (dir == REVERSE &&
+                   id.toString().equalsIgnoreCase(NullTransliterator._ID)) {
+            // Reverse of "Any-Null" => "Null"
+            id.delete(0, sep+1);
+            sep = id.length();
+        } else if (dir == REVERSE && revStart < 0) {
+            if (sep >= 0) {
+                str = id.substring(0, sep);
+                id.delete(0, sep+1);
+            } else {
+                str = "Any";
+            }
+            sep = id.length();
+            id.append(ID_SEP).append(str);
+        } else if (sep < 0 && id.length() > 0) {
+            // Don't do anything for empty IDs -- we handle these specially below
+            str = "Any-";
+            sep = str.length() - 1;
+            id.insert(0, str);
+        }
+
+        Transliterator t = null;
+
+        // If we have a reverse part of the ID, e.g., Foo(Bar), then we
+        // need to check for an empty part, which represents a Null
+        // transliterator.  We return 0 (not a NullTransliterator).  If we
+        // are not of the form Foo(Bar) then an empty string is illegal.
+        if (revStart >= 0 && id.length() == 0) {
+            // Ignore any filters; filters on Null are meaningless (and we
+            // can't attach them to 0 anyway)
+            filter = null;
+        }
+
+        else {
+            StringBuffer s = new StringBuffer();
+
+            synchronized (registry) {
+                t = registry.get(id.toString(), s);
+                // Need to enclose this in a block to prevent deadlock when
+                // instantiating aliases (below).
+            }
+
+            if (s.length() != 0) {
+                // assert(t==0);
+                // Instantiate an alias
+                t = getInstance(s.toString(), FORWARD);
+            }
+
+            if (t == null) {
+                // Creation failed; the ID is invalid or is an alias
+                filter[0] = null;
+                return null;
+            }
+
+            // Set the filter, if any
+            t.setFilter(filter[0]);
+        }
+
+        // Set the ID.  This is normally just a substring of the input
+        // ID, but for reverse transliterators we need to munge A-B to
+        // B-A or Foo(Bar) to Bar(Foo).
+        if (dir == FORWARD) {
+            id.setLength(0);
+            id.append(ID.substring(pos[0], preDelimLimit));
+        } else if (revStart < 0) {
+            id.insert(sep, ID.substring(setStart, setLimit));
+        } else {
+            // Change Foo(Bar) to Bar(Foo)
+            str = ID.substring(pos[0], revStart);
+            str = str.trim();
+            id.setLength(0);
+            id.append(ID.substring(revStart+1, revLimit));
+            // TODO make this more efficient
+            id = new StringBuffer(id.toString().trim());
+            id.append('(').append(str).append(')');
+        }
+        // TODO make this more efficient
+        id = new StringBuffer(id.toString().trim());
+
+        if (t != null) {
+            t.setID(id.toString());
+        }
+
+        // Regenerate ID of a compound entity
+        if (dir == FORWARD) {
+            if (regenID.length() != 0) {
+                regenID.append(ID_DELIM);
+            }
+            regenID.append(id);
+        } else {
+            if (regenID.length() != 0) {
+                regenID.insert(0, ID_DELIM);
+            }
+            regenID.insert(0, id);
+        }
+
+        // Indicate success by bumping pos past the final /;?\s*/.
+        pos[0] = limit;
+
+        return t;
+    }
+
+    /**
+     * Internal method used by parseID.  Given a piece of a single ID,
+     * find the boundaries of various parts.  For IDs of the form
+     * Foo(Bar), this method parses the Foo, then the Bar.  In each piece
+     * it locates any inline UnicodeSet pattern [setStart, setLimit)
+     * and finds the limit (this will point to either ';' or ')' or
+     * ID.length()).
+     *
+     * @param ID the ID to be parsed
+     * @param pos the index of ID at which to start
+     * @param withinParens if true, parse the Bar of Foo(Bar), stop at a
+     * close paren, and do not look for an open paren.  If true then a
+     * close paren MUST be seen or false is returned; if false then the
+     * ';' delimiter is optional.
+     * @param limit set to the position of ';' or ')' (depending on
+     * withinParens), or ID.length() if no delimiter was found
+     * @param setStart set to the start of an inline filter pattern,
+     * or pos if none
+     * @param setLimit set to the limit of an inline filter pattern,
+     * or pos if none
+     * @param revStart if not withinParens then set to the position of the
+     * first '(', which may be > limit; otherwise set to -1
+     * @param filter set to a newly created UnicodeSet object for the
+     * inline filter pattern, if any; OWNED BY THE CALLER
+     *
+     * @return true if the pattern is valid, false is there is an invalid
+     * UnicodeSet pattern or if withinParens is true and no close paren is
+     * seen.
+     */
+    private static boolean parseIDBounds(String ID,
+                                         int pos,
+                                         boolean withinParens,
+                                         int[] indices,
+                                         UnicodeSet[] filter) {
+        int limit;
+        int setStart;
+        int setLimit;
+        int revStart;
+
+        char endDelimiter = withinParens ? ')' : ID_DELIM;
+        limit = ID.indexOf(endDelimiter, pos);
+        if (limit < 0) {
+            if (withinParens) {
+                //return false;
+                throw new IllegalArgumentException("Missing closing parenthesis in " + ID);
+            }
+            limit = ID.length();
+        }
+        setStart = ID.indexOf('[', pos);
+        revStart = withinParens ? -1 : ID.indexOf('(', pos);
+
+        if (setStart >= 0 && setStart < limit &&
+            (revStart < 0 || setStart < revStart)) {
+            ParsePosition ppos = new ParsePosition(setStart);
+            // TODO Improve performance by scanning the UnicodeSet pattern
+            // without actually constructing it, if create is false.  That
+            // is, create a method like this one for UnicodeSet.
+            filter[0] = new UnicodeSet();
+            filter[0].applyPattern(ID, ppos, null, true);
+            setLimit = ppos.getIndex();
+            if (limit < setLimit) {
+                limit = ID.indexOf(endDelimiter, setLimit);
+                if (limit < 0) {
+                    if (withinParens) {
+                        //return false;
+                        throw new IllegalArgumentException("Missing closing parenthesis in " + ID);
+                    }
+                    limit = ID.length();
+                }
+            }
+            if (revStart >= 0 && revStart < setLimit) {
+                revStart = ID.indexOf(')', setLimit);
+            }
+        } else {
+            setStart = setLimit = pos;
+        }
+        indices[0] = limit;
+        indices[1] = setStart;
+        indices[2] = setLimit;
+        indices[3] = revStart;
+        return true;
+    }
+
+    /**
+     * If pos is the index of a space in str, then advance it over that
+     * space and any immediately subsequent ones.
+     */
+    private static int skipSpaces(String str,
+                                  int pos) {
+        while (pos < str.length() &&
+               UCharacter.isWhitespace(str.charAt(pos))) {
+            ++pos;
+        }
+        return pos;
+    }
+
+    // TODO Remove remove remove
+    // TODO Remove remove remove
+    // TODO Remove remove remove
+    // TODO Remove remove remove
+    // TODO Remove remove remove
+    // TODO Remove remove remove
+    // TODO Remove remove remove
+    // TODO Remove remove remove
+    // TODO Remove remove remove
+    // TODO Remove remove remove
+    // TODO Remove remove remove
+    static Transliterator tempGet(String id, StringBuffer aliasReturn) {
+        aliasReturn.setLength(0);
+        if (id.equalsIgnoreCase(NullTransliterator.SHORT_ID)) {
+            id = NullTransliterator._ID;
+            // Temporary hack to make this work
+        }
+        return internalGetInstance(id);
+    }
+
    /**
     * Returns this transliterator's inverse.  See the class
     * documentation for details.  This implementation simply inverts
@ -877,7 +1282,7 @@ public abstract class Transliterator {
    public final Transliterator getInverse() {
        return getInstance(ID, REVERSE);
    }
-    
+
    /**
     * Returns a transliterator object given its ID.  Unlike getInstance(),
     * this method returns null if it cannot make use of the given ID.
@ -891,7 +1296,7 @@ public abstract class Transliterator {
            obj = internalCache.get(ciID);
            sourceCache = internalCache;
        }
-        
+
        if (obj != null) {
            if (obj instanceof RuleBasedTransliterator.Data) {
                data = (RuleBasedTransliterator.Data) obj;
@ -925,7 +1330,7 @@ public abstract class Transliterator {
                        } catch (IllegalArgumentException e2) {
                            // Can't load UTF8 file
                        }
-                        
+
                        if (r != null) {
                            data = RuleBasedTransliterator.parse(r, dir);
                            sourceCache.put(ciID, data);
@ -950,7 +1355,7 @@ public abstract class Transliterator {
 //     * Find a path through the composed transliterator graph.  This
 //     * will not necessarily be the only path, or the shortest path.
 //     * This is a simple recursive algorithm.
-//     * 
+//     *
 //     * <p><code>composedGraph</code> is the links table.
 //     * composedGraph.get(x) should return a String[] array, each of
 //     * which is a node that x is connected to.
@ -984,7 +1389,7 @@ public abstract class Transliterator {
 //                }
 //            }
 //        }
-//        path.removeElementAt(path.size() - 1);    
+//        path.removeElementAt(path.size() - 1);
 //        return false;
 //    }

@ -1020,7 +1425,7 @@ public abstract class Transliterator {
    /**
     * Unregisters a transliterator or class.  This may be either
     * a system transliterator or a user transliterator or class.
-     * 
+     *
     * @param ID the ID of the transliterator or class
     * @return the <code>Object</code> that was registered with
     * <code>ID</code>, or <code>null</code> if none was
@ -1082,6 +1487,9 @@ public abstract class Transliterator {
    }

    static {
+        // TODO FINISH
+        registry = new TransliteratorRegistry();
+
        // The display name cache starts out empty
        displayNameCache = new Hashtable();

@ -1145,7 +1553,7 @@ public abstract class Transliterator {
                      HangulJamoTransliterator.class, null);
        registerClass(JamoHangulTransliterator._ID,
                      JamoHangulTransliterator.class, null);
-                      
+
        registerClass(HexToUnicodeTransliterator._ID,
                      HexToUnicodeTransliterator.class, null);
        registerClass(UnicodeToHexTransliterator._ID,
--- a/icu4j/src/com/ibm/test/translit/JamoTest.java
+++ b/icu4j/src/com/ibm/test/translit/JamoTest.java
@ -216,7 +216,7 @@ public class JamoTest extends TransliteratorTest {
        // "XML, Java, ECMAScript(JavaScript), LDAP, CORBA 3.0, WML " +
        "\ub4f1\uacfc " +
        "\uac19\uc774 \ud604\uc7ac \ub110\ub9ac \uc0ac\uc6a9\ub418\ub294 " +
-        "\ud45c\uc900\uc5d0\uc11c \ud544\uc694\ud558\uba70 \uc774\ub294 ISO/IEC " +
+        "\ud45c\uc900\uc5d0\uc11c \ud544\uc694\ud558\uba70 \uc774\ub294 " + //ISO/IEC " +
        "10646\uc744 \uad6c\ud604\ud558\ub294 \uacf5\uc2dd\uc801\uc778 " +
        "\ubc29\ubc95\uc785\ub2c8\ub2e4. \uc774\ub294 \ub9ce\uc740 \uc6b4\uc601 " +
        "\uccb4\uc81c, \uc694\uc998 \uc0ac\uc6a9\ub418\ub294 \ubaa8\ub4e0 " +
--- a/icu4j/src/com/ibm/test/translit/TransliteratorTest.java
+++ b/icu4j/src/com/ibm/test/translit/TransliteratorTest.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/test/translit/Attic/TransliteratorTest.java,v $ 
- * $Date: 2001/09/19 17:44:09 $ 
- * $Revision: 1.43 $
+ * $Date: 2001/09/20 21:21:10 $ 
+ * $Revision: 1.44 $
 *
 *****************************************************************************************
 */
@ -961,6 +961,26 @@ public class TransliteratorTest extends TestFmwk {
        }
    }
    
+    /**
+     * Test inverse of Greek-Latin; Title()
+     */
+    public void TestCompoundInverse() {
+        Transliterator t = Transliterator.getInstance
+            ("Greek-Latin; Title()", Transliterator.REVERSE);
+        if (t == null) {
+            errln("FAIL: createInstance");
+            return;
+        }
+        String exp = "(Title);Latin-Greek";
+        if (t.getID().equals(exp)) {
+            logln("Ok: inverse of \"Greek-Latin; Title()\" is \"" +
+                  t.getID());
+        } else {
+            errln("FAIL: inverse of \"Greek-Latin; Title()\" is \"" +
+                  t.getID() + "\", expected \"" + exp + "\"");
+        }
+    }
+
    //======================================================================
    // Support methods
    //======================================================================
--- a/icu4j/src/com/ibm/text/CompoundTransliterator.java
+++ b/icu4j/src/com/ibm/text/CompoundTransliterator.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/CompoundTransliterator.java,v $ 
- * $Date: 2001/03/30 23:33:06 $ 
- * $Revision: 1.12 $
+ * $Date: 2001/09/20 21:20:39 $ 
+ * $Revision: 1.13 $
 *
 *****************************************************************************************
 */
@ -35,7 +35,7 @@ import java.util.Vector;
 * <p>Copyright &copy; IBM Corporation 1999.  All rights reserved.
 *
 * @author Alan Liu
- * @version $RCSfile: CompoundTransliterator.java,v $ $Revision: 1.12 $ $Date: 2001/03/30 23:33:06 $
+ * @version $RCSfile: CompoundTransliterator.java,v $ $Revision: 1.13 $ $Date: 2001/09/20 21:20:39 $
 */
 public class CompoundTransliterator extends Transliterator {

@ -48,6 +48,14 @@ public class CompoundTransliterator extends Transliterator {
     */
    private UnicodeFilter[] filters = null;

+    /**
+     * For compound RBTs (those with an ::id block before and/or after
+     * the main rule block) we record the index of the RBT here.
+     * Otherwise, this should have a value of -1.  We need this
+     * information to implement toRules().
+     */
+    private int compoundRBTIndex;    
+
    private static final String COPYRIGHT =
        "\u00A9 IBM Corporation 1999. All rights reserved.";

@ -131,6 +139,72 @@ public class CompoundTransliterator extends Transliterator {
        this(ID, FORWARD, null);
    }

+    /**
+     * Package private constructor for Transliterator from a vector of
+     * transliterators.  The vector order is FORWARD, so if dir is
+     * REVERSE then the vector order will be reversed.  The caller is
+     * responsible for fixing up the ID.
+     */
+    CompoundTransliterator(int dir,
+                           Vector list) {
+        super("", null);
+        trans = null;
+        compoundRBTIndex = -1;
+        init(list, dir, false);
+        // assume caller will fixup ID
+    }
+
+    /**
+     * Finish constructing a transliterator: only to be called by
+     * constructors.  Before calling init(), set trans and filter to NULL.
+     * @param list a vector of transliterator objects to be adopted.  It
+     * should NOT be empty.  The list should be in declared order.  That
+     * is, it should be in the FORWARD order; if direction is REVERSE then
+     * the list order will be reversed.
+     * @param direction either FORWARD or REVERSE
+     * @param fixReverseID if TRUE, then reconstruct the ID of reverse
+     * entries by calling getID() of component entries.  Some constructors
+     * do not require this because they apply a facade ID anyway.
+     * @param status the error code indicating success or failure
+     */
+    private void init(Vector list,
+                      int direction,
+                      boolean fixReverseID) {
+        // assert(trans == 0);
+
+        // Allocate array
+        int count = list.size();
+        trans = new Transliterator[count];
+
+        // Move the transliterators from the vector into an array.
+        // Reverse the order if necessary.
+        int i;
+        for (i=0; i<count; ++i) {
+            int j = (direction == FORWARD) ? i : count - 1 - i;
+            trans[i] = (Transliterator) list.elementAt(j);
+        }
+
+        // Fix compoundRBTIndex for REVERSE transliterators
+        if (compoundRBTIndex >= 0 && direction == REVERSE) {
+            compoundRBTIndex = count - 1 - compoundRBTIndex;
+        }
+
+        // If the direction is UTRANS_REVERSE then we may need to fix the
+        // ID.
+        if (direction == REVERSE && fixReverseID) {
+            StringBuffer newID = new StringBuffer();
+            for (i=0; i<count; ++i) {
+                if (i > 0) {
+                    newID.append(ID_DELIM);
+                }
+                newID.append(trans[i].getID());
+            }
+            setID(newID.toString());
+        }
+
+        computeMaximumContextLength();
+    }
+
    /**
     * Return the IDs of the given list of transliterators, concatenated
     * with ';' delimiting them.  Equivalent to the perlish expression
--- a/icu4j/src/com/ibm/text/NormalizationTransliterator.java
+++ b/icu4j/src/com/ibm/text/NormalizationTransliterator.java
@ -13,7 +13,7 @@ import java.util.*;

 /*
 * @author Alan Liu
- * @version $RCSfile: NormalizationTransliterator.java,v $ $Revision: 1.1 $ $Date: 2001/06/12 23:01:55 $
+ * @version $RCSfile: NormalizationTransliterator.java,v $ $Revision: 1.2 $ $Date: 2001/09/20 21:20:39 $
 */
 public class NormalizationTransliterator extends Transliterator {

@ -31,25 +31,25 @@ public class NormalizationTransliterator extends Transliterator {
     * System registration hook.
     */
    static void register() {
-        Transliterator.registerFactory("NFC", new Transliterator.Factory() {
+        Transliterator.registerFactory("Any-NFC", new Transliterator.Factory() {
            public Transliterator getInstance() {
                return NormalizationTransliterator.
                    getInstance(Normalizer.COMPOSE);
            }
        });
-        Transliterator.registerFactory("NFD", new Transliterator.Factory() {
+        Transliterator.registerFactory("Any-NFD", new Transliterator.Factory() {
            public Transliterator getInstance() {
                return NormalizationTransliterator.
                    getInstance(Normalizer.DECOMP);
            }
        });
-        Transliterator.registerFactory("NFKC", new Transliterator.Factory() {
+        Transliterator.registerFactory("Any-NFKC", new Transliterator.Factory() {
            public Transliterator getInstance() {
                return NormalizationTransliterator.
                    getInstance(Normalizer.COMPOSE_COMPAT);
            }
        });
-        Transliterator.registerFactory("NFKD", new Transliterator.Factory() {
+        Transliterator.registerFactory("Any-NFKD", new Transliterator.Factory() {
            public Transliterator getInstance() {
                return NormalizationTransliterator.
                    getInstance(Normalizer.DECOMP_COMPAT);
--- a/icu4j/src/com/ibm/text/NullTransliterator.java
+++ b/icu4j/src/com/ibm/text/NullTransliterator.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/NullTransliterator.java,v $ 
- * $Date: 2000/06/28 20:49:54 $ 
- * $Revision: 1.8 $
+ * $Date: 2001/09/20 21:20:39 $ 
+ * $Revision: 1.9 $
 *
 *****************************************************************************************
 */
@ -21,9 +21,10 @@ public class NullTransliterator extends Transliterator {
        "\u00A9 IBM Corporation 2000. All rights reserved.";

    /**
-     * Package accessible ID for this transliterator.
+     * Package accessible IDs for this transliterator.
     */
-    static String _ID = "Null";
+    static String SHORT_ID = "Null";
+    static String _ID      = "Any-Null";

    /**
     * Constructs a transliterator.
--- a/icu4j/src/com/ibm/text/RemoveTransliterator.java
+++ b/icu4j/src/com/ibm/text/RemoveTransliterator.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/RemoveTransliterator.java,v $ 
- * $Date: 2001/04/04 18:06:53 $ 
- * $Revision: 1.1 $
+ * $Date: 2001/09/20 21:20:39 $ 
+ * $Revision: 1.2 $
 *
 *****************************************************************************************
 */
@ -22,7 +22,7 @@ public class RemoveTransliterator extends Transliterator {
    /**
     * Package accessible ID for this transliterator.
     */
-    static String _ID = "Remove";
+    static String _ID = "Any-Remove";

    /**
     * Constructs a transliterator.
--- a/icu4j/src/com/ibm/text/Transliterator.java
+++ b/icu4j/src/com/ibm/text/Transliterator.java
@ -4,9 +4,9 @@
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 *
- * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/Transliterator.java,v $ 
- * $Date: 2001/09/19 17:43:38 $ 
- * $Revision: 1.38 $
+ * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/Transliterator.java,v $
+ * $Date: 2001/09/20 21:20:39 $
+ * $Revision: 1.39 $
 *
 *****************************************************************************************
 */
@ -241,7 +241,7 @@ import com.ibm.util.CaseInsensitiveString;
 * <p>Copyright &copy; IBM Corporation 1999.  All rights reserved.
 *
 * @author Alan Liu
- * @version $RCSfile: Transliterator.java,v $ $Revision: 1.38 $ $Date: 2001/09/19 17:43:38 $
+ * @version $RCSfile: Transliterator.java,v $ $Revision: 1.39 $ $Date: 2001/09/20 21:20:39 $
 */
 public abstract class Transliterator {
    /**
@ -262,7 +262,7 @@ public abstract class Transliterator {
     * @see RuleBasedTransliterator
     * @see CompoundTransliterator
     */
-    public static final int REVERSE = 1;    
+    public static final int REVERSE = 1;

    /**
     * Position structure for incremental transliteration.  This data
@ -337,7 +337,7 @@ public abstract class Transliterator {
     */
    private String ID;

-    /** 
+    /**
     * This transliterator's filter.  Any character for which
     * <tt>filter.contains()</tt> returns <tt>false</tt> will not be
     * altered by this transliterator.  If <tt>filter</tt> is
@ -380,6 +380,12 @@ public abstract class Transliterator {

    private static Hashtable displayNameCache;

+    // TODO Add documentation
+    // TODO Add documentation
+    // TODO Add documentation
+    // TODO Add documentation
+    private static TransliteratorRegistry registry;
+
    /**
     * Prefix for resource bundle key for the display name for a
     * transliterator.  The ID is appended to this to form the key.
@ -412,6 +418,10 @@ public abstract class Transliterator {
    private static final String RB_LOCALE_ELEMENTS =
        "com.ibm.text.resources.LocaleElements";

+    protected static final char ID_DELIM = ';';
+
+    protected static final char ID_SEP = '-';
+
    private static final String COPYRIGHT =
        "\u00A9 IBM Corporation 1999. All rights reserved.";

@ -509,7 +519,7 @@ public abstract class Transliterator {
     * pending transliterations, clients should call {@link
     * #finishTransliteration} after the last call to this
     * method has been made.
-     * 
+     *
     * @param text the buffer holding transliterated and untransliterated text
     * @param index the start and limit of the text, the position
     * of the cursor, and the start and limit of transliteration.
@ -771,74 +781,47 @@ public abstract class Transliterator {
     * @see #getAvailableIDs
     * @see #getID
     */
-    public static Transliterator getInstance(String ID, int direction) {
-        if (ID.indexOf(';') >= 0) {
-            return new CompoundTransliterator(ID, direction, null);
-        }
-        
-        // 'id' is the ID with the filter pattern removed and with
-        // whitespace deleted.
-        StringBuffer id = new StringBuffer(ID);
-        
-        // Look for embedded filter pattern
-        UnicodeSet filter = null;
-        int setStart = ID.indexOf('[');
-        int setLimit = 0;
-        if (setStart >= 0) {
-            ParsePosition pos = new ParsePosition(setStart);
-            filter = new UnicodeSet(ID, pos, null);
-            setLimit = pos.getIndex();
-            id.delete(setStart, setLimit);
-        }
-        
-        // Delete whitespace
-        int i;
-        for (i=0; i<id.length(); ++i) {
-            if (UCharacter.isWhitespace(id.charAt(i))) {
-                id.deleteCharAt(i);
-                --i;
-            }
-        }
-
-        // Fix the id, if necessary, by reversing it (A-B => B-A).
-        // Record the position of the separator.  Detect the special
-        // case of Null, whose inverse is itself.  Given an ID with no
-        // separator "Foo", an abbreviation for "Any-Foo", consider
-        // the inverse to be "Foo-Any".
-        String str = id.toString();
-        int sep = str.indexOf('-');
-        if (str.equalsIgnoreCase(NullTransliterator._ID)) {
-            sep = id.length();
-        } else if (direction == REVERSE) {
-            String left;
-            if (sep >= 0) {
-                left = id.substring(0, sep);
-                id.delete(0, sep+1);
-            } else {
-                left = "Any";
-            }
-            sep = id.length();
-            id.append('-').append(left);
-        } else if (sep < 0) {
-            sep = id.length();
-        }
-
-        Transliterator t = internalGetInstance(id.toString());
-        if (t != null) {
-            if (filter != null) {
-                t.setFilter(filter);
-                id.insert(sep, ID.substring(setStart, setLimit));
-            }
-            t.ID = id.toString();
-            return t;
-        }
-        
-        throw new IllegalArgumentException("Unsupported transliterator: "
-                                           + ID);
+    public static final Transliterator getInstance(String ID, int direction) {
+        return getInstance(ID, direction, -1, null);
    }

    public static final Transliterator getInstance(String ID) {
-        return getInstance(ID, FORWARD);
+        return getInstance(ID, FORWARD, -1, null);
+    }
+
+    /**
+     * Create a transliterator given a compound ID (possibly degenerate,
+     * with no ID_DELIM).  If idSplitPoint >= 0 and adoptedSplitTrans !=
+     * 0, then insert adoptedSplitTrans in the compound ID at offset
+     * idSplitPoint.  Otherwise idSplitPoint should be -1 and
+     * adoptedSplitTrans should be 0.  The resultant transliterator will
+     * be an atomic (non-compound) transliterator if this is indicated by
+     * ID.  Otherwise it will be a compound translitertor.
+     */
+    private static Transliterator getInstance(String ID,
+                                              int dir,
+                                              int idSplitPoint,
+                                              Transliterator adoptedSplitTrans) {
+        Vector list = new Vector();
+        int[] ignored = new int[1];
+        StringBuffer regenID = new StringBuffer();
+        parseCompoundID(ID, regenID, dir, idSplitPoint, adoptedSplitTrans,
+                        list, ignored);
+
+        Transliterator t = null;
+        switch (list.size()) {
+        case 0:
+            t = new NullTransliterator();
+            break;
+        case 1:
+            t = (Transliterator) list.elementAt(0);
+            break;
+        default:
+            t = new CompoundTransliterator(dir, list);
+            break;
+        }
+        t.setID(regenID.toString());
+        return t;
    }

    /**
@ -854,6 +837,428 @@ public abstract class Transliterator {
        return new RuleBasedTransliterator(ID, rules, direction, null);
    }

+    public String toRules(boolean escapeUnprintable) {
+        // The base class implementation of toRules munges the ID into
+        // the correct format.  That is: foo => ::foo
+        // KEEP in sync with rbt_pars
+        return "::" + getID() + ID_DELIM;
+    }
+
+    /**
+     * Parse a compound ID (possibly a degenerate one, containing no
+     * ID_DELIM).  If idSplitPoint >= 0 and adoptedSplitTrans != 0, then
+     * insert adoptedSplitTrans in the compound ID at offset idSplitPoint.
+     * Otherwise idSplitPoint should be -1 and adoptedSplitTrans should be
+     * 0.  Return in the result vector the instantiated transliterator
+     * objects (one of these will be adoptedSplitTrans, if the latter was
+     * specified).  These will be in order of id, so if dir is REVERSE,
+     * then the caller will have to reverse the order.
+     *
+     * @param regenID regenerated ID, reversed if appropriate, which
+     * should be applied to the final created transliterator
+     * @param splitTransIndex output parameter to receive the index in
+     * 'result' at which the adoptedSplitTrans is stored, or -1 if
+     * adoptedSplitTrans == 0
+     */
+    private static void parseCompoundID(String id,
+                                        StringBuffer regenID,
+                                        int dir,
+                                        int idSplitPoint,
+                                        Transliterator adoptedSplitTrans,
+                                        Vector result,
+                                        int[] splitTransIndex) {
+        regenID.setLength(0);
+        splitTransIndex[0] = -1;
+        int pos = 0;
+        int i;
+        while (pos < id.length()) {
+            // We compare (pos >= split), not (pos == split), so we can
+            // skip over whitespace (see below).
+            if (pos >= idSplitPoint && adoptedSplitTrans != null) {
+                splitTransIndex[0] = result.size();
+                result.addElement(adoptedSplitTrans);
+                adoptedSplitTrans = null;
+            }
+            int[] p = new int[] { pos };
+            boolean[] sawDelimiter = new boolean[1];
+            Transliterator t =
+                parseID(id, regenID, p, sawDelimiter, dir, true);
+
+            if (p[0] == pos || (p[0] < id.length() && !sawDelimiter[0])) {
+                // TODO
+                //throw new IllegalArgumentException("Invalid ID " + id);
+                throw new IllegalArgumentException("Invalid ID " + id +
+                                                   " p[0]=" + p[0] +
+                                                   " pos=" + pos +
+                                                   " id.length()=" + id.length() +
+                                                   " sawDelimite[0]=" + sawDelimiter[0] +
+                                                   "");
+            }
+            pos = p[0];
+            // The return value may be NULL when, for instance, creating a
+            // REVERSE transliterator of ID "Latin-Greek()".
+            if (t != null) {
+                result.addElement(t);
+            }
+        }
+
+        // Handle case of idSplitPoint == id.length()
+        if (pos >= idSplitPoint && adoptedSplitTrans != null) {
+            splitTransIndex[0] = result.size();
+            result.addElement(adoptedSplitTrans);
+            adoptedSplitTrans = null;
+        }
+    }
+
+    /**
+     * Parse a single ID, possibly including an inline filter, and return
+     * the resultant transliterator object.  NOTE: If 'create' is false,
+     * then the amount of syntax checking is limited.  However, the 'pos'
+     * parameter will be updated correctly, assuming the input string is
+     * valid.
+     *
+     * A trailing /;? \s* / is skipped.  The parameter sawDelimiter
+     * indicates whether the ';' was seen or not.  Upon return, if pos is
+     * advanced, it will either point to a non-whitespace character past
+     * the trailing ';', if any, or be equal to length().
+     *
+     * @param ID the ID string
+     * @param regenID regenerated ID, reversed if appropriate, which
+     * should be applied to the final created transliterator.  This method
+     * will append to this parameter for FORWARD direction and insert
+     * addition text at offset 0 for REVERSE direction.  If create is
+     * false then this parameter is not used.
+     * @param pos INPUT-OUTPUT parameter.  On input, the position of the
+     * first character to parse.  On output, the position after the last
+     * character parsed.  This will be a semicolon or ID.length().  In the
+     * case of an error this value will be unchanged.
+     * @param create if true, create and return the result.  If false,
+     * only scan the ID, and return NULL.
+     * @return a newly created transliterator, or NULL.  NULL is returned
+     * in all cases if create is false.  If create is true, then NULL is
+     * returned on error, or if the ID is effectively empty.
+     * E.g. "Latin-Greek()" with dir == REVERSE.  Do NOT check for NULL to
+     * determine if there was an error.  Instead, check to see if pos
+     * moved.
+     */
+     private static Transliterator parseID(String ID,
+                                           StringBuffer regenID,
+                                           int[] pos,
+                                           boolean[] sawDelimiter,
+                                           int dir,
+                                           boolean create) {
+        int limit, preDelimLimit,
+            revStart, revLimit=0,
+            idStart, idLimit,
+            setStart, setLimit;
+
+        UnicodeSet[] filter = new UnicodeSet[1];
+        int[] indices = new int[4];
+
+        if (!parseIDBounds(ID, pos[0], false, indices, filter)) {
+            return null;
+        }
+        limit = indices[0];
+        setStart = indices[1];
+        setLimit = indices[2];
+        revStart = indices[3];
+
+        idStart = pos[0];
+        idLimit = limit;
+
+        if (revStart >= 0 && revStart < limit) {
+            int revSetStart, revSetLimit;
+            UnicodeSet[] revFilter = new UnicodeSet[1];
+            if (!parseIDBounds(ID, revStart+1, true, indices, revFilter)) {
+                return null;
+            }
+            revLimit = indices[0];
+            revSetStart = indices[1];
+            revSetLimit = indices[2];
+            // we ignore indices[3]
+
+            // revStart points to '('
+            if (dir == REVERSE) {
+                idStart = revStart+1;
+                idLimit = revLimit;
+                setStart = revSetStart;
+                setLimit = revSetLimit;
+                filter[0] = revFilter[0];
+            } else {
+                idLimit = revStart;
+            }
+            // assert(revLimit < ID.length() && ID.charAt(revLimit) == ')');
+            limit = revLimit+1;
+        } else {
+            // Ignore () exprs outside of this atomic ID, that is, in
+            // "Greek-Latin; Title()", ignore the "()" after Title when
+            // parsing Greek-Latin.
+            revStart = -1;
+        }
+
+        // Advance limit past /\s*;?\s*/
+        preDelimLimit = limit;
+        limit = skipSpaces(ID, limit);
+        sawDelimiter[0] = (limit < ID.length() && ID.charAt(limit) == ID_DELIM);
+        if (sawDelimiter[0]) {
+            limit = skipSpaces(ID, ++limit);
+        }
+
+        if (!create) {
+            // TODO Improve performance by scanning the UnicodeSet pattern
+            // without actually constructing it, if create is false.  That
+            // is, create a method like this one for UnicodeSet.
+            pos[0] = limit;
+            return null;
+        }
+
+        // 'id' is the ID with the filter pattern removed and with
+        // whitespace deleted.  In a Foo(Bar) ID, id is Foo for FORWARD
+        // and Bar for REVERSE.
+        String str;
+        str = ID.substring(setLimit, idLimit);
+        StringBuffer id = new StringBuffer(ID.substring(idStart, setStart));
+        id.append(str);
+
+        // Delete whitespace
+        int i;
+        for (i=0; i<id.length(); ++i) {
+            if (UCharacter.isWhitespace(id.charAt(i))) {
+                id.deleteCharAt(i);
+                --i;
+            }
+        }
+
+        // Fix the id, if necessary, by reversing it (A-B => B-A).  This
+        // is only done if the id is NOT of the form Foo(Bar).  Record the
+        // position of the separator.
+        //
+        // For both A-B and Foo(Bar) ids, detect the special case of Null,
+        // whose inverse is itself.  Given an ID with no separator "Foo",
+        // an abbreviation for "Any-Foo", consider the inverse to be
+        // "Foo-Any".
+        int sep = id.toString().indexOf(ID_SEP);
+        if (sep < 0 && id.toString().equalsIgnoreCase(NullTransliterator.SHORT_ID)) {
+            // Handle "Null"
+            sep = id.length();
+        } else if (dir == REVERSE &&
+                   id.toString().equalsIgnoreCase(NullTransliterator._ID)) {
+            // Reverse of "Any-Null" => "Null"
+            id.delete(0, sep+1);
+            sep = id.length();
+        } else if (dir == REVERSE && revStart < 0) {
+            if (sep >= 0) {
+                str = id.substring(0, sep);
+                id.delete(0, sep+1);
+            } else {
+                str = "Any";
+            }
+            sep = id.length();
+            id.append(ID_SEP).append(str);
+        } else if (sep < 0 && id.length() > 0) {
+            // Don't do anything for empty IDs -- we handle these specially below
+            str = "Any-";
+            sep = str.length() - 1;
+            id.insert(0, str);
+        }
+
+        Transliterator t = null;
+
+        // If we have a reverse part of the ID, e.g., Foo(Bar), then we
+        // need to check for an empty part, which represents a Null
+        // transliterator.  We return 0 (not a NullTransliterator).  If we
+        // are not of the form Foo(Bar) then an empty string is illegal.
+        if (revStart >= 0 && id.length() == 0) {
+            // Ignore any filters; filters on Null are meaningless (and we
+            // can't attach them to 0 anyway)
+            filter = null;
+        }
+
+        else {
+            StringBuffer s = new StringBuffer();
+
+            synchronized (registry) {
+                t = registry.get(id.toString(), s);
+                // Need to enclose this in a block to prevent deadlock when
+                // instantiating aliases (below).
+            }
+
+            if (s.length() != 0) {
+                // assert(t==0);
+                // Instantiate an alias
+                t = getInstance(s.toString(), FORWARD);
+            }
+
+            if (t == null) {
+                // Creation failed; the ID is invalid or is an alias
+                filter[0] = null;
+                return null;
+            }
+
+            // Set the filter, if any
+            t.setFilter(filter[0]);
+        }
+
+        // Set the ID.  This is normally just a substring of the input
+        // ID, but for reverse transliterators we need to munge A-B to
+        // B-A or Foo(Bar) to Bar(Foo).
+        if (dir == FORWARD) {
+            id.setLength(0);
+            id.append(ID.substring(pos[0], preDelimLimit));
+        } else if (revStart < 0) {
+            id.insert(sep, ID.substring(setStart, setLimit));
+        } else {
+            // Change Foo(Bar) to Bar(Foo)
+            str = ID.substring(pos[0], revStart);
+            str = str.trim();
+            id.setLength(0);
+            id.append(ID.substring(revStart+1, revLimit));
+            // TODO make this more efficient
+            id = new StringBuffer(id.toString().trim());
+            id.append('(').append(str).append(')');
+        }
+        // TODO make this more efficient
+        id = new StringBuffer(id.toString().trim());
+
+        if (t != null) {
+            t.setID(id.toString());
+        }
+
+        // Regenerate ID of a compound entity
+        if (dir == FORWARD) {
+            if (regenID.length() != 0) {
+                regenID.append(ID_DELIM);
+            }
+            regenID.append(id);
+        } else {
+            if (regenID.length() != 0) {
+                regenID.insert(0, ID_DELIM);
+            }
+            regenID.insert(0, id);
+        }
+
+        // Indicate success by bumping pos past the final /;?\s*/.
+        pos[0] = limit;
+
+        return t;
+    }
+
+    /**
+     * Internal method used by parseID.  Given a piece of a single ID,
+     * find the boundaries of various parts.  For IDs of the form
+     * Foo(Bar), this method parses the Foo, then the Bar.  In each piece
+     * it locates any inline UnicodeSet pattern [setStart, setLimit)
+     * and finds the limit (this will point to either ';' or ')' or
+     * ID.length()).
+     *
+     * @param ID the ID to be parsed
+     * @param pos the index of ID at which to start
+     * @param withinParens if true, parse the Bar of Foo(Bar), stop at a
+     * close paren, and do not look for an open paren.  If true then a
+     * close paren MUST be seen or false is returned; if false then the
+     * ';' delimiter is optional.
+     * @param limit set to the position of ';' or ')' (depending on
+     * withinParens), or ID.length() if no delimiter was found
+     * @param setStart set to the start of an inline filter pattern,
+     * or pos if none
+     * @param setLimit set to the limit of an inline filter pattern,
+     * or pos if none
+     * @param revStart if not withinParens then set to the position of the
+     * first '(', which may be > limit; otherwise set to -1
+     * @param filter set to a newly created UnicodeSet object for the
+     * inline filter pattern, if any; OWNED BY THE CALLER
+     *
+     * @return true if the pattern is valid, false is there is an invalid
+     * UnicodeSet pattern or if withinParens is true and no close paren is
+     * seen.
+     */
+    private static boolean parseIDBounds(String ID,
+                                         int pos,
+                                         boolean withinParens,
+                                         int[] indices,
+                                         UnicodeSet[] filter) {
+        int limit;
+        int setStart;
+        int setLimit;
+        int revStart;
+
+        char endDelimiter = withinParens ? ')' : ID_DELIM;
+        limit = ID.indexOf(endDelimiter, pos);
+        if (limit < 0) {
+            if (withinParens) {
+                //return false;
+                throw new IllegalArgumentException("Missing closing parenthesis in " + ID);
+            }
+            limit = ID.length();
+        }
+        setStart = ID.indexOf('[', pos);
+        revStart = withinParens ? -1 : ID.indexOf('(', pos);
+
+        if (setStart >= 0 && setStart < limit &&
+            (revStart < 0 || setStart < revStart)) {
+            ParsePosition ppos = new ParsePosition(setStart);
+            // TODO Improve performance by scanning the UnicodeSet pattern
+            // without actually constructing it, if create is false.  That
+            // is, create a method like this one for UnicodeSet.
+            filter[0] = new UnicodeSet();
+            filter[0].applyPattern(ID, ppos, null, true);
+            setLimit = ppos.getIndex();
+            if (limit < setLimit) {
+                limit = ID.indexOf(endDelimiter, setLimit);
+                if (limit < 0) {
+                    if (withinParens) {
+                        //return false;
+                        throw new IllegalArgumentException("Missing closing parenthesis in " + ID);
+                    }
+                    limit = ID.length();
+                }
+            }
+            if (revStart >= 0 && revStart < setLimit) {
+                revStart = ID.indexOf(')', setLimit);
+            }
+        } else {
+            setStart = setLimit = pos;
+        }
+        indices[0] = limit;
+        indices[1] = setStart;
+        indices[2] = setLimit;
+        indices[3] = revStart;
+        return true;
+    }
+
+    /**
+     * If pos is the index of a space in str, then advance it over that
+     * space and any immediately subsequent ones.
+     */
+    private static int skipSpaces(String str,
+                                  int pos) {
+        while (pos < str.length() &&
+               UCharacter.isWhitespace(str.charAt(pos))) {
+            ++pos;
+        }
+        return pos;
+    }
+
+    // TODO Remove remove remove
+    // TODO Remove remove remove
+    // TODO Remove remove remove
+    // TODO Remove remove remove
+    // TODO Remove remove remove
+    // TODO Remove remove remove
+    // TODO Remove remove remove
+    // TODO Remove remove remove
+    // TODO Remove remove remove
+    // TODO Remove remove remove
+    // TODO Remove remove remove
+    static Transliterator tempGet(String id, StringBuffer aliasReturn) {
+        aliasReturn.setLength(0);
+        if (id.equalsIgnoreCase(NullTransliterator.SHORT_ID)) {
+            id = NullTransliterator._ID;
+            // Temporary hack to make this work
+        }
+        return internalGetInstance(id);
+    }
+
    /**
     * Returns this transliterator's inverse.  See the class
     * documentation for details.  This implementation simply inverts
@ -877,7 +1282,7 @@ public abstract class Transliterator {
    public final Transliterator getInverse() {
        return getInstance(ID, REVERSE);
    }
-    
+
    /**
     * Returns a transliterator object given its ID.  Unlike getInstance(),
     * this method returns null if it cannot make use of the given ID.
@ -891,7 +1296,7 @@ public abstract class Transliterator {
            obj = internalCache.get(ciID);
            sourceCache = internalCache;
        }
-        
+
        if (obj != null) {
            if (obj instanceof RuleBasedTransliterator.Data) {
                data = (RuleBasedTransliterator.Data) obj;
@ -925,7 +1330,7 @@ public abstract class Transliterator {
                        } catch (IllegalArgumentException e2) {
                            // Can't load UTF8 file
                        }
-                        
+
                        if (r != null) {
                            data = RuleBasedTransliterator.parse(r, dir);
                            sourceCache.put(ciID, data);
@ -950,7 +1355,7 @@ public abstract class Transliterator {
 //     * Find a path through the composed transliterator graph.  This
 //     * will not necessarily be the only path, or the shortest path.
 //     * This is a simple recursive algorithm.
-//     * 
+//     *
 //     * <p><code>composedGraph</code> is the links table.
 //     * composedGraph.get(x) should return a String[] array, each of
 //     * which is a node that x is connected to.
@ -984,7 +1389,7 @@ public abstract class Transliterator {
 //                }
 //            }
 //        }
-//        path.removeElementAt(path.size() - 1);    
+//        path.removeElementAt(path.size() - 1);
 //        return false;
 //    }

@ -1020,7 +1425,7 @@ public abstract class Transliterator {
    /**
     * Unregisters a transliterator or class.  This may be either
     * a system transliterator or a user transliterator or class.
-     * 
+     *
     * @param ID the ID of the transliterator or class
     * @return the <code>Object</code> that was registered with
     * <code>ID</code>, or <code>null</code> if none was
@ -1082,6 +1487,9 @@ public abstract class Transliterator {
    }

    static {
+        // TODO FINISH
+        registry = new TransliteratorRegistry();
+
        // The display name cache starts out empty
        displayNameCache = new Hashtable();

@ -1145,7 +1553,7 @@ public abstract class Transliterator {
                      HangulJamoTransliterator.class, null);
        registerClass(JamoHangulTransliterator._ID,
                      JamoHangulTransliterator.class, null);
-                      
+
        registerClass(HexToUnicodeTransliterator._ID,
                      HexToUnicodeTransliterator.class, null);
        registerClass(UnicodeToHexTransliterator._ID,