ICU-1048 allow ::ID blocks in rules

X-SVN-Rev: 5233
2001-07-13 21:09:41 +00:00 · 2001-07-13 21:09:41 +00:00 · 9805ae16d9
commit 9805ae16d9
parent f35b88f4eb
9 changed files with 946 additions and 348 deletions
--- a/icu4c/source/i18n/cpdtrans.cpp
+++ b/icu4c/source/i18n/cpdtrans.cpp
@ -10,6 +10,7 @@
 #include "unicode/cpdtrans.h"
 #include "unicode/unifilt.h"
 #include "unicode/unifltlg.h"
+#include "uvector.h"

 /**
 * Constructs a new compound transliterator given an array of
@ -30,7 +31,7 @@ CompoundTransliterator::CompoundTransliterator(
                           int32_t transliteratorCount,
                           UnicodeFilter* adoptedFilter) :
    Transliterator(joinIDs(transliterators, transliteratorCount), adoptedFilter),
-    trans(0), filters(0), count(0)  {
+    trans(0), filters(0), count(0), compoundRBTIndex(-1)  {
    setTransliterators(transliterators, transliteratorCount);
 }

@ -46,44 +47,142 @@ CompoundTransliterator::CompoundTransliterator(const UnicodeString& id,
                              UnicodeFilter* adoptedFilter,
                              UErrorCode& status) :
    Transliterator(id, 0), // set filter to 0 here!
-    trans(0), filters(0) {
-    init(id, direction, adoptedFilter, status);
+    trans(0), filters(0), compoundRBTIndex(-1) {
+    init(id, direction, adoptedFilter, -1, 0, TRUE, status);
 }

 CompoundTransliterator::CompoundTransliterator(const UnicodeString& id,
                              UErrorCode& status) :
    Transliterator(id, 0), // set filter to 0 here!
-    trans(0), filters(0) {
-    init(id, UTRANS_FORWARD, 0, status);
+    trans(0), filters(0), compoundRBTIndex(-1) {
+    init(id, UTRANS_FORWARD, 0, -1, 0, TRUE, status);
 }

+/**
+ * Private constructor for compound RBTs.  Construct a compound
+ * transliterator using the given idBlock, with the adoptedTrans
+ * inserted at the idSplitPoint.
+ */
+CompoundTransliterator::CompoundTransliterator(const UnicodeString& ID,
+                                               const UnicodeString& idBlock,
+                                               int32_t idSplitPoint,
+                                               Transliterator *adoptedTrans,
+                                               UErrorCode& status) :
+    Transliterator(ID, 0),
+    trans(0), filters(0), compoundRBTIndex(-1) {
+    init(idBlock, UTRANS_FORWARD, 0, idSplitPoint, adoptedTrans, FALSE, status);
+}
+
+/**
+ * Private constructor for Transliterator from a vector of
+ * transliterators.  The vector order is FORWARD, so if dir is REVERSE
+ * then the vector order will be reversed.
+ */
+CompoundTransliterator::CompoundTransliterator(const UnicodeString& ID,
+                                               UTransDirection dir,
+                                               UVector& list,
+                                               UErrorCode& status) :
+    Transliterator(ID, 0),
+    trans(0), filters(0), compoundRBTIndex(-1) {
+    init(list, dir, 0, TRUE, status);
+}
+
+/**
+ * Finish constructing a transliterator: only to be called by
+ * constructors.  Before calling init(), set trans and filter to NULL.
+ * @param id the id containing ';'-separated entries
+ * @param direction either FORWARD or REVERSE
+ * @param adoptedFilter a filter object to be owned by this transliterator.
+ * May be NULL.
+ * @param idSplitPoint the index into id at which the
+ * adoptedSplitTransliterator should be inserted, if there is one, or
+ * -1 if there is none.
+ * @param adoptedSplitTransliterator a transliterator to be inserted
+ * before the entry at offset idSplitPoint in the id string.  May be
+ * NULL to insert no entry.
+ * @param fixReverseID if TRUE, then reconstruct the ID of reverse
+ * entries by calling getID() of component entries.  Some constructors
+ * do not require this because they apply a facade ID anyway.
+ * @param status the error code indicating success or failure
+ */
 void CompoundTransliterator::init(const UnicodeString& id,
                                  UTransDirection direction,
                                  UnicodeFilter* adoptedFilter,
+                                  int32_t idSplitPoint,
+                                  Transliterator *adoptedSplitTrans,
+                                  UBool fixReverseID,
                                  UErrorCode& status) {
-    if (U_FAILURE(status))
-        return;
-    UnicodeString* list = split(id, ID_DELIM, &count);
-    trans = new Transliterator*[count];
-    for (int32_t i = 0; i < count; ++i) {
-        trans[i] = createInstance(list[direction==UTRANS_FORWARD ? i : (count-1-i)],
-                                  direction);
-        if (trans[i] == NULL) {
-            while (++i < count)
-                trans[i] = 0;
-            status = U_ILLEGAL_ARGUMENT_ERROR;
-            delete[] list;
-            delete adoptedFilter;
-            return;
-        }
-    }
-    delete[] list;
+    // assert(trans == 0);
+    // assert(filters == 0);

-    // If the direction is UTRANS_REVERSE then we need to fix
-    // the ID.
-    if (direction == UTRANS_REVERSE) {
+    if (U_FAILURE(status)) {
+        delete adoptedFilter;
+        delete adoptedSplitTrans;
+        return;
+    }
+
+    UVector list;
+    Transliterator::parseCompoundID(id, direction,
+                                    idSplitPoint, adoptedSplitTrans,
+                                    list, compoundRBTIndex,
+                                    NULL, status);
+
+    init(list, direction, adoptedFilter, fixReverseID, status);
+}
+
+/**
+ * Finish constructing a transliterator: only to be called by
+ * constructors.  Before calling init(), set trans and filter to NULL.
+ * @param list a vector of transliterator objects to be adopted.  It
+ * should NOT be empty.  The list should be in declared order.  That
+ * is, it should be in the FORWARD order; if direction is REVERSE then
+ * the list order will be reversed.
+ * @param direction either FORWARD or REVERSE
+ * @param adoptedFilter a filter object to be owned by this transliterator.
+ * May be NULL.
+ * @param fixReverseID if TRUE, then reconstruct the ID of reverse
+ * entries by calling getID() of component entries.  Some constructors
+ * do not require this because they apply a facade ID anyway.
+ * @param status the error code indicating success or failure
+ */
+void CompoundTransliterator::init(UVector& list,
+                                  UTransDirection direction,
+                                  UnicodeFilter* adoptedFilter,
+                                  UBool fixReverseID,
+                                  UErrorCode& status) {
+    // assert(trans == 0);
+    // assert(filters == 0);
+
+    // Allocate array
+    if (U_SUCCESS(status)) {
+        count = list.size();
+        trans = new Transliterator*[count];
+    }
+
+    if (U_FAILURE(status) || trans == 0) {
+        delete adoptedFilter;
+        // assert(trans == 0);
+        return;
+    }
+
+    // Move the transliterators from the vector into an array.
+    // Reverse the order if necessary.
+    int32_t i;
+    for (i=0; i<count; ++i) {
+        int32_t j = (direction == UTRANS_FORWARD) ? i : count - 1 - i;
+        trans[i] = (Transliterator*) list.elementAt(j);
+    }
+
+    // Fix compoundRBTIndex for REVERSE transliterators
+    if (compoundRBTIndex >= 0 && direction == UTRANS_REVERSE) {
+        compoundRBTIndex = count - 1 - compoundRBTIndex;
+    }
+
+    // If the direction is UTRANS_REVERSE then we may need to fix the
+    // ID.
+    if (direction == UTRANS_REVERSE && fixReverseID) {
        UnicodeString newID;
-        for (int32_t i=0; i<count; ++i) {
+        for (i=0; i<count; ++i) {
            if (i > 0) {
                newID.append(ID_DELIM);
            }
@ -113,35 +212,35 @@ UnicodeString CompoundTransliterator::joinIDs(Transliterator* const transliterat
    return id; // Return temporary
 }

-/**
- * Splits a string, as in JavaScript
- */
-UnicodeString* CompoundTransliterator::split(const UnicodeString& s,
-                                             UChar divider,
-                                             int32_t* countPtr) {
-    // changed MED
-    // see how many there are
-    *countPtr = 1;
-    int32_t i;
-    for (i = 0; i < s.length(); ++i) {
-        if (s.charAt(i) == divider)
-            ++(*countPtr);
-    }
-    
-    // make an array with them
-    UnicodeString* result = new UnicodeString[*countPtr];
-    int32_t last = 0;
-    int32_t current = 0;
-    
-    for (i = 0; i < s.length(); ++i) {
-        if (s.charAt(i) == divider) {
-            s.extractBetween(last, i, result[current++]);
-            last = i+1;
-        }
-    }
-    s.extractBetween(last, i, result[current]);
-    return result;
-}
+///**
+// * Splits a string, as in JavaScript
+// */
+//UnicodeString* CompoundTransliterator::split(const UnicodeString& s,
+//                                             UChar divider,
+//                                             int32_t* countPtr) {
+//    // changed MED
+//    // see how many there are
+//    *countPtr = 1;
+//    int32_t i;
+//    for (i = 0; i < s.length(); ++i) {
+//        if (s.charAt(i) == divider)
+//            ++(*countPtr);
+//    }
+//    
+//    // make an array with them
+//    UnicodeString* result = new UnicodeString[*countPtr];
+//    int32_t last = 0;
+//    int32_t current = 0;
+//    
+//    for (i = 0; i < s.length(); ++i) {
+//        if (s.charAt(i) == divider) {
+//            s.extractBetween(last, i, result[current++]);
+//            last = i+1;
+//        }
+//    }
+//    s.extractBetween(last, i, result[current]);
+//    return result;
+//}

 /**
 * Copy constructor.
@ -301,73 +400,102 @@ void CompoundTransliterator::adoptFilter(UnicodeFilter* f) {
    Transliterator::adoptFilter(f);
 }

+UnicodeString& CompoundTransliterator::toRules(UnicodeString& rulesSource,
+                                               UBool escapeUnprintable) const {
+    // We do NOT call toRules() on our component transliterators, in
+    // general.  If we have several rule-based transliterators, this
+    // yields a concatenation of the rules -- not what we want.  We do
+    // handle compound RBT transliterators specially -- those for which
+    // compoundRBTIndex >= 0.  For the transliterator at compoundRBTIndex,
+    // we do call toRules() recursively.
+    rulesSource.truncate(0);
+    for (int32_t i=0; i<count; ++i) {
+        UnicodeString rule;
+        if (i == compoundRBTIndex) {
+            trans[i]->toRules(rule, escapeUnprintable);
+        } else {
+            trans[i]->Transliterator::toRules(rule, escapeUnprintable);
+        }
+        if (rulesSource.length() &&
+            rulesSource.charAt(rulesSource.length() - 1) != 10) {
+            rulesSource.append((UChar)10);
+        }
+        rulesSource.append(rule);
+        if (rulesSource.length() &&
+            rulesSource.charAt(rulesSource.length() - 1) != ID_DELIM) {
+            rulesSource.append(ID_DELIM);
+        }
+    }
+    return rulesSource;
+}
+
 /**
 * Implements {@link Transliterator#handleTransliterate}.
 */
 void CompoundTransliterator::handleTransliterate(Replaceable& text, UTransPosition& index,
                                                 UBool incremental) const {
-    /* Call each transliterator with the same start value and
-     * initial cursor index, but with the limit index as modified
-     * by preceding transliterators.  The cursor index must be
+    /* Call each transliterator with the same contextStart and
+     * start, but with the limit as modified
+     * by preceding transliterators.  The start index must be
     * reset for each transliterator to give each a chance to
-     * transliterate the text.  The initial cursor index is known
+     * transliterate the text.  The initial contextStart index is known
     * to still point to the same place after each transliterator
     * is called because each transliterator will not change the
-     * text between start and the initial value of cursor.
+     * text between contextStart and the initial start index.
     *
     * IMPORTANT: After the first transliterator, each subsequent
     * transliterator only gets to transliterate text committed by
-     * preceding transliterators; that is, the cursor (output
+     * preceding transliterators; that is, the start (output
     * value) of transliterator i becomes the limit (input value)
     * of transliterator i+1.  Finally, the overall limit is fixed
     * up before we return.
     *
     * Assumptions we make here:
-     * (1) start <= cursor <= limit    ;cursor valid on entry
-     * (2) cursor <= cursor' <= limit' ;cursor doesn't move back
-     * (3) cursor <= limit'            ;text before cursor unchanged
-     * - cursor' is the value of cursor after calling handleKT
+     * (1) contextStart <= start <= limit ;cursor valid on entry
+     * (2) start <= start' <= limit' ;cursor doesn't move back
+     * (3) start <= limit'           ;text before start unchanged
+     * - start' is the value of start after calling handleKT
     * - limit' is the value of limit after calling handleKT
     */

    /**
     * Example: 3 transliterators.  This example illustrates the
-     * mechanics we need to implement.  S, C, and L are the start,
-     * cursor, and limit.  gl is the globalLimit.
+     * mechanics we need to implement.  C, S, and L are the contextStart,
+     * start, and limit.  gl is the globalLimit.
     *
     * 1. h-u, changes hex to Unicode
     *
     *    4  7  a  d  0      4  7  a
     *    abc/u0061/u    =>  abca/u    
-     *    S  C       L       S   C L   gl=f->a
+     *    C  S       L       C   S L   gl=f->a
     *
     * 2. upup, changes "x" to "XX"
     *
     *    4  7  a       4  7  a
     *    abca/u    =>  abcAA/u    
-     *    S  CL         S    C   
+     *    C  SL         C    S   
     *                       L    gl=a->b
     * 3. u-h, changes Unicode to hex
     *
     *    4  7  a        4  7  a  d  0  3
     *    abcAA/u    =>  abc/u0041/u0041/u    
-     *    S  C L         S              C
+     *    C  S L         C              S
     *                                  L   gl=b->15
     * 4. return
     *
     *    4  7  a  d  0  3
     *    abc/u0041/u0041/u    
-     *    S C L
+     *    C              S L
     */

    if (count < 1) {
+        index.start = index.limit;
        return; // Short circuit for empty compound transliterators
    }

    int32_t i;
-    int32_t cursor = index.start;
-    int32_t limit = index.limit;
-    int32_t globalLimit = limit;
+    int32_t start = index.start;
+    int32_t globalLimit = index.limit;
    /* globalLimit is the overall limit.  We keep track of this
     * since we overwrite index.limit with the previous
     * index.start.  After each transliteration, we update
@ -375,16 +503,16 @@ void CompoundTransliterator::handleTransliterate(Replaceable& text, UTransPositi
     */
    
    for (i=0; i<count; ++i) {
-        index.start = cursor; // Reset cursor
-        index.limit = limit;
+        index.start = start; // Reset start
+        int32_t limit = index.limit;
        
        trans[i]->handleTransliterate(text, index, incremental);
        
        // Adjust overall limit for insertions/deletions
        globalLimit += index.limit - limit;
-        limit = index.start; // Move limit to end of committed text
+        index.limit = index.start; // Move limit to end of committed text
    }
-    // Cursor is good where it is -- where the last
+    // Start is good where it is -- where the last
    // transliterator left it.  Limit needs to be put back
    // where it was, modulo adjustments for deletions/insertions.
    index.limit = globalLimit;
--- a/icu4c/source/i18n/nortrans.cpp
+++ b/icu4c/source/i18n/nortrans.cpp
@ -15,10 +15,10 @@
 */
 void NormalizationTransliterator::registerIDs() {
    UErrorCode status = U_ZERO_ERROR;
-    Transliterator::_registerFactory(UnicodeString("NFC", ""), _createNFC, status);
-    Transliterator::_registerFactory(UnicodeString("NFKC", ""), _createNFKC, status);
-    Transliterator::_registerFactory(UnicodeString("NFD", ""), _createNFD, status);
-    Transliterator::_registerFactory(UnicodeString("NFKD", ""), _createNFKD, status);
+    Transliterator::_registerFactory(UnicodeString("Any-NFC", ""), _createNFC, status);
+    Transliterator::_registerFactory(UnicodeString("Any-NFKC", ""), _createNFKC, status);
+    Transliterator::_registerFactory(UnicodeString("Any-NFD", ""), _createNFD, status);
+    Transliterator::_registerFactory(UnicodeString("Any-NFKD", ""), _createNFKD, status);
 }

 /**
--- a/icu4c/source/i18n/rbt.cpp
+++ b/icu4c/source/i18n/rbt.cpp
@ -22,7 +22,7 @@ void RuleBasedTransliterator::_construct(const UnicodeString& rules,
    data = 0;
    isDataOwned = TRUE;
    if (U_SUCCESS(status)) {
-        data = TransliterationRuleParser::parse(rules, direction, parseError);
+        data = TransliteratorParser::parse(rules, direction, parseError);
        if (data == 0) {
            status = U_ILLEGAL_ARGUMENT_ERROR;
        } else {
@ -40,6 +40,18 @@ RuleBasedTransliterator::RuleBasedTransliterator(const UnicodeString& id,
    setMaximumContextLength(data->ruleSet.getMaximumContextLength());
 }

+/**
+ * Internal constructor.
+ */
+RuleBasedTransliterator::RuleBasedTransliterator(const UnicodeString& id,
+                                                 TransliterationRuleData* theData,
+                                                 UBool isDataAdopted) :
+    Transliterator(id, 0),
+    data(theData),
+    isDataOwned(isDataAdopted) {
+    setMaximumContextLength(data->ruleSet.getMaximumContextLength());
+}
+
 /**
 * Copy constructor.  Since the data object is immutable, we can share
 * it with other objects -- no need to clone it.
--- a/icu4c/source/i18n/rbt_pars.cpp
+++ b/icu4c/source/i18n/rbt_pars.cpp
@ -48,7 +48,11 @@
 // trailing SymbolTable.SYMBOL_REF character.
 // private static final char ANCHOR_END       = '$';

-const UnicodeString TransliterationRuleParser::gOPERATORS = OPERATORS;
+const UnicodeString TransliteratorParser::gOPERATORS = OPERATORS;
+
+// These are also used in Transliterator::toRules()
+static const int32_t ID_TOKEN_LEN = 2;
+static const UChar   ID_TOKEN[]   = { 0x3A, 0x3A }; // ':', ':'

 //----------------------------------------------------------------------
 // BEGIN ParseData
@ -167,14 +171,14 @@ public:
    UBool anchorStart;
    UBool anchorEnd;

-    TransliterationRuleParser& parser;
+    TransliteratorParser& parser;

    static const UnicodeString gOperators;

    //--------------------------------------------------
    // Methods

-    RuleHalf(TransliterationRuleParser& parser);
+    RuleHalf(TransliteratorParser& parser);
    ~RuleHalf();

    /**
@ -220,7 +224,7 @@ inline int32_t _voidPtr_to_int32(void* x) {

 const UnicodeString RuleHalf::gOperators = OPERATORS;

-RuleHalf::RuleHalf(TransliterationRuleParser& p) : parser(p) {
+RuleHalf::RuleHalf(TransliteratorParser& p) : parser(p) {
    cursor = -1;
    ante = -1;
    post = -1;
@ -487,24 +491,62 @@ int32_t* RuleHalf::createSegments() const {
 //----------------------------------------------------------------------

 TransliterationRuleData*
-TransliterationRuleParser::parse(const UnicodeString& rules,
-                                 UTransDirection direction,
-                                 UParseError* parseError) {
-    TransliterationRuleParser parser(rules, direction, parseError);
-    parser.parseRules();
-    if (U_FAILURE(parser.status)) {
+TransliteratorParser::parse(const UnicodeString& rules,
+                            UTransDirection direction,
+                            UParseError* parseError) {
+    TransliteratorParser parser(rules, direction, parseError);
+    UnicodeString idBlock;
+    int32_t idSplitPoint, count;
+    parser.parseRules(idBlock, idSplitPoint, count);
+    if (U_FAILURE(parser.status) || idBlock.length() != 0) {
        delete parser.data;
        parser.data = 0;
    }
    return parser.data;
 }

+/**
+ * Parse a given set of rules.  Return up to three pieces of
+ * parsed data.  These are the header ::id block, the rule block,
+ * and the footer ::id block.  Any or all of these may be empty.
+ * If the ::id blocks are empty, their corresponding parameters
+ * are returned as the empty string.  If there are no rules, the
+ * TransliterationRuleData result is 0.
+ * @param ruleDataResult caller owns the pointer stored here.
+ * May be NULL.
+ * @param headerRule string including semicolons for the header
+ * ::id block.  May be empty.
+ * @param footerRule string including semicolons for the footer
+ * ::id block.  May be empty.
+ */
+void TransliteratorParser::parse(const UnicodeString& rules,
+                                 UTransDirection direction,
+                                 TransliterationRuleData*& ruleDataResult,
+                                 UnicodeString& idBlockResult,
+                                 int32_t& idSplitPointResult,
+                                 UParseError* parseError,
+                                 UErrorCode& ec) {
+    if (U_FAILURE(ec)) {
+        ruleDataResult = 0;
+        return;
+    }
+    TransliteratorParser parser(rules, direction, parseError);
+    int32_t count;
+    parser.parseRules(idBlockResult, idSplitPointResult, count);
+    if (U_FAILURE(parser.status) || count == 0) {
+        delete parser.data;
+        parser.data = 0;
+    }
+    ruleDataResult = parser.data;
+    ec = parser.status;
+}
+
 /**
 * @param rules list of rules, separated by newline characters
 * @exception IllegalArgumentException if there is a syntax error in the
 * rules
 */
-TransliterationRuleParser::TransliterationRuleParser(
+TransliteratorParser::TransliteratorParser(
                                     const UnicodeString& theRules,
                                     UTransDirection theDirection,
                                     UParseError* theParseError) :
@ -515,7 +557,7 @@ TransliterationRuleParser::TransliterationRuleParser(
 /**
 * Destructor.
 */
-TransliterationRuleParser::~TransliterationRuleParser() {
+TransliteratorParser::~TransliteratorParser() {
    delete parseData;
 }

@ -527,8 +569,11 @@ TransliterationRuleParser::~TransliterationRuleParser() {
 * @exception IllegalArgumentException if there is a syntax error in the
 * rules
 */
-void TransliterationRuleParser::parseRules(void) {
+void TransliteratorParser::parseRules(UnicodeString& idBlockResult,
+                                      int32_t& idSplitPointResult,
+                                      int32_t& ruleCount) {
    status = U_ZERO_ERROR;
+    ruleCount = 0;

    delete data;
    data = new TransliterationRuleData(status);
@ -543,14 +588,21 @@ void TransliterationRuleParser::parseRules(void) {
    }
    determineVariableRange();

+    UnicodeString str; // scratch
+    idBlockResult.truncate(0);
+    idSplitPointResult = -1;
    int32_t pos = 0;
    int32_t limit = rules.length();
+    // The mode marks whether we are in the header ::id block, the
+    // rule block, or the footer ::id block.
+    // mode == 0: start: rule->1, ::id->0
+    // mode == 1: in rules: rule->1, ::id->2
+    // mode == 2: in footer rule block: rule->ERROR, ::id->2
+    int32_t mode = 0;
    while (pos < limit && U_SUCCESS(status)) {
        UChar c = rules.charAt(pos++);
        if (Unicode::isWhitespace(c)) {
-            // Ignore leading whitespace.  Note that this is not
-            // Unicode spaces, but Java spaces -- a subset,
-            // representing whitespace likely to be seen in code.
+            // Ignore leading whitespace.
            continue;
        }
        // Skip lines starting with the comment character
@ -561,10 +613,50 @@ void TransliterationRuleParser::parseRules(void) {
            }
            continue; // Either fall out or restart with next line
        }
-        // We've found the start of a rule.  c is its first
-        // character, and pos points past c.  Lexically parse the
-        // rule into component pieces.
-        pos = parseRule(--pos, limit);                    
+        // We've found the start of a rule or ID.  c is its first
+        // character, and pos points past c.
+        --pos;
+        // Look for an ID token.  Must have at least ID_TOKEN_LEN + 1
+        // chars left.
+        if ((pos + ID_TOKEN_LEN + 1) <= limit &&
+            rules.compare(pos, ID_TOKEN_LEN, ID_TOKEN) == 0) {
+            pos += ID_TOKEN_LEN;
+            c = rules.charAt(pos);
+            while (Unicode::isWhitespace(c) && pos < limit) {
+                ++pos;
+                c = rules.charAt(pos);
+            }
+            int32_t p = pos;
+            UBool sawDelim;
+            Transliterator::parseID(rules, p, sawDelim, direction, NULL, FALSE);
+            if (p == pos) {
+                // Invalid ::id
+                status = U_ILLEGAL_ARGUMENT_ERROR;
+            } else {
+                if (mode == 1) {
+                    mode = 2;
+                    idSplitPointResult = idBlockResult.length();
+                }
+                rules.extractBetween(pos, p, str);
+                idBlockResult.append(str);
+                if (!sawDelim) {
+                    idBlockResult.append((UChar)0x003B /*;*/);
+                }
+                pos = p;
+            }
+        } else {
+            // Parse a rule
+            pos = parseRule(pos, limit);
+            if (U_SUCCESS(status)) {
+                ++ruleCount;
+                if (mode == 2) {
+                    // ::id in illegal position (because a rule
+                    // occurred after the ::id footer block)
+                    status = U_ILLEGAL_ARGUMENT_ERROR;
+                }
+            }
+            mode = 1;
+        }
    }
    
    // Convert the set vector to an array
@ -573,7 +665,8 @@ void TransliterationRuleParser::parseRules(void) {
    // orphanElement removes the given element and shifts all other
    // elements down.  For performance (and code clarity) we work from
    // the end back to index 0.
-    for (int32_t i=data->setVariablesLength; i>0; ) {
+    int32_t i;
+    for (i=data->setVariablesLength; i>0; ) {
        --i;
        data->setVariables[i] =
            (UnicodeSet*) setVariablesVector.orphanElementAt(i);
@ -582,6 +675,9 @@ void TransliterationRuleParser::parseRules(void) {
    // Index the rules
    if (U_SUCCESS(status)) {
        data->ruleSet.freeze(*data, status);
+        if (idSplitPointResult < 0) {
+            idSplitPointResult = idBlockResult.length();
+        }
    }
 }

@ -598,7 +694,7 @@ void TransliterationRuleParser::parseRules(void) {
 * indicators.  Once it does a lexical breakdown of the rule at pos, it
 * creates a rule object and adds it to our rule list.
 */
-int32_t TransliterationRuleParser::parseRule(int32_t pos, int32_t limit) {
+int32_t TransliteratorParser::parseRule(int32_t pos, int32_t limit) {
    // Locate the left side, operator, and right side
    int32_t start = pos;
    UChar op = 0;
@ -759,7 +855,7 @@ int32_t TransliterationRuleParser::parseRule(int32_t pos, int32_t limit) {
 * @param rule pattern string
 * @param start position of first character of current rule
 */
-int32_t TransliterationRuleParser::syntaxError(int32_t parseErrorCode,
+int32_t TransliteratorParser::syntaxError(int32_t parseErrorCode,
                                               const UnicodeString& rule,
                                               int32_t start) {
    if (parseError != 0) {
@ -786,7 +882,7 @@ int32_t TransliterationRuleParser::syntaxError(int32_t parseErrorCode,
 * Parse a UnicodeSet out, store it, and return the stand-in character
 * used to represent it.
 */
-UChar TransliterationRuleParser::parseSet(const UnicodeString& rule,
+UChar TransliteratorParser::parseSet(const UnicodeString& rule,
                                          ParsePosition& pos) {
    UnicodeSet* set = new UnicodeSet(rule, pos, *parseData, status);
    if (variableNext >= variableLimit) {
@ -804,7 +900,7 @@ UChar TransliterationRuleParser::parseSet(const UnicodeString& rule,
 * Append the value of the given variable name to the given
 * UnicodeString.
 */
-void TransliterationRuleParser::appendVariableDef(const UnicodeString& name,
+void TransliteratorParser::appendVariableDef(const UnicodeString& name,
                                                  UnicodeString& buf) {
    const UnicodeString* s = (const UnicodeString*) data->variableNames->get(name);
    if (s == NULL) {
@ -839,7 +935,7 @@ void TransliterationRuleParser::appendVariableDef(const UnicodeString& name,
 * When done, everything not in the hash is available for use.  In practice,
 * this method may employ some other algorithm for improved speed.
 */
-void TransliterationRuleParser::determineVariableRange(void) {
+void TransliteratorParser::determineVariableRange(void) {
    UnicodeRange privateUse(0xE000, 0x1900); // Private use area

    UnicodeRange* r = privateUse.largestUnusedSubrange(rules);
@ -864,7 +960,7 @@ void TransliterationRuleParser::determineVariableRange(void) {
 * For example, in the string "abc'hide'h", the 'h' in "hide" will not be
 * found by a search for 'h'.
 */
-int32_t TransliterationRuleParser::quotedIndexOf(const UnicodeString& text,
+int32_t TransliteratorParser::quotedIndexOf(const UnicodeString& text,
                                                 int32_t start, int32_t limit,
                                                 UChar charToFind) {
    for (int32_t i=start; i<limit; ++i) {
--- a/icu4c/source/i18n/rbt_pars.h
+++ b/icu4c/source/i18n/rbt_pars.h
@ -18,7 +18,7 @@ class ParseData;
 class RuleHalf;
 class ParsePosition;

-class TransliterationRuleParser {
+class TransliteratorParser {

    /**
     * This is a reference to external data we don't own.  This works because
@ -87,6 +87,28 @@ public:
              UTransDirection direction,
              UParseError* parseError = 0);

+    /**
+     * Parse a given set of rules.  Return up to three pieces of
+     * parsed data.  These are the header ::id block, the rule block,
+     * and the footer ::id block.  Any or all of these may be empty.
+     * If the ::id blocks are empty, their corresponding parameters
+     * are returned as the empty string.  If there are no rules, the
+     * TransliterationRuleData result is 0.
+     * @param ruleDataResult caller owns the pointer stored here.
+     * May be NULL.
+     * @param headerRule string including semicolons for the header
+     * ::id block.  May be empty.
+     * @param footerRule string including semicolons for the footer
+     * ::id block.  May be empty.
+     */
+    static void parse(const UnicodeString& rules,
+                      UTransDirection direction,
+                      TransliterationRuleData*& ruleDataResult,
+                      UnicodeString& idBlockResult,
+                      int32_t& idSplitPointResult,
+                      UParseError* parseError,
+                      UErrorCode& ec);
+
 private:

    /**
@ -94,14 +116,14 @@ private:
     * @exception IllegalArgumentException if there is a syntax error in the
     * rules
     */
-    TransliterationRuleParser(const UnicodeString& rules,
+    TransliteratorParser(const UnicodeString& rules,
                              UTransDirection direction,
                              UParseError* parseError = 0);

    /**
     * Destructor.
     */
-    ~TransliterationRuleParser();
+    ~TransliteratorParser();

    /**
     * Parse the given string as a sequence of rules, separated by newline
@ -111,7 +133,8 @@ private:
     * @exception IllegalArgumentException if there is a syntax error in the
     * rules
     */
-    void parseRules(void);
+    void parseRules(UnicodeString& idBlockResult, int32_t& idSplitPointResult,
+                    int32_t& ruleCount);

    /**
     * MAIN PARSER.  Parse the next rule in the given rule string, starting
@ -139,13 +162,6 @@ private:
     */
    int32_t syntaxError(int32_t parseErrorCode, const UnicodeString&, int32_t start);

-    /**
-     * Allocate a private-use substitution character for the given set,
-     * register it in the setVariables hash, and return the substitution
-     * character.
-     */
-    //UChar registerSet(UnicodeSet* adoptedSet);
-
    /**
     * Parse a UnicodeSet out, store it, and return the stand-in character
     * used to represent it.
@ -189,8 +205,8 @@ private:
    friend class RuleHalf;

    // Disallowed methods; no impl.
-    TransliterationRuleParser(const TransliterationRuleParser&);
-    TransliterationRuleParser& operator=(const TransliterationRuleParser&);
+    TransliteratorParser(const TransliteratorParser&);
+    TransliteratorParser& operator=(const TransliteratorParser&);
 };

 #endif
--- a/icu4c/source/i18n/remtrans.cpp
+++ b/icu4c/source/i18n/remtrans.cpp
@ -9,8 +9,7 @@
 */
 #include "unicode/remtrans.h"

-//const UnicodeString RemoveTransliterator::ID = UnicodeString("Remove", "");
-const UChar RemoveTransliterator::ID[] = {0x52, 0x65, 0x6D, 0x6F, 0x76, 0x65, 0x00}; /* "Remove" */
+const UChar RemoveTransliterator::ID[] = {65, 110, 121, 45, 0x52, 0x65, 0x6D, 0x6F, 0x76, 0x65, 0x00}; /* "Any-Remove" */

 Transliterator* RemoveTransliterator::clone(void) const {
    return new RemoveTransliterator();
--- a/icu4c/source/i18n/translit.cpp
+++ b/icu4c/source/i18n/translit.cpp
@ -565,91 +565,347 @@ Transliterator* Transliterator::createInverse(void) const {
 Transliterator* Transliterator::createInstance(const UnicodeString& ID,
                                               UTransDirection dir,
                                               UParseError* parseError) {
-    Transliterator* t = 0;
-    if (ID.indexOf(ID_DELIM) >= 0) {
-        UErrorCode status = U_ZERO_ERROR;
-        t = new CompoundTransliterator(ID, dir, 0, status);
-        if (U_FAILURE(status)) {
-            delete t;
-            t = 0;
+    UErrorCode status = U_ZERO_ERROR;
+    return createInstance(ID, dir, -1, NULL, parseError, status);
+}
+
+/**
+ * Create a transliterator given a compound ID (possibly degenerate,
+ * with no ID_DELIM).  If idSplitPoint >= 0 and adoptedSplitTrans !=
+ * 0, then insert adoptedSplitTrans in the compound ID at offset
+ * idSplitPoint.  Otherwise idSplitPoint should be -1 and
+ * adoptedSplitTrans should be 0.  The resultant transliterator will
+ * be an atomic (non-compound) transliterator if this is indicated by
+ * ID.  Otherwise it will be a compound translitertor.
+ */
+Transliterator* Transliterator::createInstance(const UnicodeString& ID,
+                                               UTransDirection dir,
+                                               int32_t idSplitPoint,
+                                               Transliterator *adoptedSplitTrans,
+                                               UParseError* parseError,
+                                               UErrorCode& status) {
+    if (U_FAILURE(status)) {
+        return 0;
+    }
+
+    UVector list;
+    int32_t ignored;
+    parseCompoundID(ID, dir, idSplitPoint, adoptedSplitTrans,
+                    list, ignored, parseError, status);
+
+    if (U_FAILURE(status)) {
+        return 0;
+    }
+    
+    switch (list.size()) {
+    case 0:
+        return new NullTransliterator();
+
+    case 1:
+        return (Transliterator*) list.elementAt(0);
+
+    default:
+        return new CompoundTransliterator(ID, dir, list, status);
+    }
+}
+
+/**
+ * Returns a <code>Transliterator</code> object constructed from
+ * the given rule string.  This will be a RuleBasedTransliterator,
+ * if the rule string contains only rules, or a
+ * CompoundTransliterator, if it contains ID blocks, or a
+ * NullTransliterator, if it contains ID blocks which parse as
+ * empty for the given direction.
+ */
+Transliterator* Transliterator::createFromRules(const UnicodeString& ID,
+                                                const UnicodeString& rules,
+                                                UTransDirection dir,
+                                                UParseError* parseError) {
+    UnicodeString idBlock;
+    int32_t idSplitPoint = -1;
+    TransliterationRuleData *data = 0;
+    UErrorCode status = U_ZERO_ERROR;
+
+    TransliteratorParser::parse(rules, dir, data,
+                                idBlock, idSplitPoint,
+                                parseError, status);
+
+    if (U_FAILURE(status)) {
+        delete data;
+        return 0;
+    }
+
+    // NOTE: The logic here matches that in _createInstance().
+    if (idBlock.length() == 0) {
+        if (data == 0) {
+            // No idBlock, no data -- this is just an
+            // alias for Null
+            return new NullTransliterator();
+        } else {
+            // No idBlock, data != 0 -- this is an
+            // ordinary RBT_DATA.
+            return new RuleBasedTransliterator(ID, data, TRUE); // TRUE == adopt data object
        }
    } else {
-        // 'id' is the ID with the filter pattern removed and with
-        // whitespace deleted.
-        UnicodeString id(ID);
-
-        // Look for embedded filter pattern
-        UnicodeSet *filter = 0;
-        int32_t setStart = id.indexOf((UChar)0x005B /*[*/);
-        int32_t setLimit;
-        if (setStart >= 0) {
-            UErrorCode status = U_ZERO_ERROR;
-            ParsePosition pos(setStart);
-            filter = new UnicodeSet();
-            filter->applyPattern(id, pos, 0, status);
+        if (data == 0) {
+            // idBlock, no data -- this is an alias
+            Transliterator *t = createInstance(idBlock, dir, parseError);
+            if (t != 0) {
+                t->setID(ID);
+            }
+            return t;
+        } else {
+            // idBlock and data -- this is a compound
+            // RBT
+            UnicodeString id("_", "");
+            Transliterator *t = new RuleBasedTransliterator(id, data, TRUE); // TRUE == adopt data object
+            t = new CompoundTransliterator(ID, idBlock, idSplitPoint,
+                                           t, status);
            if (U_FAILURE(status)) {
-                // There was a parse failure in the filter pattern
-                delete filter;
-                return 0;
+                delete t;
+                t = 0;
            }
-            setLimit = pos.getIndex();
-            id.removeBetween(setStart, setLimit);
-        }
-
-        // Delete whitespace
-        int32_t i;
-        for (i=0; i<id.length(); ++i) {
-            if (Unicode::isWhitespace(id.charAt(i))) {
-                id.remove(i, 1);
-                --i;
-            }
-        }
-
-        // Fix the id, if necessary, by reversing it (A-B => B-A).
-        // Record the position of the separator.  Detect the special
-        // case of Null, whose inverse is itself.  Given an ID with no
-        // separator "Foo", an abbreviation for "Any-Foo", consider
-        // the inverse to be "Foo-Any".
-        int32_t sep = id.indexOf(ID_SEP);
-        if (id.caseCompare(NullTransliterator::ID,
-                           U_FOLD_CASE_DEFAULT) == 0) {
-            sep = id.length();
-        } else if (dir == UTRANS_REVERSE) {
-            UnicodeString left;
-            if (sep >= 0) {
-                id.extractBetween(0, sep, left);
-                id.removeBetween(0, sep+1);
-            } else {
-                left = UnicodeString("Any", "");
-            }
-            sep = id.length();
-            id.append(ID_SEP).append(left);
-        } else if (sep < 0) {
-            sep = id.length();
-        }
-
-        // The 'alias' parameter is non-empty if _createInstance()
-        // finds that the given ID refers to an alias.  The reason
-        // _createInstance() doesn't call createInstance() (this
-        // method) directly is to avoid deadlock.  There are other
-        // ways to do this but this is one of the more efficient ways.
-        UnicodeString alias;
-        t = _createInstance(id, alias, parseError);
-
-        if (alias.length() > 0) { // assert(t==0)
-            t = createInstance(alias);
-        }
-
-        if (t != 0) {
-            if (filter != 0) {
-                t->adoptFilter(filter);
-                id.insert(sep, ID, setStart, setLimit-setStart);
-            }
-            t->setID(id);
+            return t;
        }
    }
+}
+
+UnicodeString& Transliterator::toRules(UnicodeString& rulesSource,
+                                       UBool escapeUnprintable) const {
+    // The base class implementation of toRules munges the ID into
+    // the correct format.  That is: foo => ::foo
+    rulesSource = getID();
+    // KEEP in sync with rbt_pars
+    rulesSource.insert(0, UnicodeString("::", ""));
+    return rulesSource;
+}
+
+/**
+ * Parse a compound ID (possibly a degenerate one, containing no
+ * ID_DELIM).  If idSplitPoint >= 0 and adoptedSplitTrans != 0, then
+ * insert adoptedSplitTrans in the compound ID at offset idSplitPoint.
+ * Otherwise idSplitPoint should be -1 and adoptedSplitTrans should be
+ * 0.  Return in the result vector the instantiated transliterator
+ * objects (one of these will be adoptedSplitTrans, if the latter was
+ * specified).  These will be in order of id, so if dir is REVERSE,
+ * then the caller will have to reverse the order.
+ * 
+ * @param splitTransIndex output parameter to receive the index in
+ * 'result' at which the adoptedSplitTrans is stored, or -1 if
+ * adoptedSplitTrans == 0
+ */
+void Transliterator::parseCompoundID(const UnicodeString& id,
+                                     UTransDirection dir,
+                                     int32_t idSplitPoint,
+                                     Transliterator *adoptedSplitTrans,
+                                     UVector& result,
+                                     int32_t& splitTransIndex,
+                                     UParseError* parseError,
+                                     UErrorCode& status) {
+    if (U_FAILURE(status)) {
+        return;
+    }
+    
+    splitTransIndex = -1;
+    int32_t pos = 0;
+    int32_t i;
+    while (pos < id.length()) {
+        // We compare (pos >= split), not (pos == split), so we can
+        // skip over whitespace (see below).
+        if (pos >= idSplitPoint && adoptedSplitTrans != 0) {
+            splitTransIndex = result.size();
+            result.addElement(adoptedSplitTrans);
+            adoptedSplitTrans = 0;
+        }
+        int32_t p = pos;
+        UBool sawDelimiter; // We ignore this
+        Transliterator *t =
+            parseID(id, p, sawDelimiter, dir, parseError, TRUE);
+        if (p == pos) {
+            delete t;
+            status = U_ILLEGAL_ARGUMENT_ERROR;
+            break;
+        }
+		pos = p;
+        // The return value may be NULL when, for instance, creating a
+        // REVERSE transliterator of ID "Latin-Greek()".
+        if (t != 0) {
+            result.addElement(t);
+        }
+    }
+
+    // Handle case of idSplitPoint == id.length()
+    if (pos >= idSplitPoint && adoptedSplitTrans != 0) {
+        splitTransIndex = result.size();
+        result.addElement(adoptedSplitTrans);
+        adoptedSplitTrans = 0;
+    }
+
+    if (U_FAILURE(status)) {
+        for (i=0; i<result.size(); ++i) {
+            delete (Transliterator*)result.elementAt(i);
+        }
+        result.removeAllElements();
+        delete adoptedSplitTrans;
+    }
+}
+
+/**
+ * Parse a single ID, possibly including an inline filter, and return
+ * the resultant transliterator object.  NOTE: If 'create' is FALSE,
+ * then the amount of syntax checking is limited.  However, the 'pos'
+ * parameter will be updated correctly, assuming the input string is
+ * valid.
+ *
+ * A trailing /;? \s* / is skipped.  The parameter sawDelimiter
+ * indicates whether the ';' was seen or not.  Upon return, if pos is
+ * advanced, it will either point to a non-whitespace character past
+ * the trailing ';', if any, or be equal to length().
+ *
+ * On return one of the following will be true:
+ *  pos unchanged: sawDelimiter meaningless
+ *  pos == ID.length(): sawDelimiter TRUE or FALSE
+ *  pos < ID.length(): sawDelimiter always TRUE
+ *
+ * @param ID the ID string
+ * @param pos INPUT-OUTPUT parameter.  On input, the position of the
+ * first character to parse.  On output, the position after the last
+ * character parsed.  This will be a semicolon or ID.length().  In the
+ * case of an error this value will be unchanged.
+ * @param create if TRUE, create and return the result.  If FALSE,
+ * only scan the ID, and return NULL.
+ * @return a newly created transliterator, or NULL.  NULL is returned
+ * in all cases if create is FALSE.  If create is TRUE, then NULL is
+ * returned on error, or if the ID is effectively empty.
+ * E.g. "Latin-Greek()" with dir == REVERSE.  Do NOT check for NULL to
+ * determine if there was an error.  Instead, check to see if pos
+ * moved.
+ */
+Transliterator* Transliterator::parseID(const UnicodeString& ID,
+                                        int32_t& pos,
+                                        UBool& sawDelimiter,
+                                        UTransDirection dir,
+                                        UParseError* parseError,
+                                        UBool create) {
+    Transliterator* t = 0;
+    UnicodeString str; // scratch
+
+    // Look for embedded filter pattern by looking for ';' and
+    // '[' and seeing which comes first.
+    UnicodeSet *filter = 0;
+    int32_t limit = ID.indexOf(ID_DELIM, pos);
+    sawDelimiter = limit >= 0;
+    if (!sawDelimiter) {
+        limit = ID.length();
+    }
+    int32_t setStart = ID.indexOf((UChar)0x005B /*[*/, pos);
+    int32_t setLimit;
+    if (setStart >= 0 && setStart < limit) {
+        UErrorCode status = U_ZERO_ERROR;
+        ParsePosition ppos(setStart);
+        filter = new UnicodeSet();
+        filter->applyPattern(ID, ppos, 0, status);
+        if (U_FAILURE(status)) {
+            // There was a parse failure in the filter pattern
+            delete filter;
+            return 0;
+        }
+        setLimit = ppos.getIndex();
+        if (limit < setLimit) {
+            limit = ID.indexOf(ID_DELIM, setLimit);
+            sawDelimiter = limit >= 0;
+            if (!sawDelimiter) {
+                limit = ID.length();
+            }
+        }
+    } else {
+        setStart = setLimit = pos;
+    }
+    
+    // Advance limit past /;?\s*/
+    int32_t idLimit = limit; // limit before separator
+    if (sawDelimiter) {
+        // assert(limit < ID.length() && ID.charAt(limit) == ID_DELIM);
+        ++limit;
+    }
+    while (limit < ID.length() && u_isspace(ID.charAt(limit))) {
+        ++limit;
+    }
+
+    if (!create) {
+        // TODO Improve performance by scanning the UnicodeSet pattern
+        // without actually constructing it, if create is FALSE.  That
+        // is, create a method like this one for UnicodeSet.
+        delete filter;
+        pos = limit;
+        return 0;
+    }
+
+    // 'id' is the ID with the filter pattern removed and with
+    // whitespace deleted.
+    UnicodeString id;
+    ID.extractBetween(pos, setStart, id);
+    ID.extractBetween(setLimit, idLimit, str);
+    id.append(str);
+
+    // Delete whitespace
+    int32_t i;
+    for (i=0; i<id.length(); ++i) {
+        if (Unicode::isWhitespace(id.charAt(i))) {
+            id.remove(i, 1);
+            --i;
+        }
+    }
+
+    // Fix the id, if necessary, by reversing it (A-B => B-A).
+    // Record the position of the separator.  Detect the special
+    // case of Null, whose inverse is itself.  Given an ID with no
+    // separator "Foo", an abbreviation for "Any-Foo", consider
+    // the inverse to be "Foo-Any".
+    int32_t sep = id.indexOf(ID_SEP);
+    if (sep < 0 && id.caseCompare(NullTransliterator::ID,
+                                  U_FOLD_CASE_DEFAULT) == 0) {
+        sep = id.length();
+    } else if (dir == UTRANS_REVERSE) {
+        if (sep >= 0) {
+            id.extractBetween(0, sep, str);
+            id.removeBetween(0, sep+1);
+        } else {
+            str = UnicodeString("Any", "");
+        }
+        sep = id.length();
+        id.append(ID_SEP).append(str);
+    } else if (sep < 0) {
+        str = UnicodeString("Any-", "");
+        sep = str.length();
+        id.insert(0, str);
+    }
+
+    // The 'alias' parameter is non-empty if _createInstance()
+    // finds that the given ID refers to an alias.  The reason
+    // _createInstance() doesn't call createInstance() (this
+    // method) directly is to avoid deadlock.  There are other
+    // ways to do this but this is one of the more efficient ways.
+    str.truncate(0);
+    t = _createInstance(id, str /*alias*/, parseError);
+
+    if (str.length() > 0) {
+        // assert(t==0);
+        t = createInstance(str, UTRANS_FORWARD, parseError);
+    }
+
+    if (t != 0) {
+        if (filter != 0) {
+            t->adoptFilter(filter);
+            id.insert(sep, ID, setStart, setLimit-setStart);
+        }
+        t->setID(id);
+        pos = limit;
+    }
+
    return t;
 }
+                                        

 /**
 * Returns a transliterator object given its ID.  Unlike getInstance(),
@ -661,8 +917,6 @@ Transliterator* Transliterator::createInstance(const UnicodeString& ID,
 Transliterator* Transliterator::_createInstance(const UnicodeString& ID,
                                                UnicodeString& aliasReturn,
                                                UParseError* parseError) {
-    UErrorCode status = U_ZERO_ERROR;
-
    if (!cacheInitialized) {
        initializeCache();
    }
@ -672,46 +926,55 @@ Transliterator* Transliterator::_createInstance(const UnicodeString& ID,
    CacheEntry* entry = (CacheEntry*) cache->get(ID);
    if (entry == 0) {
        entry = (CacheEntry*) internalCache->get(ID);
+        if (entry == 0) {
+            return 0; // out of memory
+        }
    }

-    TransliterationRuleData* data = 0;
+    UErrorCode status = U_ZERO_ERROR;

-    if (entry == 0) {
-        return 0;
-    }
+    for (;;) {
+        if (entry->entryType == CacheEntry::RBT_DATA) {
+            return new RuleBasedTransliterator(ID, entry->u.data);
+        } else if (entry->entryType == CacheEntry::PROTOTYPE) {
+            return entry->u.prototype->clone();
+        } else if (entry->entryType == CacheEntry::ALIAS) {
+            // We can't call createInstance() here because of deadlock.
+            aliasReturn = entry->stringArg;
+            return 0;
+        } else if (entry->entryType == CacheEntry::FACTORY) {
+            return entry->u.factory();
+        } else if (entry->entryType == CacheEntry::COMPOUND_RBT) {
+            UnicodeString id("_", "");
+            Transliterator *t = new RuleBasedTransliterator(id, entry->u.data);
+            t = new CompoundTransliterator(ID, entry->stringArg,
+                                           entry->intArg, t, status);
+            if (U_FAILURE(status)) {
+                delete t;
+                t = 0;
+                _unregister(ID);
+            }
+            return t;
+        }

-    if (entry->entryType == CacheEntry::RBT_DATA) {
-        data = entry->u.data;
-        // Fall through to construct transliterator from cached Data object.
-    } else if (entry->entryType == CacheEntry::PROTOTYPE) {
-        return entry->u.prototype->clone();
-    } else if (entry->entryType == CacheEntry::ALIAS) {
-        // We can't call createInstance() here because of deadlock.
-        aliasReturn = entry->stringArg;
-        return 0;
-    } else if (entry->entryType == CacheEntry::FACTORY) {
-        return entry->u.factory();
-    } else {
-        // At this point entry type must be either RULES_FORWARD
-        // or RULES_REVERSE
+        // At this point entry type must be either RULES_FORWARD or
+        // RULES_REVERSE.  We process the rule data into a
+        // TransliteratorRuleData object, and possibly also into an
+        // ::id header and/or footer.  Then we modify the cache with
+        // the parsed data and retry.
        UBool isReverse = (entry->entryType == CacheEntry::RULES_REVERSE);
-        
+
        // We use the file name, taken from another resource bundle
        // 2-d array at static init time, as a locale language.  We're
        // just using the locale mechanism to map through to a file
        // name; this in no way represents an actual locale.

-        char *ch;
-        ch = new char[entry->stringArg.length() + 1];
+        char *ch = new char[entry->stringArg.length() + 1];
        ch[entry->stringArg.extract(0, 0x7fffffff, ch, "")] = 0;
        Locale fakeLocale(ch);
        delete [] ch;

-        ResourceBundle bundle((char *)0,
-                              fakeLocale, status);
-        
-        // Call RBT to parse the rules from the resource bundle
-
+        ResourceBundle bundle((char *)0, fakeLocale, status);
        UnicodeString rules = bundle.getStringEx(RB_RULE, status);

        // If the status indicates a failure, then we don't have any
@ -719,42 +982,54 @@ Transliterator* Transliterator::_createInstance(const UnicodeString& ID,
        // in the root locale should correspond to all the installed
        // transliterators; if it lists something that's not
        // installed, we'll get an error from ResourceBundle.
-        if (U_SUCCESS(status)) {

-            data = TransliterationRuleParser::parse(rules, isReverse
-                                    ? UTRANS_REVERSE
-                                    : UTRANS_FORWARD,
-                                    parseError);
+        TransliteratorParser::parse(rules, isReverse ?
+                                    UTRANS_REVERSE : UTRANS_FORWARD,
+                                    entry->u.data,
+                                    entry->stringArg,
+                                    entry->intArg,
+                                    parseError,
+                                    status);

-            // Double check to see if someone has modified the entry
-            // since we last looked at it.
-            if (entry->entryType != CacheEntry::RBT_DATA) {
-                entry->entryType = CacheEntry::RBT_DATA;
-                entry->u.data = data;
+        if (U_FAILURE(status)) {
+            // We have a failure of some kind.  Remove the ID from the
+            // cache so we don't keep trying.  NOTE: This will throw off
+            // anyone who is, at the moment, trying to iterate over the
+            // available IDs.  That's acceptable since we should never
+            // really get here except under installation, configuration,
+            // or unrecoverable run time memory failures.
+            _unregister(ID);
+            break;
+        }
+
+        // Reset entry->entryType to something that we process at the
+        // top of the loop, then loop back to the top.  As long as we
+        // do this, we only loop through twice at most.
+        // NOTE: The logic here matches that in createFromRules().
+        if (entry->stringArg.length() == 0) {
+            if (entry->u.data == 0) {
+                // No idBlock, no data -- this is just an
+                // alias for Null
+                entry->entryType = CacheEntry::ALIAS;
+                entry->stringArg = NullTransliterator::ID;
            } else {
-                // Oops!  Another thread has updated this cache entry
-                // already to point to a data object.  Discard the
-                // one we just created and use the one in the cache
-                // instead.
-                delete data;
-                data = entry->u.data;
+                // No idBlock, data != 0 -- this is an
+                // ordinary RBT_DATA
+                entry->entryType = CacheEntry::RBT_DATA;
+            }
+        } else {
+            if (entry->u.data == 0) {
+                // idBlock, no data -- this is an alias
+                entry->entryType = CacheEntry::ALIAS;
+            } else {
+                // idBlock and data -- this is a compound
+                // RBT
+                entry->entryType = CacheEntry::COMPOUND_RBT;
            }
        }
    }

-    if (data != 0) {
-        return new RuleBasedTransliterator(ID, data);
-    } else {
-        // We have a failure of some kind.  Remove the ID from the
-        // cache so we don't keep trying.  NOTE: This will throw off
-        // anyone who is, at the moment, trying to iterate over the
-        // available IDs.  That's acceptable since we should never
-        // really get here except under installation, configuration,
-        // or unrecoverable run time memory failures.
-        _unregister(ID);
-    }
-
-    return 0;
+    return 0; // failed
 }

 // For public consumption
@ -907,10 +1182,11 @@ UChar Transliterator::filteredCharAt(const Replaceable& text, int32_t i) const {
        (localFilter->contains(c = text.charAt(i)) ? c : (UChar)0xFFFE);
 }

-// TODO Move this into the class
-// NO This should remain a C function for os/390 and Solaris Workshop [grhoten]
 /**
 * Comparison function for UVector.
+ *
+ * Do not make this a class static: This should remain a C function
+ * for os/390 and Solaris Workshop [grhoten]
 */
 U_CDECL_BEGIN
 static UBool U_CALLCONV
--- a/icu4c/source/test/intltest/transtst.cpp
+++ b/icu4c/source/test/intltest/transtst.cpp
@ -17,6 +17,7 @@
 #include "unicode/hextouni.h"
 #include "unicode/unitohex.h"
 #include "unicode/unicode.h"
+#include "unicode/uniset.h"
 #include "unicode/ucnv.h"
 #include "unicode/ucnv_err.h"

@ -61,6 +62,7 @@ TransliteratorTest::runIndexedTest(int32_t index, UBool exec,
        TESTCASE(26,TestLiberalizedID);
        TESTCASE(27,TestCreateInstance);
        TESTCASE(28,TestNormalizationTransliterator);
+        TESTCASE(29,TestCompoundRBT);
        default: name = ""; break;
    }
 }
@ -1053,93 +1055,6 @@ void TransliteratorTest::TestLiberalizedID(void) {
    }
 }

-//======================================================================
-// Support methods
-//======================================================================
-void TransliteratorTest::expect(const UnicodeString& rules,
-                                const UnicodeString& source,
-                                const UnicodeString& expectedResult) {
-    UErrorCode status = U_ZERO_ERROR;
-    Transliterator *t = new RuleBasedTransliterator("<ID>", rules, status);
-    if (U_FAILURE(status)) {
-        errln("FAIL: Transliterator constructor failed");
-    } else {
-        expect(*t, source, expectedResult);
-    }
-    delete t;
-}
-
-void TransliteratorTest::expect(const Transliterator& t,
-                                const UnicodeString& source,
-                                const UnicodeString& expectedResult,
-                                const Transliterator& reverseTransliterator) {
-    expect(t, source, expectedResult);
-    expect(reverseTransliterator, expectedResult, source);
-}
-
-void TransliteratorTest::expect(const Transliterator& t,
-                                const UnicodeString& source,
-                                const UnicodeString& expectedResult) {
-    UnicodeString result(source);
-    t.transliterate(result);
-    expectAux(t.getID() + ":String", source, result, expectedResult);
-
-    UnicodeString rsource(source);
-    t.transliterate(rsource);
-    expectAux(t.getID() + ":Replaceable", source, rsource, expectedResult);
-
-    // Test keyboard (incremental) transliteration -- this result
-    // must be the same after we finalize (see below).
-    rsource.remove();
-    UTransPosition index={0, 0, 0, 0};
-    UnicodeString log;
-
-    for (int32_t i=0; i<source.length(); ++i) {
-        if (i != 0) {
-            log.append(" + ");
-        }
-        log.append(source.charAt(i)).append(" -> ");
-        UErrorCode status = U_ZERO_ERROR;
-        t.transliterate(rsource, index, source.charAt(i), status);
-        // Append the string buffer with a vertical bar '|' where
-        // the committed index is.
-        UnicodeString left, right;
-        rsource.extractBetween(0, index.start, left);
-        rsource.extractBetween(index.start, rsource.length(), right);
-        log.append(left).append((UChar)PIPE).append(right);
-    }
-    
-    // As a final step in keyboard transliteration, we must call
-    // transliterate to finish off any pending partial matches that
-    // were waiting for more input.
-    t.finishTransliteration(rsource, index);
-    log.append(" => ").append(rsource);
-
-    expectAux(t.getID() + ":Keyboard", log,
-              rsource == expectedResult,
-              expectedResult);
-}
-
-void TransliteratorTest::expectAux(const UnicodeString& tag,
-                                   const UnicodeString& source,
-                                   const UnicodeString& result,
-                                   const UnicodeString& expectedResult) {
-    expectAux(tag, source + " -> " + result,
-              result == expectedResult,
-              expectedResult);
-}
-
-void TransliteratorTest::expectAux(const UnicodeString& tag,
-                                   const UnicodeString& summary, UBool pass,
-                                   const UnicodeString& expectedResult) {
-    if (pass) {
-        logln(UnicodeString("(")+tag+") " + prettify(summary));
-    } else {
-        errln(UnicodeString("FAIL: (")+tag+") "
-              + prettify(summary)
-              + ", expected " + prettify(expectedResult));
-    }
-}
 /* test for Jitterbug 912 */
 void TransliteratorTest::TestCreateInstance(){
    UParseError *err = 0;
@ -1248,3 +1163,157 @@ void TransliteratorTest::TestNormalizationTransliterator() {
    delete NFKD;
    delete NFKC;
 }
+
+/**
+ * Test compound RBT rules.
+ */
+void TransliteratorTest::TestCompoundRBT(void) {
+    // Careful with spacing and ';' here:  Phrase this exactly
+    // as toRules() is going to return it.  If toRules() changes
+    // with regard to spacing or ';', then adjust this string.
+    UnicodeString rule("::Hex-Unicode;\n"
+                       "::Any-Lower;\n"
+                       "a > '.A.';\n"
+                       "b > '.B.';\n"
+                       "::Any[^t]-Upper;", "");
+    Transliterator *t = Transliterator::createFromRules("Test", rule);
+    if (t == 0) {
+        errln("FAIL: createFromRules failed");
+        return;
+    }
+    expect(*t, "\\u0043at in the hat, bat on the mat",
+           "C.A.t IN tHE H.A.t, .B..A.t ON tHE M.A.t");
+    UnicodeString r;
+    t->toRules(r, TRUE);
+    if (r == rule) {
+        logln((UnicodeString)"OK: toRules() => " + r);
+    } else {
+        errln((UnicodeString)"FAIL: toRules() => " + r +
+              ", expected " + rule);
+    }
+    delete t;
+
+    // Now test toRules
+    t = Transliterator::createInstance("Greek-Latin; Latin-Cyrillic");
+    if (t == 0) {
+        errln("FAIL: createInstance failed");
+        return;
+    }
+    UnicodeString exp("::Greek-Latin;\n::Latin-Cyrillic;");
+    t->toRules(r, TRUE);
+    if (r != exp) {
+        errln((UnicodeString)"FAIL: toRules() => " + r +
+              ", expected " + exp);
+    } else {
+        logln((UnicodeString)"OK: toRules() => " + r);
+    }
+    delete t;
+
+    // Round trip the result of toRules
+    t = Transliterator::createFromRules("Test", r);
+    if (t == 0) {
+        errln("FAIL: createFromRules #2 failed");
+        return;
+    } else {
+        logln((UnicodeString)"OK: createFromRules(" + r + ") succeeded");
+    }
+
+    // Test toRules again
+    t->toRules(r, TRUE);
+    if (r != exp) {
+        errln((UnicodeString)"FAIL: toRules() => " + r +
+              ", expected " + exp);
+    } else {
+        logln((UnicodeString)"OK: toRules() => " + r);
+    }
+
+    delete t;
+}
+
+//======================================================================
+// Support methods
+//======================================================================
+void TransliteratorTest::expect(const UnicodeString& rules,
+                                const UnicodeString& source,
+                                const UnicodeString& expectedResult) {
+    UErrorCode status = U_ZERO_ERROR;
+    Transliterator *t = new RuleBasedTransliterator("<ID>", rules, status);
+    if (U_FAILURE(status)) {
+        errln("FAIL: Transliterator constructor failed");
+    } else {
+        expect(*t, source, expectedResult);
+    }
+    delete t;
+}
+
+void TransliteratorTest::expect(const Transliterator& t,
+                                const UnicodeString& source,
+                                const UnicodeString& expectedResult,
+                                const Transliterator& reverseTransliterator) {
+    expect(t, source, expectedResult);
+    expect(reverseTransliterator, expectedResult, source);
+}
+
+void TransliteratorTest::expect(const Transliterator& t,
+                                const UnicodeString& source,
+                                const UnicodeString& expectedResult) {
+    UnicodeString result(source);
+    t.transliterate(result);
+    expectAux(t.getID() + ":String", source, result, expectedResult);
+
+    UnicodeString rsource(source);
+    t.transliterate(rsource);
+    expectAux(t.getID() + ":Replaceable", source, rsource, expectedResult);
+
+    // Test keyboard (incremental) transliteration -- this result
+    // must be the same after we finalize (see below).
+    rsource.remove();
+    UTransPosition index={0, 0, 0, 0};
+    UnicodeString log;
+
+    for (int32_t i=0; i<source.length(); ++i) {
+        if (i != 0) {
+            log.append(" + ");
+        }
+        log.append(source.charAt(i)).append(" -> ");
+        UErrorCode status = U_ZERO_ERROR;
+        t.transliterate(rsource, index, source.charAt(i), status);
+        // Append the string buffer with a vertical bar '|' where
+        // the committed index is.
+        UnicodeString left, right;
+        rsource.extractBetween(0, index.start, left);
+        rsource.extractBetween(index.start, rsource.length(), right);
+        log.append(left).append((UChar)PIPE).append(right);
+    }
+    
+    // As a final step in keyboard transliteration, we must call
+    // transliterate to finish off any pending partial matches that
+    // were waiting for more input.
+    t.finishTransliteration(rsource, index);
+    log.append(" => ").append(rsource);
+
+    expectAux(t.getID() + ":Keyboard", log,
+              rsource == expectedResult,
+              expectedResult);
+}
+
+void TransliteratorTest::expectAux(const UnicodeString& tag,
+                                   const UnicodeString& source,
+                                   const UnicodeString& result,
+                                   const UnicodeString& expectedResult) {
+    expectAux(tag, source + " -> " + result,
+              result == expectedResult,
+              expectedResult);
+}
+
+void TransliteratorTest::expectAux(const UnicodeString& tag,
+                                   const UnicodeString& summary, UBool pass,
+                                   const UnicodeString& expectedResult) {
+    if (pass) {
+        logln(UnicodeString("(")+tag+") " + prettify(summary));
+    } else {
+        errln(UnicodeString("FAIL: (")+tag+") "
+              + prettify(summary)
+              + ", expected " + prettify(expectedResult));
+    }
+}
--- a/icu4c/source/test/intltest/transtst.h
+++ b/icu4c/source/test/intltest/transtst.h
@ -159,6 +159,8 @@ class TransliteratorTest : public IntlTest {

    void TestNormalizationTransliterator(void);

+    void TestCompoundRBT(void);
+
    //======================================================================
    // Support methods
    //======================================================================