ICU-4539 Added ability to put ::ID rules into the middle of Transliterator rule sets.

X-SVN-Rev: 17844
2005-06-09 17:30:48 +00:00 · 2005-06-09 17:30:48 +00:00 · fb164eba5a
commit fb164eba5a
parent 950b3b0bc3
18 changed files with 852 additions and 410 deletions
--- a/icu4c/source/data/translit/Latin_NumericPinyin.txt
+++ b/icu4c/source/data/translit/Latin_NumericPinyin.txt
@ -1,5 +1,5 @@
 #--------------------------------------------------------------------
-# Copyright (c) 1999-2004, International Business Machines
+# Copyright (c) 1999-2005, International Business Machines
 # Corporation and others. All Rights Reserved.
 #--------------------------------------------------------------------

@ -27,7 +27,7 @@ e {($tone) r} > r &tone-digit($1);

 # The following backs up until it finds the right vowel, then deposits the tone

-$vowel = [aAeEiIoOuUüÜ];
+$vowel = [aAeEiIoOuUüÜ {u\u0308} {U\u0308} ];
 $consonant = [[a-z A-Z] - [$vowel]];
 $digit = [1-5];
 $1 &digit-tone($3) $2 < ([aAeE]) ($vowel* $consonant*) ($digit);
--- a/icu4c/source/i18n/cpdtrans.cpp
+++ b/icu4c/source/i18n/cpdtrans.cpp
@ -1,6 +1,6 @@
 /*
 **********************************************************************
-*   Copyright (C) 1999-2004, International Business Machines
+*   Copyright (C) 1999-2005, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 **********************************************************************
 *   Date        Name        Description
@ -51,7 +51,7 @@ CompoundTransliterator::CompoundTransliterator(
                           int32_t transliteratorCount,
                           UnicodeFilter* adoptedFilter) :
    Transliterator(joinIDs(transliterators, transliteratorCount), adoptedFilter),
-    trans(0), count(0), compoundRBTIndex(-1)  {
+    trans(0), count(0), numAnonymousRBTs(0)  {
    setTransliterators(transliterators, transliteratorCount);
 }

@ -68,20 +68,36 @@ CompoundTransliterator::CompoundTransliterator(const UnicodeString& id,
                              UParseError& /*parseError*/,
                              UErrorCode& status) :
    Transliterator(id, adoptedFilter),
-    trans(0), compoundRBTIndex(-1) {
+    trans(0), numAnonymousRBTs(0) {
    // TODO add code for parseError...currently unused, but
    // later may be used by parsing code...
-    init(id, direction, -1, 0, TRUE, status);
+    init(id, direction, TRUE, status);
 }

 CompoundTransliterator::CompoundTransliterator(const UnicodeString& id,
                              UParseError& /*parseError*/,
                              UErrorCode& status) :
    Transliterator(id, 0), // set filter to 0 here!
-    trans(0), compoundRBTIndex(-1) {
+    trans(0), numAnonymousRBTs(0) {
    // TODO add code for parseError...currently unused, but
    // later may be used by parsing code...
-    init(id, UTRANS_FORWARD, -1, 0, TRUE, status);
+    init(id, UTRANS_FORWARD, TRUE, status);
+}
+
+
+/**
+ * Private constructor for use of TransliteratorAlias
+ */
+CompoundTransliterator::CompoundTransliterator(const UnicodeString& ID,
+                                              UVector& list,
+                                              UnicodeFilter* adoptedFilter,
+                                              int32_t anonymousRBTs,
+                                              UParseError& /*parseError*/,
+                                              UErrorCode& status) :
+    Transliterator(ID, adoptedFilter),
+    trans(0), numAnonymousRBTs(anonymousRBTs)
+{
+    init(list, UTRANS_FORWARD, FALSE, status);
 }

 /**
@ -93,7 +109,7 @@ CompoundTransliterator::CompoundTransliterator(UVector& list,
                                               UParseError& /*parseError*/,
                                               UErrorCode& status) :
    Transliterator(EMPTY, NULL),
-    trans(0), compoundRBTIndex(-1)
+    trans(0), numAnonymousRBTs(0)
 {
    // TODO add code for parseError...currently unused, but
    // later may be used by parsing code...
@ -101,20 +117,14 @@ CompoundTransliterator::CompoundTransliterator(UVector& list,
    // assume caller will fixup ID
 }

-/**
- * Private constructor for compound RBTs.  Construct a compound
- * transliterator using the given idBlock, with the adoptedTrans
- * inserted at the idSplitPoint.
- */
-CompoundTransliterator::CompoundTransliterator(const UnicodeString& newID,
-                                               const UnicodeString& idBlock,
-                                               int32_t idSplitPoint,
-                                               Transliterator *adoptedTrans,
+CompoundTransliterator::CompoundTransliterator(UVector& list,
+                                               int32_t anonymousRBTs,
+                                               UParseError& /*parseError*/,
                                               UErrorCode& status) :
-    Transliterator(newID, 0),
-    trans(0), compoundRBTIndex(-1)
+    Transliterator(EMPTY, NULL),
+    trans(0), numAnonymousRBTs(anonymousRBTs)
 {
-    init(idBlock, UTRANS_FORWARD, idSplitPoint, adoptedTrans, FALSE, status);
+    init(list, UTRANS_FORWARD, FALSE, status);
 }

 /**
@ -135,14 +145,11 @@ CompoundTransliterator::CompoundTransliterator(const UnicodeString& newID,
 */
 void CompoundTransliterator::init(const UnicodeString& id,
                                  UTransDirection direction,
-                                  int32_t idSplitPoint,
-                                  Transliterator *adoptedSplitTrans,
                                  UBool fixReverseID,
                                  UErrorCode& status) {
    // assert(trans == 0);

    if (U_FAILURE(status)) {
-        delete adoptedSplitTrans;
        return;
    }

@ -152,12 +159,11 @@ void CompoundTransliterator::init(const UnicodeString& id,
    if (!TransliteratorIDParser::parseCompoundID(id, direction,
                                      regenID, list, compoundFilter)) {
        status = U_INVALID_ID;
-        delete adoptedSplitTrans;
        delete compoundFilter;
        return;
    }

-    compoundRBTIndex = TransliteratorIDParser::instantiateList(list, adoptedSplitTrans, idSplitPoint, status);
+    TransliteratorIDParser::instantiateList(list, status);

    init(list, direction, fixReverseID, status);

@ -209,11 +215,6 @@ void CompoundTransliterator::init(UVector& list,
        trans[i] = (Transliterator*) list.elementAt(j);
    }

-    // Fix compoundRBTIndex for REVERSE transliterators
-    if (compoundRBTIndex >= 0 && direction == UTRANS_REVERSE) {
-        compoundRBTIndex = count - 1 - compoundRBTIndex;
-    }
-
    // If the direction is UTRANS_REVERSE then we may need to fix the
    // ID.
    if (direction == UTRANS_REVERSE && fixReverseID) {
@ -251,7 +252,7 @@ UnicodeString CompoundTransliterator::joinIDs(Transliterator* const transliterat
 * Copy constructor.
 */
 CompoundTransliterator::CompoundTransliterator(const CompoundTransliterator& t) :
-    Transliterator(t), trans(0), count(0), compoundRBTIndex(-1) {
+    Transliterator(t), trans(0), count(0), numAnonymousRBTs(-1) {
    *this = t;
 }

@ -292,7 +293,7 @@ CompoundTransliterator& CompoundTransliterator::operator=(
    for (i=0; i<count; ++i) {
        trans[i] = t.trans[i]->clone();
    }
-    compoundRBTIndex = t.compoundRBTIndex;
+    numAnonymousRBTs = t.numAnonymousRBTs;
    return *this;
 }

@ -359,7 +360,7 @@ UnicodeString& CompoundTransliterator::toRules(UnicodeString& rulesSource,
    // compoundRBTIndex >= 0.  For the transliterator at compoundRBTIndex,
    // we do call toRules() recursively.
    rulesSource.truncate(0);
-    if (compoundRBTIndex >= 0 && getFilter() != NULL) {
+    if (numAnonymousRBTs >= 1 && getFilter() != NULL) {
        // If we are a compound RBT and if we have a global
        // filter, then emit it at the top.
        UnicodeString pat;
@ -367,8 +368,24 @@ UnicodeString& CompoundTransliterator::toRules(UnicodeString& rulesSource,
    }
    for (int32_t i=0; i<count; ++i) {
        UnicodeString rule;
-        if (i == compoundRBTIndex) {
+
+        // Anonymous RuleBasedTransliterators (inline rules and
+        // ::BEGIN/::END blocks) are given IDs that begin with
+        // "%Pass": use toRules() to write all the rules to the output
+        // (and insert "::Null;" if we have two in a row)
+        if (trans[i]->getID().startsWith("%Pass")) {
            trans[i]->toRules(rule, escapeUnprintable);
+            if (numAnonymousRBTs > 1 && i > 0 && trans[i - 1]->getID().startsWith("%Pass"))
+                rule = "::Null;" + rule;
+
+        // we also use toRules() on CompoundTransliterators (which we
+        // check for by looking for a semicolon in the ID)-- this gets
+        // the list of their child transliterators output in the right
+        // format
+        } else if (trans[i]->getID().indexOf(';') >= 0) {
+            trans[i]->toRules(rule, escapeUnprintable);
+
+        // for everything else, use Transliterator::toRules()
        } else {
            trans[i]->Transliterator::toRules(rule, escapeUnprintable);
        }
--- a/icu4c/source/i18n/cpdtrans.h
+++ b/icu4c/source/i18n/cpdtrans.h
@ -1,6 +1,6 @@
 /*
 **********************************************************************
-*   Copyright (C) 1999-2004, International Business Machines
+*   Copyright (C) 1999-2005, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 **********************************************************************
 *   Date        Name        Description
@ -42,13 +42,7 @@ class U_I18N_API CompoundTransliterator : public Transliterator {

    int32_t count;

-    /**
-     * For compound RBTs (those with an ::id block before and/or after
-     * the main rule block) we record the index of the RBT here.
-     * Otherwise, this should have a value of -1.  We need this
-     * information to implement toRules().
-     */
-    int32_t compoundRBTIndex;
+    int32_t numAnonymousRBTs;

 public:

@ -202,28 +196,27 @@ private:
    friend class Transliterator;
    friend class TransliteratorAlias; // to access private ct

-    /**
-     * Private constructor for compound RBTs.  Construct a compound
-     * transliterator using the given idBlock, with the adoptedTrans
-     * inserted at the idSplitPoint.
-     */
-    CompoundTransliterator(const UnicodeString& ID,
-                           const UnicodeString& idBlock,
-                           int32_t idSplitPoint,
-                           Transliterator *adoptedTrans,
-                           UErrorCode& status);
-                           
    /**
     * Private constructor for Transliterator.
     */
+    CompoundTransliterator(const UnicodeString& ID,
+                           UVector& list,
+                           UnicodeFilter* adoptedFilter,
+                           int32_t numAnonymousRBTs,
+                           UParseError& parseError,
+                           UErrorCode& status);
+    
    CompoundTransliterator(UVector& list,
                           UParseError& parseError,
                           UErrorCode& status);

+    CompoundTransliterator(UVector& list,
+                           int32_t anonymousRBTs,
+                           UParseError& parseError,
+                           UErrorCode& status);
+
    void init(const UnicodeString& id,
              UTransDirection direction,
-              int32_t idSplitPoint,
-              Transliterator *adoptedRbt,
              UBool fixReverseID,
              UErrorCode& status);

--- a/icu4c/source/i18n/rbt.cpp
+++ b/icu4c/source/i18n/rbt.cpp
@ -1,6 +1,6 @@
 /*
 **********************************************************************
-*   Copyright (C) 1999-2004, International Business Machines
+*   Copyright (C) 1999-2005, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 **********************************************************************
 *   Date        Name        Description
@ -43,13 +43,13 @@ void RuleBasedTransliterator::_construct(const UnicodeString& rules,
        return;
    }

-    if (parser.idBlock.length() != 0 ||
+    if (parser.idBlockVector->size() != 0 ||
        parser.compoundFilter != NULL) {
        status = U_INVALID_RBT_SYNTAX; // ::ID blocks disallowed in RBT
        return;
    }

-    fData = parser.orphanData();
+    fData = (TransliterationRuleData*)parser.dataVector->orphanElementAt(0);
    setMaximumContextLength(fData->ruleSet.getMaximumContextLength());
 }

--- a/icu4c/source/i18n/rbt_data.cpp
+++ b/icu4c/source/i18n/rbt_data.cpp
@ -1,6 +1,6 @@
 /*
 **********************************************************************
-*   Copyright (C) 1999-2004, International Business Machines
+*   Copyright (C) 1999-2005, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 **********************************************************************
 *   Date        Name        Description
@ -23,7 +23,7 @@ U_NAMESPACE_BEGIN

 TransliterationRuleData::TransliterationRuleData(UErrorCode& status)
 : UMemory(), ruleSet(status),
-    variableNames(0), variables(0)
+    variableNames(0), variables(0), variablesAreOwned(TRUE)
 {
    if (U_FAILURE(status)) {
        return;
@ -44,7 +44,8 @@ TransliterationRuleData::TransliterationRuleData(UErrorCode& status)
 TransliterationRuleData::TransliterationRuleData(const TransliterationRuleData& other) :
    UMemory(other), ruleSet(other.ruleSet),
    variablesBase(other.variablesBase),
-    variablesLength(other.variablesLength)
+    variablesLength(other.variablesLength),
+    variablesAreOwned(TRUE)
 {
    UErrorCode status = U_ZERO_ERROR;
    variableNames = new Hashtable(status);
@ -78,12 +79,12 @@ TransliterationRuleData::TransliterationRuleData(const TransliterationRuleData&

 TransliterationRuleData::~TransliterationRuleData() {
    delete variableNames;
-    if (variables != 0) {
+    if (variablesAreOwned && variables != 0) {
        for (int32_t i=0; i<variablesLength; ++i) {
            delete variables[i];
        }
-        uprv_free(variables);
    }
+    uprv_free(variables);
 }

 UnicodeFunctor*
--- a/icu4c/source/i18n/rbt_data.h
+++ b/icu4c/source/i18n/rbt_data.h
@ -1,5 +1,5 @@
 /*
-* Copyright (C) 1999-2004, International Business Machines Corporation and others. All Rights Reserved.
+* Copyright (C) 1999-2005, International Business Machines Corporation and others. All Rights Reserved.
 **********************************************************************
 *   Date        Name        Description
 *   11/17/99    aliu        Creation.
@ -74,6 +74,15 @@ public:
     */
    UnicodeFunctor** variables;

+    /**
+     * Flag that indicates whether the variables are owned (if a single
+     * call to Transliterator::createFromRules() produces a CompoundTransliterator
+     * with more than one RuleBasedTransliterator as children, they all share
+     * the same variables list, so only the first one is considered to own
+     * the variables)
+     */
+    bool variablesAreOwned;
+
    /**
     * The character that represents variables[0].  Characters
     * variablesBase through variablesBase +
--- a/icu4c/source/i18n/rbt_pars.cpp
+++ b/icu4c/source/i18n/rbt_pars.cpp
@ -32,6 +32,7 @@
 #include "unicode/symtable.h"
 #include "tridpars.h"
 #include "uvector.h"
+#include "hash.h"
 #include "util.h"
 #include "cmemory.h"
 #include "uprops.h"
@ -108,6 +109,15 @@ static const UChar HALF_ENDERS[] = { // "=><;"
 static const int32_t ID_TOKEN_LEN = 2;
 static const UChar   ID_TOKEN[]   = { 0x3A, 0x3A }; // ':', ':'

+/*
+commented out until we do real ::BEGIN/::END functionality
+static const int32_t BEGIN_TOKEN_LEN = 5;
+static const UChar BEGIN_TOKEN[] = { 0x42, 0x45, 0x47, 0x49, 0x4e }; // 'BEGIN'
+
+static const int32_t END_TOKEN_LEN = 3;
+static const UChar END_TOKEN[] = { 0x45, 0x4e, 0x44 }; // 'END'
+*/
+
 U_NAMESPACE_BEGIN

 //----------------------------------------------------------------------
@ -126,8 +136,11 @@ public:

    const UVector* variablesVector; // alias

+    const Hashtable* variableNames; // alias
+
    ParseData(const TransliterationRuleData* data = 0,
-              const UVector* variablesVector = 0);
+              const UVector* variablesVector = 0,
+              const Hashtable* variableNames = 0);

    virtual const UnicodeString* lookup(const UnicodeString& s) const;

@ -153,14 +166,15 @@ private:
 };

 ParseData::ParseData(const TransliterationRuleData* d,
-                     const UVector* sets) :
-    data(d), variablesVector(sets) {}
+                     const UVector* sets,
+                     const Hashtable* vNames) :
+    data(d), variablesVector(sets), variableNames(vNames) {}

 /**
 * Implement SymbolTable API.
 */
 const UnicodeString* ParseData::lookup(const UnicodeString& name) const {
-    return (const UnicodeString*) data->variableNames->get(name);
+    return (const UnicodeString*) variableNames->get(name);
 }

 /**
@ -516,7 +530,7 @@ int32_t RuleHalf::parseSection(const UnicodeString& rule, int32_t pos, int32_t l
                // standin for that matcher.
                StringMatcher* m =
                    new StringMatcher(buf, bufSegStart, buf.length(),
-                                      segmentNumber, *parser.data);
+                                      segmentNumber, *parser.curData);
                
                // Record and associate object and segment number
                parser.setSegmentObject(segmentNumber, m);
@ -554,7 +568,7 @@ int32_t RuleHalf::parseSection(const UnicodeString& rule, int32_t pos, int32_t l
                UnicodeString output;
                buf.extractBetween(bufSegStart, buf.length(), output);
                FunctionReplacer *r =
-                    new FunctionReplacer(t, new StringReplacer(output, parser.data));
+                    new FunctionReplacer(t, new StringReplacer(output, parser.curData));
                
                // Replace the buffer contents with a stand-in
                buf.truncate(bufSegStart);
@ -645,7 +659,7 @@ int32_t RuleHalf::parseSection(const UnicodeString& rule, int32_t pos, int32_t l
                }

                UnicodeFunctor *m =
-                    new StringMatcher(buf, qstart, qlimit, 0, *parser.data);
+                    new StringMatcher(buf, qstart, qlimit, 0, *parser.curData);
                int32_t min = 0;
                int32_t max = Quantifier::MAX;
                switch (c) {
@ -794,10 +808,13 @@ UBool RuleHalf::isValidInput(TransliteratorParser& transParser) {
 * Constructor.
 */
 TransliteratorParser::TransliteratorParser() {
-    data = NULL;
+    dataVector = NULL;
+    idBlockVector = NULL;
+    curData = NULL;
    compoundFilter = NULL;
    parseData = NULL;
    variablesVector = NULL;
+    variableNames = NULL;
    segmentObjects = NULL;
 }

@ -805,10 +822,16 @@ TransliteratorParser::TransliteratorParser() {
 * Destructor.
 */
 TransliteratorParser::~TransliteratorParser() {
-    delete data;
+    while (dataVector != NULL && !dataVector->isEmpty())
+        delete (TransliterationRuleData*)(dataVector->orphanElementAt(0));
+    delete dataVector;
+    delete idBlockVector;
    delete compoundFilter;
    delete parseData;
+    while (variablesVector != NULL && !variablesVector->isEmpty())
+        delete (UnicodeFunctor*)variablesVector->orphanElementAt(0);
    delete variablesVector;
+    delete variableNames;
    delete segmentObjects;
 }

@ -833,15 +856,6 @@ UnicodeSet* TransliteratorParser::orphanCompoundFilter() {
    return f;
 }

-/**
- * Return the data object parsed by parse().  Caller owns result.
- */
-TransliterationRuleData* TransliteratorParser::orphanData() {
-    TransliterationRuleData* d = data;
-    data = NULL;
-    return d;
-}
-
 //----------------------------------------------------------------------
 // Private implementation
 //----------------------------------------------------------------------
@ -861,12 +875,31 @@ void TransliteratorParser::parseRules(const UnicodeString& rule,
    parseError.preContext[0] = parseError.postContext[0] = (UChar)0;
    status = U_ZERO_ERROR;

-    delete data;
-    data = new TransliterationRuleData(status);
+    UBool parsingIDs = TRUE;
+    UBool inBeginEndBlock = FALSE;
+    int32_t ruleCount = 0;
+    
+    if (dataVector == NULL)
+        dataVector = new UVector(status);
+    else {
+        while (!dataVector->isEmpty())
+            delete (TransliterationRuleData*)(dataVector->orphanElementAt(0));
+    }
    if (U_FAILURE(status)) {
        return;
    }

+    if (idBlockVector == NULL) {
+        idBlockVector = new UVector(status);
+        idBlockVector->setDeleter(uhash_deleteUnicodeString);
+    }
+    else
+        idBlockVector->removeAllElements();
+    if (U_FAILURE(status)) {
+        return;
+    }
+    curData = NULL;
+    
    direction = theDirection;
    ruleCount = 0;

@ -876,34 +909,27 @@ void TransliteratorParser::parseRules(const UnicodeString& rule,
    if (variablesVector == NULL) {
        variablesVector = new UVector(status);
    } else {
-        variablesVector->removeAllElements();
+        while (!variablesVector->isEmpty())
+            delete (UnicodeFunctor*)variablesVector->orphanElementAt(0);
    }
-    parseData = new ParseData(0, variablesVector);
+    if (variableNames == NULL) {
+        variableNames = new Hashtable(status);
+        variableNames->setValueDeleter(uhash_deleteUnicodeString);
+    } else {
+        variableNames->removeAll();
+    }
+    parseData = new ParseData(0, variablesVector, variableNames);
    if (parseData == NULL) {
        status = U_MEMORY_ALLOCATION_ERROR;
        return;
    }
-    parseData->data = data;

-    // By default, rules use part of the private use area
-    // E000..F8FF for variables and other stand-ins.  Currently
-    // the range F000..F8FF is typically sufficient.  The 'use
-    // variable range' pragma allows rule sets to modify this.
-    setVariableRange(0xF000, 0xF8FF);
-    
    dotStandIn = (UChar) -1;

    UnicodeString str; // scratch
-    idBlock.truncate(0);
-    idSplitPoint = -1;
+    UnicodeString idBlockResult;
    int32_t pos = 0;
    int32_t limit = rule.length();
-    // The mode marks whether we are in the header ::id block, the
-    // rule block, or the footer ::id block.
-    // mode == 0: start: rule->1, ::id->0
-    // mode == 1: in rules: rule->1, ::id->2
-    // mode == 2: in footer rule block: rule->ERROR, ::id->2
-    int32_t mode = 0;

    // The compound filter offset is an index into idBlockResult.
    // If it is 0, then the compound filter occurred at the start,
@ -913,9 +939,6 @@ void TransliteratorParser::parseRules(const UnicodeString& rule,
    compoundFilter = NULL;
    int32_t compoundFilterOffset = -1;

-    // The number of ::ID block entries we have parsed
-    int32_t idBlockCount = 0;
-
    while (pos < limit && U_SUCCESS(status)) {
        UChar c = rule.charAt(pos++);
        if (uprv_isRuleWhiteSpace(c)) {
@ -930,13 +953,21 @@ void TransliteratorParser::parseRules(const UnicodeString& rule,
            }
            continue; // Either fall out or restart with next line
        }
+
+        // skip empty rules
+        if (c == END_OF_RULE)
+            continue;
+
+        // keep track of how many rules we've seen
+        ++ruleCount;
+        
        // We've found the start of a rule or ID.  c is its first
        // character, and pos points past c.
        --pos;
        // Look for an ID token.  Must have at least ID_TOKEN_LEN + 1
        // chars left.
        if ((pos + ID_TOKEN_LEN + 1) <= limit &&
-            rule.compare(pos, ID_TOKEN_LEN, ID_TOKEN) == 0) {
+                rule.compare(pos, ID_TOKEN_LEN, ID_TOKEN) == 0) {
            pos += ID_TOKEN_LEN;
            c = rule.charAt(pos);
            while (uprv_isRuleWhiteSpace(c) && pos < limit) {
@ -944,33 +975,35 @@ void TransliteratorParser::parseRules(const UnicodeString& rule,
                c = rule.charAt(pos);
            }

-            if (mode == 1) {
-                // We have just entered the footer ::ID block
-                mode = 2;
-                // In the forward direction add elements at the end.
-                // In the reverse direction add elements at the start.
-                idSplitPoint = idBlockCount;
-            }
            int32_t p = pos;
            
+            if (!parsingIDs) {
+                if (curData != NULL) {
+                    if (direction == UTRANS_FORWARD)
+                        dataVector->addElement(curData, status);
+                    else
+                        dataVector->insertElementAt(curData, 0, status);
+                    curData = NULL;
+                }
+                parsingIDs = TRUE;
+            }
+
            TransliteratorIDParser::SingleID* id =
                TransliteratorIDParser::parseSingleID(rule, p, direction, status);
            if (p != pos && ICU_Utility::parseChar(rule, p, END_OF_RULE)) {
                // Successful ::ID parse.
-                
+
                if (direction == UTRANS_FORWARD) {
-                    idBlock.append(id->canonID).append(END_OF_RULE);
+                    idBlockResult.append(id->canonID).append(END_OF_RULE);
                } else {
-                    idBlock.insert(0, END_OF_RULE);
-                    idBlock.insert(0, id->canonID);
+                    idBlockResult.insert(0, END_OF_RULE);
+                    idBlockResult.insert(0, id->canonID);
                }
-                
-                ++idBlockCount;
-                
+
            } else {
                // Couldn't parse an ID.  Try to parse a global filter
                int32_t withParens = -1;
-                UnicodeSet* f = TransliteratorIDParser::parseGlobalFilter(rule, p, direction, withParens, &idBlock);
+                UnicodeSet* f = TransliteratorIDParser::parseGlobalFilter(rule, p, direction, withParens, NULL);
                if (f != NULL) {
                    if (ICU_Utility::parseChar(rule, p, END_OF_RULE)
                        && (direction == UTRANS_FORWARD) == (withParens == 0))
@ -981,7 +1014,7 @@ void TransliteratorParser::parseRules(const UnicodeString& rule,
                            delete f;
                        } else {
                            compoundFilter = f;
-                            compoundFilterOffset = idBlockCount;
+                            compoundFilterOffset = ruleCount;
                        }
                    } else {
                        delete f;
@ -993,78 +1026,93 @@ void TransliteratorParser::parseRules(const UnicodeString& rule,
                }
            }
            delete id;
-            
            pos = p;
-        } else if (resemblesPragma(rule, pos, limit)) {
-            int32_t ppp = parsePragma(rule, pos, limit);
-            if (ppp < 0) {
-                syntaxError(U_MALFORMED_PRAGMA, rule, pos);
-            }
-            pos = ppp;
        } else {
-            // Parse a rule
-            pos = parseRule(rule, pos, limit);
-            if (U_SUCCESS(status)) {
-                ++ruleCount;
-                if (mode == 2) {
-                    // ::id in illegal position (because a rule
-                    // occurred after the ::id footer block)
-                    syntaxError(U_ILLEGAL_ARGUMENT_ERROR,rule,pos);
-                }
-            }else{
-                syntaxError(status,rule,pos);
+            if (parsingIDs) {
+                if (direction == UTRANS_FORWARD)
+                    idBlockVector->addElement(new UnicodeString(idBlockResult), status);
+                else
+                    idBlockVector->insertElementAt(new UnicodeString(idBlockResult), 0, status);
+                idBlockResult.remove();
+                parsingIDs = FALSE;
+                curData = new TransliterationRuleData(status);
+                parseData->data = curData;
+
+                // By default, rules use part of the private use area
+                // E000..F8FF for variables and other stand-ins.  Currently
+                // the range F000..F8FF is typically sufficient.  The 'use
+                // variable range' pragma allows rule sets to modify this.
+                setVariableRange(0xF000, 0xF8FF);
+            }
+
+            if (resemblesPragma(rule, pos, limit)) {
+                int32_t ppp = parsePragma(rule, pos, limit);
+                if (ppp < 0) {
+                    syntaxError(U_MALFORMED_PRAGMA, rule, pos);
+                }
+                pos = ppp;
+            // Parse a rule
+            } else {
+                pos = parseRule(rule, pos, limit);
            }
-            mode = 1;
        }
    }
-    
-    if (idSplitPoint < 0) {
-        idSplitPoint = idBlockCount;
+
+    if (parsingIDs && idBlockResult.length() > 0) {
+        if (direction == UTRANS_FORWARD)
+            idBlockVector->addElement(new UnicodeString(idBlockResult), status);
+        else
+            idBlockVector->insertElementAt(new UnicodeString(idBlockResult), 0, status);
+    }
+    else if (!parsingIDs && curData != NULL) {
+        if (direction == UTRANS_FORWARD)
+            dataVector->addElement(curData, status);
+        else
+            dataVector->insertElementAt(curData, 0, status);
    }
    
-    if (direction == UTRANS_REVERSE) {
-        idSplitPoint = idBlockCount - idSplitPoint;
-    }
-
-    // Convert the set vector to an array
-    data->variablesLength = variablesVector->size();
-    if(data->variablesLength == 0) {
-        data->variables = 0;
-    } else {
-        data->variables = (UnicodeFunctor **)uprv_malloc(data->variablesLength * sizeof(UnicodeFunctor *));
-    }
-
-    // orphanElement removes the given element and shifts all other
-    // elements down.  For performance (and code clarity) we work from
-    // the end back to index 0.
-    int32_t i;
-    for (i=data->variablesLength; i>0; ) {
-        --i;
-        data->variables[i] =
-            (UnicodeSet*) variablesVector->orphanElementAt(i);
-    }
-
-    // Index the rules
    if (U_SUCCESS(status)) {
+        // Convert the set vector to an array
+        for (int32_t i = 0; i < dataVector->size(); i++) {
+            TransliterationRuleData* data = (TransliterationRuleData*)dataVector->elementAt(i);
+            data->variablesLength = variablesVector->size();
+            if (data->variablesLength == 0) {
+                data->variables = 0;
+            } else {
+                data->variables = (UnicodeFunctor**)uprv_malloc(data->variablesLength * sizeof(UnicodeFunctor*));
+                data->variablesAreOwned = (i == 0);
+            }
+
+            for (int32_t j = 0; j < data->variablesLength; j++) {
+                data->variables[j] =
+                    ((UnicodeSet*)variablesVector->elementAt(j));
+            }
+            
+            data->variableNames->removeAll();
+            int32_t pos = -1;
+            const UHashElement* he = variableNames->nextElement(pos);
+            while (he != NULL) {
+                data->variableNames->put(*((UnicodeString*)(he->key.pointer)),
+                    ((UnicodeString*)(he->value.pointer))->clone(), status);
+                he = variableNames->nextElement(pos);
+            }
+        }
+        variablesVector->removeAllElements();   // keeps them from getting deleted when we succeed
+
+        // Index the rules
        if (compoundFilter != NULL) {
-            if ((direction == UTRANS_FORWARD &&
-                 compoundFilterOffset != 0) ||
-                (direction == UTRANS_REVERSE &&
-                 compoundFilterOffset != idBlockCount)) {
+            if ((direction == UTRANS_FORWARD && compoundFilterOffset != 1) ||
+                (direction == UTRANS_REVERSE && compoundFilterOffset != ruleCount)) {
                status = U_MISPLACED_COMPOUND_FILTER;
            }
        }        

-        data->ruleSet.freeze(parseError,status);
-
-        if (idSplitPoint < 0) {
-            idSplitPoint = idBlock.length();
-        }
-
-        if (ruleCount == 0) {
-            delete data;
-            data = NULL;
+        for (int32_t i = 0; i < dataVector->size(); i++) {
+            TransliterationRuleData* data = (TransliterationRuleData*)dataVector->elementAt(i);
+            data->ruleSet.freeze(parseError, status);
        }
+        if (idBlockVector->size() == 1 && ((UnicodeString*)idBlockVector->elementAt(0))->isEmpty())
+            idBlockVector->removeElementAt(0);
    }
 }

@ -1077,8 +1125,11 @@ void TransliteratorParser::setVariableRange(int32_t start, int32_t end) {
        return;
    }
    
-    data->variablesBase = variableNext = (UChar) start; // first private use
-    variableLimit = (UChar) (end + 1);
+    curData->variablesBase = (UChar) start;
+    if (dataVector->size() == 0) {
+        variableNext = (UChar) start;
+        variableLimit = (UChar) (end + 1);
+    }
 }

 /**
@ -1087,7 +1138,7 @@ void TransliteratorParser::setVariableRange(int32_t start, int32_t end) {
 * variable range does not overlap characters used in a rule.
 */
 UBool TransliteratorParser::checkVariableRange(UChar32 ch) const {
-    return !(ch >= data->variablesBase && ch < variableLimit);
+    return !(ch >= curData->variablesBase && ch < variableLimit);
 }

 /**
@ -1276,7 +1327,7 @@ int32_t TransliteratorParser::parseRule(const UnicodeString& rule, int32_t pos,
        } 
        // We allow anything on the right, including an empty string.
        UnicodeString* value = new UnicodeString(right->text);
-        data->variableNames->put(undefinedVariableName, value, status);
+        variableNames->put(undefinedVariableName, value, status);
        ++variableLimit;
        return pos;
    }
@ -1363,13 +1414,13 @@ int32_t TransliteratorParser::parseRule(const UnicodeString& rule, int32_t pos,
        segmentObjects->toArray((void**) segmentsArray);
    }

-    data->ruleSet.addRule(new TransliterationRule(
+    curData->ruleSet.addRule(new TransliterationRule(
                                 left->text, left->ante, left->post,
                                 right->text, right->cursor, right->cursorOffset,
                                 segmentsArray,
                                 segmentObjects->size(),
                                 left->anchorStart, left->anchorEnd,
-                                 data,
+                                 curData,
                                 status), status);

    return pos;
@ -1434,7 +1485,7 @@ UChar TransliteratorParser::generateStandInFor(UnicodeFunctor* adopted) {
    // (typical n is 0, 1, or 2); linear search is optimal.
    for (int32_t i=0; i<variablesVector->size(); ++i) {
        if (variablesVector->elementAt(i) == adopted) { // [sic] pointer comparison
-            return (UChar) (data->variablesBase + i);
+            return (UChar) (curData->variablesBase + i);
        }
    }
    
@ -1452,7 +1503,7 @@ UChar TransliteratorParser::generateStandInFor(UnicodeFunctor* adopted) {
 */
 UChar TransliteratorParser::getSegmentStandin(int32_t seg) {
    // Special character used to indicate an empty spot
-    UChar empty = data->variablesBase - 1;
+    UChar empty = curData->variablesBase - 1;
    while (segmentStandins.length() < seg) {
        segmentStandins.append(empty);
    }
@ -1483,7 +1534,7 @@ void TransliteratorParser::setSegmentObject(int32_t seg, StringMatcher* adopted)
    if (segmentObjects->size() < seg) {
        segmentObjects->setSize(seg);
    }
-    int32_t index = getSegmentStandin(seg) - data->variablesBase;
+    int32_t index = getSegmentStandin(seg) - curData->variablesBase;
    if (segmentObjects->elementAt(seg-1) != NULL ||
        variablesVector->elementAt(index) != NULL) {
        // should never happen
@ -1511,7 +1562,7 @@ UChar TransliteratorParser::getDotStandIn() {
 */
 void TransliteratorParser::appendVariableDef(const UnicodeString& name,
                                                  UnicodeString& buf) {
-    const UnicodeString* s = (const UnicodeString*) data->variableNames->get(name);
+    const UnicodeString* s = (const UnicodeString*) variableNames->get(name);
    if (s == NULL) {
        // We allow one undefined variable so that variable definition
        // statements work.  For the first undefined variable we return
--- a/icu4c/source/i18n/rbt_pars.h
+++ b/icu4c/source/i18n/rbt_pars.h
@ -26,6 +26,7 @@ class ParseData;
 class RuleHalf;
 class ParsePosition;
 class UVector;
+class Hashtable;
 class StringMatcher;

 class TransliteratorParser : public UMemory {
@ -33,27 +34,16 @@ class TransliteratorParser : public UMemory {
 public:

    /**
-     * PUBLIC data member containing the parsed data object, or null if
-     * there were no rules.
+     * A Vector of TransliterationRuleData objects, one for each discrete group
+     * of rules in the rule set
     */
-    TransliterationRuleData* data;
+    UVector* dataVector;

    /**
     * PUBLIC data member.
-     * The block of ::IDs, both at the top and at the bottom.
-     * Inserted into these may be additional rules at the
-     * idSplitPoint.
+     * A Vector of UnicodeStrings containing all of the ID blocks in the rule set
     */
-    UnicodeString idBlock;
-
-    /**
-     * PUBLIC data member.
-     * In a compound RBT, the index at which the RBT rules are
-     * inserted into the ID block.  Index 0 means before any IDs
-     * in the block.  Index idBlock.length() means after all IDs
-     * in the block.  Index is a string index.
-     */
-    int32_t idSplitPoint;
+    UVector* idBlockVector;

    /**
     * PUBLIC data member containing the parsed compound filter, if any.
@ -62,10 +52,10 @@ class TransliteratorParser : public UMemory {

 private:

-    // The number of rules parsed.  This tells us if there were
-    // any actual transliterator rules, or if there were just ::ID
-    // block IDs.
-    int32_t ruleCount;
+    /**
+     * The current data object for which we are parsing rules
+     */
+    TransliterationRuleData* curData;

    UTransDirection direction;

@ -92,6 +82,12 @@ class TransliteratorParser : public UMemory {
     */
    UVector* variablesVector;

+    /**
+     * Temporary table of variable names.  When parsing is complete, this is
+     * copied into data.variableNames.
+     */
+    Hashtable* variableNames;    
+    
    /**
     * String of standins for segments.  Used during the parsing of a single
     * rule.  segmentStandins.charAt(0) is the standin for "$1" and corresponds
@ -177,12 +173,6 @@ public:
     */ 
    UnicodeSet* orphanCompoundFilter();

-    /**
-     * Return the data object parsed by parse().  Caller owns result.
-     * @return the data object parsed by parse().
-     */
-    TransliterationRuleData* orphanData();
-
 private:

    /**
--- a/icu4c/source/i18n/rbt_set.cpp
+++ b/icu4c/source/i18n/rbt_set.cpp
@ -1,6 +1,6 @@
 /*
 **********************************************************************
- *   Copyright (C) 1999-2004, International Business Machines
+ *   Copyright (C) 1999-2005, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 **********************************************************************
 *   Date        Name        Description
@ -120,7 +120,7 @@ inline void _debugOut(const char* msg, TransliterationRule* rule,
    UnicodeString esc;
    _escape(buf, esc);
    CharString cbuf(esc);
-    printf("%s\n", (char*) cbuf);
+    printf("%s\n", (const char*) cbuf);
 }

 #else
--- a/icu4c/source/i18n/translit.cpp
+++ b/icu4c/source/i18n/translit.cpp
@ -921,30 +921,26 @@ Transliterator::createInstance(const UnicodeString& ID,
        return NULL;
    }
    
-    TransliteratorIDParser::instantiateList(list, NULL, -1, status);
+    TransliteratorIDParser::instantiateList(list, status);
    if (U_FAILURE(status)) {
        return NULL;
    }
    
    U_ASSERT(list.size() > 0);
    Transliterator* t = NULL;
-    switch (list.size()) {
-    case 1:
-        t = (Transliterator*) list.elementAt(0);
-        break;
-    default:
+    
+    if (list.size() > 1 || canonID.indexOf(";") >= 0) {
+        // [NOTE: If it's a compoundID, we instantiate a CompoundTransliterator even if it only
+        // has one child transliterator.  This is so that toRules() will return the right thing
+        // (without any inactive ID), but our main ID still comes out correct.  That is, if we
+        // instantiate "(Lower);Latin-Greek;", we want the rules to come out as "::Latin-Greek;"
+        // even though the ID is "(Lower);Latin-Greek;".
        t = new CompoundTransliterator(list, parseError, status);
-        /* test for NULL */
-        if (t == 0) {
-            status = U_MEMORY_ALLOCATION_ERROR;
-            return 0;
-        }
-        if (U_FAILURE(status)) {
-            delete t;
-            return NULL;
-        }
-        break;
    }
+    else {
+        t = (Transliterator*)list.elementAt(0);
+    }
+    
    t->setID(canonID);
    if (globalFilter != NULL) {
        t->adoptFilter(globalFilter);
@ -1053,59 +1049,61 @@ Transliterator::createFromRules(const UnicodeString& ID,
    }

    // NOTE: The logic here matches that in TransliteratorRegistry.
-    if (parser.idBlock.length() == 0) {
-        if (parser.data == NULL) {
-            // No idBlock, no data -- this is just an
-            // alias for Null
-            t = new NullTransliterator();
-        } else {
-            // No idBlock, data != 0 -- this is an
-            // ordinary RBT_DATA.
-            t = new RuleBasedTransliterator(ID, parser.orphanData(), TRUE); // TRUE == adopt data object
+    if (parser.idBlockVector->size() == 0 && parser.dataVector->size() == 0) {
+        t = new NullTransliterator();
+    }
+    else if (parser.idBlockVector->size() == 0 && parser.dataVector->size() == 1) {
+        t = new RuleBasedTransliterator(ID, (TransliterationRuleData*)parser.dataVector->orphanElementAt(0), TRUE);
+    }
+    else if (parser.idBlockVector->size() == 1 && parser.dataVector->size() == 0) {
+        // idBlock, no data -- this is an alias.  The ID has
+        // been munged from reverse into forward mode, if
+        // necessary, so instantiate the ID in the forward
+        // direction.
+        if (parser.compoundFilter != NULL) {
+            UnicodeString filterPattern;
+            parser.compoundFilter->toPattern(filterPattern, FALSE);
+            t = createInstance(filterPattern + ";"
+                    + *((UnicodeString*)parser.idBlockVector->elementAt(0)), UTRANS_FORWARD, parseError, status);
        }
-        /* test for NULL */
-        if (t == 0) {
-            status = U_MEMORY_ALLOCATION_ERROR;
-            return 0;
-        }
-    } else {
-        if (parser.data == NULL) {
-            // idBlock, no data -- this is an alias.  The ID has
-            // been munged from reverse into forward mode, if
-            // necessary, so instantiate the ID in the forward
-            // direction.
-            t = createInstance(parser.idBlock, UTRANS_FORWARD, parseError, status);
-            if (t != NULL) {
-                t->setID(ID);
-            }
-        } else {
-            // idBlock and data -- this is a compound
-            // RBT
-            UnicodeString id((UChar)0x005F); // '_'
-            t = new RuleBasedTransliterator(id, parser.orphanData(), TRUE); // TRUE == adopt data object
-            /* test for NULL */
-            if (t == 0) {
-                status = U_MEMORY_ALLOCATION_ERROR;
-                return 0;
-            }
-            t = new CompoundTransliterator(ID, parser.idBlock, parser.idSplitPoint,
-                                           t, status);
-            /* test for NULL */
-            if (t == 0) {
-                status = U_MEMORY_ALLOCATION_ERROR;
-                return 0;
-            }
-            if (U_FAILURE(status)) {
-                delete t;
-                t = 0;
-            }
-            if (parser.compoundFilter != NULL) {
-                t->adoptFilter(parser.orphanCompoundFilter());
-            }
-            return t;
+        else
+            t = createInstance(*((UnicodeString*)parser.idBlockVector->elementAt(0)), UTRANS_FORWARD, parseError, status);
+
+
+        if (t != NULL) {
+            t->setID(ID);
        }
    }
+    else {
+        UVector transliterators(status);
+        int32_t passNumber = 1;

+        int32_t limit = parser.idBlockVector->size();
+        if (parser.dataVector->size() > limit)
+            limit = parser.dataVector->size();
+
+        for (int32_t i = 0; i < limit; i++) {
+            if (i < parser.idBlockVector->size()) {
+                UnicodeString* idBlock = (UnicodeString*)parser.idBlockVector->elementAt(i);
+                if (!idBlock->isEmpty()) {
+                    Transliterator* temp = createInstance(*idBlock, UTRANS_FORWARD, parseError, status);
+                    if (temp != NULL && temp->getDynamicClassID() != NullTransliterator::getStaticClassID())
+                        transliterators.addElement(temp, status);
+                    else
+                        delete temp;
+                }
+            }
+            if (!parser.dataVector->isEmpty()) {
+                TransliterationRuleData* data = (TransliterationRuleData*)parser.dataVector->orphanElementAt(0);
+                transliterators.addElement(new RuleBasedTransliterator((UnicodeString)"%Pass" + (passNumber++),
+                    data, TRUE), status);
+            }
+        }
+
+        t = new CompoundTransliterator(transliterators, passNumber - 1, parseError, status);
+        t->setID(ID);
+        t->adoptFilter(parser.orphanCompoundFilter());
+    }
    return t;
 }

--- a/icu4c/source/i18n/transreg.cpp
+++ b/icu4c/source/i18n/transreg.cpp
@ -58,25 +58,25 @@ U_NAMESPACE_BEGIN
 // Alias
 //------------------------------------------------------------------

-TransliteratorAlias::TransliteratorAlias(const UnicodeString& theAliasID) :
+TransliteratorAlias::TransliteratorAlias(const UnicodeString& theAliasID,
+                                         const UnicodeSet* cpdFilter) :
    ID(),
-    aliasID(theAliasID),
-    trans(0),
-    compoundFilter(0),
-    idSplitPoint(-1),
+    aliasesOrRules(theAliasID),
+    transes(0),
+    compoundFilter(cpdFilter),
+    direction(UTRANS_FORWARD),
    type(TransliteratorAlias::SIMPLE) {
 }

 TransliteratorAlias::TransliteratorAlias(const UnicodeString& theID,
-                                         const UnicodeString& idBlock,
-                                         Transliterator* adopted,
-                                         int32_t theIDSplitPoint,
+                                         const UnicodeString& idBlocks,
+                                         UVector* adoptedTransliterators,
                                         const UnicodeSet* cpdFilter) :
    ID(theID),
-    aliasID(idBlock),
-    trans(adopted),
+    aliasesOrRules(idBlocks),
+    transes(adoptedTransliterators),
    compoundFilter(cpdFilter),
-    idSplitPoint(theIDSplitPoint),
+    direction(UTRANS_FORWARD),
    type(TransliteratorAlias::COMPOUND) {
 }

@ -84,15 +84,15 @@ TransliteratorAlias::TransliteratorAlias(const UnicodeString& theID,
                                         const UnicodeString& rules,
                                         UTransDirection dir) :
    ID(theID),
-    aliasID(rules), // bad name -- rename aliasID!
-    trans(0),
+    aliasesOrRules(rules),
+    transes(0),
    compoundFilter(0),
-    idSplitPoint((int32_t) dir), // bad name -- rename idSplitPoint!
+    direction(dir),
    type(TransliteratorAlias::RULES) {
 }

 TransliteratorAlias::~TransliteratorAlias() {
-    delete trans;
+    delete transes;
 }


@ -104,23 +104,60 @@ Transliterator* TransliteratorAlias::create(UParseError& pe,
    Transliterator *t = NULL;
    switch (type) {
    case SIMPLE:
-        t = Transliterator::createInstance(aliasID, UTRANS_FORWARD, pe, ec);
+        t = Transliterator::createInstance(aliasesOrRules, UTRANS_FORWARD, pe, ec);
+        if (compoundFilter != 0)
+            t->adoptFilter((UnicodeSet*)compoundFilter->clone());
        break;
    case COMPOUND:
-        t = new CompoundTransliterator(ID, aliasID, idSplitPoint,
-                                       trans, ec);
-        /* test for NULL */
-        if (t == 0) {
-            ec = U_MEMORY_ALLOCATION_ERROR;
-            return 0;
-        }
-        trans = 0; // so we don't delete it later
-        if (compoundFilter) {
-            // TODO: Is this right? Are we leaking memory here?
-            // I'm suspicious because of the "trans = 0" line above;
-            // doesn't seem to fit the cloning here.  Don't have time
-            // to track this down right now. [alan 3.0]
-            t->adoptFilter((UnicodeSet*) compoundFilter->clone());
+        {
+            // the total number of transliterators in the compound is the total number of anonymous transliterators
+            // plus the total number of ID blocks-- we start by assuming the list begins and ends with an ID
+            // block and that each pair anonymous transliterators has an ID block between them.  Then we go back
+            // to see whether there really are ID blocks at the beginning and end (by looking for U+FFFF, which
+            // marks the position where an anonymous transliterator goes) and adjust accordingly
+            int32_t anonymousRBTs = transes->size();
+            int32_t transCount = anonymousRBTs * 2 + 1;
+            if (!aliasesOrRules.isEmpty() && aliasesOrRules[0] == (UChar)(0xffff))
+                --transCount;
+            if (aliasesOrRules.length() >= 2 && aliasesOrRules[aliasesOrRules.length() - 1] == (UChar)(0xffff))
+                --transCount;
+            UnicodeString noIDBlock((UChar)(0xffff));
+            noIDBlock += ((UChar)(0xffff));
+            int32_t pos = aliasesOrRules.indexOf(noIDBlock);
+            while (pos >= 0) {
+                --transCount;
+                pos = aliasesOrRules.indexOf(noIDBlock, pos + 1);
+            }
+
+            UVector transliterators(ec);
+            UnicodeString idBlock;
+            int32_t blockSeparatorPos = aliasesOrRules.indexOf((UChar)(0xffff));
+            while (blockSeparatorPos >= 0) {
+                aliasesOrRules.extract(0, blockSeparatorPos, idBlock);
+                aliasesOrRules.remove(0, blockSeparatorPos + 1);
+                if (!idBlock.isEmpty())
+                    transliterators.addElement(Transliterator::createInstance(idBlock, UTRANS_FORWARD, pe, ec), ec);
+                if (!transes->isEmpty())
+                    transliterators.addElement(transes->orphanElementAt(0), ec);
+                blockSeparatorPos = aliasesOrRules.indexOf((UChar)(0xffff));
+            }
+            if (!aliasesOrRules.isEmpty())
+                transliterators.addElement(Transliterator::createInstance(aliasesOrRules, UTRANS_FORWARD, pe, ec), ec);
+            while (!transes->isEmpty())
+                transliterators.addElement(transes->orphanElementAt(0), ec);
+
+            if (U_SUCCESS(ec)) {
+                t = new CompoundTransliterator(ID, transliterators,
+                    (compoundFilter ? (UnicodeSet*)(compoundFilter->clone()) : 0),
+                    anonymousRBTs, pe, ec);
+                if (t == 0) {
+                    ec = U_MEMORY_ALLOCATION_ERROR;
+                    return 0;
+                }
+            } else {
+                for (int32_t i = 0; i < transliterators.size(); i++)
+                    delete (Transliterator*)(transliterators.elementAt(i));
+            }
        }
        break;
    case RULES:
@ -141,9 +178,7 @@ void TransliteratorAlias::parse(TransliteratorParser& parser,
        return;
    }

-    // aliasID is really rules -- rename it!
-    // idSplitPoint is really UTransDirection -- rename it!
-    parser.parse(aliasID, (UTransDirection) idSplitPoint, pe, ec);
+    parser.parse(aliasesOrRules, direction, pe, ec);
 }

 //----------------------------------------------------------------------
@ -399,7 +434,8 @@ public:
    UnicodeSet* compoundFilter; // For COMPOUND_RBT
    union {
        Transliterator* prototype; // For PROTOTYPE
-        TransliterationRuleData* data; // For RBT_DATA, COMPOUND_RBT
+        TransliterationRuleData* data; // For RBT_DATA
+        UVector* dataVector;    // For COMPOUND_RBT
        struct {
            Transliterator::Factory function;
            Transliterator::Token   context;
@ -428,12 +464,16 @@ Entry::~Entry() {
    DEBUG_delEntry(this);
    if (entryType == PROTOTYPE) {
        delete u.prototype;
-    } else if (entryType == RBT_DATA || entryType == COMPOUND_RBT) {
+    } else if (entryType == RBT_DATA) {
        // The data object is shared between instances of RBT.  The
        // entry object owns it.  It should only be deleted when the
        // transliterator component is being cleaned up.  Doing so
        // invalidates any RBTs that the user has instantiated.
        delete u.data;
+    } else if (entryType == COMPOUND_RBT) {
+        while (u.dataVector != NULL && !u.dataVector->isEmpty())
+            delete (TransliterationRuleData*)u.dataVector->orphanElementAt(0);
+        delete u.dataVector;
    }
    delete compoundFilter;
 }
@ -522,39 +562,41 @@ Transliterator* TransliteratorRegistry::reget(const UnicodeString& ID,
        entry->entryType == Entry::RULES_REVERSE ||
        entry->entryType == Entry::LOCALE_RULES) {
        
-        entry->u.data = parser.orphanData();
-        entry->stringArg = parser.idBlock;
-        entry->intArg = parser.idSplitPoint;
-        entry->compoundFilter = parser.orphanCompoundFilter();
+        if (parser.idBlockVector->isEmpty() && parser.dataVector->isEmpty()) {
+            entry->u.data = 0;
+            entry->entryType = Entry::ALIAS;
+            entry->stringArg = UNICODE_STRING_SIMPLE("Any-NULL");
+        }
+        else if (parser.idBlockVector->isEmpty() && parser.dataVector->size() == 1) {
+            entry->u.data = (TransliterationRuleData*)parser.dataVector->orphanElementAt(0);
+            entry->entryType = Entry::RBT_DATA;
+        }
+        else if (parser.idBlockVector->size() == 1 && parser.dataVector->isEmpty()) {
+            entry->stringArg = *(UnicodeString*)(parser.idBlockVector->elementAt(0));
+            entry->compoundFilter = parser.orphanCompoundFilter();
+            entry->entryType = Entry::ALIAS;
+        }
+        else {
+            entry->entryType = Entry::COMPOUND_RBT;
+            entry->compoundFilter = parser.orphanCompoundFilter();
+            entry->u.dataVector = new UVector(status);
+            entry->stringArg.remove();

-        // Reset entry->entryType to encapsulate the parsed data.  The
-        // next time we instantiate this ID (including this very next
-        // time, at the end of this function) we won't have to parse
-        // again.
-        // NOTE: The logic here matches that in
-        // Transliterator::createFromRules().
-        if (entry->stringArg.length() == 0) {
-            if (entry->u.data == 0) {
-                // No idBlock, no data -- this is just an
-                // alias for Null
-                entry->entryType = Entry::ALIAS;
-                entry->stringArg = UNICODE_STRING_SIMPLE("Any-Null");
-            } else {
-                // No idBlock, data != 0 -- this is an
-                // ordinary RBT_DATA
-                entry->entryType = Entry::RBT_DATA;
-            }
-        } else {
-            if (entry->u.data == 0) {
-                // idBlock, no data -- this is an alias.  The ID has
-                // been munged from reverse into forward mode, if
-                // necessary, so instantiate the ID in the forward
-                // direction.
-                entry->entryType = Entry::ALIAS;
-            } else {
-                // idBlock and data -- this is a compound
-                // RBT
-                entry->entryType = Entry::COMPOUND_RBT;
+            int32_t limit = parser.idBlockVector->size();
+            if (parser.dataVector->size() > limit)
+                limit = parser.dataVector->size();
+
+            for (int32_t i = 0; i < limit; i++) {
+                if (i < parser.idBlockVector->size()) {
+                    UnicodeString* idBlock = (UnicodeString*)parser.idBlockVector->elementAt(i);
+                    if (!idBlock->isEmpty())
+                        entry->stringArg += *idBlock;
+                }
+                if (!parser.dataVector->isEmpty()) {
+                    TransliterationRuleData* data = (TransliterationRuleData*)parser.dataVector->orphanElementAt(0);
+                    entry->u.dataVector->addElement(data, status);
+                    entry->stringArg += (UChar)0xffff;  // use U+FFFF to mark position of RBTs in ID block
+                }
            }
        }
    }
@ -1165,7 +1207,7 @@ Transliterator* TransliteratorRegistry::instantiateEntry(const UnicodeString& ID
        }
        return t;
    case Entry::ALIAS:
-        aliasReturn = new TransliteratorAlias(entry->stringArg);
+        aliasReturn = new TransliteratorAlias(entry->stringArg, entry->compoundFilter);
        if (aliasReturn == 0) {
            status = U_MEMORY_ALLOCATION_ERROR;
        }
@ -1178,13 +1220,19 @@ Transliterator* TransliteratorRegistry::instantiateEntry(const UnicodeString& ID
        return t;
    case Entry::COMPOUND_RBT:
        {
-            UnicodeString id((UChar)0x005F);    /* "_" */
-            Transliterator *t = new RuleBasedTransliterator(id, entry->u.data);
-            if (t == 0) {
-                status = U_MEMORY_ALLOCATION_ERROR;
-                return 0;
+            UVector* rbts = new UVector(status);
+            int32_t passNumber = 1;
+            for (int32_t i = 0; U_SUCCESS(status) && i < entry->u.dataVector->size(); i++) {
+                Transliterator* t = new RuleBasedTransliterator((UnicodeString)"%Pass" + (passNumber++),
+                    (TransliterationRuleData*)(entry->u.dataVector->elementAt(i)), FALSE);
+                if (t == 0)
+                    status = U_MEMORY_ALLOCATION_ERROR;
+                else
+                    rbts->addElement(t, status);
            }
-            aliasReturn = new TransliteratorAlias(ID, entry->stringArg, t, entry->intArg, entry->compoundFilter);
+            if (U_FAILURE(status))
+                return 0;
+            aliasReturn = new TransliteratorAlias(ID, entry->stringArg, rbts, entry->compoundFilter);
        }
        if (aliasReturn == 0) {
            status = U_MEMORY_ALLOCATION_ERROR;
--- a/icu4c/source/i18n/transreg.h
+++ b/icu4c/source/i18n/transreg.h
@ -1,6 +1,6 @@
 /*
 **********************************************************************
-*   Copyright (c) 2001-2004, International Business Machines
+*   Copyright (c) 2001-2005, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 **********************************************************************
 *   Date        Name        Description
@ -44,13 +44,13 @@ class TransliteratorAlias : public UMemory {
     * Construct a simple alias (type == SIMPLE)
     * @param aliasID the given id.
     */
-    TransliteratorAlias(const UnicodeString& aliasID);
+    TransliteratorAlias(const UnicodeString& aliasID, const UnicodeSet* compoundFilter);

    /**
     * Construct a compound RBT alias (type == COMPOUND)
     */
-    TransliteratorAlias(const UnicodeString& ID, const UnicodeString& idBlock,
-                        Transliterator* adopted, int32_t idSplitPoint,
+    TransliteratorAlias(const UnicodeString& ID, const UnicodeString& idBlocks,
+                        UVector* adoptedTransliterators,
                        const UnicodeSet* compoundFilter);

    /**
@ -108,10 +108,10 @@ class TransliteratorAlias : public UMemory {
    //    Here ID is the ID, aliasID is the rules string.
    //    idSplitPoint is the UTransDirection.
    UnicodeString ID;
-    UnicodeString aliasID; // rename! holds rules for RULES type
-    Transliterator* trans; // owned
+    UnicodeString aliasesOrRules;
+    UVector* transes; // owned
    const UnicodeSet* compoundFilter; // alias
-    int32_t idSplitPoint; // rename! holds UTransDirection for RULES type
+    UTransDirection direction;
    enum { SIMPLE, COMPOUND, RULES } type;

    TransliteratorAlias(const TransliteratorAlias &other); // forbid copying of this class
--- a/icu4c/source/i18n/tridpars.cpp
+++ b/icu4c/source/i18n/tridpars.cpp
@ -1,6 +1,6 @@
 /*
 **********************************************************************
-*   Copyright (c) 2002-2004, International Business Machines Corporation
+*   Copyright (c) 2002-2005, International Business Machines Corporation
 *   and others.  All Rights Reserved.
 **********************************************************************
 *   Date        Name        Description
@ -437,22 +437,13 @@ UBool TransliteratorIDParser::parseCompoundID(const UnicodeString& id, int32_t d
 * the reverse.  THIS MAY RESULT IN AN EMPTY VECTOR.  Convert
 * SingleID entries to actual transliterators.
 *
- * Also, optionally, insert the given transliterator at the given
- * position.  This effectively happens before anything else.
- *
 * @param list vector of SingleID objects.  On exit, vector
 * of one or more Transliterators.
- * @param insert Transliterator to insert, or NULL if none.
- * Adopted.
- * @param insertIndex index from 0..list.size()-1, at which
- * to place 'insert', or -1 if none.
 * @return new value of insertIndex.  The index will shift if
 * there are empty items, like "(Lower)", with indices less than
 * insertIndex.
 */
-int32_t TransliteratorIDParser::instantiateList(UVector& list,
-                                                Transliterator* insert,
-                                                int32_t insertIndex,
+void TransliteratorIDParser::instantiateList(UVector& list,
                                                UErrorCode& ec) {
    UVector tlist(ec);
    if (U_FAILURE(ec)) {
@ -463,15 +454,6 @@ int32_t TransliteratorIDParser::instantiateList(UVector& list,
    Transliterator* t;
    int32_t i;
    for (i=0; i<=list.size(); ++i) { // [sic]: i<=list.size()
-        if (insertIndex == i) {
-            insertIndex = tlist.size();
-            tlist.addElement(insert, ec);
-            if (U_FAILURE(ec)) {
-                goto RETURN;
-            }
-            insert = NULL;
-        }
-
        // We run the loop too long by one, so we can
        // do an insert after the last element
        if (i==list.size()) {
@ -525,9 +507,7 @@ int32_t TransliteratorIDParser::instantiateList(UVector& list,
        }
    }

-    delete insert; // Clean up in case of failure
    list.setDeleter(save);
-    return insertIndex;
 }

 /**
--- a/icu4c/source/i18n/tridpars.h
+++ b/icu4c/source/i18n/tridpars.h
@ -1,6 +1,6 @@
 /*
 **************************************************************************
- *   Copyright (c) 2002-2004, International Business Machines Corporation *
+ *   Copyright (c) 2002-2005, International Business Machines Corporation *
 *   and others.  All Rights Reserved.                                    *
 **************************************************************************
 *   Date        Name        Description                                  *
@ -202,23 +202,15 @@ class TransliteratorIDParser /* not : public UObject because all methods are sta
     * the reverse.  THIS MAY RESULT IN AN EMPTY VECTOR.  Convert
     * SingleID entries to actual transliterators.
     *
-     * Also, optionally, insert the given transliterator at the given
-     * position.  This effectively happens before anything else.
-     *
     * @param list vector of SingleID objects.  On exit, vector
     * of one or more Transliterators.
-     * @param insert Transliterator to insert, or null if none.
-     * @param insertIndex index from 0..list.size()-1, at which
-     * to place 'insert', or -1 if none.
     * @param ec Output param to receive a success or an error code.
     * @return new value of insertIndex.  The index will shift if
     * there are empty items, like "(Lower)", with indices less than
     * insertIndex.
     */
-    static int32_t instantiateList(UVector& list,
-                                   Transliterator* insert,
-                                   int32_t insertIndex,
-                                   UErrorCode& ec);
+    static void instantiateList(UVector& list,
+                                UErrorCode& ec);

    /**
     * Parse an ID into pieces.  Take IDs of the form T, T/V, S-T,
--- a/icu4c/source/i18n/unicode/translit.h
+++ b/icu4c/source/i18n/unicode/translit.h
@ -1,6 +1,6 @@
 /*
 **********************************************************************
-* Copyright (C) 1999-2004, International Business Machines
+* Copyright (C) 1999-2005, International Business Machines
 * Corporation and others. All Rights Reserved.
 **********************************************************************
 *   Date        Name        Description
@ -343,6 +343,7 @@ protected:

    friend class TransliteratorParser; // for parseID()
    friend class TransliteratorIDParser; // for createBasicInstance()
+	friend class TransliteratorAlias; // for setID()

 public:

--- a/icu4c/source/test/intltest/transrt.cpp
+++ b/icu4c/source/test/intltest/transrt.cpp
@ -1101,19 +1101,19 @@ void TransliteratorRoundTripTest::TestHan() {
    pn->transliterate(target2);

    // verify that there are no marks
-    Transliterator *nfc = Transliterator::createInstance("nfc", UTRANS_FORWARD, status);
+    Transliterator *nfd = Transliterator::createInstance("nfd", UTRANS_FORWARD, status);
    ASSERT_SUCCESS(status);

-    UnicodeString nfced = target2;
-    nfc->transliterate(nfced);
-    UnicodeSet allMarks("[:mark:]", status);
+    UnicodeString nfded = target2;
+    nfd->transliterate(nfded);
+    UnicodeSet allMarks("[\\u0304\\u0301\\u030C\\u0300\\u0306]", status); // look only for Pinyin tone marks, not all marks (there are some others in there)
    ASSERT_SUCCESS(status);
-    assertFalse("NumericPinyin must contain no marks", allMarks.containsSome(nfced));
+    assertFalse("NumericPinyin must contain no marks", allMarks.containsSome(nfded));

    // verify roundtrip
    Transliterator *np = pn->createInverse(status);
    ASSERT_SUCCESS(status);
-    UnicodeString target3 = target;
+    UnicodeString target3 = target2;
    np->transliterate(target3);
    UBool roundtripOK = (target3.compare(target) == 0);
    assertTrue("NumericPinyin must roundtrip", roundtripOK);
@ -1125,13 +1125,15 @@ void TransliteratorRoundTripTest::TestHan() {
        writeStringInU8(out, target);
        fprintf(out, "\nPinyin-Numeric-Pinyin: ");
        writeStringInU8(out, target2);
+        fprintf(out, "\nNumeric-Pinyin-Pinyin: ");
+        writeStringInU8(out, target3);
        fprintf(out, "\n");
        fclose(out);
    }

    delete hanTL;
    delete pn;
-    delete nfc;
+    delete nfd;
    delete np;
    uset_close(USetExemplars);
 }
--- a/icu4c/source/test/intltest/transtst.cpp
+++ b/icu4c/source/test/intltest/transtst.cpp
@ -183,6 +183,8 @@ TransliteratorTest::runIndexedTest(int32_t index, UBool exec,
        TESTCASE(75,TestAllCodepoints);
        TESTCASE(76,TestBoilerplate);
        TESTCASE(77,TestAlternateSyntax);
+        TESTCASE(78,TestBeginEnd);
+        TESTCASE(79,TestBeginEndToRules);
        default: name = ""; break;
    }
 }
@ -776,7 +778,7 @@ void TransliteratorTest::TestJ277(void) {
    // Transliterate the Greek locale data
    Locale el("el");
    DateFormatSymbols syms(el, status);
-    if (U_FAILURE(status)) { errln("FAIL: DateFormatSymbols constructor failed. Error: " + UnicodeString(u_errorName(status))); return; }
+    if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; }
    int32_t i, count;
    const UnicodeString* data = syms.getMonths(count);
    for (i=0; i<count; ++i) {
@ -3972,6 +3974,332 @@ void TransliteratorTest::TestAlternateSyntax() {
           "<=\\N{LEFTWARDS ARROW}; >=\\N{RIGHTWARDS ARROW}; <>=\\N{LEFT RIGHT ARROW}; &=\\N{INCREMENT}");
 }

+static const char* BEGIN_END_RULES[] = {
+    // [0]
+    "abc > xy;"
+    "aba > z;",
+
+    // [1]
+/*
+    "::BEGIN;"
+    "abc > xy;"
+    "::END;"
+    "::BEGIN;"
+    "aba > z;"
+    "::END;",
+*/
+    "", // test case commented out below, this is here to keep from messing up the indexes
+
+    // [2]
+/*
+    "abc > xy;"
+    "::BEGIN;"
+    "aba > z;"
+    "::END;",
+*/
+    "", // test case commented out below, this is here to keep from messing up the indexes
+
+    // [3]
+/*
+    "::BEGIN;"
+    "abc > xy;"
+    "::END;"
+    "aba > z;",
+*/
+    "", // test case commented out below, this is here to keep from messing up the indexes
+
+    // [4]
+    "abc > xy;"
+    "::Null;"
+    "aba > z;",
+
+    // [5]
+    "::Upper;"
+    "ABC > xy;"
+    "AB > x;"
+    "C > z;"
+    "::Upper;"
+    "XYZ > p;"
+    "XY > q;"
+    "Z > r;"
+    "::Upper;",
+
+    // [6]
+    "$ws = [[:Separator:][\\u0009-\\u000C]$];"
+    "$delim = [\\-$ws];"
+    "$ws $delim* > ' ';"
+    "'-' $delim* > '-';",
+
+    // [7]
+    "::Null;"
+    "$ws = [[:Separator:][\\u0009-\\u000C]$];"
+    "$delim = [\\-$ws];"
+    "$ws $delim* > ' ';"
+    "'-' $delim* > '-';",
+
+    // [8]
+    "$ws = [[:Separator:][\\u0009-\\u000C]$];"
+    "$delim = [\\-$ws];"
+    "$ws $delim* > ' ';"
+    "'-' $delim* > '-';"
+    "::Null;",
+
+    // [9]
+    "$ws = [[:Separator:][\\u0009-\\u000C]$];"
+    "$delim = [\\-$ws];"
+    "::Null;"
+    "$ws $delim* > ' ';"
+    "'-' $delim* > '-';",
+
+    // [10]
+/*
+    "::BEGIN;"
+    "$ws = [[:Separator:][\\u0009-\\u000C]$];"
+    "$delim = [\\-$ws];"
+    "::END;"
+    "$ws $delim* > ' ';"
+    "'-' $delim* > '-';",
+*/
+    "", // test case commented out below, this is here to keep from messing up the indexes
+
+    // [11]
+/*
+    "$ws = [[:Separator:][\\u0009-\\u000C]$];"
+    "$delim = [\\-$ws];"
+    "::BEGIN;"
+    "$ws $delim* > ' ';"
+    "'-' $delim* > '-';"
+    "::END;",
+*/
+    "", // test case commented out below, this is here to keep from messing up the indexes
+
+    // [12]
+/*
+    "$ws = [[:Separator:][\\u0009-\\u000C]$];"
+    "$delim = [\\-$ws];"
+    "$ab = [ab];"
+    "::BEGIN;"
+    "$ws $delim* > ' ';"
+    "'-' $delim* > '-';"
+    "::END;"
+    "::BEGIN;"
+    "$ab { ' ' } $ab > '-';"
+    "c { ' ' > ;"
+    "::END;"
+    "::BEGIN;"
+    "'a-a' > a\\%|a;"
+    "::END;",
+*/
+    "", // test case commented out below, this is here to keep from messing up the indexes
+
+    // [13]
+    "$ws = [[:Separator:][\\u0009-\\u000C]$];"
+    "$delim = [\\-$ws];"
+    "$ab = [ab];"
+    "::Null;"
+    "$ws $delim* > ' ';"
+    "'-' $delim* > '-';"
+    "::Null;"
+    "$ab { ' ' } $ab > '-';"
+    "c { ' ' > ;"
+    "::Null;"
+    "'a-a' > a\\%|a;",
+
+    // [14]
+/*
+    "::[abc];"
+    "::BEGIN;"
+    "abc > xy;"
+    "::END;"
+    "::BEGIN;"
+    "aba > yz;"
+    "::END;"
+    "::Upper;",
+*/
+    "", // test case commented out below, this is here to keep from messing up the indexes
+
+    // [15]
+    "::[abc];"
+    "abc > xy;"
+    "::Null;"
+    "aba > yz;"
+    "::Upper;",
+
+    // [16]
+/*
+    "::[abc];"
+    "::BEGIN;"
+    "abc <> xy;"
+    "::END;"
+    "::BEGIN;"
+    "aba <> yz;"
+    "::END;"
+    "::Upper(Lower);"
+    "::([XYZ]);"
+*/
+    "", // test case commented out below, this is here to keep from messing up the indexes
+
+    // [17]
+    "::[abc];"
+    "abc <> xy;"
+    "::Null;"
+    "aba <> yz;"
+    "::Upper(Lower);"
+    "::([XYZ]);"
+};
+static const int32_t BEGIN_END_RULES_length = (int32_t)(sizeof(BEGIN_END_RULES) / sizeof(BEGIN_END_RULES[0]));
+
+/*
+(This entire test is commented out below and will need some heavy revision when we re-add
+the ::BEGIN/::END stuff)
+static const char* BOGUS_BEGIN_END_RULES[] = {
+    // [7]
+    "::BEGIN;"
+    "abc > xy;"
+    "::BEGIN;"
+    "aba > z;"
+    "::END;"
+    "::END;",
+
+    // [8]
+    "abc > xy;"
+    " aba > z;"
+    "::END;",
+
+    // [9]
+    "::BEGIN;"
+    "::Upper;"
+    "::END;"
+};
+static const int32_t BOGUS_BEGIN_END_RULES_length = (int32_t)(sizeof(BOGUS_BEGIN_END_RULES) / sizeof(BOGUS_BEGIN_END_RULES[0]));
+*/
+
+static const char* BEGIN_END_TEST_CASES[] = {
+    // rules             input                   expected output
+    BEGIN_END_RULES[0],  "abc ababc aba",        "xy zbc z",
+//    BEGIN_END_RULES[1],  "abc ababc aba",        "xy abxy z",
+//    BEGIN_END_RULES[2],  "abc ababc aba",        "xy abxy z",
+//    BEGIN_END_RULES[3],  "abc ababc aba",        "xy abxy z",
+    BEGIN_END_RULES[4],  "abc ababc aba",        "xy abxy z",
+    BEGIN_END_RULES[5],  "abccabaacababcbc",     "PXAARXQBR",
+
+    BEGIN_END_RULES[6],  "e   e - e---e-  e",    "e e e-e-e",
+    BEGIN_END_RULES[7],  "e   e - e---e-  e",    "e e e-e-e",
+    BEGIN_END_RULES[8],  "e   e - e---e-  e",    "e e e-e-e",
+    BEGIN_END_RULES[9],  "e   e - e---e-  e",    "e e e-e-e",
+//    BEGIN_END_RULES[10],  "e   e - e---e-  e",    "e e e-e-e",
+//    BEGIN_END_RULES[11], "e   e - e---e-  e",    "e e e-e-e",
+//    BEGIN_END_RULES[12], "e   e - e---e-  e",    "e e e-e-e",
+//    BEGIN_END_RULES[12], "a    a    a    a",     "a%a%a%a",
+//    BEGIN_END_RULES[12], "a a-b c b a",          "a%a-b cb-a",
+    BEGIN_END_RULES[13], "e   e - e---e-  e",    "e e e-e-e",
+    BEGIN_END_RULES[13], "a    a    a    a",     "a%a%a%a",
+    BEGIN_END_RULES[13], "a a-b c b a",          "a%a-b cb-a",
+
+//    BEGIN_END_RULES[14], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
+    BEGIN_END_RULES[15], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
+//    BEGIN_END_RULES[16], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
+    BEGIN_END_RULES[17], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ"
+};
+static const int32_t BEGIN_END_TEST_CASES_length = (int32_t)(sizeof(BEGIN_END_TEST_CASES) / sizeof(BEGIN_END_TEST_CASES[0]));
+
+void TransliteratorTest::TestBeginEnd() {
+    // run through the list of test cases above
+    int32_t i = 0;
+    for (i = 0; i < BEGIN_END_TEST_CASES_length; i += 3) {
+        expect((UnicodeString)"Test case #" + (i / 3),
+               UnicodeString(BEGIN_END_TEST_CASES[i]),
+               UnicodeString(BEGIN_END_TEST_CASES[i + 1]),
+               UnicodeString(BEGIN_END_TEST_CASES[i + 2]));
+    }
+
+    // instantiate the one reversible rule set in the reverse direction and make sure it does the right thing
+    UParseError parseError;
+    UErrorCode status = U_ZERO_ERROR;
+    Transliterator* reversed  = Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES[17]),
+            UTRANS_REVERSE, parseError, status);
+    if (reversed == 0 || U_FAILURE(status)) {
+        reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError, status);
+    } else {
+        expect(*reversed, UnicodeString("xy XY XYZ yz YZ"), UnicodeString("xy abc xaba yz aba"));
+    }
+    delete reversed;
+
+    // finally, run through the list of syntactically-ill-formed rule sets above and make sure
+    // that all of them cause errors
+/*
+(commented out until we have the real ::BEGIN/::END stuff in place
+    for (i = 0; i < BOGUS_BEGIN_END_RULES_length; i++) {
+        UParseError parseError;
+        UErrorCode status = U_ZERO_ERROR;
+        Transliterator* t = Transliterator::createFromRules("foo", UnicodeString(BOGUS_BEGIN_END_RULES[i]),
+                UTRANS_FORWARD, parseError, status);
+        if (!U_FAILURE(status)) {
+            delete t;
+            errln((UnicodeString)"Should have gotten syntax error from " + BOGUS_BEGIN_END_RULES[i]);
+        }
+    }
+*/
+}
+
+void TransliteratorTest::TestBeginEndToRules() {
+    // run through the same list of test cases we used above, but this time, instead of just
+    // instantiating a Transliterator from the rules and running the test against it, we instantiate
+    // a Transliterator from the rules, do toRules() on it, instantiate a Transliterator from
+    // the resulting set of rules, and make sure that the generated rule set is semantically equivalent
+    // to (i.e., does the same thing as) the original rule set
+    for (int32_t i = 0; i < BEGIN_END_TEST_CASES_length; i += 3) {
+        UParseError parseError;
+        UErrorCode status = U_ZERO_ERROR;
+        Transliterator* t = Transliterator::createFromRules("--", UnicodeString(BEGIN_END_TEST_CASES[i]),
+                UTRANS_FORWARD, parseError, status);
+        if (U_FAILURE(status)) {
+            reportParseError(UnicodeString("FAIL: Couldn't create transliterator"), parseError, status);
+        } else {
+            UnicodeString rules;
+            t->toRules(rules, TRUE);
+            Transliterator* t2 = Transliterator::createFromRules((UnicodeString)"Test case #" + (i / 3), rules,
+                    UTRANS_FORWARD, parseError, status);
+            if (U_FAILURE(status)) {
+                reportParseError(UnicodeString("FAIL: Couldn't create transliterator from generated rules"),
+                        parseError, status);
+                delete t;
+            } else {
+                expect(*t2,
+                       UnicodeString(BEGIN_END_TEST_CASES[i + 1]),
+                       UnicodeString(BEGIN_END_TEST_CASES[i + 2]));
+                delete t;
+                delete t2;
+            }
+        }
+    }
+
+    // do the same thing for the reversible test case
+    UParseError parseError;
+    UErrorCode status = U_ZERO_ERROR;
+    Transliterator* reversed = Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES[17]),
+            UTRANS_REVERSE, parseError, status);
+    if (U_FAILURE(status)) {
+        reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError, status);
+    } else {
+        UnicodeString rules;
+        reversed->toRules(rules, FALSE);
+        Transliterator* reversed2 = Transliterator::createFromRules("Reversed", rules, UTRANS_FORWARD,
+                parseError, status);
+        if (U_FAILURE(status)) {
+            reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator from generated rules"),
+                    parseError, status);
+            delete reversed;
+        } else {
+            expect(*reversed2,
+                   UnicodeString("xy XY XYZ yz YZ"),
+                   UnicodeString("xy abc xaba yz aba"));
+            delete reversed;
+            delete reversed2;
+        }
+    }
+}
+
 //======================================================================
 // Support methods
 //======================================================================
@ -3990,14 +4318,35 @@ void TransliteratorTest::expectT(const UnicodeString& id,
    delete t;
 }

+void TransliteratorTest::reportParseError(const UnicodeString& message,
+                                          const UParseError& parseError,
+                                          const UErrorCode& status) {
+    errln(message +
+          /*", parse error " + parseError.code +*/
+          ", line " + parseError.line +
+          ", offset " + parseError.offset +
+          ", pre-context " + prettify(parseError.preContext, TRUE) +
+          ", post-context " + prettify(parseError.postContext,TRUE) +
+          ", Error: " + u_errorName(status));
+}
+
 void TransliteratorTest::expect(const UnicodeString& rules,
                                const UnicodeString& source,
                                const UnicodeString& expectedResult,
                                UTransPosition *pos) {
+    expect("<ID>", rules, source, expectedResult, pos);
+}
+
+void TransliteratorTest::expect(const UnicodeString& id,
+                                const UnicodeString& rules,
+                                const UnicodeString& source,
+                                const UnicodeString& expectedResult,
+                                UTransPosition *pos) {
    UErrorCode status = U_ZERO_ERROR;
-    Transliterator *t = new RuleBasedTransliterator("<ID>", rules, status);
+    UParseError parseError;
+    Transliterator* t = Transliterator::createFromRules(id, rules, UTRANS_FORWARD, parseError, status);
    if (U_FAILURE(status)) {
-        errln("FAIL: Transliterator constructor failed");
+        reportParseError(UnicodeString("Couldn't create transliterator from ") + rules, parseError, status);
    } else {
        expect(*t, source, expectedResult, pos);
    }
@ -4021,7 +4370,6 @@ void TransliteratorTest::expect(const Transliterator& t,
        t.transliterate(result);
        expectAux(t.getID() + ":String", source, result, expectedResult);
    }
-
    UTransPosition index={0, 0, 0, 0};
    if (pos != 0) {
        index = *pos;
--- a/icu4c/source/test/intltest/transtst.h
+++ b/icu4c/source/test/intltest/transtst.h
@ -1,6 +1,6 @@
 /*
 **********************************************************************
-*   Copyright (C) 1999-2003, International Business Machines
+*   Copyright (C) 1999-2005, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 **********************************************************************
 *   Date        Name        Description
@ -344,6 +344,10 @@ private:

    void TestAlternateSyntax(void);

+    void TestBeginEnd(void);
+
+    void TestBeginEndToRules(void);
+
    //======================================================================
    // Support methods
    //======================================================================
@ -357,6 +361,12 @@ private:
                const UnicodeString& expectedResult,
                UTransPosition *pos=0);

+    void expect(const UnicodeString& id,
+                const UnicodeString& rules,
+                const UnicodeString& source,
+                const UnicodeString& expectedResult,
+                UTransPosition *pos=0);
+
    void expect(const Transliterator& t,
                const UnicodeString& source,
                const UnicodeString& expectedResult,
@ -385,6 +395,8 @@ private:
    void CheckIncrementalAux(const Transliterator* t, 
                             const UnicodeString& input);

+    void reportParseError(const UnicodeString& message, const UParseError& parseError, const UErrorCode& status);
+

    const UnicodeString DESERET_DEE;
    const UnicodeString DESERET_dee;