ICU-13565 Reduce size of BreakIterator brk files

See #1100
2020-05-19 20:44:14 +00:00 · 2020-05-19 20:44:14 +00:00 · c5ebb80a73
commit c5ebb80a73
parent 566e0f8686
24 changed files with 943 additions and 240 deletions
--- a/icu4c/source/common/rbbi.cpp
+++ b/icu4c/source/common/rbbi.cpp
@ -746,18 +746,68 @@ struct LookAheadResults {
 };


+// Wrapper functions to select the appropriate handleNext() or handleSafePrevious()
+// instantiation, based on whether an 8 or 16 bit table is required.
+//
+// These Trie access functions will be inlined within the handleNext()/Previous() instantions.
+static inline uint16_t TrieFunc8(const UCPTrie *trie, UChar32 c) {
+    return UCPTRIE_FAST_GET(trie, UCPTRIE_8, c);
+}
+
+static inline uint16_t TrieFunc16(const UCPTrie *trie, UChar32 c) {
+    return UCPTRIE_FAST_GET(trie, UCPTRIE_16, c);
+}
+
+int32_t RuleBasedBreakIterator::handleNext() {
+    const RBBIStateTable *statetable = fData->fForwardTable;
+    bool use8BitsTrie = ucptrie_getValueWidth(fData->fTrie) == UCPTRIE_VALUE_BITS_8;
+    if (statetable->fFlags & RBBI_8BITS_ROWS) {
+        if (use8BitsTrie) {
+            return handleNext<RBBIStateTableRow8, TrieFunc8, kDictBitFor8BitsTrie>();
+        } else {
+            return handleNext<RBBIStateTableRow8, TrieFunc16, kDictBit>();
+        }
+    } else {
+        if (use8BitsTrie) {
+            return handleNext<RBBIStateTableRow16, TrieFunc8, kDictBitFor8BitsTrie>();
+        } else {
+            return handleNext<RBBIStateTableRow16, TrieFunc16, kDictBit>();
+        }
+    }
+}
+
+int32_t RuleBasedBreakIterator::handleSafePrevious(int32_t fromPosition) {
+    const RBBIStateTable *statetable = fData->fReverseTable;
+    bool use8BitsTrie = ucptrie_getValueWidth(fData->fTrie) == UCPTRIE_VALUE_BITS_8;
+    if (statetable->fFlags & RBBI_8BITS_ROWS) {
+        if (use8BitsTrie) {
+            return handleSafePrevious<RBBIStateTableRow8, TrieFunc8, kDictBitFor8BitsTrie>(fromPosition);
+        } else {
+            return handleSafePrevious<RBBIStateTableRow8, TrieFunc16, kDictBit>(fromPosition);
+        }
+    } else {
+        if (use8BitsTrie) {
+            return handleSafePrevious<RBBIStateTableRow16, TrieFunc8, kDictBitFor8BitsTrie>(fromPosition);
+        } else {
+            return handleSafePrevious<RBBIStateTableRow16, TrieFunc16, kDictBit>(fromPosition);
+        }
+    }
+}
+
+
 //-----------------------------------------------------------------------------------
 //
 //  handleNext()
 //     Run the state machine to find a boundary
 //
 //-----------------------------------------------------------------------------------
+template <typename RowType, RuleBasedBreakIterator::PTrieFunc trieFunc, uint16_t dictMask>
 int32_t RuleBasedBreakIterator::handleNext() {
    int32_t             state;
    uint16_t            category        = 0;
    RBBIRunMode         mode;

-    RBBIStateTableRow  *row;
+    RowType             *row;
    UChar32             c;
    LookAheadResults    lookAheadMatches;
    int32_t             result             = 0;
@ -789,7 +839,7 @@ int32_t RuleBasedBreakIterator::handleNext() {

    //  Set the initial state for the state machine
    state = START_STATE;
-    row = (RBBIStateTableRow *)
+    row = (RowType *)
            //(statetable->fTableData + (statetable->fRowLen * state));
            (tableData + tableRowLen * state);

@ -825,20 +875,17 @@ int32_t RuleBasedBreakIterator::handleNext() {
        if (mode == RBBI_RUN) {
            // look up the current character's character category, which tells us
            // which column in the state table to look at.
-            // Note:  the 16 in UTRIE_GET16 refers to the size of the data being returned,
-            //        not the size of the character going in, which is a UChar32.
-            //
-            category = UTRIE2_GET16(fData->fTrie, c);
+            category = trieFunc(fData->fTrie, c);

            // Check the dictionary bit in the character's category.
            //    Counter is only used by dictionary based iteration.
            //    Chars that need to be handled by a dictionary have a flag bit set
            //    in their category values.
            //
-            if ((category & 0x4000) != 0)  {
+            if ((category & dictMask) != 0)  {
                fDictionaryCharCount++;
                //  And off the dictionary flag bit.
-                category &= ~0x4000;
+                category &= ~dictMask;
            }
        }

@ -860,7 +907,7 @@ int32_t RuleBasedBreakIterator::handleNext() {
        // fNextState is a variable-length array.
        U_ASSERT(category<fData->fHeader->fCatCount);
        state = row->fNextState[category];  /*Not accessing beyond memory*/
-        row = (RBBIStateTableRow *)
+        row = (RowType *)
            // (statetable->fTableData + (statetable->fRowLen * state));
            (tableData + tableRowLen * state);

@ -948,10 +995,12 @@ int32_t RuleBasedBreakIterator::handleNext() {
 //      because the safe table does not require as many options.
 //
 //-----------------------------------------------------------------------------------
+template <typename RowType, RuleBasedBreakIterator::PTrieFunc trieFunc, uint16_t dictMask>
 int32_t RuleBasedBreakIterator::handleSafePrevious(int32_t fromPosition) {
+
    int32_t             state;
    uint16_t            category        = 0;
-    RBBIStateTableRow  *row;
+    RowType            *row;
    UChar32             c;
    int32_t             result          = 0;

@ -971,7 +1020,7 @@ int32_t RuleBasedBreakIterator::handleSafePrevious(int32_t fromPosition) {
    //  Set the initial state for the state machine
    c = UTEXT_PREVIOUS32(&fText);
    state = START_STATE;
-    row = (RBBIStateTableRow *)
+    row = (RowType *)
            (stateTable->fTableData + (stateTable->fRowLen * state));

    // loop until we reach the start of the text or transition to state 0
@ -980,12 +1029,10 @@ int32_t RuleBasedBreakIterator::handleSafePrevious(int32_t fromPosition) {

        // look up the current character's character category, which tells us
        // which column in the state table to look at.
-        // Note:  the 16 in UTRIE_GET16 refers to the size of the data being returned,
-        //        not the size of the character going in, which is a UChar32.
        //
-        //  And off the dictionary flag bit. For reverse iteration it is not used.
-        category = UTRIE2_GET16(fData->fTrie, c);
-        category &= ~0x4000;
+        //  Off the dictionary flag bit. For reverse iteration it is not used.
+        category = trieFunc(fData->fTrie, c);
+        category &= ~dictMask;

        #ifdef RBBI_DEBUG
            if (gTrace) {
@ -1004,7 +1051,7 @@ int32_t RuleBasedBreakIterator::handleSafePrevious(int32_t fromPosition) {
        // fNextState is a variable-length array.
        U_ASSERT(category<fData->fHeader->fCatCount);
        state = row->fNextState[category];  /*Not accessing beyond memory*/
-        row = (RBBIStateTableRow *)
+        row = (RowType *)
            (stateTable->fTableData + (stateTable->fRowLen * state));

        if (state == STOP_STATE) {
@ -1024,6 +1071,7 @@ int32_t RuleBasedBreakIterator::handleSafePrevious(int32_t fromPosition) {
    return result;
 }

+
 //-------------------------------------------------------------------------------
 //
 //   getRuleStatus()   Return the break rule tag associated with the current
--- a/icu4c/source/common/rbbi_cache.cpp
+++ b/icu4c/source/common/rbbi_cache.cpp
@ -119,6 +119,8 @@ UBool RuleBasedBreakIterator::DictionaryCache::preceding(int32_t fromPos, int32_

 void RuleBasedBreakIterator::DictionaryCache::populateDictionary(int32_t startPos, int32_t endPos,
                                       int32_t firstRuleStatus, int32_t otherRuleStatus) {
+    uint32_t dictMask = ucptrie_getValueWidth(fBI->fData->fTrie) == UCPTRIE_VALUE_BITS_8 ?
+        kDictBitFor8BitsTrie : kDictBit;
    if ((endPos - startPos) <= 1) {
        return;
    }
@ -142,13 +144,13 @@ void RuleBasedBreakIterator::DictionaryCache::populateDictionary(int32_t startPo

    utext_setNativeIndex(text, rangeStart);
    UChar32     c = utext_current32(text);
-    category = UTRIE2_GET16(fBI->fData->fTrie, c);
+    category = ucptrie_get(fBI->fData->fTrie, c);

    while(U_SUCCESS(status)) {
-        while((current = (int32_t)UTEXT_GETNATIVEINDEX(text)) < rangeEnd && (category & 0x4000) == 0) {
+        while((current = (int32_t)UTEXT_GETNATIVEINDEX(text)) < rangeEnd && (category & dictMask) == 0) {
            utext_next32(text);           // TODO: cleaner loop structure.
            c = utext_current32(text);
-            category = UTRIE2_GET16(fBI->fData->fTrie, c);
+            category = ucptrie_get(fBI->fData->fTrie, c);
        }
        if (current >= rangeEnd) {
            break;
@ -166,7 +168,7 @@ void RuleBasedBreakIterator::DictionaryCache::populateDictionary(int32_t startPo

        // Reload the loop variables for the next go-round
        c = utext_current32(text);
-        category = UTRIE2_GET16(fBI->fData->fTrie, c);
+        category = ucptrie_get(fBI->fData->fTrie, c);
    }

    // If we found breaks, ensure that the first and last entries are
--- a/icu4c/source/common/rbbidata.cpp
+++ b/icu4c/source/common/rbbidata.cpp
@ -11,10 +11,10 @@

 #if !UCONFIG_NO_BREAK_ITERATION

+#include "unicode/ucptrie.h"
 #include "unicode/utypes.h"
 #include "rbbidata.h"
 #include "rbbirb.h"
-#include "utrie2.h"
 #include "udatamem.h"
 #include "cmemory.h"
 #include "cstring.h"
@ -110,17 +110,24 @@ void RBBIDataWrapper::init(const RBBIDataHeader *data, UErrorCode &status) {
        fReverseTable = (RBBIStateTable *)((char *)data + fHeader->fRTable);
    }

-    fTrie = utrie2_openFromSerialized(UTRIE2_16_VALUE_BITS,
-                                      (uint8_t *)data + fHeader->fTrie,
-                                      fHeader->fTrieLen,
-                                      NULL,           // *actual length
-                                      &status);
+    fTrie = ucptrie_openFromBinary(UCPTRIE_TYPE_FAST,
+                                   UCPTRIE_VALUE_BITS_ANY,
+                                   (uint8_t *)data + fHeader->fTrie,
+                                   fHeader->fTrieLen,
+                                   nullptr,           // *actual length
+                                   &status);
    if (U_FAILURE(status)) {
        return;
    }

-    fRuleSource   = (UChar *)((char *)data + fHeader->fRuleSource);
-    fRuleString.setTo(TRUE, fRuleSource, -1);
+    UCPTrieValueWidth width = ucptrie_getValueWidth(fTrie);
+    if (!(width == UCPTRIE_VALUE_BITS_8 || width == UCPTRIE_VALUE_BITS_16)) {
+        status = U_INVALID_FORMAT_ERROR;
+        return;
+    }
+
+    fRuleSource   = ((char *)data + fHeader->fRuleSource);
+    fRuleString = UnicodeString::fromUTF8(StringPiece(fRuleSource, fHeader->fRuleSourceLen));
    U_ASSERT(data->fRuleSourceLen > 0);

    fRuleStatusTable = (int32_t *)((char *)data + fHeader->fStatusTable);
@ -142,8 +149,8 @@ void RBBIDataWrapper::init(const RBBIDataHeader *data, UErrorCode &status) {
 //-----------------------------------------------------------------------------
 RBBIDataWrapper::~RBBIDataWrapper() {
    U_ASSERT(fRefCount == 0);
-    utrie2_close(fTrie);
-    fTrie = NULL;
+    ucptrie_close(fTrie);
+    fTrie = nullptr;
    if (fUDataMem) {
        udata_close(fUDataMem);
    } else if (!fDontFreeData) {
@ -225,6 +232,11 @@ void  RBBIDataWrapper::printTable(const char *heading, const RBBIStateTable *tab

    RBBIDebugPrintf("   %s\n", heading);

+    RBBIDebugPrintf("Flags: %4x RBBI_LOOKAHEAD_HARD_BREAK=%s RBBI_BOF_REQUIRED=%s  RBBI_8BITS_ROWS=%s\n",
+                    table->fFlags,
+                    table->fFlags & RBBI_LOOKAHEAD_HARD_BREAK ? "T" : "F",
+                    table->fFlags & RBBI_BOF_REQUIRED ? "T" : "F",
+                    table->fFlags & RBBI_8BITS_ROWS ? "T" : "F");
    RBBIDebugPrintf("State |  Acc  LA TagIx");
    for (c=0; c<fHeader->fCatCount; c++) {RBBIDebugPrintf("%3d ", c);}
    RBBIDebugPrintf("\n------|---------------"); for (c=0;c<fHeader->fCatCount; c++) {
@ -236,12 +248,20 @@ void  RBBIDataWrapper::printTable(const char *heading, const RBBIStateTable *tab
        RBBIDebugPrintf("         N U L L   T A B L E\n\n");
        return;
    }
+    UBool use8Bits = table->fFlags & RBBI_8BITS_ROWS;
    for (s=0; s<table->fNumStates; s++) {
        RBBIStateTableRow *row = (RBBIStateTableRow *)
                                  (table->fTableData + (table->fRowLen * s));
-        RBBIDebugPrintf("%4d  |  %3d %3d %3d ", s, row->fAccepting, row->fLookAhead, row->fTagIdx);
-        for (c=0; c<fHeader->fCatCount; c++)  {
-            RBBIDebugPrintf("%3d ", row->fNextState[c]);
+        if (use8Bits) {
+            RBBIDebugPrintf("%4d  |  %3d %3d %3d ", s, row->r8.fAccepting, row->r8.fLookAhead, row->r8.fTagIdx);
+            for (c=0; c<fHeader->fCatCount; c++)  {
+                RBBIDebugPrintf("%3d ", row->r8.fNextState[c]);
+            }
+        } else {
+            RBBIDebugPrintf("%4d  |  %3d %3d %3d ", s, row->r16.fAccepting, row->r16.fLookAhead, row->r16.fTagIdx);
+            for (c=0; c<fHeader->fCatCount; c++)  {
+                RBBIDebugPrintf("%3d ", row->r16.fNextState[c]);
+            }
        }
        RBBIDebugPrintf("\n");
    }
@ -377,35 +397,64 @@ ubrk_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outD
    //
    int32_t         topSize = offsetof(RBBIStateTable, fTableData);

-    // Forward state table.  
+    // Forward state table.
    tableStartOffset = ds->readUInt32(rbbiDH->fFTable);
    tableLength      = ds->readUInt32(rbbiDH->fFTableLen);

    if (tableLength > 0) {
-        ds->swapArray32(ds, inBytes+tableStartOffset, topSize, 
+        RBBIStateTable *rbbiST = (RBBIStateTable *)(inBytes+tableStartOffset);
+        UBool use8Bits = ds->readUInt32(rbbiST->fFlags) & RBBI_8BITS_ROWS;
+
+        ds->swapArray32(ds, inBytes+tableStartOffset, topSize,
                            outBytes+tableStartOffset, status);
-        ds->swapArray16(ds, inBytes+tableStartOffset+topSize, tableLength-topSize,
-                            outBytes+tableStartOffset+topSize, status);
+
+        // Swap the state table if the table is in 16 bits.
+        if (use8Bits) {
+            if (outBytes != inBytes) {
+                uprv_memmove(outBytes+tableStartOffset+topSize,
+                             inBytes+tableStartOffset+topSize,
+                             tableLength-topSize);
+            }
+        } else {
+            ds->swapArray16(ds, inBytes+tableStartOffset+topSize, tableLength-topSize,
+                                outBytes+tableStartOffset+topSize, status);
+        }
    }
-    
+
    // Reverse state table.  Same layout as forward table, above.
    tableStartOffset = ds->readUInt32(rbbiDH->fRTable);
    tableLength      = ds->readUInt32(rbbiDH->fRTableLen);

    if (tableLength > 0) {
-        ds->swapArray32(ds, inBytes+tableStartOffset, topSize, 
+        RBBIStateTable *rbbiST = (RBBIStateTable *)(inBytes+tableStartOffset);
+        UBool use8Bits = ds->readUInt32(rbbiST->fFlags) & RBBI_8BITS_ROWS;
+
+        ds->swapArray32(ds, inBytes+tableStartOffset, topSize,
                            outBytes+tableStartOffset, status);
-        ds->swapArray16(ds, inBytes+tableStartOffset+topSize, tableLength-topSize,
-                            outBytes+tableStartOffset+topSize, status);
+
+        // Swap the state table if the table is in 16 bits.
+        if (use8Bits) {
+            if (outBytes != inBytes) {
+                uprv_memmove(outBytes+tableStartOffset+topSize,
+                             inBytes+tableStartOffset+topSize,
+                             tableLength-topSize);
+            }
+        } else {
+            ds->swapArray16(ds, inBytes+tableStartOffset+topSize, tableLength-topSize,
+                                outBytes+tableStartOffset+topSize, status);
+        }
    }

    // Trie table for character categories
-    utrie2_swap(ds, inBytes+ds->readUInt32(rbbiDH->fTrie), ds->readUInt32(rbbiDH->fTrieLen),
-                    outBytes+ds->readUInt32(rbbiDH->fTrie), status);
+    ucptrie_swap(ds, inBytes+ds->readUInt32(rbbiDH->fTrie), ds->readUInt32(rbbiDH->fTrieLen),
+                     outBytes+ds->readUInt32(rbbiDH->fTrie), status);

-    // Source Rules Text.  It's UChar data
-    ds->swapArray16(ds, inBytes+ds->readUInt32(rbbiDH->fRuleSource), ds->readUInt32(rbbiDH->fRuleSourceLen),
-                        outBytes+ds->readUInt32(rbbiDH->fRuleSource), status);
+    // Source Rules Text.  It's UTF8 data
+    if (outBytes != inBytes) {
+        uprv_memmove(outBytes+ds->readUInt32(rbbiDH->fRuleSource),
+                     inBytes+ds->readUInt32(rbbiDH->fRuleSource),
+                     ds->readUInt32(rbbiDH->fRuleSourceLen));
+    }

    // Table of rule status values.  It's all int_32 values
    ds->swapArray32(ds, inBytes+ds->readUInt32(rbbiDH->fStatusTable), ds->readUInt32(rbbiDH->fStatusTableLen),
--- a/icu4c/source/common/rbbidata.h
+++ b/icu4c/source/common/rbbidata.h
@ -49,16 +49,17 @@ ubrk_swap(const UDataSwapper *ds,

 #ifdef __cplusplus

+#include "unicode/ucptrie.h"
 #include "unicode/uobject.h"
 #include "unicode/unistr.h"
 #include "unicode/uversion.h"
 #include "umutex.h"
-#include "utrie2.h"
+

 U_NAMESPACE_BEGIN

 // The current RBBI data format version.
-static const uint8_t RBBI_DATA_FORMAT_VERSION[] = {5, 0, 0, 0};
+static const uint8_t RBBI_DATA_FORMAT_VERSION[] = {6, 0, 0, 0};

 /*  
 *   The following structs map exactly onto the raw data from ICU common data file. 
@ -94,25 +95,25 @@ struct RBBIDataHeader {



-struct  RBBIStateTableRow {
-    int16_t          fAccepting;    /*  Non-zero if this row is for an accepting state.   */
+template <typename ST, typename UT>
+struct RBBIStateTableRowT {
+    ST               fAccepting;    /*  Non-zero if this row is for an accepting state.   */
                                    /*  Value 0: not an accepting state.                  */
                                    /*       -1: Unconditional Accepting state.           */
                                    /*    positive:  Look-ahead match has completed.      */
                                    /*           Actual boundary position happened earlier */
                                    /*           Value here == fLookAhead in earlier      */
                                    /*              state, at actual boundary pos.        */
-    int16_t          fLookAhead;    /*  Non-zero if this row is for a state that          */
+    ST               fLookAhead;    /*  Non-zero if this row is for a state that          */
                                    /*    corresponds to a '/' in the rule source.        */
                                    /*    Value is the same as the fAccepting             */
                                    /*      value for the rule (which will appear         */
                                    /*      in a different state.                         */
-    int16_t          fTagIdx;       /*  Non-zero if this row covers a {tagged} position   */
+    ST               fTagIdx;       /*  Non-zero if this row covers a {tagged} position   */
                                    /*     from a rule.  Value is the index in the        */
                                    /*     StatusTable of the set of matching             */
                                    /*     tags (rule status values)                      */
-    int16_t          fReserved;
-    uint16_t         fNextState[1]; /*  Next State, indexed by char category.             */
+    UT               fNextState[1]; /*  Next State, indexed by char category.             */
                                    /*    Variable-length array declared with length 1    */
                                    /*    to disable bounds checkers.                     */
                                    /*    Array Size is actually fData->fHeader->fCatCount*/
@ -120,12 +121,18 @@ struct  RBBIStateTableRow {
                                    /*              before changing anything here.        */
 };

+typedef RBBIStateTableRowT<int8_t, uint8_t> RBBIStateTableRow8;
+typedef RBBIStateTableRowT<int16_t, uint16_t> RBBIStateTableRow16;
+
+union RBBIStateTableRow {
+  RBBIStateTableRow16 r16;
+  RBBIStateTableRow8 r8;
+};

 struct RBBIStateTable {
    uint32_t         fNumStates;    /*  Number of states.                                 */
    uint32_t         fRowLen;       /*  Length of a state table row, in bytes.            */
    uint32_t         fFlags;        /*  Option Flags for this state table                 */
-    uint32_t         fReserved;     /*  reserved                                          */
    char             fTableData[1]; /*  First RBBIStateTableRow begins here.              */
                                    /*    Variable-length array declared with length 1    */
                                    /*    to disable bounds checkers.                     */
@ -133,10 +140,9 @@ struct RBBIStateTable {
                                    /*     arithmetic for indexing variable length rows.) */
 };

-typedef enum {
-    RBBI_LOOKAHEAD_HARD_BREAK = 1,
-    RBBI_BOF_REQUIRED = 2
-} RBBIStateTableFlags;
+constexpr uint32_t RBBI_LOOKAHEAD_HARD_BREAK = 1;
+constexpr uint32_t RBBI_BOF_REQUIRED = 2;
+constexpr uint32_t RBBI_8BITS_ROWS = 4;


 /*                                        */
@ -170,13 +176,13 @@ public:
    const RBBIDataHeader     *fHeader;
    const RBBIStateTable     *fForwardTable;
    const RBBIStateTable     *fReverseTable;
-    const UChar              *fRuleSource;
+    const char               *fRuleSource;
    const int32_t            *fRuleStatusTable; 

    /* number of int32_t values in the rule status table.   Used to sanity check indexing */
    int32_t             fStatusMaxIdx;

-    UTrie2             *fTrie;
+    UCPTrie             *fTrie;

 private:
    u_atomic_int32_t    fRefCount;
--- a/icu4c/source/common/rbbirb.cpp
+++ b/icu4c/source/common/rbbirb.cpp
@ -22,6 +22,7 @@
 #include "unicode/uniset.h"
 #include "unicode/uchar.h"
 #include "unicode/uchriter.h"
+#include "unicode/ustring.h"
 #include "unicode/parsepos.h"
 #include "unicode/parseerr.h"

@ -154,7 +155,14 @@ RBBIDataHeader *RBBIRuleBuilder::flattenData() {
    int32_t reverseTableSize  = align8(fForwardTable->getSafeTableSize());
    int32_t trieSize          = align8(fSetBuilder->getTrieSize());
    int32_t statusTableSize   = align8(fRuleStatusVals->size() * sizeof(int32_t));
-    int32_t rulesSize         = align8((fStrippedRules.length()+1) * sizeof(UChar));
+
+    int32_t rulesLengthInUTF8 = 0;
+    u_strToUTF8WithSub(0, 0, &rulesLengthInUTF8,
+                       fStrippedRules.getBuffer(), fStrippedRules.length(),
+                       0xfffd, nullptr, fStatus);
+    *fStatus = U_ZERO_ERROR;
+
+    int32_t rulesSize         = align8((rulesLengthInUTF8+1));

    int32_t         totalSize = headerSize
                                + forwardTableSize
@ -197,11 +205,11 @@ RBBIDataHeader *RBBIRuleBuilder::flattenData() {
    data->fRTableLen     = reverseTableSize;

    data->fTrie          = data->fRTable + data->fRTableLen;
-    data->fTrieLen       = fSetBuilder->getTrieSize();
-    data->fStatusTable   = data->fTrie    + trieSize;
+    data->fTrieLen       = trieSize;
+    data->fStatusTable   = data->fTrie    + data->fTrieLen;
    data->fStatusTableLen= statusTableSize;
    data->fRuleSource    = data->fStatusTable + statusTableSize;
-    data->fRuleSourceLen = fStrippedRules.length() * sizeof(UChar);
+    data->fRuleSourceLen = rulesLengthInUTF8;

    uprv_memset(data->fReserved, 0, sizeof(data->fReserved));

@ -214,7 +222,12 @@ RBBIDataHeader *RBBIRuleBuilder::flattenData() {
        ruleStatusTable[i] = fRuleStatusVals->elementAti(i);
    }

-    fStrippedRules.extract((UChar *)((uint8_t *)data+data->fRuleSource), rulesSize/2+1, *fStatus);
+    u_strToUTF8WithSub((char *)data+data->fRuleSource, rulesSize, &rulesLengthInUTF8,
+                       fStrippedRules.getBuffer(), fStrippedRules.length(),
+                       0xfffd, nullptr, fStatus);
+    if (U_FAILURE(*fStatus)) {
+        return NULL;
+    }

    return data;
 }
--- a/icu4c/source/common/rbbiscan.cpp
+++ b/icu4c/source/common/rbbiscan.cpp
@ -829,16 +829,14 @@ static const UChar      chRParen    = 0x29;
 UnicodeString RBBIRuleScanner::stripRules(const UnicodeString &rules) {
    UnicodeString strippedRules;
    int32_t rulesLength = rules.length();
-    bool skippingSpaces = false;

    for (int32_t idx=0; idx<rulesLength; idx = rules.moveIndex32(idx, 1)) {
        UChar32 cp = rules.char32At(idx);
        bool whiteSpace = u_hasBinaryProperty(cp, UCHAR_PATTERN_WHITE_SPACE);
-        if (skippingSpaces && whiteSpace) {
+        if (whiteSpace) {
            continue;
        }
        strippedRules.append(cp);
-        skippingSpaces = whiteSpace;
    }
    return strippedRules;
 }
--- a/icu4c/source/common/rbbisetb.cpp
+++ b/icu4c/source/common/rbbisetb.cpp
@ -35,7 +35,6 @@
 #if !UCONFIG_NO_BREAK_ITERATION

 #include "unicode/uniset.h"
-#include "utrie2.h"
 #include "uvector.h"
 #include "uassert.h"
 #include "cmemory.h"
@ -46,6 +45,7 @@

 U_NAMESPACE_BEGIN

+const int32_t kMaxCharCategoriesFor8BitsTrie = 127;
 //------------------------------------------------------------------------
 //
 //   Constructor
@ -56,7 +56,8 @@ RBBISetBuilder::RBBISetBuilder(RBBIRuleBuilder *rb)
    fRB             = rb;
    fStatus         = rb->fStatus;
    fRangeList      = 0;
-    fTrie           = 0;
+    fMutableTrie    = nullptr;
+    fTrie           = nullptr;
    fTrieSize       = 0;
    fGroupCount     = 0;
    fSawBOF         = FALSE;
@ -79,7 +80,8 @@ RBBISetBuilder::~RBBISetBuilder()
        delete r;
    }

-    utrie2_close(fTrie);
+    ucptrie_close(fTrie);
+    umutablecptrie_close(fMutableTrie);
 }


@ -255,17 +257,23 @@ void RBBISetBuilder::buildRanges() {
 void RBBISetBuilder::buildTrie() {
    RangeDescriptor *rlRange;

-    fTrie = utrie2_open(0,       //  Initial value for all code points.
+    fMutableTrie = umutablecptrie_open(
+                        0,       //  Initial value for all code points.
                        0,       //  Error value for out-of-range input.
                        fStatus);

+    bool use8Bits = getNumCharCategories() <= kMaxCharCategoriesFor8BitsTrie;
    for (rlRange = fRangeList; rlRange!=0 && U_SUCCESS(*fStatus); rlRange=rlRange->fNext) {
-        utrie2_setRange32(fTrie,
-                          rlRange->fStartChar,     // Range start
-                          rlRange->fEndChar,       // Range end (inclusive)
-                          rlRange->fNum,           // value for range
-                          TRUE,                    // Overwrite previously written values
-                          fStatus);
+        uint32_t value = rlRange->fNum;
+        if (use8Bits && ((value & RuleBasedBreakIterator::kDictBit) != 0)) {
+            U_ASSERT((value & RuleBasedBreakIterator::kDictBitFor8BitsTrie) == 0);
+            value = RuleBasedBreakIterator::kDictBitFor8BitsTrie | (value & ~RuleBasedBreakIterator::kDictBit);
+        }
+        umutablecptrie_setRange(fMutableTrie,
+                                rlRange->fStartChar,     // Range start
+                                rlRange->fEndChar,       // Range end (inclusive)
+                                value,           // value for range
+                                fStatus);
    }
 }

@ -274,8 +282,8 @@ void RBBISetBuilder::mergeCategories(IntPair categories) {
    U_ASSERT(categories.first >= 1);
    U_ASSERT(categories.second > categories.first);
    for (RangeDescriptor *rd = fRangeList; rd != nullptr; rd = rd->fNext) {
-        int32_t rangeNum = rd->fNum & ~DICT_BIT;
-        int32_t rangeDict = rd->fNum & DICT_BIT;
+        int32_t rangeNum = rd->fNum & ~RuleBasedBreakIterator::kDictBit;
+        int32_t rangeDict = rd->fNum & RuleBasedBreakIterator::kDictBit;
        if (rangeNum == categories.second) {
            rd->fNum = categories.first | rangeDict;
        } else if (rangeNum > categories.second) {
@ -295,15 +303,18 @@ int32_t RBBISetBuilder::getTrieSize()  {
    if (U_FAILURE(*fStatus)) {
        return 0;
    }
-    utrie2_freeze(fTrie, UTRIE2_16_VALUE_BITS, fStatus);
-    fTrieSize  = utrie2_serialize(fTrie,
-                                  NULL,                // Buffer
-                                  0,                   // Capacity
-                                  fStatus);
-    if (*fStatus == U_BUFFER_OVERFLOW_ERROR) {
-        *fStatus = U_ZERO_ERROR;
+    if (fTrie == nullptr) {
+        bool use8Bits = getNumCharCategories() <= kMaxCharCategoriesFor8BitsTrie;
+        fTrie = umutablecptrie_buildImmutable(
+            fMutableTrie,
+            UCPTRIE_TYPE_FAST,
+            use8Bits ? UCPTRIE_VALUE_BITS_8 : UCPTRIE_VALUE_BITS_16,
+            fStatus);
+        fTrieSize = ucptrie_toBinary(fTrie, nullptr, 0, fStatus);
+        if (*fStatus == U_BUFFER_OVERFLOW_ERROR) {
+            *fStatus = U_ZERO_ERROR;
+        }
    }
-    // RBBIDebugPrintf("Trie table size is %d\n", trieSize);
    return fTrieSize;
 }

@ -316,9 +327,9 @@ int32_t RBBISetBuilder::getTrieSize()  {
 //
 //-----------------------------------------------------------------------------------
 void RBBISetBuilder::serializeTrie(uint8_t *where) {
-    utrie2_serialize(fTrie,
-                     where,                   // Buffer
-                     fTrieSize,               // Capacity
+    ucptrie_toBinary(fTrie,
+                     where,                // Buffer
+                     fTrieSize,            // Capacity
                     fStatus);
 }

@ -467,7 +478,7 @@ void RBBISetBuilder::printRangeGroups() {
            lastPrintedGroupNum = groupNum;
            RBBIDebugPrintf("%2i  ", groupNum);

-            if (rlRange->fNum & DICT_BIT) { RBBIDebugPrintf(" <DICT> ");}
+            if (rlRange->fNum & RuleBasedBreakIterator::kDictBit) { RBBIDebugPrintf(" <DICT> ");}

            for (i=0; i<rlRange->fIncludesSets->size(); i++) {
                RBBINode       *usetNode    = (RBBINode *)rlRange->fIncludesSets->elementAt(i);
@ -669,7 +680,7 @@ void RangeDescriptor::setDictionaryFlag() {
            if (varRef && varRef->fType == RBBINode::varRef) {
                const UnicodeString *setName = &varRef->fText;
                if (setName->compare(dictionary, -1) == 0) {
-                    fNum |= RBBISetBuilder::DICT_BIT;
+                    fNum |= RuleBasedBreakIterator::kDictBit;
                    break;
                }
            }
--- a/icu4c/source/common/rbbisetb.h
+++ b/icu4c/source/common/rbbisetb.h
@ -16,9 +16,10 @@

 #if !UCONFIG_NO_BREAK_ITERATION

+#include "unicode/ucptrie.h"
+#include "unicode/umutablecptrie.h"
 #include "unicode/uobject.h"
 #include "rbbirb.h"
-#include "utrie2.h"
 #include "uvector.h"

 U_NAMESPACE_BEGIN
@ -101,8 +102,6 @@ public:
     */
    void     mergeCategories(IntPair categories);

-    static constexpr int32_t DICT_BIT = 0x4000;
-
 #ifdef RBBI_DEBUG
    void     printSets();
    void     printRanges();
@ -121,8 +120,9 @@ private:

    RangeDescriptor       *fRangeList;      // Head of the linked list of RangeDescriptors

-    UTrie2                *fTrie;           // The mapping TRIE that is the end result of processing
-    uint32_t               fTrieSize;       //  the Unicode Sets.
+    UMutableCPTrie        *fMutableTrie;    // The mapping TRIE that is the end result of processing
+    UCPTrie               *fTrie;           //  the Unicode Sets.
+    uint32_t               fTrieSize;

    // Groups correspond to character categories -
    //       groups of ranges that are in the same original UnicodeSets.
--- a/icu4c/source/common/rbbitblb.cpp
+++ b/icu4c/source/common/rbbitblb.cpp
@ -28,6 +28,8 @@

 U_NAMESPACE_BEGIN

+const int32_t kMaxStateFor8BitsTable = 255;
+
 RBBITableBuilder::RBBITableBuilder(RBBIRuleBuilder *rb, RBBINode **rootNode, UErrorCode &status) :
        fRB(rb),
        fTree(*rootNode),
@ -1335,11 +1337,18 @@ int32_t  RBBITableBuilder::getTableSize() const {
    numRows = fDStates->size();
    numCols = fRB->fSetBuilder->getNumCharCategories();

-    rowSize = offsetof(RBBIStateTableRow, fNextState) + sizeof(uint16_t)*numCols;
+    if (use8BitsForTable()) {
+        rowSize = offsetof(RBBIStateTableRow8, fNextState) + sizeof(int8_t)*numCols;
+    } else {
+        rowSize = offsetof(RBBIStateTableRow16, fNextState) + sizeof(int16_t)*numCols;
+    }
    size   += numRows * rowSize;
    return size;
 }

+bool RBBITableBuilder::use8BitsForTable() const {
+    return fDStates->size() <= kMaxStateFor8BitsTable;
+}

 //-----------------------------------------------------------------------------
 //
@ -1364,27 +1373,44 @@ void RBBITableBuilder::exportTable(void *where) {
        return;
    }

-    table->fRowLen    = offsetof(RBBIStateTableRow, fNextState) + sizeof(uint16_t) * catCount;
    table->fNumStates = fDStates->size();
    table->fFlags     = 0;
+    if (use8BitsForTable()) {
+        table->fRowLen    = offsetof(RBBIStateTableRow8, fNextState) + sizeof(uint8_t) * catCount;
+        table->fFlags  |= RBBI_8BITS_ROWS;
+    } else {
+        table->fRowLen    = offsetof(RBBIStateTableRow16, fNextState) + sizeof(int16_t) * catCount;
+    }
    if (fRB->fLookAheadHardBreak) {
        table->fFlags  |= RBBI_LOOKAHEAD_HARD_BREAK;
    }
    if (fRB->fSetBuilder->sawBOF()) {
        table->fFlags  |= RBBI_BOF_REQUIRED;
    }
-    table->fReserved  = 0;

    for (state=0; state<table->fNumStates; state++) {
        RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(state);
        RBBIStateTableRow   *row = (RBBIStateTableRow *)(table->fTableData + state*table->fRowLen);
-        U_ASSERT (-32768 < sd->fAccepting && sd->fAccepting <= 32767);
-        U_ASSERT (-32768 < sd->fLookAhead && sd->fLookAhead <= 32767);
-        row->fAccepting = (int16_t)sd->fAccepting;
-        row->fLookAhead = (int16_t)sd->fLookAhead;
-        row->fTagIdx    = (int16_t)sd->fTagsIdx;
-        for (col=0; col<catCount; col++) {
-            row->fNextState[col] = (uint16_t)sd->fDtran->elementAti(col);
+        if (use8BitsForTable()) {
+            U_ASSERT (-128 < sd->fAccepting && sd->fAccepting <= 127);
+            U_ASSERT (-128 < sd->fLookAhead && sd->fLookAhead <= 127);
+            U_ASSERT (-128 < sd->fTagsIdx && sd->fTagsIdx <= 127);
+            row->r8.fAccepting = (int8_t)sd->fAccepting;
+            row->r8.fLookAhead = (int8_t)sd->fLookAhead;
+            row->r8.fTagIdx    = (int8_t)sd->fTagsIdx;
+            for (col=0; col<catCount; col++) {
+                U_ASSERT (sd->fDtran->elementAti(col) <= kMaxStateFor8BitsTable);
+                row->r8.fNextState[col] = sd->fDtran->elementAti(col);
+            }
+        } else {
+            U_ASSERT (-32768 < sd->fAccepting && sd->fAccepting <= 32767);
+            U_ASSERT (-32768 < sd->fLookAhead && sd->fLookAhead <= 32767);
+            row->r16.fAccepting = (int16_t)sd->fAccepting;
+            row->r16.fLookAhead = (int16_t)sd->fLookAhead;
+            row->r16.fTagIdx    = (int16_t)sd->fTagsIdx;
+            for (col=0; col<catCount; col++) {
+                row->r16.fNextState[col] = sd->fDtran->elementAti(col);
+            }
        }
    }
 }
@ -1520,11 +1546,18 @@ int32_t  RBBITableBuilder::getSafeTableSize() const {
    numRows = fSafeTable->size();
    numCols = fRB->fSetBuilder->getNumCharCategories();

-    rowSize = offsetof(RBBIStateTableRow, fNextState) + sizeof(uint16_t)*numCols;
+    if (use8BitsForSafeTable()) {
+        rowSize = offsetof(RBBIStateTableRow8, fNextState) + sizeof(int8_t)*numCols;
+    } else {
+        rowSize = offsetof(RBBIStateTableRow16, fNextState) + sizeof(int16_t)*numCols;
+    }
    size   += numRows * rowSize;
    return size;
 }

+bool RBBITableBuilder::use8BitsForSafeTable() const {
+    return fSafeTable->size() <= kMaxStateFor8BitsTable;
+}

 //-----------------------------------------------------------------------------
 //
@ -1549,20 +1582,33 @@ void RBBITableBuilder::exportSafeTable(void *where) {
        return;
    }

-    table->fRowLen    = offsetof(RBBIStateTableRow, fNextState) + sizeof(uint16_t) * catCount;
    table->fNumStates = fSafeTable->size();
    table->fFlags     = 0;
-    table->fReserved  = 0;
+    if (use8BitsForSafeTable()) {
+        table->fRowLen    = offsetof(RBBIStateTableRow8, fNextState) + sizeof(uint8_t) * catCount;
+        table->fFlags  |= RBBI_8BITS_ROWS;
+    } else {
+        table->fRowLen    = offsetof(RBBIStateTableRow16, fNextState) + sizeof(int16_t) * catCount;
+    }

    for (state=0; state<table->fNumStates; state++) {
        UnicodeString *rowString = (UnicodeString *)fSafeTable->elementAt(state);
        RBBIStateTableRow   *row = (RBBIStateTableRow *)(table->fTableData + state*table->fRowLen);
-        row->fAccepting = 0;
-        row->fLookAhead = 0;
-        row->fTagIdx    = 0;
-        row->fReserved  = 0;
-        for (col=0; col<catCount; col++) {
-            row->fNextState[col] = rowString->charAt(col);
+        if (use8BitsForSafeTable()) {
+            row->r8.fAccepting = 0;
+            row->r8.fLookAhead = 0;
+            row->r8.fTagIdx    = 0;
+            for (col=0; col<catCount; col++) {
+                U_ASSERT(rowString->charAt(col) <= kMaxStateFor8BitsTable);
+                row->r8.fNextState[col] = rowString->charAt(col);
+            }
+        } else {
+            row->r16.fAccepting = 0;
+            row->r16.fLookAhead = 0;
+            row->r16.fTagIdx    = 0;
+            for (col=0; col<catCount; col++) {
+                row->r16.fNextState[col] = rowString->charAt(col);
+            }
        }
    }
 }
--- a/icu4c/source/common/rbbitblb.h
+++ b/icu4c/source/common/rbbitblb.h
@ -53,6 +53,9 @@ public:
     */
    void     exportTable(void *where);

+    /** Use 8 bits to encode the forward table */
+    bool     use8BitsForTable() const;
+
    /**
     *  Find duplicate (redundant) character classes. Begin looking with categories.first.
     *  Duplicate, if found are returned in the categories parameter.
@ -85,6 +88,8 @@ public:
     */
    void     exportSafeTable(void *where);

+    /** Use 8 bits to encode the safe reverse table */
+    bool     use8BitsForSafeTable() const;

 private:
    void     calcNullable(RBBINode *n);
--- a/icu4c/source/common/unicode/rbbi.h
+++ b/icu4c/source/common/unicode/rbbi.h
@ -32,6 +32,8 @@
 #include "unicode/parseerr.h"
 #include "unicode/schriter.h"

+struct UCPTrie;
+
 U_NAMESPACE_BEGIN

 /** @internal */
@ -659,6 +661,28 @@ private:
     */
    int32_t handleNext();

+    /*
+     * Templatized version of handleNext() and handleSafePrevious().
+     *
+     * There will be exactly four instantiations, two each for 8 and 16 bit tables,
+     * two each for 8 and 16 bit trie.
+     * Having separate instantiations for the table types keeps conditional tests of
+     * the table type out of the inner loops, at the expense of replicated code.
+     *
+     * The template parameter for the Trie access function is a value, not a type.
+     * Doing it this way, the compiler will inline the Trie function in the
+     * expanded functions. (Both the 8 and 16 bit access functions have the same type
+     * signature)
+     */
+
+    typedef uint16_t (*PTrieFunc)(const UCPTrie *, UChar32);
+
+    template<typename RowType, PTrieFunc trieFunc, uint16_t dictMask>
+    int32_t handleSafePrevious(int32_t fromPosition);
+
+    template<typename RowType, PTrieFunc trieFunc, uint16_t dictMask>
+    int32_t handleNext();
+

    /**
     * This function returns the appropriate LanguageBreakEngine for a
@ -682,6 +706,16 @@ private:
     */
    void dumpTables();

+    /**
+     * Bit for dictionary based category
+     */
+    static constexpr int32_t kDictBit = 0x4000;
+
+    /**
+     * Bit for dictionary based category in 8bits trie
+     */
+    static constexpr int32_t kDictBitFor8BitsTrie = 0x0080;
+
 #endif  /* U_HIDE_INTERNAL_API */
 };

--- a/icu4c/source/test/intltest/rbbiapts.cpp
+++ b/icu4c/source/test/intltest/rbbiapts.cpp
@ -1030,7 +1030,7 @@ void RBBIAPITest::RoundtripRule(const char *dataFile) {
    parseError.offset = 0;
    LocalUDataMemoryPointer data(udata_open(U_ICUDATA_BRKITR, "brk", dataFile, &status));
    uint32_t length;
-    const UChar *builtSource;
+    const char *builtSource;
    const uint8_t *rbbiRules;
    const uint8_t *builtRules;

@ -1040,7 +1040,7 @@ void RBBIAPITest::RoundtripRule(const char *dataFile) {
    }

    builtRules = (const uint8_t *)udata_getMemory(data.getAlias());
-    builtSource = (const UChar *)(builtRules + ((RBBIDataHeader*)builtRules)->fRuleSource);
+    builtSource = (const char *)(builtRules + ((RBBIDataHeader*)builtRules)->fRuleSource);
    LocalPointer<RuleBasedBreakIterator> brkItr (new RuleBasedBreakIterator(builtSource, parseError, status));
    if (U_FAILURE(status)) {
        errln("%s:%d createRuleBasedBreakIterator: ICU Error \"%s\"  at line %d, column %d\n",
--- a/icu4c/source/test/intltest/rbbitst.cpp
+++ b/icu4c/source/test/intltest/rbbitst.cpp
@ -128,6 +128,11 @@ void RBBITest::runIndexedTest( int32_t index, UBool exec, const char* &name, cha
    TESTCASE_AUTO(TestReverse);
    TESTCASE_AUTO(TestBug13692);
    TESTCASE_AUTO(TestDebugRules);
+    TESTCASE_AUTO(Test8BitsTrieWith8BitStateTable);
+    TESTCASE_AUTO(Test8BitsTrieWith16BitStateTable);
+    TESTCASE_AUTO(Test16BitsTrieWith8BitStateTable);
+    TESTCASE_AUTO(Test16BitsTrieWith16BitStateTable);
+    TESTCASE_AUTO(TestTable_8_16_Bits);

 #if U_ENABLE_TRACING
    TESTCASE_AUTO(TestTraceCreateCharacter);
@ -4621,7 +4626,7 @@ void RBBITest::TestBug12677() {
    RuleBasedBreakIterator bi(rules, pe, status);
    assertSuccess(WHERE, status);
    UnicodeString rtRules = bi.getRules();
-    assertEquals(WHERE, UnicodeString(u"!!forward; $x = [ab#]; '#' '?'; "),  rtRules);
+    assertEquals(WHERE, UnicodeString(u"!!forward;$x=[ab#];'#''?';"),  rtRules);
 }


@ -4635,6 +4640,7 @@ void RBBITest::TestTableRedundancies() {

    RBBIDataWrapper *dw = bi->fData;
    const RBBIStateTable *fwtbl = dw->fForwardTable;
+    UBool in8Bits = fwtbl->fFlags & RBBI_8BITS_ROWS;
    int32_t numCharClasses = dw->fHeader->fCatCount;
    // printf("Char Classes: %d     states: %d\n", numCharClasses, fwtbl->fNumStates);

@ -4645,7 +4651,7 @@ void RBBITest::TestTableRedundancies() {
        UnicodeString s;
        for (int32_t r = 1; r < (int32_t)fwtbl->fNumStates; r++) {
            RBBIStateTableRow  *row = (RBBIStateTableRow *) (fwtbl->fTableData + (fwtbl->fRowLen * r));
-            s.append(row->fNextState[column]);
+            s.append(in8Bits ? row->r8.fNextState[column] : row->r16.fNextState[column]);
        }
        columns.push_back(s);
    }
@ -4665,12 +4671,22 @@ void RBBITest::TestTableRedundancies() {
    for (int32_t r=0; r < (int32_t)fwtbl->fNumStates; r++) {
        UnicodeString s;
        RBBIStateTableRow  *row = (RBBIStateTableRow *) (fwtbl->fTableData + (fwtbl->fRowLen * r));
-        assertTrue(WHERE, row->fAccepting >= -1);
-        s.append(row->fAccepting + 1);   // values of -1 are expected.
-        s.append(row->fLookAhead);
-        s.append(row->fTagIdx);
-        for (int32_t column = 0; column < numCharClasses; column++) {
-            s.append(row->fNextState[column]);
+        if (in8Bits) {
+            assertTrue(WHERE, row->r8.fAccepting >= -1);
+            s.append(row->r8.fAccepting + 1);   // values of -1 are expected.
+            s.append(row->r8.fLookAhead);
+            s.append(row->r8.fTagIdx);
+            for (int32_t column = 0; column < numCharClasses; column++) {
+                s.append(row->r8.fNextState[column]);
+            }
+        } else {
+            assertTrue(WHERE, row->r16.fAccepting >= -1);
+            s.append(row->r16.fAccepting + 1);   // values of -1 are expected.
+            s.append(row->r16.fLookAhead);
+            s.append(row->r16.fTagIdx);
+            for (int32_t column = 0; column < numCharClasses; column++) {
+                s.append(row->r16.fNextState[column]);
+            }
        }
        rows.push_back(s);
    }
@ -4743,12 +4759,14 @@ void RBBITest::TestReverse(std::unique_ptr<RuleBasedBreakIterator>bi) {

    RBBIDataWrapper *data = bi->fData;
    int32_t categoryCount = data->fHeader->fCatCount;
-    UTrie2  *trie = data->fTrie;
+    UCPTrie *trie = data->fTrie;
+    bool use8BitsTrie = ucptrie_getValueWidth(trie) == UCPTRIE_VALUE_BITS_8;
+    uint32_t dictBit = use8BitsTrie ? 0x0080 : 0x4000;

    std::vector<UnicodeString> strings(categoryCount, UnicodeString());
    for (int cp=0; cp<0x1fff0; ++cp) {
-        int cat = utrie2_get32(trie, cp);
-        cat &= ~0x4000;    // And off the dictionary bit from the category.
+        int cat = ucptrie_get(trie, cp);
+        cat &= ~dictBit;    // And off the dictionary bit from the category.
        assertTrue(WHERE, cat < categoryCount && cat >= 0);
        if (cat < 0 || cat >= categoryCount) return;
        strings[cat].append(cp);
@ -4886,6 +4904,182 @@ void RBBITest::TestDebugRules() {
 #endif
 }

+void RBBITest::testTrieStateTable(int32_t numChar, bool expectedTrieWidthIn8Bits, bool expectedStateRowIn8Bits) {
+    UCPTrieValueWidth expectedTrieWidth = expectedTrieWidthIn8Bits ? UCPTRIE_VALUE_BITS_8 : UCPTRIE_VALUE_BITS_16;
+    int32_t expectedStateRowBits = expectedStateRowIn8Bits ? RBBI_8BITS_ROWS : 0;
+    // Text are duplicate characters from U+4E00 to U+4FFF
+    UnicodeString text;
+    for (UChar c = 0x4e00; c < 0x5000; c++) {
+        text.append(c).append(c);
+    }
+    // Generate rule which will caused length+4 character classes and
+    // length+3 states
+    UnicodeString rules(u"!!quoted_literals_only;");
+    for (UChar c = 0x4e00; c < 0x4e00 + numChar; c++) {
+        rules.append(u'\'').append(c).append(c).append(u"';");
+    }
+    rules.append(u".;");
+    UErrorCode status = U_ZERO_ERROR;
+    UParseError parseError;
+    RuleBasedBreakIterator bi(rules, parseError, status);
+
+    assertEquals(WHERE, numChar + 4, bi.fData->fHeader->fCatCount);
+    assertEquals(WHERE, numChar + 3, bi.fData->fForwardTable->fNumStates);
+    assertEquals(WHERE, expectedTrieWidth, ucptrie_getValueWidth(bi.fData->fTrie));
+    assertEquals(WHERE, expectedStateRowBits, bi.fData->fForwardTable->fFlags & RBBI_8BITS_ROWS);
+    assertEquals(WHERE, expectedStateRowBits, bi.fData->fReverseTable->fFlags & RBBI_8BITS_ROWS);
+
+    bi.setText(text);
+
+    int32_t pos;
+    int32_t i = 0;
+    while ((pos = bi.next()) > 0) {
+        // The first numChar should not break between the pair
+        if (i++ < numChar) {
+            assertEquals(WHERE, i * 2, pos);
+        } else {
+            // After the first numChar next(), break on each character.
+            assertEquals(WHERE, i + numChar, pos);
+        }
+    }
+    while ((pos = bi.previous()) > 0) {
+        // The first numChar should not break between the pair
+        if (--i < numChar) {
+            assertEquals(WHERE, i * 2, pos);
+        } else {
+            // After the first numChar next(), break on each character.
+            assertEquals(WHERE, i + numChar, pos);
+        }
+    }
+}
+
+void RBBITest::Test8BitsTrieWith8BitStateTable() {
+    testTrieStateTable(123, true /* expectedTrieWidthIn8Bits */, true /* expectedStateRowIn8Bits */);
+}
+
+void RBBITest::Test16BitsTrieWith8BitStateTable() {
+    testTrieStateTable(124, false /* expectedTrieWidthIn8Bits */, true /* expectedStateRowIn8Bits */);
+}
+
+void RBBITest::Test16BitsTrieWith16BitStateTable() {
+    testTrieStateTable(255, false /* expectedTrieWidthIn8Bits */, false /* expectedStateRowIn8Bits */);
+}
+
+void RBBITest::Test8BitsTrieWith16BitStateTable() {
+    // Test UCPTRIE_VALUE_BITS_8 with 16 bits rows. Use a different approach to
+    // create state table in 16 bits.
+
+    // Generate 510 'a' as text
+    UnicodeString text;
+    for (int32_t i = 0; i < 510; i++) {
+        text.append(u'a');
+    }
+
+    UnicodeString rules(u"!!quoted_literals_only;'");
+    // 254 'a' in the rule will cause 256 states
+    for (int32_t i = 0; i < 254; i++) {
+        rules.append(u'a');
+    }
+    rules.append(u"';.;");
+
+    UErrorCode status = U_ZERO_ERROR;
+    UParseError parseError;
+    LocalPointer<RuleBasedBreakIterator> bi(new RuleBasedBreakIterator(rules, parseError, status));
+
+    assertEquals(WHERE, 256, bi->fData->fForwardTable->fNumStates);
+    assertEquals(WHERE, UCPTRIE_VALUE_BITS_8, ucptrie_getValueWidth(bi->fData->fTrie));
+    assertEquals(WHERE,
+                 false, RBBI_8BITS_ROWS == (bi->fData->fForwardTable->fFlags & RBBI_8BITS_ROWS));
+    bi->setText(text);
+
+    // break positions:
+    // 254, 508, 509, ... 510
+    assertEquals("next()", 254, bi->next());
+    int32_t i = 0;
+    int32_t pos;
+    while ((pos = bi->next()) > 0) {
+        assertEquals(WHERE, 508 + i , pos);
+        i++;
+    }
+    i = 0;
+    while ((pos = bi->previous()) > 0) {
+        i++;
+        if (pos >= 508) {
+            assertEquals(WHERE, 510 - i , pos);
+        } else {
+            assertEquals(WHERE, 254 , pos);
+        }
+    }
+}
+
+// Test that both compact (8 bit) and full sized (16 bit) rbbi tables work, and
+// that there are no problems with rules at the size that transitions between the two.
+//
+// A rule that matches a literal string, like 'abcdefghij', will require one state and
+// one character class per character in the string. So we can make a rule to tickle the
+// boundaries by using literal strings of various lengths.
+//
+// For both the number of states and the number of character classes, the eight bit format
+// only has 7 bits available, allowing for 128 values. For both, a few values are reserved,
+// leaving 120 something available. This test runs the string over the range of 120 - 130,
+// which allows some margin for changes to the number of values reserved by the rule builder
+// without breaking the test.
+
+void RBBITest::TestTable_8_16_Bits() {
+
+    // testStr serves as both the source of the rule string (truncated to the desired length)
+    // and as test data to check matching behavior. A break rule consisting of the first 120
+    // characters of testStr will match the first 120 chars of the full-length testStr.
+    UnicodeString testStr;
+    for (UChar c=0x3000; c<0x3200; ++c) {
+        testStr.append(c);
+    }
+
+    const int32_t startLength = 120;   // The shortest rule string to test.
+    const int32_t endLength = 260;     // The longest rule string to test
+    const int32_t increment = this->quick ? endLength - startLength : 1;
+
+    for (int32_t ruleLen=startLength; ruleLen <= endLength; ruleLen += increment) {
+        UParseError parseError;
+        UErrorCode status = U_ZERO_ERROR;
+
+        UnicodeString ruleString{u"!!quoted_literals_only; '#';"};
+        ruleString.findAndReplace(UnicodeString(u"#"), UnicodeString(testStr, 0, ruleLen));
+        RuleBasedBreakIterator bi(ruleString, parseError, status);
+        if (!assertSuccess(WHERE, status)) {
+            errln(ruleString);
+            break;
+        }
+        // bi.dumpTables();
+
+        // Verify that the break iterator is functioning - that the first boundary found
+        // in testStr is at the length of the rule string.
+        bi.setText(testStr);
+        assertEquals(WHERE, ruleLen, bi.next());
+
+        // Reverse iteration. Do a setText() first, to flush the break iterator's internal cache
+        // of previously detected boundaries, thus forcing the engine to run the safe reverse rules.
+        bi.setText(testStr);
+        int32_t result = bi.preceding(ruleLen);
+        assertEquals(WHERE, 0, result);
+
+        // Verify that the range of rule lengths being tested cover the transations
+        // from 8 to 16 bit data.
+        bool has8BitRowData = bi.fData->fForwardTable->fFlags & RBBI_8BITS_ROWS;
+        bool has8BitsTrie = ucptrie_getValueWidth(bi.fData->fTrie) == UCPTRIE_VALUE_BITS_8;
+
+        if (ruleLen == startLength) {
+            assertEquals(WHERE, true, has8BitRowData);
+            assertEquals(WHERE, true, has8BitsTrie);
+        }
+        if (ruleLen == endLength) {
+            assertEquals(WHERE, false, has8BitRowData);
+            assertEquals(WHERE, false, has8BitsTrie);
+        }
+    }
+}
+
+
 #if U_ENABLE_TRACING
 static std::vector<std::string> gData;
 static std::vector<int32_t> gEntryFn;
--- a/icu4c/source/test/intltest/rbbitst.h
+++ b/icu4c/source/test/intltest/rbbitst.h
@ -86,6 +86,11 @@ public:

    void TestDebug();
    void TestProperties();
+    void Test8BitsTrieWith8BitStateTable();
+    void Test8BitsTrieWith16BitStateTable();
+    void Test16BitsTrieWith8BitStateTable();
+    void Test16BitsTrieWith16BitStateTable();
+    void TestTable_8_16_Bits();

 #if U_ENABLE_TRACING
    void TestTraceCreateCharacter();
@ -133,6 +138,9 @@ private:
    // Test parameters, from the test framework and test invocation.
    const char* fTestParams;

+    // Helper functions to test different trie bit sizes and state table bit sizes.
+    void testTrieStateTable(int32_t numChar, bool expectedTrieWidthIn8Bits, bool expectedStateRowIn8Bits);
+
 #if U_ENABLE_TRACING
    void assertTestTraceResult(int32_t fnNumber, const char* expectedData);
 #endif
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/RBBIDataWrapper.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/RBBIDataWrapper.java
@ -12,10 +12,12 @@ package com.ibm.icu.impl;
 import java.io.DataOutputStream;
 import java.io.IOException;
 import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
 import java.util.Arrays;

 import com.ibm.icu.impl.ICUBinary.Authenticate;
 import com.ibm.icu.text.RuleBasedBreakIterator;
+import com.ibm.icu.util.CodePointTrie;

 /**
 * <p>Internal class used for Rule Based Break Iterators.</p>
@ -43,14 +45,10 @@ public final class RBBIDataWrapper {
         * Option Flags for this state table.
         */
        public int     fFlags;
-        /**
-         * Option Flags for this state table.
-         */
-        public int     fReserved;
        /**
         * Linear array of next state values, accessed as short[state, char_class]
         */
-        public short[] fTable;
+        public char[] fTable;

        public RBBIStateTable() {
        }
@ -59,16 +57,29 @@ public final class RBBIDataWrapper {
            if (length == 0) {
                return null;
            }
-            if (length < 16) {
+            if (length < 12) {
                throw new IOException("Invalid RBBI state table length.");
            }
            RBBIStateTable This = new RBBIStateTable();
            This.fNumStates = bytes.getInt();
            This.fRowLen    = bytes.getInt();
            This.fFlags     = bytes.getInt();
-            This.fReserved  = bytes.getInt();
-            int lengthOfShorts = length - 16;   // length in bytes.
-            This.fTable     = ICUBinary.getShorts(bytes, lengthOfShorts / 2, lengthOfShorts & 1);
+            int lengthOfTable = length - 12;   // length in bytes.
+            boolean use8Bits = (This.fFlags & RBBIDataWrapper.RBBI_8BITS_ROWS) == RBBIDataWrapper.RBBI_8BITS_ROWS;
+            if (use8Bits) {
+                This.fTable = new char[lengthOfTable];
+                for (int i = 0; i < lengthOfTable; i++) {
+                    byte b = bytes.get();
+                    if (i % This.fRowLen < NEXTSTATES) {
+                        This.fTable[i] = (char) b; // Treat b as signed.
+                    } else {
+                        This.fTable[i] = (char)(0xff & b); // Treat b as unsigned.
+                    }
+                }
+                ICUBinary.skipBytes(bytes, lengthOfTable & 1);
+            } else {
+                This.fTable    = ICUBinary.getChars(bytes, lengthOfTable / 2, lengthOfTable & 1);
+            }
            return This;
        }

@ -76,13 +87,20 @@ public final class RBBIDataWrapper {
            bytes.writeInt(fNumStates);
            bytes.writeInt(fRowLen);
            bytes.writeInt(fFlags);
-            bytes.writeInt(fReserved);
-            int tableLen = fRowLen * fNumStates / 2;  // fRowLen is bytes.
-            for (int i = 0; i < tableLen; i++) {
-                bytes.writeShort(fTable[i]);
+            if ((fFlags & RBBIDataWrapper.RBBI_8BITS_ROWS) == RBBIDataWrapper.RBBI_8BITS_ROWS) {
+                int tableLen = fRowLen * fNumStates;  // fRowLen is bytes.
+                for (int i = 0; i < tableLen; i++) {
+                    byte b = (byte)(fTable[i] & 0x00ff);
+                    bytes.writeByte(b);
+                }
+            } else {
+                int tableLen = fRowLen * fNumStates / 2;  // fRowLen is bytes.
+                for (int i = 0; i < tableLen; i++) {
+                    bytes.writeChar(fTable[i]);
+                }
            }
-            int bytesWritten = 16 + fRowLen * fNumStates;   // total bytes written,
-                                                            // including 16 for the header.
+            int bytesWritten = 12 + fRowLen * fNumStates;   // total bytes written,
+                                                            // including 12 for the header.
            while (bytesWritten % 8 != 0) {
                bytes.writeByte(0);
                ++bytesWritten;
@ -105,7 +123,6 @@ public final class RBBIDataWrapper {
            if (fNumStates != otherST.fNumStates) return false;
            if (fRowLen    != otherST.fRowLen)    return false;
            if (fFlags     != otherST.fFlags)     return false;
-            if (fReserved  != otherST.fReserved)  return false;
            return Arrays.equals(fTable, otherST.fTable);
        }
    }
@ -134,12 +151,12 @@ public final class RBBIDataWrapper {

    public RBBIStateTable   fRTable;

-    public Trie2   fTrie;
+    public CodePointTrie    fTrie;
    public String  fRuleSource;
    public int     fStatusTable[];

    public static final int DATA_FORMAT = 0x42726b20;     // "Brk "
-    public static final int FORMAT_VERSION = 0x05000000;  // 4.0.0.0
+    public static final int FORMAT_VERSION = 0x06000000;  // 6.0.0.0

    private static final class IsAcceptable implements Authenticate {
        @Override
@ -186,20 +203,20 @@ public final class RBBIDataWrapper {
     * offset to the "tagIndex" field in a state table row.
     */
    public final static int      TAGIDX     = 2;
-    /**
-     * offset to the reserved field in a state table row.
-     */
-    public final static int      RESERVED   = 3;
    /**
     * offset to the start of the next states array in a state table row.
     */
-    public final static int      NEXTSTATES = 4;
+    public final static int      NEXTSTATES = 3;

    //  Bit selectors for the "FLAGS" field of the state table header
    //     enum RBBIStateTableFlags in the C version.
    //
    public final static int      RBBI_LOOKAHEAD_HARD_BREAK = 1;
    public final static int      RBBI_BOF_REQUIRED         = 2;
+    public final static int      RBBI_8BITS_ROWS           = 4;
+
+    public final static int      DICT_BIT                  = 0x4000;
+    public final static int      DICT_BIT_FOR_8BITS_TRIE   = 0x0080;

    /**
     * Data Header.  A struct-like class with the fields from the RBBI data file header.
@ -243,7 +260,7 @@ public final class RBBIDataWrapper {
     * array index of the start of the state table row for that state.
     */
    public int getRowIndex(int state){
-        return state * (fHeader.fCatCount + 4);
+        return state * (fHeader.fCatCount + NEXTSTATES);
    }

    RBBIDataWrapper() {
@ -330,7 +347,10 @@ public final class RBBIDataWrapper {
                                                //  as we don't go more than 100 bytes past the
                                                //  past the end of the TRIE.

-        This.fTrie = Trie2.createFromSerialized(bytes);  // Deserialize the TRIE, leaving buffer
+        This.fTrie = CodePointTrie.fromBinary(
+            CodePointTrie.Type.FAST,
+            null,
+            bytes);  // Deserialize the TRIE, leaving buffer
                                                //  at an unknown position, preceding the
                                                //  padding between TRIE and following section.

@ -359,8 +379,8 @@ public final class RBBIDataWrapper {
        }
        ICUBinary.skipBytes(bytes, This.fHeader.fRuleSource - pos);
        pos = This.fHeader.fRuleSource;
-        This.fRuleSource = ICUBinary.getString(
-                bytes, This.fHeader.fRuleSourceLen / 2, This.fHeader.fRuleSourceLen & 1);
+        This.fRuleSource = new String(
+            ICUBinary.getBytes(bytes, This.fHeader.fRuleSourceLen, 0), StandardCharsets.UTF_8);

        if (RuleBasedBreakIterator.fDebugEnv!=null && RuleBasedBreakIterator.fDebugEnv.indexOf("data")>=0) {
            This.dump(System.out);
@ -396,6 +416,15 @@ public final class RBBIDataWrapper {
        return dest.toString();
    }

+    static public String charToString(char n, int width) {
+        StringBuilder  dest = new StringBuilder(width);
+        dest.append(n);
+        while (dest.length() < width) {
+           dest.insert(0, ' ');
+        }
+        return dest.toString();
+    }
+
    /** Fixed width int-to-string conversion. */
    static public String intToHexString(int n, int width) {
        StringBuilder  dest = new StringBuilder(width);
@ -408,11 +437,11 @@ public final class RBBIDataWrapper {

    /** Dump a state table.  (A full set of RBBI rules has 4 state tables.)  */
    private void dumpTable(java.io.PrintStream out, RBBIStateTable table) {
-        if (table == null || table.fTable.length == 0)   {
+        if (table == null || (table.fTable.length == 0)) {
            out.println("  -- null -- ");
        } else {
-            int n;
-            int state;
+            char n;
+            char state;
            StringBuilder header = new StringBuilder(" Row  Acc Look  Tag");
            for (n=0; n<fHeader.fCatCount; n++) {
                header.append(intToString(n, 5));
@ -434,18 +463,18 @@ public final class RBBIDataWrapper {
     * @param table
     * @param state
     */
-    private void dumpRow(java.io.PrintStream out, RBBIStateTable table, int   state) {
+    private void dumpRow(java.io.PrintStream out, RBBIStateTable table, char   state) {
        StringBuilder dest = new StringBuilder(fHeader.fCatCount*5 + 20);
        dest.append(intToString(state, 4));
        int row = getRowIndex(state);
        if (table.fTable[row+ACCEPTING] != 0) {
-           dest.append(intToString(table.fTable[row+ACCEPTING], 5));
-        }else {
+            dest.append(intToString(table.fTable[row+ACCEPTING], 5));
+        } else {
            dest.append("     ");
        }
        if (table.fTable[row+LOOKAHEAD] != 0) {
            dest.append(intToString(table.fTable[row+LOOKAHEAD], 5));
-        }else {
+        } else {
            dest.append("     ");
        }
        dest.append(intToString(table.fTable[row+TAGIDX], 5));
@ -466,6 +495,7 @@ public final class RBBIDataWrapper {
        int      char32;
        int      category;
        int      lastNewline[] = new int[n+1];
+        int      dictMask = fTrie.getValueWidth() ==  CodePointTrie.ValueWidth.BITS_8 ? DICT_BIT_FOR_8BITS_TRIE : DICT_BIT;

        for (category = 0; category <= fHeader.fCatCount; category ++) {
            catStrings[category] = "";
@ -474,7 +504,7 @@ public final class RBBIDataWrapper {
        out.println("--------------------");
        for (char32 = 0; char32<=0x10ffff; char32++) {
            category = fTrie.get(char32);
-            category &= ~0x4000;            // Mask off dictionary bit.
+            category &= ~dictMask;            // Mask off dictionary bit.
            if (category < 0 || category > fHeader.fCatCount) {
                out.println("Error, bad category " + Integer.toHexString(category) +
                        " for char " + Integer.toHexString(char32));
--- a/icu4j/main/classes/core/src/com/ibm/icu/text/RBBIRuleBuilder.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/text/RBBIRuleBuilder.java
@ -11,6 +11,7 @@ package com.ibm.icu.text;
 import java.io.DataOutputStream;
 import java.io.IOException;
 import java.io.OutputStream;
+import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
@ -182,7 +183,9 @@ class RBBIRuleBuilder {
        int reverseTableSize = align8(fForwardTable.getSafeTableSize());
        int trieSize         = align8(fSetBuilder.getTrieSize());
        int statusTableSize  = align8(fRuleStatusVals.size() * 4);
-        int rulesSize        = align8((strippedRules.length()) * 2);
+
+        byte[] strippedRulesUTF8 = strippedRules.getBytes(StandardCharsets.UTF_8);
+        int rulesSize        = align8(strippedRulesUTF8.length + 1);

        int totalSize = headerSize
                + forwardTableSize
@ -202,7 +205,7 @@ class RBBIRuleBuilder {
        header[RBBIDataWrapper.DH_MAGIC]         = 0xb1a0;
        header[RBBIDataWrapper.DH_FORMATVERSION] = RBBIDataWrapper.FORMAT_VERSION;
        header[RBBIDataWrapper.DH_LENGTH]        = totalSize;            // fLength, the total size of all rule sections.
-        header[RBBIDataWrapper.DH_CATCOUNT]      = fSetBuilder.getNumCharCategories(); // fCatCount.
+        header[RBBIDataWrapper.DH_CATCOUNT]      = fSetBuilder.getNumCharCategories();

        header[RBBIDataWrapper.DH_FTABLE]        = headerSize;           // fFTable
        header[RBBIDataWrapper.DH_FTABLELEN]     = forwardTableSize;     // fTableLen
@ -214,11 +217,11 @@ class RBBIRuleBuilder {
                                                     + header[RBBIDataWrapper.DH_RTABLELEN]; // fTrie
        header[RBBIDataWrapper.DH_TRIELEN]       = fSetBuilder.getTrieSize(); // fTrieLen
        header[RBBIDataWrapper.DH_STATUSTABLE]   = header[RBBIDataWrapper.DH_TRIE]
-                                                     + header[RBBIDataWrapper.DH_TRIELEN];
+                                                     + trieSize;
        header[RBBIDataWrapper.DH_STATUSTABLELEN] = statusTableSize; // fStatusTableLen
        header[RBBIDataWrapper.DH_RULESOURCE]    = header[RBBIDataWrapper.DH_STATUSTABLE]
                                                     + statusTableSize;
-        header[RBBIDataWrapper.DH_RULESOURCELEN] = strippedRules.length() * 2;
+        header[RBBIDataWrapper.DH_RULESOURCELEN] = strippedRulesUTF8.length;
        for (i = 0; i < header.length; i++) {
            dos.writeInt(header[i]);
            outputPos += 4;
@ -257,8 +260,9 @@ class RBBIRuleBuilder {
        // Write out the stripped rules (rules with extra spaces removed
        //   These go last in the data area, even though they are not last in the header.
        Assert.assrt(outputPos == header[RBBIDataWrapper.DH_RULESOURCE]);
-        dos.writeChars(strippedRules);
-        outputPos += strippedRules.length() * 2;
+        dos.write(strippedRulesUTF8, 0, strippedRulesUTF8.length);
+        dos.write(0);  // Null termination
+        outputPos += strippedRulesUTF8.length + 1;
        while (outputPos % 8 != 0) { // pad to an 8 byte boundary
            dos.write(0);
            outputPos += 1;
--- a/icu4j/main/classes/core/src/com/ibm/icu/text/RBBIRuleScanner.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/text/RBBIRuleScanner.java
@ -697,16 +697,14 @@ class RBBIRuleScanner {
    static String stripRules(String rules) {
        StringBuilder strippedRules = new StringBuilder();
        int rulesLength = rules.length();
-        boolean skippingSpaces = false;

        for (int idx = 0; idx < rulesLength; idx = rules.offsetByCodePoints(idx, 1)) {
            int cp = rules.codePointAt(idx);
            boolean whiteSpace = UCharacter.hasBinaryProperty(cp, UProperty.PATTERN_WHITE_SPACE);
-            if (skippingSpaces && whiteSpace) {
+            if (whiteSpace) {
                continue;
            }
            strippedRules.appendCodePoint(cp);
-            skippingSpaces = whiteSpace;
        }
        return strippedRules.toString();
    }
--- a/icu4j/main/classes/core/src/com/ibm/icu/text/RBBISetBuilder.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/text/RBBISetBuilder.java
@ -8,15 +8,16 @@
 */
 package com.ibm.icu.text;

+import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.io.OutputStream;
 import java.util.ArrayList;
 import java.util.List;

 import com.ibm.icu.impl.Assert;
-import com.ibm.icu.impl.Trie2Writable;
-import com.ibm.icu.impl.Trie2_16;
 import com.ibm.icu.text.RBBIRuleBuilder.IntPair;
+import com.ibm.icu.util.CodePointTrie;
+import com.ibm.icu.util.MutableCodePointTrie;

 //
 //  RBBISetBuilder   Handles processing of Unicode Sets from RBBI rules
@ -125,9 +126,9 @@ class RBBISetBuilder {
    RBBIRuleBuilder       fRB;             // The RBBI Rule Compiler that owns us.
    RangeDescriptor       fRangeList;      // Head of the linked list of RangeDescriptors

-    Trie2Writable         fTrie;           // The mapping TRIE that is the end result of processing
+    MutableCodePointTrie  fTrie;           // The mapping TRIE that is the end result of processing
                                           //  the Unicode Sets.
-    Trie2_16              fFrozenTrie;
+    CodePointTrie         fFrozenTrie;

    // Groups correspond to character categories -
    //       groups of ranges that are in the same original UnicodeSets.
@ -140,6 +141,7 @@ class RBBISetBuilder {
    boolean             fSawBOF;

    static final int    DICT_BIT = 0x4000;
+    static final int    DICT_BIT_FOR_8BITS_TRIE  = 0x0080;


    //------------------------------------------------------------------------
@ -286,22 +288,30 @@ class RBBISetBuilder {
    }


+    private static final int MAX_CHAR_CATEGORIES_FOR_8BITS_TRIE = 127;
+
    /**
     * Build the Trie table for mapping UChar32 values to the corresponding
     * range group number.
     */
    void buildTrie() {
+        boolean use8Bits = getNumCharCategories() <= MAX_CHAR_CATEGORIES_FOR_8BITS_TRIE;
        RangeDescriptor rlRange;

-        fTrie = new Trie2Writable(0,       //   Initial value for all code points.
-                                  0);      //   Error value for out-of-range input.
+        fTrie = new MutableCodePointTrie(0,       //   Initial value for all code points.
+                                         0);      //   Error value for out-of-range input.

        for (rlRange = fRangeList; rlRange!=null; rlRange=rlRange.fNext) {
+            int value = rlRange.fNum;
+            if (use8Bits && ((value & DICT_BIT) != 0)) {
+                assert((value & DICT_BIT_FOR_8BITS_TRIE) == 0);
+                // switch to the bit from DICT_BIT to DICT_BIT_FOR_8BITS_TRIE
+                value = DICT_BIT_FOR_8BITS_TRIE | (value & ~DICT_BIT);
+            }
            fTrie.setRange(
                    rlRange.fStartChar,     // Range start
                    rlRange.fEndChar,       // Range end (inclusive)
-                    rlRange.fNum,           // value for range
-                    true                    // Overwrite previously written values
+                    value                  // value for range
                    );
        }
    }
@ -326,17 +336,31 @@ class RBBISetBuilder {
        --fGroupCount;
    }

+    //-----------------------------------------------------------------------------------
+    //
+    //          freezeTrieIfNotYet()    Ensure the trie is frozen. Shared code by getTrieSize
+    //                                  and serializeTrie.
+    //
+    //-----------------------------------------------------------------------------------
+    void freezeTrieIfNotYet()  {
+        if (fFrozenTrie == null) {
+            boolean use8Bits = getNumCharCategories() <= MAX_CHAR_CATEGORIES_FOR_8BITS_TRIE;
+            fFrozenTrie = fTrie.buildImmutable(CodePointTrie.Type.FAST,
+                                               use8Bits ?
+                                               CodePointTrie.ValueWidth.BITS_8 :
+                                               CodePointTrie.ValueWidth.BITS_16);
+            fTrie = null;
+        }
+    }
+
    //-----------------------------------------------------------------------------------
    //
    //          getTrieSize()    Return the size that will be required to serialize the Trie.
    //
    //-----------------------------------------------------------------------------------
    int getTrieSize()  {
-        if (fFrozenTrie == null) {
-            fFrozenTrie = fTrie.toTrie2_16();
-            fTrie = null;
-        }
-        return fFrozenTrie.getSerializedLength();
+        freezeTrieIfNotYet();
+        return fFrozenTrie.toBinary(new ByteArrayOutputStream());
    }


@ -346,11 +370,8 @@ class RBBISetBuilder {
    //
    //-----------------------------------------------------------------------------------
    void serializeTrie(OutputStream os) throws IOException {
-        if (fFrozenTrie == null) {
-            fFrozenTrie = fTrie.toTrie2_16();
-            fTrie = null;
-        }
-        fFrozenTrie.serialize(os);
+        freezeTrieIfNotYet();
+        fFrozenTrie.toBinary(os);
   }

    //------------------------------------------------------------------------
--- a/icu4j/main/classes/core/src/com/ibm/icu/text/RBBITableBuilder.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/text/RBBITableBuilder.java
@ -74,6 +74,8 @@ class RBBITableBuilder {
    /** Synthesized safe table, a List of row arrays.  */
    private List<short[]>    fSafeTable;

+    private static final int MAX_STATE_FOR_8BITS_TABLE = 255;
+
    /** Map from rule number (fVal in look ahead nodes) to sequential lookahead index. */
    int[] fLookAheadRuleMap;

@ -1097,10 +1099,11 @@ class RBBITableBuilder {
           if (fRB.fTreeRoots[fRootIx] == null) {
               return 0;
           }
-           int size    = 16;    // The header of 4 ints, with no rows to the table.
+           int size    = 12;    // The header of 4 ints, with no rows to the table.
           int numRows = fDStates.size();
           int numCols = fRB.fSetBuilder.getNumCharCategories();
-           int rowSize = 8 + 2*numCols;
+           boolean use8Bits = numRows <= MAX_STATE_FOR_8BITS_TABLE;
+           int rowSize = (use8Bits ? 1 : 2 ) * (RBBIDataWrapper.NEXTSTATES + numCols);
           size   += numRows * rowSize;
           size = (size + 7) & ~7;   // round up to a multiple of 8 bytes
           return size;
@ -1125,13 +1128,20 @@ class RBBITableBuilder {
           Assert.assrt(fRB.fSetBuilder.getNumCharCategories() < 0x7fff &&
               fDStates.size() < 0x7fff);
           table.fNumStates = fDStates.size();
+           boolean use8Bits = table.fNumStates <= MAX_STATE_FOR_8BITS_TABLE;

           // Size of table size in shorts.
-           //  the "4" is the size of struct RBBIStateTableRow, the row header part only.
-           int rowLen = 4 + fRB.fSetBuilder.getNumCharCategories();   // Row Length in shorts.
-           int tableSize = (getTableSize() - 16) / 2;       // fTable length in shorts.
-           table.fTable = new short[tableSize];
-           table.fRowLen = rowLen * 2;                      // Row length in bytes.
+           int rowLen = RBBIDataWrapper.NEXTSTATES + fRB.fSetBuilder.getNumCharCategories();   // Row Length in shorts.
+           int tableSize;
+           if (use8Bits) {
+               tableSize = (getTableSize() - 12);       // fTable length in bytes.
+               table.fTable = new char[tableSize];
+               table.fRowLen = rowLen;                          // Row length in bytes.
+           } else {
+               tableSize = (getTableSize() - 12) / 2;   // fTable length in shorts.
+               table.fTable = new char[tableSize];
+               table.fRowLen = rowLen * 2;                      // Row length in bytes.
+           }

           if (fRB.fLookAheadHardBreak) {
               table.fFlags  |= RBBIDataWrapper.RBBI_LOOKAHEAD_HARD_BREAK;
@ -1139,18 +1149,29 @@ class RBBITableBuilder {
           if (fRB.fSetBuilder.sawBOF()) {
               table.fFlags  |= RBBIDataWrapper.RBBI_BOF_REQUIRED;
           }
+           if (use8Bits) {
+               table.fFlags  |= RBBIDataWrapper.RBBI_8BITS_ROWS;
+           }

           int numCharCategories = fRB.fSetBuilder.getNumCharCategories();
           for (state=0; state<table.fNumStates; state++) {
               RBBIStateDescriptor sd = fDStates.get(state);
               int row = state*rowLen;
-               Assert.assrt (-32768 < sd.fAccepting && sd.fAccepting <= 32767);
-               Assert.assrt (-32768 < sd.fLookAhead && sd.fLookAhead <= 32767);
-               table.fTable[row + RBBIDataWrapper.ACCEPTING] = (short)sd.fAccepting;
-               table.fTable[row + RBBIDataWrapper.LOOKAHEAD] = (short)sd.fLookAhead;
-               table.fTable[row + RBBIDataWrapper.TAGIDX]    = (short)sd.fTagsIdx;
+               if (use8Bits) {
+                   Assert.assrt (-128 < sd.fAccepting && sd.fAccepting <= MAX_STATE_FOR_8BITS_TABLE);
+                   Assert.assrt (-128 < sd.fLookAhead && sd.fLookAhead <= MAX_STATE_FOR_8BITS_TABLE);
+               } else {
+                   Assert.assrt (-32768 < sd.fAccepting && sd.fAccepting <= 32767);
+                   Assert.assrt (-32768 < sd.fLookAhead && sd.fLookAhead <= 32767);
+               }
+               table.fTable[row + RBBIDataWrapper.ACCEPTING] = (char)sd.fAccepting;
+               table.fTable[row + RBBIDataWrapper.LOOKAHEAD] = (char)sd.fLookAhead;
+               table.fTable[row + RBBIDataWrapper.TAGIDX]    = (char)sd.fTagsIdx;
               for (col=0; col<numCharCategories; col++) {
-                   table.fTable[row + RBBIDataWrapper.NEXTSTATES + col] = (short)sd.fDtran[col];
+                   if (use8Bits) {
+                       Assert.assrt (sd.fDtran[col] <= MAX_STATE_FOR_8BITS_TABLE);
+                   }
+                   table.fTable[row + RBBIDataWrapper.NEXTSTATES + col] = (char)sd.fDtran[col];
               }
           }
           return table;
@ -1250,10 +1271,12 @@ class RBBITableBuilder {
           if (fSafeTable == null) {
               return 0;
           }
-           int size    = 16;    // The header of 4 ints, with no rows to the table.
+           int size    = 12;    // The header of 4 ints, with no rows to the table.
           int numRows = fSafeTable.size();
           int numCols = fSafeTable.get(0).length;
-           int rowSize = 8 + 2*numCols;
+           boolean use8Bits = numRows <= MAX_STATE_FOR_8BITS_TABLE;
+
+           int rowSize = (use8Bits ? 1 : 2 ) * (RBBIDataWrapper.NEXTSTATES + numCols);
           size += numRows * rowSize;
           // TODO: there are redundant round-up. Figure out best place, get rid of the rest.
           size = (size + 7) & ~7;   // round up to a multiple of 8 bytes
@ -1269,23 +1292,33 @@ class RBBITableBuilder {
       RBBIDataWrapper.RBBIStateTable exportSafeTable() {
           RBBIDataWrapper.RBBIStateTable table = new RBBIDataWrapper.RBBIStateTable();
           table.fNumStates = fSafeTable.size();
+           boolean use8Bits = table.fNumStates <= MAX_STATE_FOR_8BITS_TABLE;
           int numCharCategories = fSafeTable.get(0).length;

           // Size of table size in shorts.
-           //  the "4" is the size of struct RBBIStateTableRow, the row header part only.
-           int rowLen = 4 + numCharCategories;
+           int rowLen = RBBIDataWrapper.NEXTSTATES + numCharCategories;
           // TODO: tableSize is basically numStates * numCharCategories,
           //       except for alignment padding. Clean up here, and in main exportTable().
-           int tableSize = (getSafeTableSize() - 16) / 2;   // fTable length in shorts.
-           table.fTable = new short[tableSize];
-           table.fRowLen = rowLen * 2;                      // Row length in bytes.
+           int tableSize = (getSafeTableSize() - 12);           // fTable length in bytes.
+           if (use8Bits) {
+               table.fFlags  |= RBBIDataWrapper.RBBI_8BITS_ROWS;
+               table.fTable = new char[tableSize];
+               table.fRowLen = rowLen;                          // Row length in bytes.
+           } else {
+               tableSize /= 2;   // fTable length in shorts.
+               table.fTable = new char[tableSize];
+               table.fRowLen = rowLen * 2;                      // Row length in bytes.
+           }

           for (int state=0; state<table.fNumStates; state++) {
               short[] rowArray = fSafeTable.get(state);
               int row = state * rowLen;

               for (int col=0; col<numCharCategories; col++) {
-                   table.fTable[row + RBBIDataWrapper.NEXTSTATES + col] = rowArray[col];
+                   if (use8Bits) {
+                       Assert.assrt (rowArray[col] <= MAX_STATE_FOR_8BITS_TABLE);
+                   }
+                   table.fTable[row + RBBIDataWrapper.NEXTSTATES + col] = (char)rowArray[col];
               }
           }
           return table;
--- a/icu4j/main/classes/core/src/com/ibm/icu/text/RuleBasedBreakIterator.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/text/RuleBasedBreakIterator.java
@ -26,10 +26,10 @@ import com.ibm.icu.impl.CharacterIteration;
 import com.ibm.icu.impl.ICUBinary;
 import com.ibm.icu.impl.ICUDebug;
 import com.ibm.icu.impl.RBBIDataWrapper;
-import com.ibm.icu.impl.Trie2;
 import com.ibm.icu.lang.UCharacter;
 import com.ibm.icu.lang.UProperty;
 import com.ibm.icu.lang.UScript;
+import com.ibm.icu.util.CodePointTrie;

 /**
 * Rule Based Break Iterator
@ -821,9 +821,9 @@ public class RuleBasedBreakIterator extends BreakIterator {

        // caches for quicker access
        CharacterIterator text = fText;
-        Trie2 trie = fRData.fTrie;
+        CodePointTrie trie = fRData.fTrie;

-        short[] stateTable  = fRData.fFTable.fTable;
+        char[] stateTable  = fRData.fFTable.fTable;
        int initialPosition = fPosition;
        text.setIndex(initialPosition);
        int result          = initialPosition;
@ -844,6 +844,8 @@ public class RuleBasedBreakIterator extends BreakIterator {
        short category      = 3;
        int flagsState      = fRData.fFTable.fFlags;
        int mode            = RBBI_RUN;
+        int dictMask = fRData.fTrie.getValueWidth() == CodePointTrie.ValueWidth.BITS_8 ?
+            RBBIDataWrapper.DICT_BIT_FOR_8BITS_TRIE : RBBIDataWrapper.DICT_BIT;
        if ((flagsState & RBBIDataWrapper.RBBI_BOF_REQUIRED) != 0) {
            category = 2;
            mode     = RBBI_START;
@ -885,10 +887,10 @@ public class RuleBasedBreakIterator extends BreakIterator {
                //    Chars that need to be handled by a dictionary have a flag bit set
                //    in their category values.
                //
-                if ((category & 0x4000) != 0)  {
+                if ((category & dictMask) != 0)  {
                    fDictionaryCharCount++;
                    //  And off the dictionary flag bit.
-                    category &= ~0x4000;
+                    category &= ~dictMask;
                }

                if (TRACE) {
@ -912,8 +914,7 @@ public class RuleBasedBreakIterator extends BreakIterator {
            // look up a state transition in the state table
            state = stateTable[row + RBBIDataWrapper.NEXTSTATES + category];
            row   = fRData.getRowIndex(state);
-
-            if (stateTable[row + RBBIDataWrapper.ACCEPTING] == -1) {
+            if (stateTable[row + RBBIDataWrapper.ACCEPTING] == 0xffff) {
                // Match found, common case
                result = text.getIndex();
                if (c >= UTF16.SUPPLEMENTARY_MIN_VALUE && c <= UTF16.CODEPOINT_MAX_VALUE) {
@ -927,7 +928,7 @@ public class RuleBasedBreakIterator extends BreakIterator {
            }

            int completedRule = stateTable[row + RBBIDataWrapper.ACCEPTING];
-            if (completedRule > 0) {
+            if (completedRule > 0 && completedRule != 0xffff) {
                // Lookahead match is completed
                int lookaheadResult = fLookAheadMatches.getPosition(completedRule);
                if (lookaheadResult >= 0) {
@ -937,13 +938,14 @@ public class RuleBasedBreakIterator extends BreakIterator {
                }
            }

+
            // If we are at the position of the '/' in a look-ahead (hard break) rule;
            // record the current position, to be returned later, if the full rule matches.
            // TODO: Move this check before the previous check of fAccepting.
            //       This would enable hard-break rules with no following context.
            //       But there are line break test failures when trying this. Investigate.
            //       Issue ICU-20837
-            int rule =  stateTable[row + RBBIDataWrapper.LOOKAHEAD];
+            int rule = stateTable[row + RBBIDataWrapper.LOOKAHEAD];
            if (rule != 0) {
                int  pos = text.getIndex();
                if (c >= UTF16.SUPPLEMENTARY_MIN_VALUE && c <= UTF16.CODEPOINT_MAX_VALUE) {
@ -996,14 +998,17 @@ public class RuleBasedBreakIterator extends BreakIterator {
     * @internal
     */
    private int handleSafePrevious(int fromPosition) {
-        int             state;
+        char            state;
        short           category = 0;
        int             result = 0;

        // caches for quicker access
        CharacterIterator text = fText;
-        Trie2 trie = fRData.fTrie;
-        short[] stateTable  = fRData.fRTable.fTable;
+        CodePointTrie trie = fRData.fTrie;
+        char[] stateTable  = fRData.fRTable.fTable;
+        int flagsState      = fRData.fRTable.fFlags;
+        int dictMask = fRData.fTrie.getValueWidth() == CodePointTrie.ValueWidth.BITS_8 ?
+            RBBIDataWrapper.DICT_BIT_FOR_8BITS_TRIE : RBBIDataWrapper.DICT_BIT;

        CISetIndex32(text, fromPosition);
        if (TRACE) {
@ -1029,7 +1034,7 @@ public class RuleBasedBreakIterator extends BreakIterator {
            //
            //  And off the dictionary flag bit. For reverse iteration it is not used.
            category = (short) trie.get(c);
-            category &= ~0x4000;
+            category &= ~dictMask;
            if (TRACE) {
                System.out.print("            " +  RBBIDataWrapper.intToString(text.getIndex(), 5));
                System.out.print(RBBIDataWrapper.intToHexString(c, 10));
@ -1209,6 +1214,8 @@ public class RuleBasedBreakIterator extends BreakIterator {
            int         category;
            int         current;
            int         foundBreakCount = 0;
+            int dictMask = fRData.fTrie.getValueWidth() == CodePointTrie.ValueWidth.BITS_8 ?
+                RBBIDataWrapper.DICT_BIT_FOR_8BITS_TRIE : RBBIDataWrapper.DICT_BIT;

            // Loop through the text, looking for ranges of dictionary characters.
            // For each span, find the appropriate break engine, and ask it to find
@ -1219,7 +1226,7 @@ public class RuleBasedBreakIterator extends BreakIterator {
            category = (short)fRData.fTrie.get(c);

            while(true) {
-                while((current = fText.getIndex()) < rangeEnd && (category & 0x4000) == 0) {
+                while((current = fText.getIndex()) < rangeEnd && (category & dictMask) == 0) {
                    c = CharacterIteration.next32(fText);    // pre-increment
                    category = (short)fRData.fTrie.get(c);
                }
--- a/icu4j/main/shared/data/icudata.jar
+++ b/icu4j/main/shared/data/icudata.jar
@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0f8b27a3e77ffbe4468ede22ae8a4ca85df993472d6bdea432505a148ff33f23
-size 13149606
+oid sha256:8ed7db50765b06c8a35f48048543c5c9a2c2e19993f752bd71a15e6ac89aa3b3
+size 13141781
--- a/icu4j/main/shared/data/icutzdata.jar
+++ b/icu4j/main/shared/data/icutzdata.jar
@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:58c1ec5386cba3b6660c3bf8c22ce74c343d3101354f157533d13ec1099e1379
-size 94524
+oid sha256:6d2882ccb44134313ff0365eb24776d4e859fa9dd223f10d608d65fdfd7f23d9
+size 94529
--- a/icu4j/main/shared/data/testdata.jar
+++ b/icu4j/main/shared/data/testdata.jar
@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c39bb717c3e95c47c14f49507e8a7866e89bdb3588f021693c3909dda64f4dcb
-size 723466
+oid sha256:e032f823e0ba2fd99f784fe400675049c126e091158a285955c71aa5e2c6036b
+size 723481
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/rbbi/RBBITest.java
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/rbbi/RBBITest.java
@ -29,8 +29,10 @@ import com.ibm.icu.dev.test.TestFmwk;
 import com.ibm.icu.impl.RBBIDataWrapper;
 import com.ibm.icu.text.BreakIterator;
 import com.ibm.icu.text.RuleBasedBreakIterator;
+import com.ibm.icu.util.CodePointTrie;
 import com.ibm.icu.util.ULocale;

+
@RunWith(JUnit4.class)
 public class RBBITest extends TestFmwk {
    public RBBITest() {
@ -562,7 +564,7 @@ public class RBBITest extends TestFmwk {

        RuleBasedBreakIterator bi  = new RuleBasedBreakIterator(rules);
        String rtRules = bi.toString();        // getRules() in C++
-        assertEquals("Break Iterator rule stripping test", "!!forward; $x = [ab#]; '#' '?'; ",  rtRules);
+        assertEquals("Break Iterator rule stripping test", "!!forward;$x=[ab#];'#''?';",  rtRules);
    }

    @Test
@ -582,7 +584,7 @@ public class RBBITest extends TestFmwk {
            StringBuilder s = new StringBuilder();
            for (int r = 1; r < fwtbl.fNumStates; r++) {
                int row = dw.getRowIndex(r);
-                short tableVal = fwtbl.fTable[row + RBBIDataWrapper.NEXTSTATES + column];
+                char tableVal = fwtbl.fTable[row + RBBIDataWrapper.NEXTSTATES + column];
                s.append((char)tableVal);
            }
            columns.add(s.toString());
@ -602,13 +604,12 @@ public class RBBITest extends TestFmwk {
        for (int r=0; r<fwtbl.fNumStates; r++) {
            StringBuilder s = new StringBuilder();
            int row = dw.getRowIndex(r);
-            assertTrue("Accepting < -1", fwtbl.fTable[row + RBBIDataWrapper.ACCEPTING] >= -1);
            s.append(fwtbl.fTable[row + RBBIDataWrapper.ACCEPTING]);
            s.append(fwtbl.fTable[row + RBBIDataWrapper.LOOKAHEAD]);
            s.append(fwtbl.fTable[row + RBBIDataWrapper.TAGIDX]);
            for (int column=0; column<numCharClasses; column++) {
-                short tableVal = fwtbl.fTable[row + RBBIDataWrapper.NEXTSTATES + column];
-                s.append((char)tableVal);
+                char tableVal = fwtbl.fTable[row + RBBIDataWrapper.NEXTSTATES + column];
+                s.append(tableVal);
            }
            rows.add(s.toString());
        }
@ -655,4 +656,199 @@ public class RBBITest extends TestFmwk {
            assertTrue("Reverse Table",      RBBIDataWrapper.equals(bi.fRData.fRTable, bi2.fRData.fRTable));
        }
    }
+
+    // Helper function to test 8/16 bits of trie and 8/16 bits of state table.
+    private void testTrieStateTable(int numChar, boolean expectUCPTrieValueWidthIn8Bits,
+        boolean expectStateRowIn8Bits) {
+        // Text are duplicate characters from U+4E00 to U+4FFF
+        StringBuilder builder = new StringBuilder(2 * (0x5000 - 0x4e00));
+        for (char c = 0x4e00; c < 0x5000; c++) {
+            builder.append(c).append(c);
+        }
+        String text = builder.toString();
+
+        // Generate rule which will caused length+4 character classes and
+        // length+3 states
+
+        builder = new StringBuilder(100 + 6 * numChar);
+        builder.append("!!quoted_literals_only;");
+        for (char c = 0x4e00; c < 0x4e00 + numChar; c++) {
+            builder.append("\'").append(c).append(c).append("';");
+        }
+        builder.append(".;");
+        String rules = builder.toString();
+
+        RuleBasedBreakIterator bi = new RuleBasedBreakIterator(rules);
+
+        RBBIDataWrapper dw = bi.fRData;
+        RBBIDataWrapper.RBBIStateTable fwtbl = dw.fFTable;
+        RBBIDataWrapper.RBBIStateTable rvtbl = dw.fRTable;
+
+        boolean has8BitRowDataForwardTable = (fwtbl.fFlags & RBBIDataWrapper.RBBI_8BITS_ROWS) != 0;
+        boolean has8BitRowDataReverseTable = (rvtbl.fFlags & RBBIDataWrapper.RBBI_8BITS_ROWS) != 0;
+        boolean has8BitsTrie = dw.fTrie.getValueWidth() == CodePointTrie.ValueWidth.BITS_8;
+
+        assertEquals("Number of char classes mismatch numChar=" + numChar, numChar + 4, dw.fHeader.fCatCount);
+        assertEquals("Number of states in Forward Table mismatch numChar=" + numChar, numChar + 3, fwtbl.fNumStates);
+        assertEquals("Number of states in Reverse Table mismatch numChar=" + numChar, numChar + 3, rvtbl.fNumStates);
+        assertEquals("Trie width mismatch numChar=" + numChar, expectUCPTrieValueWidthIn8Bits, has8BitsTrie);
+        assertEquals("Bits of Forward State table mismatch numChar=" + numChar,
+                     expectStateRowIn8Bits, has8BitRowDataForwardTable);
+        assertEquals("Bits of Reverse State table mismatch numChar=" + numChar,
+                     expectStateRowIn8Bits, has8BitRowDataReverseTable);
+
+        bi.setText(text);
+
+        int pos;
+        int i = 0;
+        while ((pos = bi.next()) > 0) {
+            // The first numChar should not break between the pair
+            if (i++ < numChar) {
+                assertEquals("next() mismatch numChar=" + numChar, i * 2, pos);
+            } else {
+                // After the first numChar next(), break on each character.
+                assertEquals("next() mismatch numChar=" + numChar, i + numChar, pos);
+            }
+        }
+        while ((pos = bi.previous()) > 0) {
+            // The first numChar should not break between the pair
+            if (--i < numChar) {
+                assertEquals("previous() mismatch numChar=" + numChar, i * 2, pos);
+            } else {
+                // After the first numChar next(), break on each character.
+                assertEquals("previous() mismatch numChar=" + numChar, i + numChar, pos);
+            }
+        }
+    }
+
+    @Test
+    public void Test8BitsTrieWith8BitStateTable() {
+        testTrieStateTable(123,  true /* expectUCPTrieValueWidthIn8Bits */,  true /* expectStateRowIn8Bits */);
+    }
+
+    @Test
+    public void Test16BitsTrieWith8BitStateTable() {
+        testTrieStateTable(124, false /* expectUCPTrieValueWidthIn8Bits */,  true /* expectStateRowIn8Bits */);
+    }
+
+    @Test
+    public void Test16BitsTrieWith16BitStateTable() {
+        testTrieStateTable(255, false /* expectUCPTrieValueWidthIn8Bits */, false /* expectStateRowIn8Bits */);
+    }
+
+    @Test
+    public void Test8BitsTrieWith16BitStateTable() {
+        // Test UCPTRIE_VALUE_BITS_8 with 16 bits rows. Use a different approach to
+        // create state table in 16 bits.
+
+        // Generate 510 'a' as text
+        StringBuilder builder = new StringBuilder(510);
+        for (int i = 0; i < 510; i++) {
+            builder.append('a');
+        }
+        String text = builder.toString();
+
+        builder = new StringBuilder(550);
+        builder.append("!!quoted_literals_only;'");
+        // 254 'a' in the rule will cause 256 states
+        for (int i = 0; i < 254; i++) {
+            builder.append('a');
+        }
+        builder.append("';.;");
+        String rules = builder.toString();
+
+        RuleBasedBreakIterator bi = new RuleBasedBreakIterator(rules);
+
+        RBBIDataWrapper dw = bi.fRData;
+        RBBIDataWrapper.RBBIStateTable fwtbl = dw.fFTable;
+
+        boolean has8BitRowData = (fwtbl.fFlags & RBBIDataWrapper.RBBI_8BITS_ROWS) != 0;
+        boolean has8BitsTrie = dw.fTrie.getValueWidth() == CodePointTrie.ValueWidth.BITS_8;
+        assertFalse("State table should be in 16 bits", has8BitRowData);
+        assertTrue("Trie should be in 8 bits", has8BitsTrie);
+
+        bi.setText(text);
+
+        // break positions:
+        // 254, 508, 509, 510
+        assertEquals("next()", 254, bi.next());
+        int i = 0;
+        int pos;
+        while ((pos = bi.next()) > 0) {
+            assertEquals("next()", 508 + i , pos);
+            i++;
+        }
+        i = 0;
+        while ((pos = bi.previous()) > 0) {
+             i++;
+            if (pos >= 508) {
+                assertEquals("previous()", 510 - i , pos);
+            } else {
+                assertEquals("previous()", 254 , pos);
+            }
+        }
+    }
+
+    /**
+     * Test that both compact (8 bit) and full sized (16 bit) rbbi tables work, and
+     * that there are no problems with rules at the size that transitions between the two.
+     *
+     * A rule that matches a literal string, like 'abcdefghij', will require one state and
+     * one character class per character in the string. So we can make a rule to tickle the
+     * boundaries by using literal strings of various lengths.
+     *
+     * For both the number of states and the number of character classes, the eight bit format
+     * only has 7 bits available, allowing for 128 values. For both, a few values are reserved,
+     * leaving 120 something available. This test runs the string over the range of 120 - 130,
+     * which allows some margin for changes to the number of values reserved by the rule builder
+     * without breaking the test.
+     */
+    @Test
+    public void TestTable_8_16_Bits() {
+        // testStr serves as both the source of the rule string (truncated to the desired length)
+        // and as test data to check matching behavior. A break rule consisting of the first 120
+        // characters of testStr will match the first 120 chars of the full-length testStr.
+        StringBuilder builder = new StringBuilder(0x200);
+        for (char c=0x3000; c<0x3200; ++c) {
+            builder.append(c);
+        }
+        String testStr = builder.toString();
+
+        int startLength = 120;   // The shortest rule string to test.
+        int endLength = 260;     // The longest rule string to test
+        int increment = 1;
+        for (int ruleLen=startLength; ruleLen <= endLength; ruleLen += increment) {
+            String ruleString = (new String("!!quoted_literals_only; '#';"))
+                .replace("#", testStr.substring(0, ruleLen));
+            RuleBasedBreakIterator bi = new RuleBasedBreakIterator(ruleString);
+
+            // Verify that the break iterator is functioning - that the first boundary found
+            // in testStr is at the length of the rule string.
+            bi.setText(testStr);
+            assertEquals("The first boundary found in testStr should be at the length of the rule string",
+                ruleLen, bi.next());
+
+            // Reverse iteration. Do a setText() first, to flush the break iterator's internal cache
+            // of previously detected boundaries, thus forcing the engine to run the safe reverse rules.
+            bi.setText(testStr);
+            int result = bi.preceding(ruleLen);
+            assertEquals("Reverse iteration should find the boundary at 0", 0, result);
+
+            // Verify that the range of rule lengths being tested cover the transations
+            // from 8 to 16 bit data.
+            RBBIDataWrapper dw = bi.fRData;
+            RBBIDataWrapper.RBBIStateTable fwtbl = dw.fFTable;
+
+            boolean has8BitRowData = (fwtbl.fFlags & RBBIDataWrapper.RBBI_8BITS_ROWS) != 0;
+            boolean has8BitsTrie = dw.fTrie.getValueWidth() == CodePointTrie.ValueWidth.BITS_8;
+            if (ruleLen == startLength) {
+                assertTrue("State table should be in 8 bits", has8BitRowData);
+                assertTrue("Trie should be in 8 bits", has8BitsTrie);
+            }
+            if (ruleLen == endLength) {
+                assertFalse("State table should be in 16 bits", has8BitRowData);
+                assertFalse("Trie should be in 16 bits", has8BitsTrie);
+            }
+        }
+    }
 }