From 295dc24d64b5e92ed7d277e406f16739ce127d74 Mon Sep 17 00:00:00 2001
From: Markus Scherer <markus.icu@gmail.com>
Date: Thu, 25 Oct 2007 17:05:36 +0000
Subject: [PATCH] ICU-5987 merge small-conversion-file feature into trunk, from
 svn merge -r 22780:22805 .../branches/markus/smallcnv

X-SVN-Rev: 22852
---
 icu4c/source/common/ucnv_bld.c           |  48 ++-
 icu4c/source/common/ucnv_cnv.h           |  13 +
 icu4c/source/common/ucnvmbcs.c           | 472 ++++++++++++++++++++---
 icu4c/source/common/ucnvmbcs.h           | 108 ++++--
 icu4c/source/test/testdata/Makefile.in   |   2 +-
 icu4c/source/test/testdata/testdata.mak  |  10 +-
 icu4c/source/tools/makeconv/gencnvex.c   |   4 +-
 icu4c/source/tools/makeconv/genmbcs.c    | 212 +++++++---
 icu4c/source/tools/makeconv/genmbcs.h    |  13 +-
 icu4c/source/tools/makeconv/makeconv.c   |  59 ++-
 icu4c/source/tools/makeconv/makeconv.h   |   3 +-
 icu4c/source/tools/toolutil/pkgitems.cpp |  16 +-
 12 files changed, 784 insertions(+), 176 deletions(-)

diff --git a/icu4c/source/common/ucnv_bld.c b/icu4c/source/common/ucnv_bld.c
index f627d6b823..943a1ca507 100644
--- a/icu4c/source/common/ucnv_bld.c
+++ b/icu4c/source/common/ucnv_bld.c
@@ -1261,6 +1261,9 @@ ucnv_swap(const UDataSwapper *ds,
     const _MBCSHeader *inMBCSHeader;
     _MBCSHeader *outMBCSHeader;
     _MBCSHeader mbcsHeader;
+    uint32_t mbcsHeaderLength;
+    UBool noFromU=FALSE;
+
     uint8_t outputType;
 
     int32_t maxFastUChar, mbcsIndexLength;
@@ -1350,7 +1353,15 @@ ucnv_swap(const UDataSwapper *ds,
             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
             return 0;
         }
-        if(!(inMBCSHeader->version[0]==4 && inMBCSHeader->version[1]>=1)) {
+        if(inMBCSHeader->version[0]==4 && inMBCSHeader->version[1]>=1) {
+            mbcsHeaderLength=MBCS_HEADER_V4_LENGTH;
+        } else if(inMBCSHeader->version[0]==5 && inMBCSHeader->version[1]>=3 &&
+                  ((mbcsHeader.options=ds->readUInt32(inMBCSHeader->options))&
+                   MBCS_OPT_UNKNOWN_INCOMPATIBLE_MASK)==0
+        ) {
+            mbcsHeaderLength=mbcsHeader.options&MBCS_OPT_LENGTH_MASK;
+            noFromU=(UBool)((mbcsHeader.options&MBCS_OPT_NO_FROM_U)!=0);
+        } else {
             udata_printError(ds, "ucnv_swap(): unsupported _MBCSHeader.version %d.%d\n",
                              inMBCSHeader->version[0], inMBCSHeader->version[1]);
             *pErrorCode=U_UNSUPPORTED_ERROR;
@@ -1365,9 +1376,15 @@ ucnv_swap(const UDataSwapper *ds,
         mbcsHeader.offsetFromUBytes=    ds->readUInt32(inMBCSHeader->offsetFromUBytes);
         mbcsHeader.flags=               ds->readUInt32(inMBCSHeader->flags);
         mbcsHeader.fromUBytesLength=    ds->readUInt32(inMBCSHeader->fromUBytesLength);
+        /* mbcsHeader.options have been read above */
 
         extOffset=(int32_t)(mbcsHeader.flags>>8);
         outputType=(uint8_t)mbcsHeader.flags;
+        if(noFromU && outputType==MBCS_OUTPUT_1) {
+            udata_printError(ds, "ucnv_swap(): unsupported combination of makeconv --small with SBCS\n");
+            *pErrorCode=U_UNSUPPORTED_ERROR;
+            return 0;
+        }
 
         /* make sure that the output type is known */
         switch(outputType) {
@@ -1406,7 +1423,10 @@ ucnv_swap(const UDataSwapper *ds,
         }
 
         if(extOffset==0) {
-            size=(int32_t)(mbcsHeader.offsetFromUBytes+mbcsHeader.fromUBytesLength+mbcsIndexLength);
+            size=(int32_t)(mbcsHeader.offsetFromUBytes+mbcsIndexLength);
+            if(!noFromU) {
+                size+=(int32_t)mbcsHeader.fromUBytesLength;
+            }
 
             /* avoid compiler warnings - not otherwise necessary, and the value does not matter */
             inExtIndexes=NULL;
@@ -1436,8 +1456,9 @@ ucnv_swap(const UDataSwapper *ds,
                 uprv_memcpy(outBytes, inBytes, size);
             }
 
-            /* swap the MBCSHeader */
-            ds->swapArray32(ds, &inMBCSHeader->countStates, 7*4,
+            /* swap the MBCSHeader, except for the version field */
+            count=mbcsHeaderLength*4;
+            ds->swapArray32(ds, &inMBCSHeader->countStates, count-4,
                                &outMBCSHeader->countStates, pErrorCode);
 
             if(outputType==MBCS_OUTPUT_EXT_ONLY) {
@@ -1447,18 +1468,23 @@ ucnv_swap(const UDataSwapper *ds,
                  */
 
                 /* swap the base name, between the header and the extension data */
-                ds->swapInvChars(ds, inMBCSHeader+1, (int32_t)uprv_strlen((const char *)(inMBCSHeader+1)),
-                                    outMBCSHeader+1, pErrorCode);
+                const char *inBaseName=(const char *)inBytes+count;
+                char *outBaseName=(char *)outBytes+count;
+                ds->swapInvChars(ds, inBaseName, (int32_t)uprv_strlen(inBaseName),
+                                    outBaseName, pErrorCode);
             } else {
                 /* normal file with base table data */
 
                 /* swap the state table, 1kB per state */
-                ds->swapArray32(ds, inMBCSHeader+1, (int32_t)(mbcsHeader.countStates*1024),
-                                   outMBCSHeader+1, pErrorCode);
+                offset=count;
+                count=mbcsHeader.countStates*1024;
+                ds->swapArray32(ds, inBytes+offset, (int32_t)count,
+                                   outBytes+offset, pErrorCode);
 
                 /* swap the toUFallbacks[] */
-                offset=sizeof(_MBCSHeader)+mbcsHeader.countStates*1024;
-                ds->swapArray32(ds, inBytes+offset, (int32_t)(mbcsHeader.countToUFallbacks*8),
+                offset+=count;
+                count=mbcsHeader.countToUFallbacks*8;
+                ds->swapArray32(ds, inBytes+offset, (int32_t)count,
                                    outBytes+offset, pErrorCode);
 
                 /* swap the unicodeCodeUnits[] */
@@ -1495,7 +1521,7 @@ ucnv_swap(const UDataSwapper *ds,
 
                     /* stage 3/result bytes: sometimes uint16_t[] or uint32_t[] */
                     offset=mbcsHeader.offsetFromUBytes;
-                    count=mbcsHeader.fromUBytesLength;
+                    count= noFromU ? 0 : mbcsHeader.fromUBytesLength;
                     switch(outputType) {
                     case MBCS_OUTPUT_2:
                     case MBCS_OUTPUT_3_EUC:
diff --git a/icu4c/source/common/ucnv_cnv.h b/icu4c/source/common/ucnv_cnv.h
index cf612a754d..a51faaf26a 100644
--- a/icu4c/source/common/ucnv_cnv.h
+++ b/icu4c/source/common/ucnv_cnv.h
@@ -175,6 +175,19 @@ typedef UConverter * (*UConverterSafeClone) (const UConverter   *cnv,
                                              int32_t            *pBufferSize, 
                                              UErrorCode         *status);
 
+/**
+ * Filters for some ucnv_getUnicodeSet() implementation code.
+ */
+typedef enum UConverterSetFilter {
+    UCNV_SET_FILTER_NONE,
+    UCNV_SET_FILTER_DBCS_ONLY,
+    UCNV_SET_FILTER_2022_CN,
+    UCNV_SET_FILTER_SJIS,
+    UCNV_SET_FILTER_GR94DBCS,
+    UCNV_SET_FILTER_HZ,
+    UCNV_SET_FILTER_COUNT
+} UConverterSetFilter;
+
 /**
  * Fills the set of Unicode code points that can be converted by an ICU converter.
  * The API function ucnv_getUnicodeSet() clears the USet before calling
diff --git a/icu4c/source/common/ucnvmbcs.c b/icu4c/source/common/ucnvmbcs.c
index 10dbe74daa..06f2644a1c 100644
--- a/icu4c/source/common/ucnvmbcs.c
+++ b/icu4c/source/common/ucnvmbcs.c
@@ -61,9 +61,47 @@
 #define MBCS_UNROLL_SINGLE_FROM_BMP 0
 
 /*
- * _MBCSHeader version 4.3
+ * _MBCSHeader versions 5.3 & 4.3
  * (Note that the _MBCSHeader version is in addition to the converter formatVersion.)
  *
+ * This version is optional. Version 5 is used for incompatible data format changes.
+ * makeconv will continue to generate version 4 files if possible.
+ *
+ * Changes from version 4:
+ *
+ * The main difference is an additional _MBCSHeader field with
+ * - the length (number of uint32_t) of the _MBCSHeader
+ * - flags for further incompatible data format changes
+ * - flags for further, backward compatible data format changes
+ *
+ * The MBCS_OPT_FROM_U flag indicates that most of the fromUnicode data is omitted from
+ * the file and needs to be reconstituted at load time.
+ * This requires a utf8Friendly format with an additional mbcsIndex table for fast
+ * (and UTF-8-friendly) fromUnicode conversion for Unicode code points up to maxFastUChar.
+ * (For details about these structures see below, and see ucnvmbcs.h.)
+ *
+ *   utf8Friendly also implies that the fromUnicode mappings are stored in ascending order
+ *   of the Unicode code points. (This requires that the .ucm file has the |0 etc.
+ *   precision markers for all mappings.)
+ *
+ *   All fallbacks have been moved to the extension table, leaving only roundtrips in the
+ *   omitted data that can be reconstituted from the toUnicode data.
+ *
+ *   Of the stage 2 table, the part corresponding to maxFastUChar and below is omitted.
+ *   With only roundtrip mappings in the base fromUnicode data, this part is fully
+ *   redundant with the mbcsIndex and will be reconstituted from that (also using the
+ *   stage 1 table which contains the information about how stage 2 was compacted).
+ *
+ *   The rest of the stage 2 table, the part for code points above maxFastUChar,
+ *   is stored in the file and will be appended to the reconstituted part.
+ *
+ *   The entire fromUBytes array is omitted from the file and will be reconstitued.
+ *   This is done by enumerating all toUnicode roundtrip mappings, performing
+ *   each mapping (using the stage 1 and reconstituted stage 2 tables) and
+ *   writing instead of reading the byte values.
+ *
+ * _MBCSHeader version 4.3
+ *
  * Change from version 4.2:
  * - Optional utf8Friendly data structures, with 64-entry stage 3 block
  *   allocation for parts of the BMP, and an additional mbcsIndex in non-SBCS
@@ -362,101 +400,240 @@ gb18030Ranges[13][4]={
 
 /* Miscellaneous ------------------------------------------------------------ */
 
-#if 0  /* Replaced by ucnv_MBCSGetFilteredUnicodeSetForUnicode() until we implement ucnv_getUnicodeSet() with reverse fallbacks. */
+/**
+ * Callback from ucnv_MBCSEnumToUnicode(), takes 32 mappings from
+ * consecutive sequences of bytes, starting from the one encoded in value,
+ * to Unicode code points. (Multiple mappings to reduce per-function call overhead.)
+ * Does not currently support m:n mappings or reverse fallbacks.
+ * This function will not be called for sequences of bytes with leading zeros.
+ *
+ * @param context an opaque pointer, as passed into ucnv_MBCSEnumToUnicode()
+ * @param value contains 1..4 bytes of the first byte sequence, right-aligned
+ * @param codePoints resulting Unicode code points, or negative if a byte sequence does
+ *        not map to anything
+ * @return TRUE to continue enumeration, FALSE to stop
+ */
+typedef UBool U_CALLCONV
+UConverterEnumToUCallback(const void *context, uint32_t value, UChar32 codePoints[32]);
 
 /* similar to ucnv_MBCSGetNextUChar() but recursive */
-static void
-_getUnicodeSetForBytes(const UConverterSharedData *sharedData,
-                       const int32_t (*stateTable)[256], const uint16_t *unicodeCodeUnits,
-                       const USetAdder *sa,
-                       UConverterUnicodeSet which,
-                       uint8_t state, uint32_t offset, int32_t lowByte, int32_t highByte,
-                      
-                       UErrorCode *pErrorCode) {
-    int32_t b, entry;
+static UBool
+enumToU(UConverterMBCSTable *mbcsTable, int8_t stateProps[],
+        int32_t state, uint32_t offset,
+        uint32_t value,
+        UConverterEnumToUCallback *callback, const void *context,
+        UErrorCode *pErrorCode) {
+    UChar32 codePoints[32];
+    const int32_t *row;
+    const uint16_t *unicodeCodeUnits;
+    UChar32 anyCodePoints;
+    int32_t b, limit;
 
-    for(b=lowByte; b<=highByte; ++b) {
-        entry=stateTable[state][b];
+    row=mbcsTable->stateTable[state];
+    unicodeCodeUnits=mbcsTable->unicodeCodeUnits;
+
+    value<<=8;
+    anyCodePoints=-1;  /* becomes non-negative if there is a mapping */
+
+    b=(stateProps[state]&0x38)<<2;
+    if(b==0 && stateProps[state]>=0x40) {
+        /* skip byte sequences with leading zeros because they are not stored in the fromUnicode table */
+        codePoints[0]=U_SENTINEL;
+        b=1;
+    }
+    limit=((stateProps[state]&7)+1)<<5;
+    while(b<limit) {
+        int32_t entry=row[b];
         if(MBCS_ENTRY_IS_TRANSITION(entry)) {
-            _getUnicodeSetForBytes(
-                sharedData, stateTable, unicodeCodeUnits,
-                sa, which,
-                (uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry),
-                offset+MBCS_ENTRY_TRANSITION_OFFSET(entry),
-                0, 0xff,
-                pErrorCode);
+            int32_t nextState=MBCS_ENTRY_TRANSITION_STATE(entry);
+            if(stateProps[nextState]>=0) {
+                /* recurse to a state with non-ignorable actions */
+                if(!enumToU(
+                        mbcsTable, stateProps, nextState,
+                        offset+MBCS_ENTRY_TRANSITION_OFFSET(entry),
+                        value|(uint32_t)b,
+                        callback, context,
+                        pErrorCode)) {
+                    return FALSE;
+                }
+            }
+            codePoints[b&0x1f]=U_SENTINEL;
         } else {
             UChar32 c;
-            int32_t rowOffset=offset;
-            uint8_t action;
-
-            c=U_SENTINEL;
+            int32_t action;
 
             /*
              * An if-else-if chain provides more reliable performance for
              * the most common cases compared to a switch.
              */
-            action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry));
+            action=MBCS_ENTRY_FINAL_ACTION(entry);
             if(action==MBCS_STATE_VALID_DIRECT_16) {
                 /* output BMP code point */
                 c=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
             } else if(action==MBCS_STATE_VALID_16) {
-                offset+=MBCS_ENTRY_FINAL_VALUE_16(entry);
-                c=unicodeCodeUnits[offset];
+                int32_t finalOffset=offset+MBCS_ENTRY_FINAL_VALUE_16(entry);
+                c=unicodeCodeUnits[finalOffset];
                 if(c<0xfffe) {
                     /* output BMP code point */
                 } else {
                     c=U_SENTINEL;
                 }
             } else if(action==MBCS_STATE_VALID_16_PAIR) {
-                offset+=MBCS_ENTRY_FINAL_VALUE_16(entry);
-                c=unicodeCodeUnits[offset++];
+                int32_t finalOffset=offset+MBCS_ENTRY_FINAL_VALUE_16(entry);
+                c=unicodeCodeUnits[finalOffset++];
                 if(c<0xd800) {
                     /* output BMP code point below 0xd800 */
                 } else if(c<=0xdbff) {
                     /* output roundtrip or fallback supplementary code point */
-                    c=((c&0x3ff)<<10)+unicodeCodeUnits[offset]+(0x10000-0xdc00);
+                    c=((c&0x3ff)<<10)+unicodeCodeUnits[finalOffset]+(0x10000-0xdc00);
                 } else if(c==0xe000) {
                     /* output roundtrip BMP code point above 0xd800 or fallback BMP code point */
-                    c=unicodeCodeUnits[offset];
+                    c=unicodeCodeUnits[finalOffset];
                 } else {
                     c=U_SENTINEL;
                 }
             } else if(action==MBCS_STATE_VALID_DIRECT_20) {
                 /* output supplementary code point */
                 c=(UChar32)(MBCS_ENTRY_FINAL_VALUE(entry)+0x10000);
+            } else {
+                c=U_SENTINEL;
             }
 
-            if(c>=0) {
-                sa->add(sa->set, c);
+            codePoints[b&0x1f]=c;
+            anyCodePoints&=c;
+        }
+        if(((++b)&0x1f)==0) {
+            if(anyCodePoints>=0) {
+                if(!callback(context, value|(uint32_t)(b-0x20), codePoints)) {
+                    return FALSE;
+                }
+                anyCodePoints=-1;
             }
-            offset=rowOffset;
         }
     }
+    return TRUE;
 }
 
 /*
- * Internal function returning a UnicodeSet for toUnicode() conversion.
- * Currently only used for ISO-2022-CN, and only handles roundtrip mappings.
- * In the future, if we add support for reverse-fallback sets, this function
- * needs to be updated, and called for each initial state.
- * Does not currently handle extensions.
- * Does not empty the set first.
+ * Only called if stateProps[state]==-1.
+ * A recursive call may do stateProps[state]|=0x40 if this state is the target of an
+ * MBCS_STATE_CHANGE_ONLY.
  */
-U_CFUNC void
-ucnv_MBCSGetUnicodeSetForBytes(const UConverterSharedData *sharedData,
-                           const USetAdder *sa,
-                           UConverterUnicodeSet which,
-                           uint8_t state, int32_t lowByte, int32_t highByte,
-                           UErrorCode *pErrorCode) {
-    _getUnicodeSetForBytes(
-        sharedData, sharedData->mbcs.stateTable, sharedData->mbcs.unicodeCodeUnits,
-        sa, which,
-        state, 0, lowByte, highByte,
-        pErrorCode);
+static int8_t
+getStateProp(const int32_t (*stateTable)[256], int8_t stateProps[], int state) {
+    const int32_t *row;
+    int32_t min, max, entry, nextState;
+
+    row=stateTable[state];
+    stateProps[state]=0;
+
+    /* find first non-ignorable state */
+    for(min=0;; ++min) {
+        entry=row[min];
+        nextState=MBCS_ENTRY_STATE(entry);
+        if(stateProps[nextState]==-1) {
+            getStateProp(stateTable, stateProps, nextState);
+        }
+        if(MBCS_ENTRY_IS_TRANSITION(entry)) {
+            if(stateProps[nextState]>=0) {
+                break;
+            }
+        } else if(MBCS_ENTRY_FINAL_ACTION(entry)<MBCS_STATE_UNASSIGNED) {
+            break;
+        }
+        if(min==0xff) {
+            stateProps[state]=-0x40;  /* (int8_t)0xc0 */
+            return stateProps[state];
+        }
+    }
+    stateProps[state]|=(int8_t)((min>>5)<<3);
+
+    /* find last non-ignorable state */
+    for(max=0xff; min<max; --max) {
+        entry=row[max];
+        nextState=MBCS_ENTRY_STATE(entry);
+        if(stateProps[nextState]==-1) {
+            getStateProp(stateTable, stateProps, nextState);
+        }
+        if(MBCS_ENTRY_IS_TRANSITION(entry)) {
+            if(stateProps[nextState]>=0) {
+                break;
+            }
+        } else if(MBCS_ENTRY_FINAL_ACTION(entry)<MBCS_STATE_UNASSIGNED) {
+            break;
+        }
+    }
+    stateProps[state]|=(int8_t)(max>>5);
+
+    /* recurse further and collect direct-state information */
+    while(min<=max) {
+        entry=row[min];
+        nextState=MBCS_ENTRY_STATE(entry);
+        if(stateProps[nextState]==-1) {
+            getStateProp(stateTable, stateProps, nextState);
+        }
+        if(MBCS_ENTRY_IS_FINAL(entry)) {
+            stateProps[nextState]|=0x40;
+            if(MBCS_ENTRY_FINAL_ACTION(entry)<=MBCS_STATE_FALLBACK_DIRECT_20) {
+                stateProps[state]|=0x40;
+            }
+        }
+        ++min;
+    }
+    return stateProps[state];
 }
 
-#endif
+/*
+ * Internal function enumerating the toUnicode data of an MBCS converter.
+ * Currently only used for reconstituting data for a MBCS_OPT_NO_FROM_U
+ * table, but could also be used for a future ucnv_getUnicodeSet() option
+ * that includes reverse fallbacks (after updating this function's implementation).
+ * Currently only handles roundtrip mappings.
+ * Does not currently handle extensions.
+ */
+static void
+ucnv_MBCSEnumToUnicode(UConverterMBCSTable *mbcsTable,
+                       UConverterEnumToUCallback *callback, const void *context,
+                       UErrorCode *pErrorCode) {
+    /*
+     * Properties for each state, to speed up the enumeration.
+     * Ignorable actions are unassigned/illegal/state-change-only:
+     * They do not lead to mappings.
+     *
+     * Bits 7..6:
+     * 1 direct/initial state (stateful converters have multiple)
+     * 0 non-initial state with transitions or with non-ignorable result actions
+     * -1 final state with only ignorable actions
+     *
+     * Bits 5..3:
+     * The lowest byte value with non-ignorable actions is
+     * value<<5 (rounded down).
+     *
+     * Bits 2..0:
+     * The highest byte value with non-ignorable actions is
+     * (value<<5)&0x1f (rounded up).
+     */
+    int8_t stateProps[MBCS_MAX_STATE_COUNT];
+    int32_t state;
+
+    uprv_memset(stateProps, -1, sizeof(stateProps));
+
+    /* recurse from state 0 and set all stateProps */
+    getStateProp(mbcsTable->stateTable, stateProps, 0);
+
+    for(state=0; state<mbcsTable->countStates; ++state) {
+        /*if(stateProps[state]==-1) {
+            printf("unused/unreachable <icu:state> %d\n", state);
+        }*/
+        if(stateProps[state]>=0x40) {
+            /* start from each direct state */
+            enumToU(
+                mbcsTable, stateProps, state, 0, 0,
+                callback, context,
+                pErrorCode);
+        }
+    }
+}
 
 U_CFUNC void
 ucnv_MBCSGetFilteredUnicodeSetForUnicode(const UConverterSharedData *sharedData,
@@ -1006,6 +1183,156 @@ _EBCDICSwapLFNL(UConverterSharedData *sharedData, UErrorCode *pErrorCode) {
     return TRUE;
 }
 
+/* reconstitute omitted fromUnicode data ------------------------------------ */
+
+/* for details, compare with genmbcs.c MBCSAddFromUnicode() and transformEUC() */
+static UBool U_CALLCONV
+writeStage3Roundtrip(const void *context, uint32_t value, UChar32 codePoints[32]) {
+    UConverterMBCSTable *mbcsTable=(UConverterMBCSTable *)context;
+    const uint16_t *table;
+    uint32_t *stage2;
+    uint8_t *bytes, *p;
+    UChar32 c;
+    int32_t i, st3;
+
+    table=mbcsTable->fromUnicodeTable;
+    bytes=(uint8_t *)mbcsTable->fromUnicodeBytes;
+
+    /* for EUC outputTypes, modify the value like genmbcs.c's transformEUC() */
+    switch(mbcsTable->outputType) {
+    case MBCS_OUTPUT_3_EUC:
+        if(value<=0xffff) {
+            /* short sequences are stored directly */
+            /* code set 0 or 1 */
+        } else if(value<=0x8effff) {
+            /* code set 2 */
+            value&=0x7fff;
+        } else /* first byte is 0x8f */ {
+            /* code set 3 */
+            value&=0xff7f;
+        }
+        break;
+    case MBCS_OUTPUT_4_EUC:
+        if(value<=0xffffff) {
+            /* short sequences are stored directly */
+            /* code set 0 or 1 */
+        } else if(value<=0x8effffff) {
+            /* code set 2 */
+            value&=0x7fffff;
+        } else /* first byte is 0x8f */ {
+            /* code set 3 */
+            value&=0xff7fff;
+        }
+        break;
+    default:
+        break;
+    }
+
+    for(i=0; i<=0x1f; ++value, ++i) {
+        c=codePoints[i];
+        if(c<0) {
+            continue;
+        }
+
+        /* locate the stage 2 & 3 data */
+        stage2=((uint32_t *)table)+table[c>>10]+((c>>4)&0x3f);
+        p=bytes;
+        st3=(int32_t)(uint16_t)*stage2*16+(c&0xf);
+
+        /* write the codepage bytes into stage 3 */
+        switch(mbcsTable->outputType) {
+        case MBCS_OUTPUT_3:
+        case MBCS_OUTPUT_4_EUC:
+            p+=st3*3;
+            p[0]=(uint8_t)(value>>16);
+            p[1]=(uint8_t)(value>>8);
+            p[2]=(uint8_t)value;
+            break;
+        case MBCS_OUTPUT_4:
+            ((uint32_t *)p)[st3]=value;
+            break;
+        default:
+            /* 2 bytes per character */
+            ((uint16_t *)p)[st3]=(uint16_t)value;
+            break;
+        }
+
+        /* set the roundtrip flag */
+        *stage2|=(1UL<<(16+(c&0xf)));
+    }
+    return TRUE;
+ }
+
+static void
+reconstituteData(UConverterMBCSTable *mbcsTable,
+                 uint32_t stage1Length, uint32_t stage2Length,
+                 uint32_t fullStage2Length,  /* lengths are numbers of units, not bytes */
+                 UErrorCode *pErrorCode) {
+    uint16_t *stage1;
+    uint32_t *stage2;
+    uint8_t *bytes;
+    uint32_t dataLength=stage1Length*2+fullStage2Length*4+mbcsTable->fromUBytesLength;
+    mbcsTable->reconstitutedData=(uint8_t *)uprv_malloc(dataLength);
+    if(mbcsTable->reconstitutedData==NULL) {
+        *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
+        return;
+    }
+    uprv_memset(mbcsTable->reconstitutedData, 0, dataLength);
+
+    /* copy existing data and reroute the pointers */
+    stage1=(uint16_t *)mbcsTable->reconstitutedData;
+    uprv_memcpy(stage1, mbcsTable->fromUnicodeTable, stage1Length*2);
+
+    stage2=(uint32_t *)(stage1+stage1Length);
+    uprv_memcpy(stage2+(fullStage2Length-stage2Length),
+                mbcsTable->fromUnicodeTable+stage1Length,
+                stage2Length*4);
+
+    mbcsTable->fromUnicodeTable=stage1;
+    mbcsTable->fromUnicodeBytes=bytes=(uint8_t *)(stage2+fullStage2Length);
+
+    /* indexes into stage 2 count from the bottom of the fromUnicodeTable */
+    stage2=(uint32_t *)stage1;
+
+    /* reconstitute the initial part of stage 2 from the mbcsIndex */
+    {
+        int32_t stageUTF8Length=((int32_t)mbcsTable->maxFastUChar+1)>>6;
+        int32_t stageUTF8Index=0;
+        int32_t st1, st2, st3, i;
+
+        for(st1=0; stageUTF8Index<stageUTF8Length; ++st1) {
+            st2=stage1[st1];
+            if(st2!=stage1Length/2) {
+                /* each stage 2 block has 64 entries corresponding to 16 entries in the mbcsIndex */
+                for(i=0; i<16; ++i) {
+                    st3=mbcsTable->mbcsIndex[stageUTF8Index++];
+                    if(st3!=0) {
+                        /* an stage 2 entry's index is per stage 3 16-block, not per stage 3 entry */
+                        st3>>=4;
+                        /*
+                         * 4 stage 2 entries point to 4 consecutive stage 3 16-blocks which are
+                         * allocated together as a single 64-block for access from the mbcsIndex
+                         */
+                        stage2[st2++]=st3++;
+                        stage2[st2++]=st3++;
+                        stage2[st2++]=st3++;
+                        stage2[st2++]=st3;
+                    } else {
+                        /* no stage 3 block, skip */
+                        st2+=4;
+                    }
+                }
+            } else {
+                /* no stage 2 block, skip */
+                stageUTF8Index+=16;
+            }
+        }
+    }
+
+    /* reconstitute fromUnicodeBytes with roundtrips from toUnicode data */
+    ucnv_MBCSEnumToUnicode(mbcsTable, writeStage3Roundtrip, mbcsTable, pErrorCode);
+}
+
 /* MBCS setup functions ----------------------------------------------------- */
 
 static void
@@ -1017,13 +1344,25 @@ ucnv_MBCSLoad(UConverterSharedData *sharedData,
     UConverterMBCSTable *mbcsTable=&sharedData->mbcs;
     _MBCSHeader *header=(_MBCSHeader *)raw;
     uint32_t offset;
+    uint32_t headerLength;
+    UBool noFromU=FALSE;
 
-    if(header->version[0]!=4) {
+    if(header->version[0]==4) {
+        headerLength=MBCS_HEADER_V4_LENGTH;
+    } else if(header->version[0]==5 && header->version[1]>=3 &&
+              (header->options&MBCS_OPT_UNKNOWN_INCOMPATIBLE_MASK)==0) {
+        headerLength=header->options&MBCS_OPT_LENGTH_MASK;
+        noFromU=(UBool)((header->options&MBCS_OPT_NO_FROM_U)!=0);
+    } else {
         *pErrorCode=U_INVALID_TABLE_FORMAT;
         return;
     }
 
     mbcsTable->outputType=(uint8_t)header->flags;
+    if(noFromU && mbcsTable->outputType==MBCS_OUTPUT_1) {
+        *pErrorCode=U_INVALID_TABLE_FORMAT;
+        return;
+    }
 
     /* extension data, header version 4.2 and higher */
     offset=header->flags>>8;
@@ -1051,7 +1390,7 @@ ucnv_MBCSLoad(UConverterSharedData *sharedData,
         }
 
         /* load the base table */
-        baseName=(const char *)(header+1);
+        baseName=(const char *)header+headerLength*4;
         if(0==uprv_strcmp(baseName, sharedData->staticData->name)) {
             /* forbid loading this same extension-only file */
             *pErrorCode=U_INVALID_TABLE_FORMAT;
@@ -1095,6 +1434,12 @@ ucnv_MBCSLoad(UConverterSharedData *sharedData,
         mbcsTable->swapLFNLFromUnicodeBytes=NULL;
         mbcsTable->swapLFNLName=NULL;
 
+        /*
+         * The reconstitutedData must be deleted only when the base converter
+         * is unloaded.
+         */
+        mbcsTable->reconstitutedData=NULL;
+
         /*
          * Set a special, runtime-only outputType if the extension converter
          * is a DBCS version of a base converter that also maps single bytes.
@@ -1187,7 +1532,7 @@ ucnv_MBCSLoad(UConverterSharedData *sharedData,
 
         mbcsTable->countStates=(uint8_t)header->countStates;
         mbcsTable->countToUFallbacks=header->countToUFallbacks;
-        mbcsTable->stateTable=(const int32_t (*)[256])(raw+sizeof(_MBCSHeader));
+        mbcsTable->stateTable=(const int32_t (*)[256])(raw+headerLength*4);
         mbcsTable->toUFallbacks=(const _MBCSToUFallback *)(mbcsTable->stateTable+header->countStates);
         mbcsTable->unicodeCodeUnits=(const uint16_t *)(raw+header->offsetToUCodeUnits);
 
@@ -1244,7 +1589,9 @@ ucnv_MBCSLoad(UConverterSharedData *sharedData,
                  * The .cnv file is prebuilt with an additional stage table with indexes
                  * to each block.
                  */
-                mbcsTable->mbcsIndex=(const uint16_t *)(mbcsTable->fromUnicodeBytes+mbcsTable->fromUBytesLength);
+                mbcsTable->mbcsIndex=(const uint16_t *)
+                    (mbcsTable->fromUnicodeBytes+
+                     (noFromU ? 0 : mbcsTable->fromUBytesLength));
                 mbcsTable->maxFastUChar=(((UChar)header->version[2])<<8)|0xff;
             }
         }
@@ -1261,6 +1608,16 @@ ucnv_MBCSLoad(UConverterSharedData *sharedData,
             }
             mbcsTable->asciiRoundtrips=asciiRoundtrips;
         }
+
+        if(noFromU) {
+            uint32_t stage1Length=
+                mbcsTable->unicodeMask&UCNV_HAS_SUPPLEMENTARY ?
+                    0x440 : 0x40;
+            uint32_t stage2Length=
+                (header->offsetFromUBytes-header->offsetFromUTable)/4-
+                stage1Length/2;
+            reconstituteData(mbcsTable, stage1Length, stage2Length, header->fullStage2Length, pErrorCode);
+        }
     }
 
     /* Set the impl pointer here so that it is set for both extension-only and base tables. */
@@ -1296,6 +1653,9 @@ ucnv_MBCSUnload(UConverterSharedData *sharedData) {
     if(mbcsTable->baseSharedData!=NULL) {
         ucnv_unload(mbcsTable->baseSharedData);
     }
+    if(mbcsTable->reconstitutedData!=NULL) {
+        uprv_free(mbcsTable->reconstitutedData);
+    }
 }
 
 static void
diff --git a/icu4c/source/common/ucnvmbcs.h b/icu4c/source/common/ucnvmbcs.h
index 42f64ee353..9e4f295703 100644
--- a/icu4c/source/common/ucnvmbcs.h
+++ b/icu4c/source/common/ucnvmbcs.h
@@ -23,6 +23,7 @@
 
 #include "unicode/ucnv.h"
 #include "ucnv_cnv.h"
+#include "ucnv_ext.h"
 
 /**
  * ICU conversion (.cnv) data file structure, following the usual UDataInfo
@@ -41,6 +42,24 @@
  * the same toUnicode structures, while the fromUnicode structures for SBCS
  * differ from those for other MBCS-style converters.
  *
+ * _MBCSHeader.version 5 is optional and not backward-compatible
+ * (as usual for changes in the major version field).
+ *
+ * Versions 5.m work like versions 4.m except:
+ * - The _MBCSHeader has variable length (and is always longer than in version 4).
+ *   See the struct _MBCSHeader further description below.
+ * - There is a set of flags which indicate further incompatible changes.
+ *   (Reader code must reject the file if it does not recognize them all.)
+ * - In particular, one of these flags indicates that most of the fromUnicode
+ *   data is missing and must be reconstituted from the toUnicode data
+ *   and from the utf8Friendly mbcsIndex at load time.
+ *   (This only works with a utf8Friendly table.)
+ *   In this case, makeconv may increase maxFastUChar automatically to U+FFFF.
+ *
+ * The first of these versions is 5.3, which is like 4.3 except for the differences above.
+ *
+ * When possible, makeconv continues to generate version 4.m files.
+ *
  * _MBCSHeader.version 4.3 optionally modifies the fromUnicode data structures
  * slightly and optionally adds a table for conversion to MBCS (non-SBCS)
  * charsets.
@@ -127,6 +146,26 @@
  *  7   uint32_t    fromUBytesLength -- _MBCSHeader.version 4.1 (ICU 2.4) and higher
  *                  counts bytes in fromUBytes[]
  *
+ * New and required in version 5:
+ *  8   uint32_t    options, bits:
+ *                      31..16 reserved for flags that can be added without breaking
+ *                                 backward compatibility
+ *                      15.. 6 reserved for flags whose addition will break
+ *                                 backward compatibility
+ *                           6 MBCS_OPT_FROM_U -- if set,
+ *                                 then most of the fromUnicode data is omitted;
+ *                                 fullStage2Length is present and the missing
+ *                                 bottom part of stage 2 must be reconstituted from
+ *                                 the toUnicode data;
+ *                                 stage 3 is missing completely as well;
+ *                                 not used for SBCS tables
+ *                       5.. 0 length of the _MBCSHeader (number of uint32_t)
+ *
+ * New and optional in version 5:
+ *  9   uint32_t    fullStage2Length: used if MBCS_OPT_FROM_U is set
+ *                                 specifies the full length of stage 2
+ *                                 including the omitted part
+ *
  * if(outputType==MBCS_OUTPUT_EXT_ONLY) {
  *     -- base table name for extension-only table
  *     char baseTableName[variable]; -- with NUL plus padding for 4-alignment
@@ -153,7 +192,7 @@
  *         -- BMP-only tables have a smaller stage 1 table
  *         uint16_t fromUTable[0x40]; (32-bit-aligned)
  *     }
- *    
+ *
  *     -- stage 2 tables
  *        length determined by top of stage 1 and bottom of stage 3 tables
  *     if(outputType==MBCS_OUTPUT_1) {
@@ -162,17 +201,24 @@
  *     } else {
  *         -- DBCS, MBCS, EBCDIC_STATEFUL, ...: roundtrip flags and indexes
  *         uint32_t stage 2 flags and indexes[?];
+ *         if(options&MBCS_OPT_NO_FROM_U) {
+ *             stage 2 really has length fullStage2Length
+ *             and the omitted lower part must be reconstituted from
+ *             the toUnicode data
+ *         }
  *     }
- *    
+ *
  *     -- stage 3 tables with byte results
  *     if(outputType==MBCS_OUTPUT_1) {
  *         -- SBCS: each 16-bit result contains flags and the result byte, see ucnvmbcs.c
  *         uint16_t fromUBytes[fromUBytesLength/2];
- *     } else {
+ *     } else if(!(options&MBCS_OPT_NO_FROM_U)) {
  *         -- DBCS, MBCS, EBCDIC_STATEFUL, ... 2/3/4 bytes result, see ucnvmbcs.c
  *         uint8_t fromUBytes[fromUBytesLength]; or
  *         uint16_t fromUBytes[fromUBytesLength/2]; or
  *         uint32_t fromUBytes[fromUBytesLength/4];
+ *     } else {
+ *         fromUBytes[] must be reconstituted from the toUnicode data
  *     }
  *
  *     -- optional utf8Friendly mbcsIndex -- _MBCSHeader.version 4.3 (ICU 3.8) and higher
@@ -340,6 +386,9 @@ typedef struct UConverterMBCSTable {
     /* roundtrips */
     uint32_t asciiRoundtrips;
 
+    /* reconstituted data that was omitted from the .cnv file */
+    uint8_t *reconstitutedData;
+
     /* converter name for swaplfnl */
     char *swapLFNLName;
 
@@ -348,6 +397,26 @@ typedef struct UConverterMBCSTable {
     const int32_t *extIndexes;
 } UConverterMBCSTable;
 
+enum {
+    MBCS_OPT_LENGTH_MASK=0x3f,
+    MBCS_OPT_NO_FROM_U=0x40,
+    /*
+     * If any of the following options bits are set,
+     * then the file must be rejected.
+     */
+    MBCS_OPT_INCOMPATIBLE_MASK=0xffc0,
+    /*
+     * Remove bits from this mask as more options are recognized
+     * by all implementations that use this constant.
+     */
+    MBCS_OPT_UNKNOWN_INCOMPATIBLE_MASK=0xff80
+};
+
+enum {
+    MBCS_HEADER_V4_LENGTH=8,
+    MBCS_HEADER_V5_MIN_LENGTH=9
+};
+
 /**
  * MBCS data header. See data format description above.
  */
@@ -360,6 +429,12 @@ typedef struct {
              offsetFromUBytes,
              flags,
              fromUBytesLength;
+
+    /* new and required in version 5 */
+    uint32_t options;
+
+    /* new and optional in version 5; used if options&MBCS_OPT_NO_FROM_U */
+    uint32_t fullStage2Length;  /* number of 32-bit units */
 } _MBCSHeader;
 
 /*
@@ -456,23 +531,6 @@ U_CFUNC void
 ucnv_MBCSToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
                           UErrorCode *pErrorCode);
 
-#if 0  /* Replaced by ucnv_MBCSGetFilteredUnicodeSetForUnicode() until we implement ucnv_getUnicodeSet() with reverse fallbacks. */
-/*
- * Internal function returning a UnicodeSet for toUnicode() conversion.
- * Currently only used for ISO-2022-CN, and only handles roundtrip mappings.
- * In the future, if we add support for reverse-fallback sets, this function
- * needs to be updated, and called for each initial state.
- * Does not currently handle extensions.
- * Does not empty the set first.
- */
-U_CFUNC void
-ucnv_MBCSGetUnicodeSetForBytes(const UConverterSharedData *sharedData,
-                           const USetAdder *sa,
-                           UConverterUnicodeSet which,
-                           uint8_t state, int32_t lowByte, int32_t highByte,
-                           UErrorCode *pErrorCode);
-#endif
-
 /*
  * Internal function returning a UnicodeSet for toUnicode() conversion.
  * Currently only used for ISO-2022-CN, and only handles roundtrip mappings.
@@ -487,16 +545,6 @@ ucnv_MBCSGetUnicodeSetForUnicode(const UConverterSharedData *sharedData,
                                  UConverterUnicodeSet which,
                                  UErrorCode *pErrorCode);
 
-typedef enum UConverterSetFilter {
-    UCNV_SET_FILTER_NONE,
-    UCNV_SET_FILTER_DBCS_ONLY,
-    UCNV_SET_FILTER_2022_CN,
-    UCNV_SET_FILTER_SJIS,
-    UCNV_SET_FILTER_GR94DBCS,
-    UCNV_SET_FILTER_HZ,
-    UCNV_SET_FILTER_COUNT
-} UConverterSetFilter;
-
 /*
  * Same as ucnv_MBCSGetUnicodeSetForUnicode() but
  * the set can be filtered by encoding scheme.
diff --git a/icu4c/source/test/testdata/Makefile.in b/icu4c/source/test/testdata/Makefile.in
index 657c770c97..a99f0f14de 100644
--- a/icu4c/source/test/testdata/Makefile.in
+++ b/icu4c/source/test/testdata/Makefile.in
@@ -186,7 +186,7 @@ $(TESTBUILDDIR)/nfsmxp.spp: $(BINDIR)/gensprep$(EXEEXT) $(TESTSRCDATADIR)/nfs4_m
 	$(INVOKE) $(BINDIR)/gensprep -s $(TESTSRCDATADIR) $(ICU_DATA_OPT) -d $(TESTBUILDDIR) -b nfsmxp -k -n $(UNICODEDATADIR) -u 3.2.0 nfs4_mixed_prep_p.txt
 
 $(TESTBUILDDIR)/%.cnv: $(TESTSRCDATADIR)/%.ucm $(BINDIR)/makeconv$(EXEEXT)
-	$(INVOKE) $(BINDIR)/makeconv -c -d $(TESTBUILDDIR) $(TESTSRCDATADIR)/$(<F)
+	$(INVOKE) $(BINDIR)/makeconv --small -c -d $(TESTBUILDDIR) $(TESTSRCDATADIR)/$(<F)
 
 $(TESTBUILDDIR)/%.res: $(TESTSRCDATADIR)/%.txt $(BINDIR)/genrb$(EXEEXT) $(DAT_FILES)
 	$(INVOKE) $(BINDIR)/genrb $(GENRBOPTS) -q -s $(TESTSRCDATADIR) $(ICU_DATA_OPT) -d $(TESTBUILDDIR) $(<F)
diff --git a/icu4c/source/test/testdata/testdata.mak b/icu4c/source/test/testdata/testdata.mak
index 3bd1585d87..7e25d4d188 100644
--- a/icu4c/source/test/testdata/testdata.mak
+++ b/icu4c/source/test/testdata/testdata.mak
@@ -124,21 +124,21 @@ $(TEST_RES_FILES:.res =.res
 # Targets for test converter data
 "$(TESTDATABLD)\test1.cnv": "$(TESTDATA)\test1.ucm"
 	@echo Building $@
-	@"$(ICUTOOLS)\makeconv\$(CFG)\makeconv" -d"$(TESTDATABLD)" $**
+	@"$(ICUTOOLS)\makeconv\$(CFG)\makeconv" --small -d"$(TESTDATABLD)" $**
 
 "$(TESTDATABLD)\test3.cnv": "$(TESTDATA)\test3.ucm"
 	@echo Building $@
-	@"$(ICUTOOLS)\makeconv\$(CFG)\makeconv" -d"$(TESTDATABLD)" $**
+	@"$(ICUTOOLS)\makeconv\$(CFG)\makeconv" --small -d"$(TESTDATABLD)" $**
 
 "$(TESTDATABLD)\test4.cnv": "$(TESTDATA)\test4.ucm"
 	@echo Building $@
-	@"$(ICUTOOLS)\makeconv\$(CFG)\makeconv" -d"$(TESTDATABLD)" $**
+	@"$(ICUTOOLS)\makeconv\$(CFG)\makeconv" --small -d"$(TESTDATABLD)" $**
 
 "$(TESTDATABLD)\test4x.cnv": "$(TESTDATA)\test4x.ucm"
 	@echo Building $@
-	@"$(ICUTOOLS)\makeconv\$(CFG)\makeconv" -d"$(TESTDATABLD)" $**
+	@"$(ICUTOOLS)\makeconv\$(CFG)\makeconv" --small -d"$(TESTDATABLD)" $**
 
 "$(TESTDATABLD)\ibm9027.cnv": "$(TESTDATA)\ibm9027.ucm"
 	@echo Building $@
-	@"$(ICUTOOLS)\makeconv\$(CFG)\makeconv" -d"$(TESTDATABLD)" $**
+	@"$(ICUTOOLS)\makeconv\$(CFG)\makeconv" --small -d"$(TESTDATABLD)" $**
 
diff --git a/icu4c/source/tools/makeconv/gencnvex.c b/icu4c/source/tools/makeconv/gencnvex.c
index 3b657366b6..cf09cbe5b3 100644
--- a/icu4c/source/tools/makeconv/gencnvex.c
+++ b/icu4c/source/tools/makeconv/gencnvex.c
@@ -130,7 +130,7 @@ CnvExtWrite(NewConverter *cnvData, const UConverterStaticData *staticData,
             extData->ucm->baseName[length++]=0;
         }
 
-        headerSize=sizeof(header)+length;
+        headerSize=MBCS_HEADER_V4_LENGTH*4+length;
 
         /* fill the header */
         header.version[0]=4;
@@ -138,7 +138,7 @@ CnvExtWrite(NewConverter *cnvData, const UConverterStaticData *staticData,
         header.flags=(uint32_t)((headerSize<<8)|MBCS_OUTPUT_EXT_ONLY);
 
         /* write the header and the base table name */
-        udata_writeBlock(pData, &header, sizeof(header));
+        udata_writeBlock(pData, &header, MBCS_HEADER_V4_LENGTH*4);
         udata_writeBlock(pData, extData->ucm->baseName, length);
     }
 
diff --git a/icu4c/source/tools/makeconv/genmbcs.c b/icu4c/source/tools/makeconv/genmbcs.c
index 139ab0109b..6757b7781a 100644
--- a/icu4c/source/tools/makeconv/genmbcs.c
+++ b/icu4c/source/tools/makeconv/genmbcs.c
@@ -30,7 +30,7 @@
  * Reduce tests for maxCharLength.
  */
 
-typedef struct MBCSData {
+struct MBCSData {
     NewConverter newConverter;
 
     UCMFile *ucm;
@@ -48,10 +48,18 @@ typedef struct MBCSData {
     uint32_t stage2Top, stage3Top;
 
     /* fromUTF8 */
-    uint16_t stageUTF8[MBCS_UTF8_STAGE_SIZE];
+    uint16_t stageUTF8[0x10000>>MBCS_UTF8_STAGE_SHIFT];  /* allow for utf8Max=0xffff */
+
+    /*
+     * Maximum UTF-8-friendly code point.
+     * 0 if !utf8Friendly, otherwise 0x01ff..0xffff in steps of 0x100.
+     * If utf8Friendly, utf8Max is normally either MBCS_UTF8_MAX or 0xffff.
+     */
+    uint16_t utf8Max;
 
     UBool utf8Friendly;
-} MBCSData;
+    UBool omitFromU;
+};
 
 /* prototypes */
 static void
@@ -115,6 +123,29 @@ printBytes(char *buffer, const uint8_t *bytes, int32_t length) {
 
 /* implementation ----------------------------------------------------------- */
 
+static MBCSData gDummy;
+
+U_CFUNC const MBCSData *
+MBCSGetDummy() {
+    uprv_memset(&gDummy, 0, sizeof(MBCSData));
+
+    /*
+     * Set "pessimistic" values which may sometimes move too many
+     * mappings to the extension table (but never too few).
+     * These values cause MBCSOkForBaseFromUnicode() to return FALSE for the
+     * largest set of mappings.
+     * Assume maxCharLength>1.
+     */
+    gDummy.utf8Friendly=TRUE;
+    if(SMALL) {
+        gDummy.utf8Max=0xffff;
+        gDummy.omitFromU=TRUE;
+    } else {
+        gDummy.utf8Max=MBCS_UTF8_MAX;
+    }
+    return &gDummy;
+}
+
 static void
 MBCSInit(MBCSData *mbcsData, UCMFile *ucm) {
     uprv_memset(mbcsData, 0, sizeof(MBCSData));
@@ -680,7 +711,7 @@ MBCSAddFromUnicode(MBCSData *mbcsData,
 
     /* inspect stage 1 */
     index=c>>MBCS_STAGE_1_SHIFT;
-    if(mbcsData->utf8Friendly && c<=MBCS_UTF8_MAX) {
+    if(mbcsData->utf8Friendly && c<=mbcsData->utf8Max) {
         nextOffset=(c>>MBCS_STAGE_2_SHIFT)&MBCS_STAGE_2_BLOCK_MASK&~(MBCS_UTF8_STAGE_3_BLOCKS-1);
     } else {
         nextOffset=(c>>MBCS_STAGE_2_SHIFT)&MBCS_STAGE_2_BLOCK_MASK;
@@ -716,7 +747,7 @@ MBCSAddFromUnicode(MBCSData *mbcsData,
 
     /* inspect stage 2 */
     index=mbcsData->stage1[index]+nextOffset;
-    if(mbcsData->utf8Friendly && c<=MBCS_UTF8_MAX) {
+    if(mbcsData->utf8Friendly && c<=mbcsData->utf8Max) {
         /* allocate 64-entry blocks for UTF-8-friendly lookup */
         blockSize=MBCS_UTF8_STAGE_3_BLOCK_SIZE*maxCharLength;
         nextOffset=c&MBCS_UTF8_STAGE_3_BLOCK_MASK;
@@ -761,12 +792,12 @@ MBCSAddFromUnicode(MBCSData *mbcsData,
     stage3Index=MBCS_STAGE_3_GRANULARITY*(uint32_t)(uint16_t)mbcsData->stage2[index];
 
     /* Build an alternate, UTF-8-friendly stage table as well. */
-    if(mbcsData->utf8Friendly && c<=MBCS_UTF8_MAX) {
+    if(mbcsData->utf8Friendly && c<=mbcsData->utf8Max) {
         /* Overflow for uint16_t entries in stageUTF8? */
         if(stage3Index>0xffff) {
             /*
              * This can occur only if the mapping table is nearly perfectly filled and if
-             * MBCS_UTF8_MAX==0xffff.
+             * utf8Max==0xffff.
              * (There is no known charset like this. GB 18030 does not map
              * surrogate code points and LMBCS does not map 256 PUA code points.)
              *
@@ -776,20 +807,20 @@ MBCSAddFromUnicode(MBCSData *mbcsData,
              * mappings with 0<=c<MBCS_UTF8_LIMIT, and there is only also
              * the initial all-unassigned block in stage3.
              *
+             * Solution for the overflow: Reduce utf8Max to the next lower value, 0xfeff.
+             *
              * (See svn revision 20866 of the markus/ucnvutf8 feature branch for
              * code that causes MBCSAddTable() to rebuild the table not utf8Friendly
              * in case of overflow. That code was not tested.)
              */
-            fprintf(stderr, "too many stage 3 entries for UTF-8-friendly format, processing U+%04x<->0x%s\n",
-                (int)c, printBytes(buffer, bytes, length));
-            return FALSE;
+            mbcsData->utf8Max=0xfeff;
+        } else {
+            /*
+             * The stage 3 block has been assigned for the regular trie.
+             * Just copy its index into stageUTF8[], without the granularity.
+             */
+            mbcsData->stageUTF8[c>>MBCS_UTF8_STAGE_SHIFT]=(uint16_t)stage3Index;
         }
-
-        /*
-         * The stage 3 block has been assigned for the regular trie.
-         * Just copy its index into stageUTF8[], without the granularity.
-         */
-        mbcsData->stageUTF8[c>>MBCS_UTF8_STAGE_SHIFT]=(uint16_t)stage3Index;
     }
 
     /* write the codepage bytes into stage 3 and get the previous bytes */
@@ -856,7 +887,7 @@ MBCSAddFromUnicode(MBCSData *mbcsData,
 }
 
 U_CFUNC UBool
-MBCSOkForBaseFromUnicode(UBool utf8Friendly,
+MBCSOkForBaseFromUnicode(const MBCSData *mbcsData,
                          const uint8_t *bytes, int32_t length,
                          UChar32 c, int8_t flag) {
     /*
@@ -883,7 +914,16 @@ MBCSOkForBaseFromUnicode(UBool utf8Friendly,
      * - any mapping to 0x00 (result value 0, indistinguishable from unmappable entry)
      * - any |1 fallback (no roundtrip flags in the optimized table)
      */
-    if(utf8Friendly && flag<=1 && c<=MBCS_UTF8_MAX && (bytes[0]==0 || flag==1)) {
+    if(mbcsData->utf8Friendly && flag<=1 && c<=mbcsData->utf8Max && (bytes[0]==0 || flag==1)) {
+        return FALSE;
+    }
+
+    /*
+     * If we omit the fromUnicode data, we can only store roundtrips there
+     * because only they are recoverable from the toUnicode data.
+     * Fallbacks must go into the extension table.
+     */
+    if(mbcsData->omitFromU && flag!=0) {
         return FALSE;
     }
 
@@ -918,6 +958,18 @@ MBCSAddTable(NewConverter *cnvData, UCMTable *table, UConverterStaticData *stati
      * indicators are used.
      */
     mbcsData->utf8Friendly=utf8Friendly=(UBool)((table->flagsType&UCM_FLAGS_EXPLICIT)!=0);
+    if(utf8Friendly) {
+        mbcsData->utf8Max=MBCS_UTF8_MAX;
+        if(SMALL && maxCharLength>1) {
+            mbcsData->omitFromU=TRUE;
+        }
+    } else {
+        mbcsData->utf8Max=0;
+        if(SMALL && maxCharLength>1) {
+            fprintf(stderr,
+                "makeconv warning: --small not available for .ucm files without |0 etc.\n");
+        }
+    }
 
     if(!MBCSStartMappings(mbcsData)) {
         return FALSE;
@@ -933,6 +985,28 @@ MBCSAddTable(NewConverter *cnvData, UCMTable *table, UConverterStaticData *stati
         c=m->u;
         f=m->f;
 
+        /*
+         * Small optimization for --small .cnv files:
+         *
+         * If there are fromUnicode mappings above MBCS_UTF8_MAX,
+         * then the file size will be smaller if we make utf8Max larger
+         * because the size increase in stageUTF8 will be more than balanced by
+         * how much less of stage2 needs to be stored.
+         *
+         * There is no point in doing this incrementally because stageUTF8
+         * uses so much less space per block than stage2,
+         * so we immediately increase utf8Max to 0xffff.
+         *
+         * Do not increase utf8Max if it is already at 0xfeff because MBCSAddFromUnicode()
+         * sets it to that value when stageUTF8 overflows.
+         */
+        if( mbcsData->omitFromU && f<=1 &&
+            mbcsData->utf8Max<c && c<=0xffff &&
+            mbcsData->utf8Max<0xfeff
+        ) {
+            mbcsData->utf8Max=0xffff;
+        }
+
         switch(f) {
         case -1:
             /* there was no precision/fallback indicator */
@@ -943,7 +1017,7 @@ MBCSAddTable(NewConverter *cnvData, UCMTable *table, UConverterStaticData *stati
 
             if(maxCharLength==1) {
                 isOK&=MBCSSingleAddFromUnicode(mbcsData, m->b.bytes, m->bLen, c, f);
-            } else if(MBCSOkForBaseFromUnicode(utf8Friendly, m->b.bytes, m->bLen, c, f)) {
+            } else if(MBCSOkForBaseFromUnicode(mbcsData, m->b.bytes, m->bLen, c, f)) {
                 isOK&=MBCSAddFromUnicode(mbcsData, m->b.bytes, m->bLen, c, f);
             } else {
                 m->f|=MBCS_FROM_U_EXT_FLAG;
@@ -955,7 +1029,7 @@ MBCSAddTable(NewConverter *cnvData, UCMTable *table, UConverterStaticData *stati
             if(maxCharLength==1) {
                 staticData->hasFromUnicodeFallback=TRUE;
                 isOK&=MBCSSingleAddFromUnicode(mbcsData, m->b.bytes, m->bLen, c, f);
-            } else if(MBCSOkForBaseFromUnicode(utf8Friendly, m->b.bytes, m->bLen, c, f)) {
+            } else if(MBCSOkForBaseFromUnicode(mbcsData, m->b.bytes, m->bLen, c, f)) {
                 staticData->hasFromUnicodeFallback=TRUE;
                 isOK&=MBCSAddFromUnicode(mbcsData, m->b.bytes, m->bLen, c, f);
             } else {
@@ -965,7 +1039,7 @@ MBCSAddTable(NewConverter *cnvData, UCMTable *table, UConverterStaticData *stati
             break;
         case 2:
             /* ignore |2 SUB mappings, except to move <subchar1> mappings to the extension table */
-            if(maxCharLength>1 && !MBCSOkForBaseFromUnicode(utf8Friendly, m->b.bytes, m->bLen, c, f)) {
+            if(maxCharLength>1 && m->bLen==1) {
                 m->f|=MBCS_FROM_U_EXT_FLAG;
                 m->moveFlag=UCM_MOVE_TO_EXT;
             }
@@ -1329,24 +1403,56 @@ static uint32_t
 MBCSWrite(NewConverter *cnvData, const UConverterStaticData *staticData,
           UNewDataMemory *pData, int32_t tableType) {
     MBCSData *mbcsData=(MBCSData *)cnvData;
+    uint32_t stage2Start, stage2Length;
     uint32_t top, stageUTF8Length=0;
     int32_t i, stage1Top;
+    uint32_t headerLength;
 
     _MBCSHeader header={ { 0, 0, 0, 0 }, 0, 0, 0, 0, 0, 0, 0 };
 
+    stage2Length=mbcsData->stage2Top;
+    if(mbcsData->omitFromU) {
+        /* find how much of stage2 can be omitted */
+        int32_t utf8Limit=(int32_t)mbcsData->utf8Max+1;
+        uint32_t st2;
+
+        i=utf8Limit>>MBCS_STAGE_1_SHIFT;
+        if((utf8Limit&((1<<MBCS_STAGE_1_SHIFT)-1))!=0 && (st2=mbcsData->stage1[i])!=0) {
+            /* utf8Limit is in the middle of an existing stage 2 block */
+            stage2Start=st2+((utf8Limit>>MBCS_STAGE_2_SHIFT)&MBCS_STAGE_2_BLOCK_MASK);
+        } else {
+            /* find the last stage2 block with mappings before utf8Limit */
+            while(i>0 && (st2=mbcsData->stage1[--i])==0) {}
+            /* stage2 up to the end of this block corresponds to stageUTF8 */
+            stage2Start=st2+MBCS_STAGE_2_BLOCK_SIZE;
+        }
+        header.options|=MBCS_OPT_NO_FROM_U;
+        header.fullStage2Length=stage2Length;
+        stage2Length-=stage2Start;
+        if(VERBOSE) {
+            printf("+ omitting %lu out of %lu stage2 entries and %lu fromUBytes\n",
+                   stage2Start, mbcsData->stage2Top, mbcsData->stage3Top);
+            printf("+ total size savings: %lu bytes\n", stage2Start*4+mbcsData->stage3Top);
+        }
+    } else {
+        stage2Start=0;
+    }
+
+    if(staticData->unicodeMask&UCNV_HAS_SUPPLEMENTARY) {
+        stage1Top=MBCS_STAGE_1_SIZE; /* 0x440==1088 */
+    } else {
+        stage1Top=0x40; /* 0x40==64 */
+    }
+
     /* adjust stage 1 entries to include the size of stage 1 in the offsets to stage 2 */
     if(mbcsData->ucm->states.maxCharLength==1) {
-        if(staticData->unicodeMask&UCNV_HAS_SUPPLEMENTARY) {
-            stage1Top=MBCS_STAGE_1_SIZE; /* 0x440==1088 */
-        } else {
-            stage1Top=0x40; /* 0x40==64 */
-        }
         for(i=0; i<stage1Top; ++i) {
             mbcsData->stage1[i]+=(uint16_t)stage1Top;
         }
 
-        /* stage2Top has counted 16-bit results, now we need to count bytes */
-        mbcsData->stage2Top*=2;
+        /* stage2Top/Length have counted 16-bit results, now we need to count bytes */
+        /* also round up to a multiple of 4 bytes */
+        stage2Length=(stage2Length*2+1)&~1;
 
         /* stage3Top has counted 16-bit results, now we need to count bytes */
         mbcsData->stage3Top*=2;
@@ -1355,40 +1461,47 @@ MBCSWrite(NewConverter *cnvData, const UConverterStaticData *staticData,
             header.version[2]=(uint8_t)(SBCS_UTF8_MAX>>8); /* store 0x1f for max==0x1fff */
         }
     } else {
-        if(staticData->unicodeMask&UCNV_HAS_SUPPLEMENTARY) {
-            stage1Top=MBCS_STAGE_1_SIZE; /* 0x440==1088 */
-        } else {
-            stage1Top=0x40; /* 0x40==64 */
-        }
         for(i=0; i<stage1Top; ++i) {
             mbcsData->stage1[i]+=(uint16_t)stage1Top/2; /* stage 2 contains 32-bit entries, stage 1 16-bit entries */
         }
 
-        /* stage2Top has counted 32-bit results, now we need to count bytes */
-        mbcsData->stage2Top*=4;
+        /* stage2Top/Length have counted 32-bit results, now we need to count bytes */
+        stage2Length*=4;
+        /* leave stage2Start counting 32-bit units */
 
         if(mbcsData->utf8Friendly) {
-            stageUTF8Length=MBCS_UTF8_STAGE_SIZE;
-            header.version[2]=(uint8_t)(MBCS_UTF8_MAX>>8); /* store 0xd7 for max==0xd7ff */
+            stageUTF8Length=(mbcsData->utf8Max+1)>>MBCS_UTF8_STAGE_SHIFT;
+            header.version[2]=(uint8_t)(mbcsData->utf8Max>>8); /* store 0xd7 for max==0xd7ff */
         }
 
         /* stage3Top has already counted bytes */
     }
 
-    /* round up stage2Top and stage3Top so that the sizes of all data blocks are multiples of 4 */
-    mbcsData->stage2Top=(mbcsData->stage2Top+3)&~3;
+    /* round up stage3Top so that the sizes of all data blocks are multiples of 4 */
     mbcsData->stage3Top=(mbcsData->stage3Top+3)&~3;
 
     /* fill the header */
-    header.version[0]=4;
+    if(header.options&MBCS_OPT_INCOMPATIBLE_MASK) {
+        header.version[0]=5;
+        if(header.options&MBCS_OPT_NO_FROM_U) {
+            headerLength=10;  /* include fullStage2Length */
+        } else {
+            headerLength=MBCS_HEADER_V5_MIN_LENGTH;  /* 9 */
+        }
+    } else {
+        header.version[0]=4;
+        headerLength=MBCS_HEADER_V4_LENGTH;  /* 8 */
+    }
     header.version[1]=3;
     /* header.version[2] set above for utf8Friendly data */
 
+    header.options|=(uint32_t)headerLength;
+
     header.countStates=mbcsData->ucm->states.countStates;
     header.countToUFallbacks=mbcsData->countToUFallbacks;
 
     header.offsetToUCodeUnits=
-        sizeof(_MBCSHeader)+
+        headerLength*4+
         mbcsData->ucm->states.countStates*1024+
         mbcsData->countToUFallbacks*sizeof(_MBCSToUFallback);
     header.offsetFromUTable=
@@ -1397,10 +1510,13 @@ MBCSWrite(NewConverter *cnvData, const UConverterStaticData *staticData,
     header.offsetFromUBytes=
         header.offsetFromUTable+
         stage1Top*2+
-        mbcsData->stage2Top;
+        stage2Length;
     header.fromUBytesLength=mbcsData->stage3Top;
 
-    top=header.offsetFromUBytes+header.fromUBytesLength+stageUTF8Length*2;
+    top=header.offsetFromUBytes+stageUTF8Length*2;
+    if(!(header.options&MBCS_OPT_NO_FROM_U)) {
+        top+=header.fromUBytesLength;
+    }
 
     header.flags=(uint8_t)(mbcsData->ucm->states.outputType);
 
@@ -1414,17 +1530,19 @@ MBCSWrite(NewConverter *cnvData, const UConverterStaticData *staticData,
     }
 
     /* write the MBCS data */
-    udata_writeBlock(pData, &header, sizeof(_MBCSHeader));
+    udata_writeBlock(pData, &header, headerLength*4);
     udata_writeBlock(pData, mbcsData->ucm->states.stateTable, header.countStates*1024);
     udata_writeBlock(pData, mbcsData->toUFallbacks, mbcsData->countToUFallbacks*sizeof(_MBCSToUFallback));
     udata_writeBlock(pData, mbcsData->unicodeCodeUnits, mbcsData->ucm->states.countToUCodeUnits*2);
     udata_writeBlock(pData, mbcsData->stage1, stage1Top*2);
     if(mbcsData->ucm->states.maxCharLength==1) {
-        udata_writeBlock(pData, mbcsData->stage2Single, mbcsData->stage2Top);
+        udata_writeBlock(pData, mbcsData->stage2Single+stage2Start, stage2Length);
     } else {
-        udata_writeBlock(pData, mbcsData->stage2, mbcsData->stage2Top);
+        udata_writeBlock(pData, mbcsData->stage2+stage2Start, stage2Length);
+    }
+    if(!(header.options&MBCS_OPT_NO_FROM_U)) {
+        udata_writeBlock(pData, mbcsData->fromUBytes, mbcsData->stage3Top);
     }
-    udata_writeBlock(pData, mbcsData->fromUBytes, mbcsData->stage3Top);
 
     if(stageUTF8Length>0) {
         udata_writeBlock(pData, mbcsData->stageUTF8, stageUTF8Length*2);
diff --git a/icu4c/source/tools/makeconv/genmbcs.h b/icu4c/source/tools/makeconv/genmbcs.h
index 60f52e3ddb..cb0cc5e6eb 100644
--- a/icu4c/source/tools/makeconv/genmbcs.h
+++ b/icu4c/source/tools/makeconv/genmbcs.h
@@ -101,9 +101,20 @@ enum {
 U_CFUNC NewConverter *
 MBCSOpen(UCMFile *ucm);
 
+struct MBCSData;
+typedef struct MBCSData MBCSData;
+
+/*
+ * Get a dummy MBCSData for use with MBCSOkForBaseFromUnicode()
+ * for creating an extension-only file.
+ * Assume maxCharLength>1.
+ */
+U_CFUNC const MBCSData *
+MBCSGetDummy();
+
 /* Test if a 1:1 mapping fits into the MBCS base table's fromUnicode structure. */
 U_CFUNC UBool
-MBCSOkForBaseFromUnicode(UBool utf8Friendly,
+MBCSOkForBaseFromUnicode(const MBCSData *mbcsData,
                          const uint8_t *bytes, int32_t length,
                          UChar32 c, int8_t flag);
 
diff --git a/icu4c/source/tools/makeconv/makeconv.c b/icu4c/source/tools/makeconv/makeconv.c
index d5aeafadc6..7e62c86809 100644
--- a/icu4c/source/tools/makeconv/makeconv.c
+++ b/icu4c/source/tools/makeconv/makeconv.c
@@ -34,6 +34,8 @@
 #include "makeconv.h"
 #include "genmbcs.h"
 
+#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
+
 #define DEBUG 0
 
 typedef struct ConvData {
@@ -76,6 +78,7 @@ extern const UConverterStaticData * ucnv_converterStaticData[UCNV_NUMBER_OF_SUPP
  * Global - verbosity
  */
 UBool VERBOSE = FALSE;
+UBool SMALL = FALSE;
 
 static void
 createConverter(ConvData *data, const char* converterName, UErrorCode *pErrorCode);
@@ -163,13 +166,25 @@ writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErr
     }
 }
 
+enum {
+    OPT_HELP_H,
+    OPT_HELP_QUESTION_MARK,
+    OPT_COPYRIGHT,
+    OPT_VERSION,
+    OPT_DESTDIR,
+    OPT_VERBOSE,
+    OPT_SMALL,
+    OPT_COUNT
+};
+
 static UOption options[]={
-    UOPTION_HELP_H,              /* 0  Numbers for those who*/
-    UOPTION_HELP_QUESTION_MARK,  /* 1   can't count. */
-    UOPTION_COPYRIGHT,           /* 2 */
-    UOPTION_VERSION,             /* 3 */
-    UOPTION_DESTDIR,             /* 4 */
-    UOPTION_VERBOSE,             /* 5 */
+    UOPTION_HELP_H,
+    UOPTION_HELP_QUESTION_MARK,
+    UOPTION_COPYRIGHT,
+    UOPTION_VERSION,
+    UOPTION_DESTDIR,
+    UOPTION_VERBOSE,
+    { "small", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0 }
 };
 
 int main(int argc, char* argv[])
@@ -194,8 +209,8 @@ int main(int argc, char* argv[])
     uprv_memcpy(&dataInfo.dataVersion, &icuVersion, sizeof(UVersionInfo));
 
     /* preset then read command line options */
-    options[4].value=u_getDataDirectory();
-    argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);
+    options[OPT_DESTDIR].value=u_getDataDirectory();
+    argc=u_parseArgs(argc, argv, LENGTHOF(options), options);
 
     /* error handling, printing usage message */
     if(argc<0) {
@@ -205,8 +220,9 @@ int main(int argc, char* argv[])
     } else if(argc<2) {
         argc=-1;
     }
-    if(argc<0 || options[0].doesOccur || options[1].doesOccur) {
-        fprintf(stderr,
+    if(argc<0 || options[OPT_HELP_H].doesOccur || options[OPT_HELP_QUESTION_MARK].doesOccur) {
+        FILE *stdfile=argc<0 ? stderr : stdout;
+        fprintf(stdfile,
             "usage: %s [-options] files...\n"
             "\tread .ucm codepage mapping files and write .cnv files\n"
             "options:\n"
@@ -216,20 +232,26 @@ int main(int argc, char* argv[])
             "\t-d or --destdir     destination directory, followed by the path\n"
             "\t-v or --verbose     Turn on verbose output\n",
             argv[0]);
+        fprintf(stdfile,
+            "\t      --small       Generate smaller .cnv files. They will be\n"
+            "\t                    significantly smaller but may not be compatible with\n"
+            "\t                    older versions of ICU and will require heap memory\n"
+            "\t                    allocation when loaded.\n");
         return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
     }
 
-    if(options[3].doesOccur) {
-        fprintf(stderr,"makeconv version %hu.%hu, ICU tool to read .ucm codepage mapping files and write .cnv files\n",
-            dataInfo.formatVersion[0], dataInfo.formatVersion[1]);
-        fprintf(stderr, U_COPYRIGHT_STRING "\n");
+    if(options[OPT_VERSION].doesOccur) {
+        printf("makeconv version %hu.%hu, ICU tool to read .ucm codepage mapping files and write .cnv files\n",
+               dataInfo.formatVersion[0], dataInfo.formatVersion[1]);
+        printf("%s\n", U_COPYRIGHT_STRING);
         exit(0);
     }
 
     /* get the options values */
-    haveCopyright = options[2].doesOccur;
-    destdir = options[4].value;
-    VERBOSE = options[5].doesOccur;
+    haveCopyright = options[OPT_COPYRIGHT].doesOccur;
+    destdir = options[OPT_DESTDIR].value;
+    VERBOSE = options[OPT_VERBOSE].doesOccur;
+    SMALL = options[OPT_SMALL].doesOccur;
 
     if (destdir != NULL && *destdir != 0) {
         uprv_strcpy(outFileName, destdir);
@@ -766,12 +788,13 @@ createConverter(ConvData *data, const char *converterName, UErrorCode *pErrorCod
                          *
                          * Do this after ucm_checkBaseExt().
                          */
+                        const MBCSData *mbcsData=MBCSGetDummy();
                         int32_t needsMove=0;
                         for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength;
                             m<mLimit;
                             ++m
                         ) {
-                            if(!MBCSOkForBaseFromUnicode(TRUE, m->b.bytes, m->bLen, m->u, m->f)) {
+                            if(!MBCSOkForBaseFromUnicode(mbcsData, m->b.bytes, m->bLen, m->u, m->f)) {
                                 m->f|=MBCS_FROM_U_EXT_FLAG;
                                 m->moveFlag=UCM_MOVE_TO_EXT;
                                 ++needsMove;
diff --git a/icu4c/source/tools/makeconv/makeconv.h b/icu4c/source/tools/makeconv/makeconv.h
index 0fa0fb2d1d..a3c2d375a1 100644
--- a/icu4c/source/tools/makeconv/makeconv.h
+++ b/icu4c/source/tools/makeconv/makeconv.h
@@ -1,7 +1,7 @@
 /*
 *******************************************************************************
 *
-*   Copyright (C) 2000-2006, International Business Machines
+*   Copyright (C) 2000-2007, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 *******************************************************************************
@@ -24,6 +24,7 @@
 
 /* exports from makeconv.c */
 U_CFUNC UBool VERBOSE;
+U_CFUNC UBool SMALL;
 
 /* converter table type for writing */
 enum {
diff --git a/icu4c/source/tools/toolutil/pkgitems.cpp b/icu4c/source/tools/toolutil/pkgitems.cpp
index 6a93769c0d..2a8f01289a 100644
--- a/icu4c/source/tools/toolutil/pkgitems.cpp
+++ b/icu4c/source/tools/toolutil/pkgitems.cpp
@@ -497,7 +497,7 @@ ucnv_enumDependencies(const UDataSwapper *ds,
     /* check for supported conversionType values */
     if(inStaticData->conversionType==UCNV_MBCS) {
         /* MBCS data */
-        uint32_t mbcsHeaderFlags;
+        uint32_t mbcsHeaderLength, mbcsHeaderFlags, mbcsHeaderOptions;
         int32_t extOffset;
 
         inMBCSHeader=(const _MBCSHeader *)inBytes;
@@ -508,7 +508,14 @@ ucnv_enumDependencies(const UDataSwapper *ds,
             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
             return;
         }
-        if(!(inMBCSHeader->version[0]==4 && inMBCSHeader->version[1]>=1)) {
+        if(inMBCSHeader->version[0]==4 && inMBCSHeader->version[1]>=1) {
+            mbcsHeaderLength=MBCS_HEADER_V4_LENGTH;
+        } else if(inMBCSHeader->version[0]==5 && inMBCSHeader->version[1]>=3 &&
+                  ((mbcsHeaderOptions=ds->readUInt32(inMBCSHeader->options))&
+                   MBCS_OPT_UNKNOWN_INCOMPATIBLE_MASK)==0
+        ) {
+            mbcsHeaderLength=mbcsHeaderOptions&MBCS_OPT_LENGTH_MASK;
+        } else {
             udata_printError(ds, "icupkg/ucnv_enumDependencies(): unsupported _MBCSHeader.version %d.%d\n",
                              inMBCSHeader->version[0], inMBCSHeader->version[1]);
             *pErrorCode=U_UNSUPPORTED_ERROR;
@@ -536,14 +543,15 @@ ucnv_enumDependencies(const UDataSwapper *ds,
             }
 
             /* swap the base name, between the header and the extension data */
-            baseNameLength=(int32_t)strlen((const char *)(inMBCSHeader+1));
+            const char *inBaseName=(const char *)inBytes+mbcsHeaderLength*4;
+            baseNameLength=(int32_t)strlen(inBaseName);
             if(baseNameLength>=(int32_t)sizeof(baseName)) {
                 udata_printError(ds, "icupkg/ucnv_enumDependencies(%s): base name length %ld too long\n",
                                  itemName, baseNameLength);
                 *pErrorCode=U_UNSUPPORTED_ERROR;
                 return;
             }
-            ds->swapInvChars(ds, inMBCSHeader+1, baseNameLength+1, baseName, pErrorCode);
+            ds->swapInvChars(ds, inBaseName, baseNameLength+1, baseName, pErrorCode);
 
             checkIDSuffix(itemName, baseName, -1, ".cnv", check, context, pErrorCode);
         }