ICU-3432 move uniset.cpp data for case closure to ucase.icu; have gencase build case closure data; ucase.c use it; UnicodeSet::closeOver() call that

X-SVN-Rev: 16902
2004-12-02 04:18:35 +00:00 · 2004-12-02 04:18:35 +00:00 · ca77616509
commit ca77616509
parent 67f46c57e8
25 changed files with 1002 additions and 690 deletions
--- a/icu4c/source/common/ucase.c
+++ b/icu4c/source/common/ucase.c
@ -33,6 +33,7 @@ struct UCaseProps {
    UDataMemory *mem;
    const int32_t *indexes;
    const uint16_t *exceptions;
+    const UChar *unfold;

    UTrie trie;
    uint8_t formatVersion[4];
@ -68,38 +69,50 @@ static UCaseProps *
 ucase_openData(UCaseProps *cspProto,
               const uint8_t *bin, int32_t length, UErrorCode *pErrorCode) {
    UCaseProps *csp;
-    int32_t size, trieSize;
+    int32_t size;

    cspProto->indexes=(const int32_t *)bin;
-    if( cspProto->indexes[UCASE_IX_INDEX_TOP]<16 ||
-        (length>=0 && length<cspProto->indexes[UCASE_IX_LENGTH])
+    if( (length>=0 && length<16*4) ||
+        cspProto->indexes[UCASE_IX_INDEX_TOP]<16
    ) {
+        /* length or indexes[] too short for minimum indexes[] length of 16 */
        *pErrorCode=U_INVALID_FORMAT_ERROR;
        return NULL;
    }
-
-    /* get the trie address, after indexes[] */
    size=cspProto->indexes[UCASE_IX_INDEX_TOP]*4;
-    bin+=size;
-    if(length>=0 && (length-=size)<16) {
-        *pErrorCode=U_INVALID_FORMAT_ERROR;
-        return NULL;
+    if(length>=0) {
+        if(length>=size && length>=cspProto->indexes[UCASE_IX_LENGTH]) {
+            length-=size;
+        } else {
+            /* length too short for indexes[] or for the whole data length */
+            *pErrorCode=U_INVALID_FORMAT_ERROR;
+            return NULL;
+        }
    }
+    bin+=size;
+    /* from here on, assume that the sizes of the items fit into the total length */

-    /* unserialize the trie */
-    trieSize=cspProto->indexes[UCASE_IX_TRIE_SIZE];
-    trieSize=utrie_unserialize(&cspProto->trie, bin, length>=0 ? length : trieSize, pErrorCode);
+    /* unserialize the trie, after indexes[] */
+    size=cspProto->indexes[UCASE_IX_TRIE_SIZE];
+    utrie_unserialize(&cspProto->trie, bin, size, pErrorCode);
    if(U_FAILURE(*pErrorCode)) {
        return NULL;
    }
+    bin+=size;

    /* get exceptions[] */
-    bin+=trieSize;
-    if(length>=0 && (length-=trieSize)<2*cspProto->indexes[UCASE_IX_EXC_LENGTH]) {
-        *pErrorCode=U_INVALID_FORMAT_ERROR;
-        return NULL;
-    }
+    size=2*cspProto->indexes[UCASE_IX_EXC_LENGTH];
    cspProto->exceptions=(const uint16_t *)bin;
+    bin+=size;
+
+    /* get unfold[] */
+    size=2*cspProto->indexes[UCASE_IX_UNFOLD_LENGTH];
+    if(size!=0) {
+        cspProto->unfold=(const UChar *)bin;
+        bin+=size;
+    } else {
+        cspProto->unfold=NULL;
+    }

    /* allocate, copy, and return the new UCaseProps */
    csp=(UCaseProps *)uprv_malloc(sizeof(UCaseProps));
@ -322,8 +335,8 @@ ucase_swap(const UDataSwapper *ds,
        utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
        offset+=count;

-        /* swap the uint16_t exceptions[] */
-        count=indexes[UCASE_IX_EXC_LENGTH]*2;
+        /* swap the uint16_t exceptions[] and unfold[] */
+        count=(indexes[UCASE_IX_EXC_LENGTH]+indexes[UCASE_IX_UNFOLD_LENGTH])*2;
        ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
        offset+=count;

@ -338,13 +351,13 @@ ucase_swap(const UDataSwapper *ds,
 static UBool U_CALLCONV
 _enumPropertyStartsRange(const void *context, UChar32 start, UChar32 limit, uint32_t value) {
    /* add the start code point to the USet */
-    USetAdder *sa=(USetAdder *)context;
+    const USetAdder *sa=(const USetAdder *)context;
    sa->add(sa->set, start);
    return TRUE;
 }

 U_CAPI void U_EXPORT2
-ucase_addPropertyStarts(const UCaseProps *csp, USetAdder *sa, UErrorCode *pErrorCode) {
+ucase_addPropertyStarts(const UCaseProps *csp, const USetAdder *sa, UErrorCode *pErrorCode) {
    if(U_FAILURE(*pErrorCode)) {
        return;
    }
@ -368,8 +381,6 @@ ucase_addPropertyStarts(const UCaseProps *csp, USetAdder *sa, UErrorCode *pError
 #define GET_PROPS(csp, c, result) \
    UTRIE_GET16(&(csp)->trie, c, result);

-#define GET_CASE_TYPE(props) ((props)&UCASE_TYPE_MASK)
-#define GET_SIGNED_DELTA(props) ((int16_t)(props)>>UCASE_DELTA_SHIFT)
 #define GET_EXCEPTIONS(csp, props) ((csp)->exceptions+((props)>>UCASE_EXC_SHIFT))

 #define PROPS_HAS_EXCEPTION(props) ((props)&UCASE_EXCEPTION)
@ -423,8 +434,8 @@ ucase_tolower(const UCaseProps *csp, UChar32 c) {
    uint16_t props;
    GET_PROPS(csp, c, props);
    if(!PROPS_HAS_EXCEPTION(props)) {
-        if(GET_CASE_TYPE(props)>=UCASE_UPPER) {
-            c+=GET_SIGNED_DELTA(props);
+        if(UCASE_GET_TYPE(props)>=UCASE_UPPER) {
+            c+=UCASE_GET_DELTA(props);
        }
    } else {
        const uint16_t *pe=GET_EXCEPTIONS(csp, props);
@ -441,8 +452,8 @@ ucase_toupper(const UCaseProps *csp, UChar32 c) {
    uint16_t props;
    GET_PROPS(csp, c, props);
    if(!PROPS_HAS_EXCEPTION(props)) {
-        if(GET_CASE_TYPE(props)==UCASE_LOWER) {
-            c+=GET_SIGNED_DELTA(props);
+        if(UCASE_GET_TYPE(props)==UCASE_LOWER) {
+            c+=UCASE_GET_DELTA(props);
        }
    } else {
        const uint16_t *pe=GET_EXCEPTIONS(csp, props);
@ -459,8 +470,8 @@ ucase_totitle(const UCaseProps *csp, UChar32 c) {
    uint16_t props;
    GET_PROPS(csp, c, props);
    if(!PROPS_HAS_EXCEPTION(props)) {
-        if(GET_CASE_TYPE(props)==UCASE_LOWER) {
-            c+=GET_SIGNED_DELTA(props);
+        if(UCASE_GET_TYPE(props)==UCASE_LOWER) {
+            c+=UCASE_GET_DELTA(props);
        }
    } else {
        const uint16_t *pe=GET_EXCEPTIONS(csp, props);
@ -478,12 +489,231 @@ ucase_totitle(const UCaseProps *csp, UChar32 c) {
    return c;
 }

+U_CAPI void U_EXPORT2
+ucase_addCaseClosure(const UCaseProps *csp, UChar32 c, const USetAdder *sa) {
+    uint16_t props;
+
+    /*
+     * Hardcode the case closure of i and its relatives and ignore the
+     * data file data for these characters.
+     * The Turkic dotless i and dotted I with their case mapping conditions
+     * and case folding option make the related characters behave specially.
+     * This code matches their closure behavior to their case folding behavior.
+     */
+    static const UChar
+        iDot[2]=        { 0x69, 0x307 };
+
+    switch(c) {
+    case 0x49:
+        /* regular i and I are in one equivalence class */
+        sa->add(sa->set, 0x69);
+        return;
+    case 0x69:
+        sa->add(sa->set, 0x49);
+        return;
+    case 0x130:
+        /* dotted I is in a class with <0069 0307> (for canonical equivalence with <0049 0307>) */
+        sa->addString(sa->set, iDot, 2);
+        return;
+    case 0x131:
+        /* dotless i is in a class by itself */
+        return;
+    default:
+        /* otherwise use the data file data */
+        break;
+    }
+
+    GET_PROPS(csp, c, props);
+    if(!PROPS_HAS_EXCEPTION(props)) {
+        if(UCASE_GET_TYPE(props)!=UCASE_NONE) {
+            /* add the one simple case mapping, no matter what type it is */
+            int32_t delta=UCASE_GET_DELTA(props);
+            if(delta!=0) {
+                sa->add(sa->set, c+delta);
+            }
+        }
+    } else {
+        /*
+         * c has exceptions, so there may be multiple simple and/or
+         * full case mappings. Add them all.
+         */
+        const uint16_t *pe0, *pe=GET_EXCEPTIONS(csp, props);
+        const UChar *closure;
+        uint16_t excWord=*pe++;
+        int32_t index, closureLength, fullLength, length;
+
+        pe0=pe;
+
+        /* add all simple case mappings */
+        for(index=UCASE_EXC_LOWER; index<=UCASE_EXC_TITLE; ++index) {
+            if(HAS_SLOT(excWord, index)) {
+                pe=pe0;
+                GET_SLOT_VALUE(excWord, index, pe, c);
+                sa->add(sa->set, c);
+            }
+        }
+
+        /* get the closure string pointer & length */
+        if(HAS_SLOT(excWord, UCASE_EXC_CLOSURE)) {
+            pe=pe0;
+            GET_SLOT_VALUE(excWord, UCASE_EXC_CLOSURE, pe, closureLength);
+            closureLength&=UCASE_CLOSURE_MAX_LENGTH; /* higher bits are reserved */
+            closure=(const UChar *)pe+1; /* behind this slot, unless there are full case mappings */
+        } else {
+            closureLength=0;
+        }
+
+#if 0
+        /* add all full case mappings */
+        if(HAS_SLOT(excWord, UCASE_EXC_FULL_MAPPINGS)) {
+            pe=pe0;
+            GET_SLOT_VALUE(excWord, UCASE_EXC_FULL_MAPPINGS, pe, fullLength);
+            ++pe;
+            fullLength&=0xffff; /* bits 16 and higher are reserved */
+            while(fullLength!=0) {
+                length=fullLength&0xf;
+                if(length!=0) {
+                    sa->addString(sa->set, (const UChar *)pe, length);
+                    pe+=length;
+                }
+                fullLength>>=4;
+            }
+            closure=(const UChar *)pe; /* behind full case mappings */
+        }
+#endif
+
+        /* add the full case folding */
+        if(HAS_SLOT(excWord, UCASE_EXC_FULL_MAPPINGS)) {
+            pe=pe0;
+            GET_SLOT_VALUE(excWord, UCASE_EXC_FULL_MAPPINGS, pe, fullLength);
+
+            /* start of full case mapping strings */
+            ++pe;
+
+            fullLength&=0xffff; /* bits 16 and higher are reserved */
+
+            /* skip the lowercase result string */
+            pe+=fullLength&UCASE_FULL_LOWER;
+            fullLength>>=4;
+
+            /* add the full case folding string */
+            length=fullLength&0xf;
+            if(length!=0) {
+                sa->addString(sa->set, (const UChar *)pe, length);
+                pe+=length;
+            }
+
+            /* skip the uppercase and titlecase strings */
+            fullLength>>=4;
+            pe+=fullLength&0xf;
+            fullLength>>=4;
+            pe+=fullLength;
+
+            closure=(const UChar *)pe; /* behind full case mappings */
+        }
+
+        /* add each code point in the closure string */
+        for(index=0; index<closureLength;) {
+            U16_NEXT_UNSAFE(closure, index, c);
+            sa->add(sa->set, c);
+        }
+    }
+}
+
+/*
+ * compare s, which has a length, with t, which has a maximum length or is NUL-terminated
+ * must be length>0 and max>0 and length<=max
+ */
+static U_INLINE int32_t
+strcmpMax(const UChar *s, int32_t length, const UChar *t, int32_t max) {
+    int32_t c1, c2;
+
+    max-=length; /* we require length<=max, so no need to decrement max in the loop */
+    do {
+        c1=*s++;
+        c2=*t++;
+        if(c2==0) {
+            return 1; /* reached the end of t but not of s */
+        }
+        c1-=c2;
+        if(c1!=0) {
+            return c1; /* return difference result */
+        }
+    } while(--length>0);
+    /* ends with length==0 */
+
+    if(max==0 || *t==0) {
+        return 0; /* equal to length of both strings */
+    } else {
+        return -max; /* return lengh difference */
+    }
+}
+
+U_CAPI UBool U_EXPORT2
+ucase_addStringCaseClosure(const UCaseProps *csp, const UChar *s, int32_t length, const USetAdder *sa) {
+    const UChar *unfold, *p;
+    int32_t i, start, limit, result, unfoldRows, unfoldRowWidth, unfoldStringWidth, unfoldCPWidth;
+
+    if(csp->unfold==NULL || s==NULL) {
+        return FALSE; /* no reverse case folding data, or no string */
+    }
+    if(length<=1) {
+        /* the string is too short to find any match */
+        /*
+         * more precise would be:
+         * if(!u_strHasMoreChar32Than(s, length, 1))
+         * but this does not make much practical difference because
+         * a single supplementary code point would just not be found
+         */
+        return FALSE;
+    }
+
+    unfold=csp->unfold;
+    unfoldRows=unfold[UCASE_UNFOLD_ROWS];
+    unfoldRowWidth=unfold[UCASE_UNFOLD_ROW_WIDTH];
+    unfoldStringWidth=unfold[UCASE_UNFOLD_STRING_WIDTH];
+    unfoldCPWidth=unfoldRowWidth-unfoldStringWidth;
+    unfold+=unfoldRowWidth;
+
+    if(length>unfoldStringWidth) {
+        /* the string is too long to find any match */
+        return FALSE;
+    }
+
+    /* do a binary search for the string */
+    start=0;
+    limit=unfoldRows;
+    while(start<limit) {
+        i=(start+limit)/2;
+        p=unfold+(i*unfoldRowWidth);
+        result=strcmpMax(s, length, p, unfoldStringWidth);
+
+        if(result==0) {
+            /* found the string: add each code point, and its case closure */
+            UChar32 c;
+
+            for(i=unfoldStringWidth; i<unfoldRowWidth && p[i]!=0;) {
+                U16_NEXT_UNSAFE(p, i, c);
+                sa->add(sa->set, c);
+                ucase_addCaseClosure(csp, c, sa);
+            }
+            return TRUE;
+        } else if(result<0) {
+            limit=i;
+        } else /* result>0 */ {
+            start=i+1;
+        }
+    }
+
+    return FALSE; /* string not found */
+}
+
 /** @return UCASE_NONE, UCASE_LOWER, UCASE_UPPER, UCASE_TITLE */
 U_CAPI int32_t U_EXPORT2
 ucase_getType(const UCaseProps *csp, UChar32 c) {
    uint16_t props;
    GET_PROPS(csp, c, props);
-    return GET_CASE_TYPE(props);
+    return UCASE_GET_TYPE(props);
 }

 /** @return same as ucase_getType(), or <0 if c is case-ignorable */
@ -492,7 +722,7 @@ ucase_getTypeOrIgnorable(const UCaseProps *csp, UChar32 c) {
    int32_t type;
    uint16_t props;
    GET_PROPS(csp, c, props);
-    type=GET_CASE_TYPE(props);
+    type=UCASE_GET_TYPE(props);
    if(type!=UCASE_NONE) {
        return type;
    } else if(
@ -775,7 +1005,7 @@ isFollowedByCasedLetter(const UCaseProps *csp, UCaseContextIterator *iter, void

    for(/* dir!=0 sets direction */; (c=iter(context, dir))>=0; dir=0) {
        GET_PROPS(csp, c, props);
-        if(GET_CASE_TYPE(props)!=UCASE_NONE) {
+        if(UCASE_GET_TYPE(props)!=UCASE_NONE) {
            return TRUE; /* followed by cased letter */
        } else if(c==0x307 || (props&(UCASE_EXCEPTION|UCASE_CASE_IGNORABLE))==UCASE_CASE_IGNORABLE) {
            /* case-ignorable, continue with the loop */
@ -934,8 +1164,8 @@ ucase_toFullLower(const UCaseProps *csp, UChar32 c,
    result=c;
    GET_PROPS(csp, c, props);
    if(!PROPS_HAS_EXCEPTION(props)) {
-        if(GET_CASE_TYPE(props)>=UCASE_UPPER) {
-            result=c+GET_SIGNED_DELTA(props);
+        if(UCASE_GET_TYPE(props)>=UCASE_UPPER) {
+            result=c+UCASE_GET_DELTA(props);
        }
    } else {
        const uint16_t *pe=GET_EXCEPTIONS(csp, props), *pe2;
@ -1081,8 +1311,8 @@ toUpperOrTitle(const UCaseProps *csp, UChar32 c,
    result=c;
    GET_PROPS(csp, c, props);
    if(!PROPS_HAS_EXCEPTION(props)) {
-        if(GET_CASE_TYPE(props)==UCASE_LOWER) {
-            result=c+GET_SIGNED_DELTA(props);
+        if(UCASE_GET_TYPE(props)==UCASE_LOWER) {
+            result=c+UCASE_GET_DELTA(props);
        }
    } else {
        const uint16_t *pe=GET_EXCEPTIONS(csp, props), *pe2;
@ -1236,8 +1466,8 @@ ucase_fold(UCaseProps *csp, UChar32 c, uint32_t options) {
    uint16_t props;
    GET_PROPS(csp, c, props);
    if(!PROPS_HAS_EXCEPTION(props)) {
-        if(GET_CASE_TYPE(props)>=UCASE_UPPER) {
-            c+=GET_SIGNED_DELTA(props);
+        if(UCASE_GET_TYPE(props)>=UCASE_UPPER) {
+            c+=UCASE_GET_DELTA(props);
        }
    } else {
        const uint16_t *pe=GET_EXCEPTIONS(csp, props);
@ -1305,8 +1535,8 @@ ucase_toFullFolding(const UCaseProps *csp, UChar32 c,
    result=c;
    GET_PROPS(csp, c, props);
    if(!PROPS_HAS_EXCEPTION(props)) {
-        if(GET_CASE_TYPE(props)>=UCASE_UPPER) {
-            result=c+GET_SIGNED_DELTA(props);
+        if(UCASE_GET_TYPE(props)>=UCASE_UPPER) {
+            result=c+UCASE_GET_DELTA(props);
        }
    } else {
        const uint16_t *pe=GET_EXCEPTIONS(csp, props), *pe2;
--- a/icu4c/source/common/ucase.h
+++ b/icu4c/source/common/ucase.h
@ -51,7 +51,7 @@ ucase_swap(const UDataSwapper *ds,
           UErrorCode *pErrorCode);

 U_CAPI void U_EXPORT2
-ucase_addPropertyStarts(const UCaseProps *csp, USetAdder *sa, UErrorCode *pErrorCode);
+ucase_addPropertyStarts(const UCaseProps *csp, const USetAdder *sa, UErrorCode *pErrorCode);

 /**
 * Bit mask for getting just the options from a string compare options word
@ -83,6 +83,33 @@ ucase_totitle(const UCaseProps *csp, UChar32 c);
 U_CAPI UChar32 U_EXPORT2
 ucase_fold(UCaseProps *csp, UChar32 c, uint32_t options);

+/**
+ * Adds all simple case mappings and the full case folding for c to sa,
+ * and also adds special case closure mappings.
+ * c itself is not added.
+ * For example, the mappings
+ * - for s include long s
+ * - for sharp s include ss
+ * - for k include the Kelvin sign
+ */
+U_CAPI void U_EXPORT2
+ucase_addCaseClosure(const UCaseProps *csp, UChar32 c, const USetAdder *sa);
+
+/**
+ * Maps the string to single code points and adds the associated case closure
+ * mappings.
+ * The string is mapped to code points if it is their full case folding string.
+ * In other words, this performs a reverse full case folding and then
+ * adds the case closure items of the resulting code points.
+ * If the string is found and its closure applied, then
+ * the string itself is added as well as part of its code points' closure.
+ * It must be length>=0.
+ *
+ * @return TRUE if the string was found
+ */
+U_CAPI UBool U_EXPORT2
+ucase_addStringCaseClosure(const UCaseProps *csp, const UChar *s, int32_t length, const USetAdder *sa);
+
 /** @return UCASE_NONE, UCASE_LOWER, UCASE_UPPER, UCASE_TITLE */
 U_CAPI int32_t U_EXPORT2
 ucase_getType(const UCaseProps *csp, UChar32 c);
@ -211,6 +238,7 @@ enum {
    UCASE_IX_LENGTH,
    UCASE_IX_TRIE_SIZE,
    UCASE_IX_EXC_LENGTH,
+    UCASE_IX_UNFOLD_LENGTH,

    UCASE_IX_MAX_FULL_LENGTH=15,
    UCASE_IX_TOP=16
@ -227,6 +255,8 @@ enum {
    UCASE_TITLE
 };

+#define UCASE_GET_TYPE(props) ((props)&UCASE_TYPE_MASK)
+
 #define UCASE_SENSITIVE     4
 #define UCASE_EXCEPTION     8

@ -264,7 +294,7 @@ enum {
    UCASE_EXC_TITLE,
    UCASE_EXC_4,            /* reserved */
    UCASE_EXC_5,            /* reserved */
-    UCASE_EXC_6,            /* reserved */
+    UCASE_EXC_CLOSURE,
    UCASE_EXC_FULL_MAPPINGS,
    UCASE_EXC_ALL_SLOTS     /* one past the last slot */
 };
@ -296,6 +326,17 @@ enum {
 #define UCASE_FULL_UPPER    0xf00
 #define UCASE_FULL_TITLE    0xf000

+/* maximum lengths */
+#define UCASE_FULL_MAPPINGS_MAX_LENGTH (4*0xf)
+#define UCASE_CLOSURE_MAX_LENGTH 0xf
+
+/* constants for reverse case folding ("unfold") data */
+enum {
+    UCASE_UNFOLD_ROWS,
+    UCASE_UNFOLD_ROW_WIDTH,
+    UCASE_UNFOLD_STRING_WIDTH
+};
+
 U_CDECL_END

 #endif
--- a/icu4c/source/common/uchar.c
+++ b/icu4c/source/common/uchar.c
@ -997,7 +997,7 @@ ublock_getCode(UChar32 c) {

 /* for Hangul_Syllable_Type */
 U_CAPI void U_EXPORT2
-uhst_addPropertyStarts(USetAdder *sa, UErrorCode *pErrorCode) {
+uhst_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) {
    UChar32 c;
    int32_t value, value2;

@ -1061,7 +1061,7 @@ uhst_addPropertyStarts(USetAdder *sa, UErrorCode *pErrorCode) {
 static UBool U_CALLCONV
 _enumPropertyStartsRange(const void *context, UChar32 start, UChar32 limit, uint32_t value) {
    /* add the start code point to the USet */
-    USetAdder *sa=(USetAdder *)context;
+    const USetAdder *sa=(const USetAdder *)context;
    sa->add(sa->set, start);
    return TRUE;
 }
@ -1069,7 +1069,7 @@ _enumPropertyStartsRange(const void *context, UChar32 start, UChar32 limit, uint
 #define USET_ADD_CP_AND_NEXT(sa, cp) sa->add(sa->set, cp); sa->add(sa->set, cp+1)

 U_CAPI void U_EXPORT2
-uchar_addPropertyStarts(USetAdder *sa, UErrorCode *pErrorCode) {
+uchar_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) {
    if(U_FAILURE(*pErrorCode)) {
        return;
    }
--- a/icu4c/source/common/ucnv2022.c
+++ b/icu4c/source/common/ucnv2022.c
@ -2979,7 +2979,7 @@ _ISO_2022_SafeClone(

 static void
 _ISO_2022_GetUnicodeSet(const UConverter *cnv,
-                    USetAdder *sa,
+                    const USetAdder *sa,
                    UConverterUnicodeSet which,
                    UErrorCode *pErrorCode)
 {
--- a/icu4c/source/common/ucnv_cnv.c
+++ b/icu4c/source/common/ucnv_cnv.c
@ -28,7 +28,7 @@

 U_CFUNC void
 ucnv_getCompleteUnicodeSet(const UConverter *cnv,
-                   USetAdder *sa,
+                   const USetAdder *sa,
                   UConverterUnicodeSet which,
                   UErrorCode *pErrorCode) {
    sa->addRange(sa->set, 0, 0x10ffff);
@ -36,7 +36,7 @@ ucnv_getCompleteUnicodeSet(const UConverter *cnv,

 U_CFUNC void
 ucnv_getNonSurrogateUnicodeSet(const UConverter *cnv,
-                               USetAdder *sa,
+                               const USetAdder *sa,
                               UConverterUnicodeSet which,
                               UErrorCode *pErrorCode) {
    sa->addRange(sa->set, 0, 0xd7ff);
--- a/icu4c/source/common/ucnv_cnv.h
+++ b/icu4c/source/common/ucnv_cnv.h
@ -171,7 +171,7 @@ typedef UConverter * (*UConverterSafeClone) (const UConverter   *cnv,
 * For more documentation, see ucnv_getUnicodeSet() in ucnv.h.
 */
 typedef void (*UConverterGetUnicodeSet) (const UConverter *cnv,
-                                         USetAdder *sa,
+                                         const USetAdder *sa,
                                         UConverterUnicodeSet which,
                                         UErrorCode *pErrorCode);

@ -246,13 +246,13 @@ U_CDECL_END

 U_CFUNC void
 ucnv_getCompleteUnicodeSet(const UConverter *cnv,
-                   USetAdder *sa,
+                   const USetAdder *sa,
                   UConverterUnicodeSet which,
                   UErrorCode *pErrorCode);

 U_CFUNC void
 ucnv_getNonSurrogateUnicodeSet(const UConverter *cnv,
-                               USetAdder *sa,
+                               const USetAdder *sa,
                               UConverterUnicodeSet which,
                               UErrorCode *pErrorCode);

--- a/icu4c/source/common/ucnv_ext.c
+++ b/icu4c/source/common/ucnv_ext.c
@ -932,7 +932,7 @@ ucnv_extContinueMatchFromU(UConverter *cnv,
 static void
 ucnv_extGetUnicodeSetString(const UConverterSharedData *sharedData,
                            const int32_t *cx,
-                            USetAdder *sa,
+                            const USetAdder *sa,
                            UConverterUnicodeSet which,
                            int32_t minLength,
                            UChar32 c,
@ -989,7 +989,7 @@ ucnv_extGetUnicodeSetString(const UConverterSharedData *sharedData,

 U_CFUNC void
 ucnv_extGetUnicodeSet(const UConverterSharedData *sharedData,
-                      USetAdder *sa,
+                      const USetAdder *sa,
                      UConverterUnicodeSet which,
                      UErrorCode *pErrorCode) {
    const int32_t *cx;
--- a/icu4c/source/common/ucnv_ext.h
+++ b/icu4c/source/common/ucnv_ext.h
@ -384,7 +384,7 @@ ucnv_extContinueMatchFromU(UConverter *cnv,

 U_CFUNC void
 ucnv_extGetUnicodeSet(const UConverterSharedData *sharedData,
-                      USetAdder *sa,
+                      const USetAdder *sa,
                      UConverterUnicodeSet which,
                      UErrorCode *pErrorCode);

--- a/icu4c/source/common/ucnv_lmb.c
+++ b/icu4c/source/common/ucnv_lmb.c
@ -664,7 +664,7 @@ _LMBCSSafeClone(const UConverter *cnv,

 static void
 _LMBCSGetUnicodeSet(const UConverter *cnv,
-                   USetAdder *sa,
+                   const USetAdder *sa,
                   UConverterUnicodeSet which,
                   UErrorCode *pErrorCode) {
    /* all but U+F6xx, see LMBCS explanation above (search for F6xx) */
--- a/icu4c/source/common/ucnvhz.c
+++ b/icu4c/source/common/ucnvhz.c
@ -510,7 +510,7 @@ _HZ_SafeClone(const UConverter *cnv,

 static void
 _HZ_GetUnicodeSet(const UConverter *cnv,
-                  USetAdder *sa,
+                  const USetAdder *sa,
                  UConverterUnicodeSet which,
                  UErrorCode *pErrorCode) {
    /* the tilde '~' is hardcoded in the converter */
--- a/icu4c/source/common/ucnvisci.c
+++ b/icu4c/source/common/ucnvisci.c
@ -1332,7 +1332,7 @@ _ISCII_SafeClone(const UConverter *cnv,

 static void
 _ISCIIGetUnicodeSet(const UConverter *cnv,
-                    USetAdder *sa,
+                    const USetAdder *sa,
                    UConverterUnicodeSet which,
                    UErrorCode *pErrorCode)
 {
--- a/icu4c/source/common/ucnvlat1.c
+++ b/icu4c/source/common/ucnvlat1.c
@ -332,7 +332,7 @@ noMoreInput:

 static void
 _Latin1GetUnicodeSet(const UConverter *cnv,
-                     USetAdder *sa,
+                     const USetAdder *sa,
                     UConverterUnicodeSet which,
                     UErrorCode *pErrorCode) {
    sa->addRange(sa->set, 0, 0xff);
@ -534,7 +534,7 @@ _ASCIIGetNextUChar(UConverterToUnicodeArgs *pArgs,

 static void
 _ASCIIGetUnicodeSet(const UConverter *cnv,
-                    USetAdder *sa,
+                    const USetAdder *sa,
                    UConverterUnicodeSet which,
                    UErrorCode *pErrorCode) {
    sa->addRange(sa->set, 0, 0x7f);
--- a/icu4c/source/common/ucnvmbcs.c
+++ b/icu4c/source/common/ucnvmbcs.c
@ -344,7 +344,7 @@ gb18030Ranges[13][4]={
 static void
 _getUnicodeSetForBytes(const UConverterSharedData *sharedData,
                       const int32_t (*stateTable)[256], const uint16_t *unicodeCodeUnits,
-                       USetAdder *sa,
+                       const USetAdder *sa,
                       UConverterUnicodeSet which,
                       uint8_t state, uint32_t offset, int32_t lowByte, int32_t highByte,
                      
@ -421,7 +421,7 @@ _getUnicodeSetForBytes(const UConverterSharedData *sharedData,
 */
 U_CFUNC void
 ucnv_MBCSGetUnicodeSetForBytes(const UConverterSharedData *sharedData,
-                           USetAdder *sa,
+                           const USetAdder *sa,
                           UConverterUnicodeSet which,
                           uint8_t state, int32_t lowByte, int32_t highByte,
                           UErrorCode *pErrorCode) {
@ -434,7 +434,7 @@ ucnv_MBCSGetUnicodeSetForBytes(const UConverterSharedData *sharedData,

 U_CFUNC void
 ucnv_MBCSGetUnicodeSetForUnicode(const UConverterSharedData *sharedData,
-                             USetAdder *sa,
+                             const USetAdder *sa,
                             UConverterUnicodeSet which,
                             UErrorCode *pErrorCode) {
    const UConverterMBCSTable *mbcsTable;
@ -571,7 +571,7 @@ ucnv_MBCSGetUnicodeSetForUnicode(const UConverterSharedData *sharedData,

 static void
 ucnv_MBCSGetUnicodeSet(const UConverter *cnv,
-                   USetAdder *sa,
+                   const USetAdder *sa,
                   UConverterUnicodeSet which,
                   UErrorCode *pErrorCode) {
    if(cnv->options&_MBCS_OPTION_GB18030) {
--- a/icu4c/source/common/ucnvmbcs.h
+++ b/icu4c/source/common/ucnvmbcs.h
@ -373,7 +373,7 @@ ucnv_MBCSToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
 */
 U_CFUNC void
 ucnv_MBCSGetUnicodeSetForBytes(const UConverterSharedData *sharedData,
-                           USetAdder *sa,
+                           const USetAdder *sa,
                           UConverterUnicodeSet which,
                           uint8_t state, int32_t lowByte, int32_t highByte,
                           UErrorCode *pErrorCode);
@ -388,7 +388,7 @@ ucnv_MBCSGetUnicodeSetForBytes(const UConverterSharedData *sharedData,
 */
 U_CFUNC void
 ucnv_MBCSGetUnicodeSetForUnicode(const UConverterSharedData *sharedData,
-                             USetAdder *sa,
+                             const USetAdder *sa,
                             UConverterUnicodeSet which,
                             UErrorCode *pErrorCode);

--- a/icu4c/source/common/unames.c
+++ b/icu4c/source/common/unames.c
@ -1718,7 +1718,7 @@ uprv_getMaxISOCommentLength() {
 * @param uset USet to receive characters. Existing contents are deleted.
 */
 static void
-charSetToUSet(uint32_t cset[8], USetAdder *sa) {
+charSetToUSet(uint32_t cset[8], const USetAdder *sa) {
    UChar us[256];
    char cs[256];

@ -1755,7 +1755,7 @@ charSetToUSet(uint32_t cset[8], USetAdder *sa) {
 * @param set USet to receive characters.
 */
 U_CAPI void U_EXPORT2
-uprv_getCharNameCharacters(USetAdder *sa) {
+uprv_getCharNameCharacters(const USetAdder *sa) {
    charSetToUSet(gNameSet, sa);
 }

@ -1769,7 +1769,7 @@ urename.h and uprops.h changed accordingly.
 * @param set USetAdder to receive characters.
 */
 U_CAPI void U_EXPORT2
-uprv_getISOCommentCharacters(USetAdder *sa) {
+uprv_getISOCommentCharacters(const USetAdder *sa) {
    charSetToUSet(gISOCommentSet, sa);
 }
 #endif
--- a/icu4c/source/common/unicode/uniset.h
+++ b/icu4c/source/common/unicode/uniset.h
@ -19,7 +19,6 @@ U_NAMESPACE_BEGIN
 class ParsePosition;
 class SymbolTable;
 class UVector;
-class CaseEquivClass;
 class RuleCharacterIterator;

 /**
@ -1324,20 +1323,6 @@ private:
    static const UnicodeSet* getInclusions(int32_t src, UErrorCode &errorCode);

    friend class UnicodeSetIterator;
-
-    //----------------------------------------------------------------
-    // Implementation: closeOver
-    //----------------------------------------------------------------
-
-    void caseCloseOne(const UnicodeString& folded);
-
-    void caseCloseOne(const CaseEquivClass& c);
-
-    void caseCloseOne(UChar folded);
-
-    static const CaseEquivClass* getCaseMapOf(const UnicodeString& folded);
-
-    static const CaseEquivClass* getCaseMapOf(UChar folded);
 };

 inline UBool UnicodeSet::operator!=(const UnicodeSet& o) const {
--- a/icu4c/source/common/unicode/uset.h
+++ b/icu4c/source/common/unicode/uset.h
@ -57,6 +57,25 @@ enum {
     * will match 'a', 'A', 'b', and 'B'.  "[^ab]" with this flag will
     * match all except 'a', 'A', 'b', and 'B'. This performs a full
     * closure over case mappings, e.g. U+017F for s.
+     *
+     * The resulting set is a superset of the input for the code points but
+     * not for the strings.
+     * It performs a case mapping closure of the code points and adds
+     * full case folding strings for the code points, and reduces strings of
+     * the original set to their full case folding equivalents.
+     *
+     * This is designed for case-insensitive matches, for example
+     * in regular expressions. The full code point case closure allows to check
+     * an input character directly against the closure set.
+     * Strings are matched by comparing the case-folded form from the closure
+     * set with an incremental case folding of the string in question.
+     *
+     * The closure set will also contain single code points if the original
+     * set contained case-equivalent strings (like U+00DF for "ss" or "Ss" etc.).
+     * This is not necessary (that is, redundant) for the above matching method
+     * but results in the same closure sets regardless of whether the original
+     * set contained the code point or a string.
+     *
     * @stable ICU 2.4
     */
    USET_CASE_INSENSITIVE = 2,  
--- a/icu4c/source/common/uniset_props.cpp
+++ b/icu4c/source/common/uniset_props.cpp
@ -28,7 +28,6 @@
 #include "uset_imp.h"
 #include "ruleiter.h"
 #include "cmemory.h"
-#include "uhash.h"
 #include "ucln_cmn.h"
 #include "util.h"
 #include "uvector.h"
@ -42,7 +41,6 @@
 #include "mutex.h"
 #include "uassert.h"
 #include "hash.h"
-#include "ucmp8.h"

 // initial storage. Must be >= 0
 // *** same as in uniset.cpp ! ***
@ -157,10 +155,6 @@ U_NAMESPACE_BEGIN

 static UnicodeSet *INCLUSIONS[UPROPS_SRC_COUNT] = { NULL }; // cached getInclusions()

-static Hashtable* CASE_EQUIV_HASH = NULL; // for closeOver(USET_CASE)
-
-static CompactByteArray* CASE_EQUIV_CBA = NULL; // for closeOver(USET_CASE)
-
 // helper functions for matching of pattern syntax pieces ------------------ ***
 // these functions are parallel to the PERL_OPEN etc. strings above

@ -1318,16 +1312,6 @@ static UBool U_CALLCONV uset_cleanup(void) {
        }
    }

-    if (CASE_EQUIV_HASH != NULL) {
-        delete CASE_EQUIV_HASH;
-        CASE_EQUIV_HASH = NULL;
-    }
-
-    if (CASE_EQUIV_CBA != NULL) {
-        ucmp8_close(CASE_EQUIV_CBA);
-        CASE_EQUIV_CBA = NULL;
-    }
-
    return TRUE;
 }

@ -1406,33 +1390,26 @@ addCaseMapping(UnicodeSet &set, int32_t result, const UChar *full, UnicodeString
 }

 UnicodeSet& UnicodeSet::closeOver(int32_t attribute) {
-    if ((attribute & USET_CASE) != 0) {
-        UnicodeSet foldSet;
-        UnicodeString str;
-        int32_t n = getRangeCount();
-        for (int32_t i=0; i<n; ++i) {
-            UChar32 start = getRangeStart(i);
-            UChar32 end   = getRangeEnd(i);
-            for (UChar32 cp=start; cp<=end; ++cp) {
-                str.truncate(0);
-                str.append(u_foldCase(cp, U_FOLD_CASE_DEFAULT));
-                foldSet.caseCloseOne(str);
-            }
-        }
-        if (strings != NULL && strings->size() > 0) {
-            for (int32_t j=0; j<strings->size(); ++j) {
-                str = * (const UnicodeString*) strings->elementAt(j);
-                foldSet.caseCloseOne(str.foldCase());
-            }
-        }
-        *this = foldSet;
-    }
-    else if ((attribute & USET_ADD_CASE_MAPPINGS)) {
-        UnicodeSet foldSet(*this);
-        UnicodeString str;
+    if (attribute & (USET_CASE | USET_ADD_CASE_MAPPINGS)) {
        UErrorCode status = U_ZERO_ERROR;
        UCaseProps *csp = ucase_getSingleton(&status);
        if (U_SUCCESS(status)) {
+            UnicodeSet foldSet(*this);
+            UnicodeString str;
+            USetAdder sa = {
+                (USet *)&foldSet,
+                _set_add,
+                _set_addRange,
+                _set_addString
+            };
+
+            // start with input set to guarantee inclusion
+            // USET_CASE: remove strings because the strings will actually be reduced (folded);
+            //            therefore, start with no strings and add only those needed
+            if (attribute & USET_CASE) {
+                foldSet.strings->removeAllElements();
+            }
+
            int32_t n = getRangeCount();
            UChar32 result;
            const UChar *full;
@ -1442,45 +1419,64 @@ UnicodeSet& UnicodeSet::closeOver(int32_t attribute) {
                UChar32 start = getRangeStart(i);
                UChar32 end   = getRangeEnd(i);

-                for (UChar32 cp=start; cp<=end; ++cp) {
-                    result = ucase_toFullLower(csp, cp, NULL, NULL, &full, "", &locCache);
-                    addCaseMapping(foldSet, result, full, str);
+                if (attribute & USET_CASE) {
+                    // full case closure
+                    for (UChar32 cp=start; cp<=end; ++cp) {
+                        ucase_addCaseClosure(csp, cp, &sa);
+                    }
+                } else {
+                    // add case mappings
+                    // (does not add long s for regular s, or Kelvin for k, for example)
+                    for (UChar32 cp=start; cp<=end; ++cp) {
+                        result = ucase_toFullLower(csp, cp, NULL, NULL, &full, "", &locCache);
+                        addCaseMapping(foldSet, result, full, str);

-                    result = ucase_toFullTitle(csp, cp, NULL, NULL, &full, "", &locCache);
-                    addCaseMapping(foldSet, result, full, str);
+                        result = ucase_toFullTitle(csp, cp, NULL, NULL, &full, "", &locCache);
+                        addCaseMapping(foldSet, result, full, str);

-                    result = ucase_toFullUpper(csp, cp, NULL, NULL, &full, "", &locCache);
-                    addCaseMapping(foldSet, result, full, str);
+                        result = ucase_toFullUpper(csp, cp, NULL, NULL, &full, "", &locCache);
+                        addCaseMapping(foldSet, result, full, str);

-                    result = ucase_toFullFolding(csp, cp, &full, 0);
-                    addCaseMapping(foldSet, result, full, str);
+                        result = ucase_toFullFolding(csp, cp, &full, 0);
+                        addCaseMapping(foldSet, result, full, str);
+                    }
                }
            }
            if (strings != NULL && strings->size() > 0) {
-                Locale root("");
-#if !UCONFIG_NO_BREAK_ITERATION
-                BreakIterator *bi = BreakIterator::createWordInstance(root, status);
-#endif
-                if (U_SUCCESS(status)) {
-                    const UnicodeString *pStr;
-
+                if (attribute & USET_CASE) {
                    for (int32_t j=0; j<strings->size(); ++j) {
-                        pStr = (const UnicodeString *) strings->elementAt(j);
-                        (str = *pStr).toLower(root);
-                        foldSet.add(str);
-#if !UCONFIG_NO_BREAK_ITERATION
-                        (str = *pStr).toTitle(bi, root);
-                        foldSet.add(str);
-#endif
-                        (str = *pStr).toUpper(root);
-                        foldSet.add(str);
-                        (str = *pStr).foldCase();
-                        foldSet.add(str);
+                        str = *(const UnicodeString *) strings->elementAt(j);
+                        str.foldCase();
+                        if(!ucase_addStringCaseClosure(csp, str.getBuffer(), str.length(), &sa)) {
+                            foldSet.add(str); // does not map to code points: add the folded string itself
+                        }
                    }
-                }
+                } else {
+                    Locale root("");
 #if !UCONFIG_NO_BREAK_ITERATION
-                delete bi;
+                    BreakIterator *bi = BreakIterator::createWordInstance(root, status);
 #endif
+                    if (U_SUCCESS(status)) {
+                        const UnicodeString *pStr;
+
+                        for (int32_t j=0; j<strings->size(); ++j) {
+                            pStr = (const UnicodeString *) strings->elementAt(j);
+                            (str = *pStr).toLower(root);
+                            foldSet.add(str);
+#if !UCONFIG_NO_BREAK_ITERATION
+                            (str = *pStr).toTitle(bi, root);
+                            foldSet.add(str);
+#endif
+                            (str = *pStr).toUpper(root);
+                            foldSet.add(str);
+                            (str = *pStr).foldCase();
+                            foldSet.add(str);
+                        }
+                    }
+#if !UCONFIG_NO_BREAK_ITERATION
+                    delete bi;
+#endif
+                }
            }
            *this = foldSet;
        }
@ -1488,525 +1484,4 @@ UnicodeSet& UnicodeSet::closeOver(int32_t attribute) {
    return *this;
 }

-//----------------------------------------------------------------
-// Case folding implementation
-//----------------------------------------------------------------
-
-/**
- * Data structure representing a case-fold equivalency class.  It is a
- * SET containing 0 or more code units, and 0 or more strings of
- * length 2 code units or longer.
- *
- * This class is implemented as a 8-UChar buffer with a few
- * convenience methods on it.  The format of the buffer:
- * - All single code units in this set, followed by a terminating
- *   zero.  If none, then just a terminating zero.
- * - Zero or more 0-terminated strings, each of length >= 2
- *   code units.
- * - A single terminating (UChar)0.
- *
- * Usage:
- *
- * const CaseEquivClass& c = ...;
- * const UChar* p;
- * for (c.getStrings(p); *p; c.nextString(p)) {
- *   foo(p);
- * }
- */
-class CaseEquivClass {
-public:
-    UChar data[8];
-
-    /**
-     * Return the string of single code units.  May be "".  Will never
-     * be NULL.
-     */
-    const UChar* getSingles() const {
-        return data;
-    }
-
-    /**
-     * Return the first multi-code-unit string.  May be "" if there
-     * are none.  Will never be NULL.
-     * @param p pointer to be set to point to the first string.
-     */
-    void getStrings(const UChar*& p) const {
-        p = data;
-        nextString(p);
-    }
-
-    /**
-     * Advance a pointer from one multi-code-unit string to the next.
-     * May advance 'p' to point to "" if there are no more.
-     * Do NOT call if *p == 0.
-     * @param p pointer to be advanced to point to the next string.
-     */
-    static void nextString(const UChar*& p) {
-        while (*p++) {}
-    }
-};
-
-/**
- * IMPORTANT: The following two static data arrays represent the
- * information used to do case closure.  The first array is an array
- * of pairs.  That is, for each even index e, entries [e] and [e+1]
- * form a pair of case equivalent code units.  The entry at [e] is the
- * folded one, that is, the one for which u_foldCase(x)==x.
- *
- * The second static array is an array of CaseEquivClass objects.
- * Since these objects are just adorned UChar[] arrays, they can be
- * initialized in place in the array, and all of them can live in a
- * single piece of static memory, with no heap allocation.
- */
-
-// MACHINE-GENERATED: Do not edit (see com.ibm.icu.dev.tools.translit.UnicodeSetCloseOver)
-static const UChar CASE_PAIRS[] = {
-    0x0061,0x0041,0x0062,0x0042,0x0063,0x0043,0x0064,0x0044,0x0065,0x0045,
-    0x0066,0x0046,0x0067,0x0047,0x0068,0x0048,0x0069,0x0049,0x006A,0x004A,
-    0x006C,0x004C,0x006D,0x004D,0x006E,0x004E,0x006F,0x004F,0x0070,0x0050,
-    0x0071,0x0051,0x0072,0x0052,0x0074,0x0054,0x0075,0x0055,0x0076,0x0056,
-    0x0077,0x0057,0x0078,0x0058,0x0079,0x0059,0x007A,0x005A,0x00E0,0x00C0,
-    0x00E1,0x00C1,0x00E2,0x00C2,0x00E3,0x00C3,0x00E4,0x00C4,0x00E6,0x00C6,
-    0x00E7,0x00C7,0x00E8,0x00C8,0x00E9,0x00C9,0x00EA,0x00CA,0x00EB,0x00CB,
-    0x00EC,0x00CC,0x00ED,0x00CD,0x00EE,0x00CE,0x00EF,0x00CF,0x00F0,0x00D0,
-    0x00F1,0x00D1,0x00F2,0x00D2,0x00F3,0x00D3,0x00F4,0x00D4,0x00F5,0x00D5,
-    0x00F6,0x00D6,0x00F8,0x00D8,0x00F9,0x00D9,0x00FA,0x00DA,0x00FB,0x00DB,
-    0x00FC,0x00DC,0x00FD,0x00DD,0x00FE,0x00DE,0x00FF,0x0178,0x0101,0x0100,
-    0x0103,0x0102,0x0105,0x0104,0x0107,0x0106,0x0109,0x0108,0x010B,0x010A,
-    0x010D,0x010C,0x010F,0x010E,0x0111,0x0110,0x0113,0x0112,0x0115,0x0114,
-    0x0117,0x0116,0x0119,0x0118,0x011B,0x011A,0x011D,0x011C,0x011F,0x011E,
-    0x0121,0x0120,0x0123,0x0122,0x0125,0x0124,0x0127,0x0126,0x0129,0x0128,
-    0x012B,0x012A,0x012D,0x012C,0x012F,0x012E,0x0133,0x0132,0x0135,0x0134,
-    0x0137,0x0136,0x013A,0x0139,0x013C,0x013B,0x013E,0x013D,0x0140,0x013F,
-    0x0142,0x0141,0x0144,0x0143,0x0146,0x0145,0x0148,0x0147,0x014B,0x014A,
-    0x014D,0x014C,0x014F,0x014E,0x0151,0x0150,0x0153,0x0152,0x0155,0x0154,
-    0x0157,0x0156,0x0159,0x0158,0x015B,0x015A,0x015D,0x015C,0x015F,0x015E,
-    0x0161,0x0160,0x0163,0x0162,0x0165,0x0164,0x0167,0x0166,0x0169,0x0168,
-    0x016B,0x016A,0x016D,0x016C,0x016F,0x016E,0x0171,0x0170,0x0173,0x0172,
-    0x0175,0x0174,0x0177,0x0176,0x017A,0x0179,0x017C,0x017B,0x017E,0x017D,
-    0x0183,0x0182,0x0185,0x0184,0x0188,0x0187,0x018C,0x018B,0x0192,0x0191,
-    0x0195,0x01F6,0x0199,0x0198,0x019E,0x0220,0x01A1,0x01A0,0x01A3,0x01A2,
-    0x01A5,0x01A4,0x01A8,0x01A7,0x01AD,0x01AC,0x01B0,0x01AF,0x01B4,0x01B3,
-    0x01B6,0x01B5,0x01B9,0x01B8,0x01BD,0x01BC,0x01BF,0x01F7,0x01CE,0x01CD,
-    0x01D0,0x01CF,0x01D2,0x01D1,0x01D4,0x01D3,0x01D6,0x01D5,0x01D8,0x01D7,
-    0x01DA,0x01D9,0x01DC,0x01DB,0x01DD,0x018E,0x01DF,0x01DE,0x01E1,0x01E0,
-    0x01E3,0x01E2,0x01E5,0x01E4,0x01E7,0x01E6,0x01E9,0x01E8,0x01EB,0x01EA,
-    0x01ED,0x01EC,0x01EF,0x01EE,0x01F5,0x01F4,0x01F9,0x01F8,0x01FB,0x01FA,
-    0x01FD,0x01FC,0x01FF,0x01FE,0x0201,0x0200,0x0203,0x0202,0x0205,0x0204,
-    0x0207,0x0206,0x0209,0x0208,0x020B,0x020A,0x020D,0x020C,0x020F,0x020E,
-    0x0211,0x0210,0x0213,0x0212,0x0215,0x0214,0x0217,0x0216,0x0219,0x0218,
-    0x021B,0x021A,0x021D,0x021C,0x021F,0x021E,0x0223,0x0222,0x0225,0x0224,
-    0x0227,0x0226,0x0229,0x0228,0x022B,0x022A,0x022D,0x022C,0x022F,0x022E,
-    0x0231,0x0230,0x0233,0x0232,0x0253,0x0181,0x0254,0x0186,0x0256,0x0189,
-    0x0257,0x018A,0x0259,0x018F,0x025B,0x0190,0x0260,0x0193,0x0263,0x0194,
-    0x0268,0x0197,0x0269,0x0196,0x026F,0x019C,0x0272,0x019D,0x0275,0x019F,
-    0x0280,0x01A6,0x0283,0x01A9,0x0288,0x01AE,0x028A,0x01B1,0x028B,0x01B2,
-    0x0292,0x01B7,0x03AC,0x0386,0x03AD,0x0388,0x03AE,0x0389,0x03AF,0x038A,
-    0x03B1,0x0391,0x03B3,0x0393,0x03B4,0x0394,0x03B6,0x0396,0x03B7,0x0397,
-    0x03BB,0x039B,0x03BD,0x039D,0x03BE,0x039E,0x03BF,0x039F,0x03C4,0x03A4,
-    0x03C5,0x03A5,0x03C7,0x03A7,0x03C8,0x03A8,0x03CA,0x03AA,0x03CB,0x03AB,
-    0x03CC,0x038C,0x03CD,0x038E,0x03CE,0x038F,0x03D9,0x03D8,0x03DB,0x03DA,
-    0x03DD,0x03DC,0x03DF,0x03DE,0x03E1,0x03E0,0x03E3,0x03E2,0x03E5,0x03E4,
-    0x03E7,0x03E6,0x03E9,0x03E8,0x03EB,0x03EA,0x03ED,0x03EC,0x03EF,0x03EE,
-    0x0430,0x0410,0x0431,0x0411,0x0432,0x0412,0x0433,0x0413,0x0434,0x0414,
-    0x0435,0x0415,0x0436,0x0416,0x0437,0x0417,0x0438,0x0418,0x0439,0x0419,
-    0x043A,0x041A,0x043B,0x041B,0x043C,0x041C,0x043D,0x041D,0x043E,0x041E,
-    0x043F,0x041F,0x0440,0x0420,0x0441,0x0421,0x0442,0x0422,0x0443,0x0423,
-    0x0444,0x0424,0x0445,0x0425,0x0446,0x0426,0x0447,0x0427,0x0448,0x0428,
-    0x0449,0x0429,0x044A,0x042A,0x044B,0x042B,0x044C,0x042C,0x044D,0x042D,
-    0x044E,0x042E,0x044F,0x042F,0x0450,0x0400,0x0451,0x0401,0x0452,0x0402,
-    0x0453,0x0403,0x0454,0x0404,0x0455,0x0405,0x0456,0x0406,0x0457,0x0407,
-    0x0458,0x0408,0x0459,0x0409,0x045A,0x040A,0x045B,0x040B,0x045C,0x040C,
-    0x045D,0x040D,0x045E,0x040E,0x045F,0x040F,0x0461,0x0460,0x0463,0x0462,
-    0x0465,0x0464,0x0467,0x0466,0x0469,0x0468,0x046B,0x046A,0x046D,0x046C,
-    0x046F,0x046E,0x0471,0x0470,0x0473,0x0472,0x0475,0x0474,0x0477,0x0476,
-    0x0479,0x0478,0x047B,0x047A,0x047D,0x047C,0x047F,0x047E,0x0481,0x0480,
-    0x048B,0x048A,0x048D,0x048C,0x048F,0x048E,0x0491,0x0490,0x0493,0x0492,
-    0x0495,0x0494,0x0497,0x0496,0x0499,0x0498,0x049B,0x049A,0x049D,0x049C,
-    0x049F,0x049E,0x04A1,0x04A0,0x04A3,0x04A2,0x04A5,0x04A4,0x04A7,0x04A6,
-    0x04A9,0x04A8,0x04AB,0x04AA,0x04AD,0x04AC,0x04AF,0x04AE,0x04B1,0x04B0,
-    0x04B3,0x04B2,0x04B5,0x04B4,0x04B7,0x04B6,0x04B9,0x04B8,0x04BB,0x04BA,
-    0x04BD,0x04BC,0x04BF,0x04BE,0x04C2,0x04C1,0x04C4,0x04C3,0x04C6,0x04C5,
-    0x04C8,0x04C7,0x04CA,0x04C9,0x04CC,0x04CB,0x04CE,0x04CD,0x04D1,0x04D0,
-    0x04D3,0x04D2,0x04D5,0x04D4,0x04D7,0x04D6,0x04D9,0x04D8,0x04DB,0x04DA,
-    0x04DD,0x04DC,0x04DF,0x04DE,0x04E1,0x04E0,0x04E3,0x04E2,0x04E5,0x04E4,
-    0x04E7,0x04E6,0x04E9,0x04E8,0x04EB,0x04EA,0x04ED,0x04EC,0x04EF,0x04EE,
-    0x04F1,0x04F0,0x04F3,0x04F2,0x04F5,0x04F4,0x04F9,0x04F8,0x0501,0x0500,
-    0x0503,0x0502,0x0505,0x0504,0x0507,0x0506,0x0509,0x0508,0x050B,0x050A,
-    0x050D,0x050C,0x050F,0x050E,0x0561,0x0531,0x0562,0x0532,0x0563,0x0533,
-    0x0564,0x0534,0x0565,0x0535,0x0566,0x0536,0x0567,0x0537,0x0568,0x0538,
-    0x0569,0x0539,0x056A,0x053A,0x056B,0x053B,0x056C,0x053C,0x056D,0x053D,
-    0x056E,0x053E,0x056F,0x053F,0x0570,0x0540,0x0571,0x0541,0x0572,0x0542,
-    0x0573,0x0543,0x0574,0x0544,0x0575,0x0545,0x0576,0x0546,0x0577,0x0547,
-    0x0578,0x0548,0x0579,0x0549,0x057A,0x054A,0x057B,0x054B,0x057C,0x054C,
-    0x057D,0x054D,0x057E,0x054E,0x057F,0x054F,0x0580,0x0550,0x0581,0x0551,
-    0x0582,0x0552,0x0583,0x0553,0x0584,0x0554,0x0585,0x0555,0x0586,0x0556,
-    0x1E01,0x1E00,0x1E03,0x1E02,0x1E05,0x1E04,0x1E07,0x1E06,0x1E09,0x1E08,
-    0x1E0B,0x1E0A,0x1E0D,0x1E0C,0x1E0F,0x1E0E,0x1E11,0x1E10,0x1E13,0x1E12,
-    0x1E15,0x1E14,0x1E17,0x1E16,0x1E19,0x1E18,0x1E1B,0x1E1A,0x1E1D,0x1E1C,
-    0x1E1F,0x1E1E,0x1E21,0x1E20,0x1E23,0x1E22,0x1E25,0x1E24,0x1E27,0x1E26,
-    0x1E29,0x1E28,0x1E2B,0x1E2A,0x1E2D,0x1E2C,0x1E2F,0x1E2E,0x1E31,0x1E30,
-    0x1E33,0x1E32,0x1E35,0x1E34,0x1E37,0x1E36,0x1E39,0x1E38,0x1E3B,0x1E3A,
-    0x1E3D,0x1E3C,0x1E3F,0x1E3E,0x1E41,0x1E40,0x1E43,0x1E42,0x1E45,0x1E44,
-    0x1E47,0x1E46,0x1E49,0x1E48,0x1E4B,0x1E4A,0x1E4D,0x1E4C,0x1E4F,0x1E4E,
-    0x1E51,0x1E50,0x1E53,0x1E52,0x1E55,0x1E54,0x1E57,0x1E56,0x1E59,0x1E58,
-    0x1E5B,0x1E5A,0x1E5D,0x1E5C,0x1E5F,0x1E5E,0x1E63,0x1E62,0x1E65,0x1E64,
-    0x1E67,0x1E66,0x1E69,0x1E68,0x1E6B,0x1E6A,0x1E6D,0x1E6C,0x1E6F,0x1E6E,
-    0x1E71,0x1E70,0x1E73,0x1E72,0x1E75,0x1E74,0x1E77,0x1E76,0x1E79,0x1E78,
-    0x1E7B,0x1E7A,0x1E7D,0x1E7C,0x1E7F,0x1E7E,0x1E81,0x1E80,0x1E83,0x1E82,
-    0x1E85,0x1E84,0x1E87,0x1E86,0x1E89,0x1E88,0x1E8B,0x1E8A,0x1E8D,0x1E8C,
-    0x1E8F,0x1E8E,0x1E91,0x1E90,0x1E93,0x1E92,0x1E95,0x1E94,0x1EA1,0x1EA0,
-    0x1EA3,0x1EA2,0x1EA5,0x1EA4,0x1EA7,0x1EA6,0x1EA9,0x1EA8,0x1EAB,0x1EAA,
-    0x1EAD,0x1EAC,0x1EAF,0x1EAE,0x1EB1,0x1EB0,0x1EB3,0x1EB2,0x1EB5,0x1EB4,
-    0x1EB7,0x1EB6,0x1EB9,0x1EB8,0x1EBB,0x1EBA,0x1EBD,0x1EBC,0x1EBF,0x1EBE,
-    0x1EC1,0x1EC0,0x1EC3,0x1EC2,0x1EC5,0x1EC4,0x1EC7,0x1EC6,0x1EC9,0x1EC8,
-    0x1ECB,0x1ECA,0x1ECD,0x1ECC,0x1ECF,0x1ECE,0x1ED1,0x1ED0,0x1ED3,0x1ED2,
-    0x1ED5,0x1ED4,0x1ED7,0x1ED6,0x1ED9,0x1ED8,0x1EDB,0x1EDA,0x1EDD,0x1EDC,
-    0x1EDF,0x1EDE,0x1EE1,0x1EE0,0x1EE3,0x1EE2,0x1EE5,0x1EE4,0x1EE7,0x1EE6,
-    0x1EE9,0x1EE8,0x1EEB,0x1EEA,0x1EED,0x1EEC,0x1EEF,0x1EEE,0x1EF1,0x1EF0,
-    0x1EF3,0x1EF2,0x1EF5,0x1EF4,0x1EF7,0x1EF6,0x1EF9,0x1EF8,0x1F00,0x1F08,
-    0x1F01,0x1F09,0x1F02,0x1F0A,0x1F03,0x1F0B,0x1F04,0x1F0C,0x1F05,0x1F0D,
-    0x1F06,0x1F0E,0x1F07,0x1F0F,0x1F10,0x1F18,0x1F11,0x1F19,0x1F12,0x1F1A,
-    0x1F13,0x1F1B,0x1F14,0x1F1C,0x1F15,0x1F1D,0x1F20,0x1F28,0x1F21,0x1F29,
-    0x1F22,0x1F2A,0x1F23,0x1F2B,0x1F24,0x1F2C,0x1F25,0x1F2D,0x1F26,0x1F2E,
-    0x1F27,0x1F2F,0x1F30,0x1F38,0x1F31,0x1F39,0x1F32,0x1F3A,0x1F33,0x1F3B,
-    0x1F34,0x1F3C,0x1F35,0x1F3D,0x1F36,0x1F3E,0x1F37,0x1F3F,0x1F40,0x1F48,
-    0x1F41,0x1F49,0x1F42,0x1F4A,0x1F43,0x1F4B,0x1F44,0x1F4C,0x1F45,0x1F4D,
-    0x1F51,0x1F59,0x1F53,0x1F5B,0x1F55,0x1F5D,0x1F57,0x1F5F,0x1F60,0x1F68,
-    0x1F61,0x1F69,0x1F62,0x1F6A,0x1F63,0x1F6B,0x1F64,0x1F6C,0x1F65,0x1F6D,
-    0x1F66,0x1F6E,0x1F67,0x1F6F,0x1F70,0x1FBA,0x1F71,0x1FBB,0x1F72,0x1FC8,
-    0x1F73,0x1FC9,0x1F74,0x1FCA,0x1F75,0x1FCB,0x1F76,0x1FDA,0x1F77,0x1FDB,
-    0x1F78,0x1FF8,0x1F79,0x1FF9,0x1F7A,0x1FEA,0x1F7B,0x1FEB,0x1F7C,0x1FFA,
-    0x1F7D,0x1FFB,0x1FB0,0x1FB8,0x1FB1,0x1FB9,0x1FD0,0x1FD8,0x1FD1,0x1FD9,
-    0x1FE0,0x1FE8,0x1FE1,0x1FE9,0x1FE5,0x1FEC,0x2170,0x2160,0x2171,0x2161,
-    0x2172,0x2162,0x2173,0x2163,0x2174,0x2164,0x2175,0x2165,0x2176,0x2166,
-    0x2177,0x2167,0x2178,0x2168,0x2179,0x2169,0x217A,0x216A,0x217B,0x216B,
-    0x217C,0x216C,0x217D,0x216D,0x217E,0x216E,0x217F,0x216F,0x24D0,0x24B6,
-    0x24D1,0x24B7,0x24D2,0x24B8,0x24D3,0x24B9,0x24D4,0x24BA,0x24D5,0x24BB,
-    0x24D6,0x24BC,0x24D7,0x24BD,0x24D8,0x24BE,0x24D9,0x24BF,0x24DA,0x24C0,
-    0x24DB,0x24C1,0x24DC,0x24C2,0x24DD,0x24C3,0x24DE,0x24C4,0x24DF,0x24C5,
-    0x24E0,0x24C6,0x24E1,0x24C7,0x24E2,0x24C8,0x24E3,0x24C9,0x24E4,0x24CA,
-    0x24E5,0x24CB,0x24E6,0x24CC,0x24E7,0x24CD,0x24E8,0x24CE,0x24E9,0x24CF,
-    0xFF41,0xFF21,0xFF42,0xFF22,0xFF43,0xFF23,0xFF44,0xFF24,0xFF45,0xFF25,
-    0xFF46,0xFF26,0xFF47,0xFF27,0xFF48,0xFF28,0xFF49,0xFF29,0xFF4A,0xFF2A,
-    0xFF4B,0xFF2B,0xFF4C,0xFF2C,0xFF4D,0xFF2D,0xFF4E,0xFF2E,0xFF4F,0xFF2F,
-    0xFF50,0xFF30,0xFF51,0xFF31,0xFF52,0xFF32,0xFF53,0xFF33,0xFF54,0xFF34,
-    0xFF55,0xFF35,0xFF56,0xFF36,0xFF57,0xFF37,0xFF58,0xFF38,0xFF59,0xFF39,
-    0xFF5A,0xFF3A,
-};
-
-// MACHINE-GENERATED: Do not edit (see com.ibm.icu.dev.tools.translit.UnicodeSetCloseOver)
-static const CaseEquivClass CASE_NONPAIRS[] = {
-    {{0x1E9A,0,  0x0061,0x02BE,0, 0}},
-    {{0xFB00,0,  0x0066,0x0066,0, 0}},
-    {{0xFB03,0,  0x0066,0x0066,0x0069,0, 0}},
-    {{0xFB04,0,  0x0066,0x0066,0x006C,0, 0}},
-    {{0xFB01,0,  0x0066,0x0069,0, 0}},
-    {{0xFB02,0,  0x0066,0x006C,0, 0}},
-    {{0x1E96,0,  0x0068,0x0331,0, 0}},
-    {{0x0130,0,  0x0069,0x0307,0, 0}},
-    {{0x01F0,0,  0x006A,0x030C,0, 0}},
-    {{0x004B,0x006B,0x212A,0,  0}},
-    {{0x0053,0x0073,0x017F,0,  0}},
-    {{0x00DF,0,  0x0073,0x0073,0, 0}},
-    {{0xFB05,0xFB06,0,  0x0073,0x0074,0, 0}},
-    {{0x1E97,0,  0x0074,0x0308,0, 0}},
-    {{0x1E98,0,  0x0077,0x030A,0, 0}},
-    {{0x1E99,0,  0x0079,0x030A,0, 0}},
-    {{0x00C5,0x00E5,0x212B,0,  0}},
-    {{0x01C4,0x01C5,0x01C6,0,  0}},
-    {{0x01C7,0x01C8,0x01C9,0,  0}},
-    {{0x01CA,0x01CB,0x01CC,0,  0}},
-    {{0x01F1,0x01F2,0x01F3,0,  0}},
-    {{0x0149,0,  0x02BC,0x006E,0, 0}},
-    {{0x1FB4,0,  0x03AC,0x03B9,0, 0}},
-    {{0x1FC4,0,  0x03AE,0x03B9,0, 0}},
-    {{0x1FB6,0,  0x03B1,0x0342,0, 0}},
-    {{0x1FB7,0,  0x03B1,0x0342,0x03B9,0, 0}},
-    {{0x1FB3,0x1FBC,0,  0x03B1,0x03B9,0, 0}},
-    {{0x0392,0x03B2,0x03D0,0,  0}},
-    {{0x0395,0x03B5,0x03F5,0,  0}},
-    {{0x1FC6,0,  0x03B7,0x0342,0, 0}},
-    {{0x1FC7,0,  0x03B7,0x0342,0x03B9,0, 0}},
-    {{0x1FC3,0x1FCC,0,  0x03B7,0x03B9,0, 0}},
-    {{0x0398,0x03B8,0x03D1,0x03F4,0,  0}},
-    {{0x0345,0x0399,0x03B9,0x1FBE,0,  0}},
-    {{0x1FD2,0,  0x03B9,0x0308,0x0300,0, 0}},
-    {{0x0390,0x1FD3,0,  0x03B9,0x0308,0x0301,0, 0}},
-    {{0x1FD7,0,  0x03B9,0x0308,0x0342,0, 0}},
-    {{0x1FD6,0,  0x03B9,0x0342,0, 0}},
-    {{0x039A,0x03BA,0x03F0,0,  0}},
-    {{0x00B5,0x039C,0x03BC,0,  0}},
-    {{0x03A0,0x03C0,0x03D6,0,  0}},
-    {{0x03A1,0x03C1,0x03F1,0,  0}},
-    {{0x1FE4,0,  0x03C1,0x0313,0, 0}},
-    {{0x03A3,0x03C2,0x03C3,0x03F2,0,  0}},
-    {{0x1FE2,0,  0x03C5,0x0308,0x0300,0, 0}},
-    {{0x03B0,0x1FE3,0,  0x03C5,0x0308,0x0301,0, 0}},
-    {{0x1FE7,0,  0x03C5,0x0308,0x0342,0, 0}},
-    {{0x1F50,0,  0x03C5,0x0313,0, 0}},
-    {{0x1F52,0,  0x03C5,0x0313,0x0300,0, 0}},
-    {{0x1F54,0,  0x03C5,0x0313,0x0301,0, 0}},
-    {{0x1F56,0,  0x03C5,0x0313,0x0342,0, 0}},
-    {{0x1FE6,0,  0x03C5,0x0342,0, 0}},
-    {{0x03A6,0x03C6,0x03D5,0,  0}},
-    {{0x03A9,0x03C9,0x2126,0,  0}},
-    {{0x1FF6,0,  0x03C9,0x0342,0, 0}},
-    {{0x1FF7,0,  0x03C9,0x0342,0x03B9,0, 0}},
-    {{0x1FF3,0x1FFC,0,  0x03C9,0x03B9,0, 0}},
-    {{0x1FF4,0,  0x03CE,0x03B9,0, 0}},
-    {{0x0587,0,  0x0565,0x0582,0, 0}},
-    {{0xFB14,0,  0x0574,0x0565,0, 0}},
-    {{0xFB15,0,  0x0574,0x056B,0, 0}},
-    {{0xFB17,0,  0x0574,0x056D,0, 0}},
-    {{0xFB13,0,  0x0574,0x0576,0, 0}},
-    {{0xFB16,0,  0x057E,0x0576,0, 0}},
-    {{0x1E60,0x1E61,0x1E9B,0,  0}},
-    {{0x1F80,0x1F88,0,  0x1F00,0x03B9,0, 0}},
-    {{0x1F81,0x1F89,0,  0x1F01,0x03B9,0, 0}},
-    {{0x1F82,0x1F8A,0,  0x1F02,0x03B9,0, 0}},
-    {{0x1F83,0x1F8B,0,  0x1F03,0x03B9,0, 0}},
-    {{0x1F84,0x1F8C,0,  0x1F04,0x03B9,0, 0}},
-    {{0x1F85,0x1F8D,0,  0x1F05,0x03B9,0, 0}},
-    {{0x1F86,0x1F8E,0,  0x1F06,0x03B9,0, 0}},
-    {{0x1F87,0x1F8F,0,  0x1F07,0x03B9,0, 0}},
-    {{0x1F90,0x1F98,0,  0x1F20,0x03B9,0, 0}},
-    {{0x1F91,0x1F99,0,  0x1F21,0x03B9,0, 0}},
-    {{0x1F92,0x1F9A,0,  0x1F22,0x03B9,0, 0}},
-    {{0x1F93,0x1F9B,0,  0x1F23,0x03B9,0, 0}},
-    {{0x1F94,0x1F9C,0,  0x1F24,0x03B9,0, 0}},
-    {{0x1F95,0x1F9D,0,  0x1F25,0x03B9,0, 0}},
-    {{0x1F96,0x1F9E,0,  0x1F26,0x03B9,0, 0}},
-    {{0x1F97,0x1F9F,0,  0x1F27,0x03B9,0, 0}},
-    {{0x1FA0,0x1FA8,0,  0x1F60,0x03B9,0, 0}},
-    {{0x1FA1,0x1FA9,0,  0x1F61,0x03B9,0, 0}},
-    {{0x1FA2,0x1FAA,0,  0x1F62,0x03B9,0, 0}},
-    {{0x1FA3,0x1FAB,0,  0x1F63,0x03B9,0, 0}},
-    {{0x1FA4,0x1FAC,0,  0x1F64,0x03B9,0, 0}},
-    {{0x1FA5,0x1FAD,0,  0x1F65,0x03B9,0, 0}},
-    {{0x1FA6,0x1FAE,0,  0x1F66,0x03B9,0, 0}},
-    {{0x1FA7,0x1FAF,0,  0x1F67,0x03B9,0, 0}},
-    {{0x1FB2,0,  0x1F70,0x03B9,0, 0}},
-    {{0x1FC2,0,  0x1F74,0x03B9,0, 0}},
-    {{0x1FF2,0,  0x1F7C,0x03B9,0, 0}},
-    {{0,  0xD801,0xDC00,0, 0xD801,0xDC28,0, 0}},
-    {{0,  0xD801,0xDC01,0, 0xD801,0xDC29,0, 0}},
-    {{0,  0xD801,0xDC02,0, 0xD801,0xDC2A,0, 0}},
-    {{0,  0xD801,0xDC03,0, 0xD801,0xDC2B,0, 0}},
-    {{0,  0xD801,0xDC04,0, 0xD801,0xDC2C,0, 0}},
-    {{0,  0xD801,0xDC05,0, 0xD801,0xDC2D,0, 0}},
-    {{0,  0xD801,0xDC06,0, 0xD801,0xDC2E,0, 0}},
-    {{0,  0xD801,0xDC07,0, 0xD801,0xDC2F,0, 0}},
-    {{0,  0xD801,0xDC08,0, 0xD801,0xDC30,0, 0}},
-    {{0,  0xD801,0xDC09,0, 0xD801,0xDC31,0, 0}},
-    {{0,  0xD801,0xDC0A,0, 0xD801,0xDC32,0, 0}},
-    {{0,  0xD801,0xDC0B,0, 0xD801,0xDC33,0, 0}},
-    {{0,  0xD801,0xDC0C,0, 0xD801,0xDC34,0, 0}},
-    {{0,  0xD801,0xDC0D,0, 0xD801,0xDC35,0, 0}},
-    {{0,  0xD801,0xDC0E,0, 0xD801,0xDC36,0, 0}},
-    {{0,  0xD801,0xDC0F,0, 0xD801,0xDC37,0, 0}},
-    {{0,  0xD801,0xDC10,0, 0xD801,0xDC38,0, 0}},
-    {{0,  0xD801,0xDC11,0, 0xD801,0xDC39,0, 0}},
-    {{0,  0xD801,0xDC12,0, 0xD801,0xDC3A,0, 0}},
-    {{0,  0xD801,0xDC13,0, 0xD801,0xDC3B,0, 0}},
-    {{0,  0xD801,0xDC14,0, 0xD801,0xDC3C,0, 0}},
-    {{0,  0xD801,0xDC15,0, 0xD801,0xDC3D,0, 0}},
-    {{0,  0xD801,0xDC16,0, 0xD801,0xDC3E,0, 0}},
-    {{0,  0xD801,0xDC17,0, 0xD801,0xDC3F,0, 0}},
-    {{0,  0xD801,0xDC18,0, 0xD801,0xDC40,0, 0}},
-    {{0,  0xD801,0xDC19,0, 0xD801,0xDC41,0, 0}},
-    {{0,  0xD801,0xDC1A,0, 0xD801,0xDC42,0, 0}},
-    {{0,  0xD801,0xDC1B,0, 0xD801,0xDC43,0, 0}},
-    {{0,  0xD801,0xDC1C,0, 0xD801,0xDC44,0, 0}},
-    {{0,  0xD801,0xDC1D,0, 0xD801,0xDC45,0, 0}},
-    {{0,  0xD801,0xDC1E,0, 0xD801,0xDC46,0, 0}},
-    {{0,  0xD801,0xDC1F,0, 0xD801,0xDC47,0, 0}},
-    {{0,  0xD801,0xDC20,0, 0xD801,0xDC48,0, 0}},
-    {{0,  0xD801,0xDC21,0, 0xD801,0xDC49,0, 0}},
-    {{0,  0xD801,0xDC22,0, 0xD801,0xDC4A,0, 0}},
-    {{0,  0xD801,0xDC23,0, 0xD801,0xDC4B,0, 0}},
-    {{0,  0xD801,0xDC24,0, 0xD801,0xDC4C,0, 0}},
-    {{0,  0xD801,0xDC25,0, 0xD801,0xDC4D,0, 0}}
-};
-
-#define CASE_PAIRS_LENGTH (sizeof(CASE_PAIRS)/sizeof(CASE_PAIRS[0]))
-#define CASE_NONPAIRS_LENGTH (sizeof(CASE_NONPAIRS)/sizeof(CASE_NONPAIRS[0]))
-
-/**
- * Add to this set all members of the case fold equivalency class
- * that contains 'folded'.
- * @param folded a string within a case fold equivalency class.
- * It must have the property that UCharacter.foldCase(folded,
- * DEFAULT_CASE_MAP).equals(folded).
- */
-void UnicodeSet::caseCloseOne(const UnicodeString& folded) {
-    if (folded.length() == 1) {
-        caseCloseOne(folded.charAt(0));
-        return;
-    }
-
-    const CaseEquivClass* c = getCaseMapOf(folded);
-    if (c != NULL) {
-        caseCloseOne(*c);
-        return;
-    }
-
-    // Add 'folded' itself; it belongs to no equivalency class.
-    add(folded);
-}
-
-/**
- * Add to this set all members of the case fold equivalency class
- * that contains 'folded'.
- * @param folded a code UNIT within a case fold equivalency class.
- * It must have the property that uchar_foldCase(folded,
- * DEFAULT_CASE_MAP) == folded.
- */
-void UnicodeSet::caseCloseOne(UChar folded) {
-    // We must do a DOUBLE LOOKUP, first in the CompactByteArray that
-    // indexes into CASE_NONPAIRS[] and then into the CASE_PAIRS[]
-    // sorted array.  A character will occur in one or the other, or
-    // neither, but not both.
-
-    // Look in the CompactByteArray.
-    const CaseEquivClass* c = getCaseMapOf(folded);
-    if (c != NULL) {
-        caseCloseOne(*c);
-        return;
-    }
-
-    // Binary search in pairs array, looking at only even entries.
-    // The indices low, high, and x will be halved with respect to
-    // CASE_PAIRS[]; that is, they must be doubled before indexing.
-
-    // CASE_PAIRS has 1312 elements, of 656 pairs, so the search
-    // takes no more than 10 passes.
-    int32_t low = 0;
-    int32_t high = (CASE_PAIRS_LENGTH >> 1) - 1;
-    int32_t x;
-    do {
-        x = (low + high) >> 1;
-        UChar ch = CASE_PAIRS[(uint32_t)(x << 1)];
-        if (folded < ch) {
-            high = x - 1;
-        } else if (folded > ch) {
-            low = x + 1;
-        } else {
-            break;
-        }
-    } while (low < high);
-    
-    x = (low + high) & ~1; // ((low + high) >> 1) << 1
-    if (folded == CASE_PAIRS[x]) {
-        add(CASE_PAIRS[x]);
-        add(CASE_PAIRS[x+1]);
-    } else {
-        // If the search fails, then add folded itself; it is a
-        // case-unique code unit.
-        add(folded);
-    }
-}
-
-/**
- * Add to this set all members of the given CaseEquivClass object.
- */
-void UnicodeSet::caseCloseOne(const CaseEquivClass& c) {
-    const UChar* p = c.getSingles();
-    while (*p) {
-        add(*p++); // add all single code units
-    }
-    for (c.getStrings(p); *p; c.nextString(p)) {
-        add(p); // add all strings
-    }
-}
-
-/**
- * Given a folded string of length >= 2 code units, return the
- * CaseEquivClass containing this string, or NULL if none.
- */
-const CaseEquivClass* UnicodeSet::getCaseMapOf(const UnicodeString& folded) {
-    umtx_lock(NULL);
-    UBool f = (CASE_EQUIV_HASH == NULL);
-    umtx_unlock(NULL);
-
-    if (f) {
-        // Create the Hashtable, which maps UnicodeStrings to index
-        // values into CASE_NONPAIRS.
-        UErrorCode ec = U_ZERO_ERROR;
-        Hashtable* hash = new Hashtable();
-        if (hash != NULL) {
-            int32_t i;
-            for (i=0; i<(int32_t)CASE_NONPAIRS_LENGTH; ++i) {
-                const CaseEquivClass* c = &CASE_NONPAIRS[i];
-                const UChar* p;
-                for (c->getStrings(p); *p; c->nextString(p)) {
-                    hash->put(UnicodeString(p), (void*) c, ec);
-                }
-            }
-            if (U_SUCCESS(ec)) {
-                umtx_lock(NULL);
-                if (CASE_EQUIV_HASH == NULL) {
-                    CASE_EQUIV_HASH = hash;
-                    hash = NULL;
-                    ucln_common_registerCleanup(UCLN_COMMON_USET, uset_cleanup);
-                }
-                umtx_unlock(NULL);
-            }
-            delete hash;
-        }
-    }
-
-    return (CASE_EQUIV_HASH != NULL) ?
-        (const CaseEquivClass*) CASE_EQUIV_HASH->get(folded) : NULL;
-}
-
-/**
- * Given a folded code unit, return the CaseEquivClass containing it,
- * or NULL if none.
- */
-const CaseEquivClass* UnicodeSet::getCaseMapOf(UChar folded) {
-    umtx_lock(NULL);
-    UBool f = (CASE_EQUIV_CBA == NULL);
-    umtx_unlock(NULL);
-
-    if (f) {
-        // Create the CompactByteArray, which maps single code units
-        // to index values into CASE_NONPAIRS.
-        CompactByteArray* cba = ucmp8_open(-1);
-        if (ucmp8_isBogus(cba)) {
-            ucmp8_close(cba);
-            cba = NULL;
-        } else {
-            int32_t i;
-            for (i=0; i<(int32_t)CASE_NONPAIRS_LENGTH; ++i) {
-                const UChar* p = CASE_NONPAIRS[i].getSingles();
-                UChar ch;
-                while ((ch = *p++) != 0) {
-                    ucmp8_set(cba, ch, (int8_t) i);
-                }
-            }
-            ucmp8_compact(cba, 256);
-        }
-
-        umtx_lock(NULL);
-        if (CASE_EQUIV_CBA == NULL) {
-            CASE_EQUIV_CBA = cba;
-            cba = NULL;
-            ucln_common_registerCleanup(UCLN_COMMON_USET, uset_cleanup);
-        }
-        umtx_unlock(NULL);
-        if (cba != NULL) {
-            ucmp8_close(cba);
-        }
-    }
-
-    if (CASE_EQUIV_CBA != NULL) {
-        int32_t index = ucmp8_getu(CASE_EQUIV_CBA, folded);
-        if (index != 255) {
-            return &CASE_NONPAIRS[index];
-        }
-    }
-    return NULL;
-}
-
 U_NAMESPACE_END
--- a/icu4c/source/common/unorm.cpp
+++ b/icu4c/source/common/unorm.cpp
@ -262,7 +262,7 @@ isAcceptable(void * /* context */,
 static UBool U_CALLCONV
 _enumPropertyStartsRange(const void *context, UChar32 start, UChar32 /*limit*/, uint32_t /*value*/) {
    /* add the start code point to the USet */
-    USetAdder *sa=(USetAdder *)context;
+    const USetAdder *sa=(const USetAdder *)context;
    sa->add(sa->set, start);
    return TRUE;
 }
@ -1129,7 +1129,7 @@ unorm_isNFSkippable(UChar32 c, UNormalizationMode mode) {
 }

 U_CAPI void U_EXPORT2
-unorm_addPropertyStarts(USetAdder *sa, UErrorCode *pErrorCode) {
+unorm_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) {
    UChar c;

    if(U_FAILURE(*pErrorCode) || !_haveData(*pErrorCode)) {
--- a/icu4c/source/common/unormimp.h
+++ b/icu4c/source/common/unormimp.h
@ -452,7 +452,7 @@ unorm_getNX(int32_t options, UErrorCode *pErrorCode);
 * @internal
 */
 U_CAPI void U_EXPORT2
-unorm_addPropertyStarts(USetAdder *sa, UErrorCode *pErrorCode);
+unorm_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode);

 /**
 * Swap unorm.icu. See udataswp.h.
--- a/icu4c/source/common/uprops.c
+++ b/icu4c/source/common/uprops.c
@ -400,7 +400,7 @@ uprops_getSource(UProperty which) {
 #if 0

 U_CAPI void U_EXPORT2
-uprv_getInclusions(USetAdder *sa, UErrorCode *pErrorCode) {
+uprv_getInclusions(const USetAdder *sa, UErrorCode *pErrorCode) {
    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
        return;
    }
--- a/icu4c/source/common/uprops.h
+++ b/icu4c/source/common/uprops.h
@ -305,7 +305,7 @@ uprv_getMaxISOCommentLength();
 * @param sa USetAdder to receive characters.
 */
 U_CAPI void U_EXPORT2
-uprv_getCharNameCharacters(USetAdder *sa);
+uprv_getCharNameCharacters(const USetAdder *sa);

 #if 0
 /* 
@ -318,7 +318,7 @@ urename.h and unames.c changed accordingly.
 * @param sa USetAdder to receive characters.
 */
 U_CAPI void U_EXPORT2
-uprv_getISOCommentCharacters(USetAdder *sa);
+uprv_getISOCommentCharacters(const USetAdder *sa);
 */
 #endif

@ -360,14 +360,14 @@ uprops_getSource(UProperty which);
 * @internal
 */
 U_CAPI void U_EXPORT2
-uchar_addPropertyStarts(USetAdder *sa, UErrorCode *pErrorCode);
+uchar_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode);

 /**
 * Same as uchar_addPropertyStarts() but only for Hangul_Syllable_Type.
 * @internal
 */
 U_CAPI void U_EXPORT2
-uhst_addPropertyStarts(USetAdder *sa, UErrorCode *pErrorCode);
+uhst_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode);

 /**
 * Return a set of characters for property enumeration.
@ -378,7 +378,7 @@ uhst_addPropertyStarts(USetAdder *sa, UErrorCode *pErrorCode);
 * @internal
 */
 U_CAPI void U_EXPORT2
-uprv_getInclusions(USetAdder *sa, UErrorCode *pErrorCode);
+uprv_getInclusions(const USetAdder *sa, UErrorCode *pErrorCode);

 /**
 * Swap the ICU Unicode properties file. See uchar.c.
--- a/icu4c/source/test/intltest/usettest.cpp
+++ b/icu4c/source/test/intltest/usettest.cpp
@ -963,6 +963,32 @@ void UnicodeSetTest::TestCloseOver() {
        CASE,
        "[ABC]","[A-Ca-c]",

+        CASE, "[i]", "[iI]",
+
+        CASE, "[\\u0130]",          "[\\u0130{i\\u0307}]", // dotted I
+        CASE, "[{i\\u0307}]",       "[\\u0130{i\\u0307}]", // i with dot
+
+        CASE, "[\\u0131]",          "[\\u0131]", // dotless i
+
+        CASE, "[\\u0390]",          "[\\u0390\\u1FD3{\\u03B9\\u0308\\u0301}]",
+
+        CASE, "[\\u03c2]",          "[\\u03a3\\u03c2\\u03c3]", // sigmas
+
+        CASE, "[\\u03f2]",          "[\\u03f2\\u03f9]", // lunate sigmas
+
+        CASE, "[\\u03f7]",          "[\\u03f7\\u03f8]",
+
+        CASE, "[\\u1fe3]",          "[\\u03b0\\u1fe3{\\u03c5\\u0308\\u0301}]",
+
+        CASE, "[\\ufb05]",          "[\\ufb05\\ufb06{st}]",
+        CASE, "[{st}]",             "[\\ufb05\\ufb06{st}]",
+
+        CASE, "[\\U0001044F]",      "[\\U00010427\\U0001044F]",
+
+        CASE, "[{a\\u02BE}]",       "[\\u1E9A{a\\u02BE}]", // first in sorted table
+
+        CASE, "[{\\u1f7c\\u03b9}]", "[\\u1ff2{\\u1f7c\\u03b9}]", // last in sorted table
+
        CASE_MAPPINGS,
        "[aq\\u00DF{Bc}{bC}{Fi}]",
        "[aAqQ\\u00DF{ss}{Ss}{SS}{Bc}{BC}{bC}{bc}{FI}{Fi}{fi}]",
@ -980,6 +1006,7 @@ void UnicodeSetTest::TestCloseOver() {

    UnicodeSet s;
    UnicodeSet t;
+    UnicodeString buf;
    for (int32_t i=0; DATA[i]!=NULL; i+=3) {
        int32_t selector = DATA[i][0];
        UnicodeString pat(DATA[i+1]);
@ -994,12 +1021,72 @@ void UnicodeSetTest::TestCloseOver() {
        if (s == t) {
            logln((UnicodeString)"Ok: " + pat + ".closeOver(" + selector + ") => " + exp);
        } else {
-            UnicodeString buf;
            errln((UnicodeString)"FAIL: " + pat + ".closeOver(" + selector + ") => " +
                  s.toPattern(buf, TRUE) + ", expected " + exp);
        }
    }

+#if 0
+    /*
+     * Unused test code.
+     * This was used to compare the old implementation (using USET_CASE)
+     * with the new one (using 0x100 temporarily)
+     * while transitioning from hardcoded case closure tables in uniset.cpp
+     * (moved to uniset_props.cpp) to building the data by gencase into ucase.icu.
+     * and using ucase.c functions for closure.
+     * See Jitterbug 3432 RFE: Move uniset.cpp data to a data file
+     *
+     * Note: The old and new implementation never fully matched because
+     * the old implementation turned out to not map U+0130 and U+0131 correctly
+     * (dotted I and dotless i) and because the old implementation's data tables
+     * were outdated compared to Unicode 4.0.1 at the time of the change to the
+     * new implementation. (So sigmas and some other characters were not handled
+     * according to the newer Unicode version.)
+     */
+    UnicodeSet sens("[:case_sensitive:]", ec), sens2, s2;
+    UnicodeSetIterator si(sens);
+    UnicodeString str, buf2;
+    const UnicodeString *pStr;
+    UChar32 c;
+    while(si.next()) {
+        if(!si.isString()) {
+            c=si.getCodepoint();
+            s.clear();
+            s.add(c);
+
+            str.setTo(c);
+            str.foldCase();
+            sens2.add(str);
+
+            t=s;
+            s.closeOver(USET_CASE);
+            t.closeOver(0x100);
+            if(s!=t) {
+                errln("FAIL: closeOver(U+%04x) differs: ", c);
+                errln((UnicodeString)"old "+s.toPattern(buf, TRUE)+" new: "+t.toPattern(buf2, TRUE));
+            }
+        }
+    }
+    // remove all code points
+    // should contain all full case folding mapping strings
+    sens2.remove(0, 0x10ffff);
+    si.reset(sens2);
+    while(si.next()) {
+        if(si.isString()) {
+            pStr=&si.getString();
+            s.clear();
+            s.add(*pStr);
+            t=s2=s;
+            s.closeOver(USET_CASE);
+            t.closeOver(0x100);
+            if(s!=t) {
+                errln((UnicodeString)"FAIL: closeOver("+s2.toPattern(buf, TRUE)+") differs: ");
+                errln((UnicodeString)"old "+s.toPattern(buf, TRUE)+" new: "+t.toPattern(buf2, TRUE));
+            }
+        }
+    }
+#endif
+
    // Test the pattern API
    s.applyPattern("[abc]", USET_CASE_INSENSITIVE, NULL, ec);
    if (U_FAILURE(ec)) {
--- a/icu4c/source/tools/gencase/gencase.h
+++ b/icu4c/source/tools/gencase/gencase.h
@ -28,6 +28,21 @@ U_CDECL_BEGIN
 #define UGENCASE_EXC_SHIFT     16
 #define UGENCASE_EXC_MASK      0xffff0000

+/*
+ * Values for the ucase.icu unfold[] data array, see store.c.
+ * The values are stored in ucase.icu so that the runtime code will work with
+ * changing values, but they are hardcoded for gencase for simplicity.
+ * They are optimized, that is, provide for minimal table column widths,
+ * for the actual Unicode data, so that the table size is minimized.
+ * Future versions of Unicode may require increases of some of these values.
+ */
+enum {
+    UGENCASE_UNFOLD_STRING_WIDTH=3,
+    UGENCASE_UNFOLD_CP_WIDTH=2,
+    UGENCASE_UNFOLD_WIDTH=UGENCASE_UNFOLD_STRING_WIDTH+UGENCASE_UNFOLD_CP_WIDTH,
+    UGENCASE_UNFOLD_MAX_ROWS=250
+};
+
 /* special casing data */
 typedef struct {
    UChar32 code;
@ -45,6 +60,7 @@ typedef struct {
 /* case mapping properties */
 typedef struct {
    UChar32 code, lowerCase, upperCase, titleCase;
+    UChar32 closure[8];
    SpecialCasing *specialCasing;
    CaseFolding *caseFolding;
    uint8_t gc, cc;
--- a/icu4c/source/tools/gencase/store.c
+++ b/icu4c/source/tools/gencase/store.c
@ -26,11 +26,14 @@
 #include "cstring.h"
 #include "filestrm.h"
 #include "utrie.h"
+#include "uarrsort.h"
 #include "unicode/udata.h"
 #include "unewdata.h"
 #include "propsvec.h"
 #include "gencase.h"

+#define LENGTHOF(array) (sizeof(array)/sizeof((array)[0]))
+
 /* Unicode case mapping properties file format ---------------------------------

 The file format prepared and written here contains several data
@ -41,7 +44,9 @@ the udata API for loading ICU data. Especially, a UDataInfo structure
 precedes the actual data. It contains platform properties values and the
 file format version.

-The following is a description of format version 1 .
+The following is a description of format version 1.1 .
+
+Format version 1.1 adds data for case closure.

 The file contains the following structures:

@ -52,16 +57,19 @@ The file contains the following structures:
    i1 dataLength; -- length in bytes of the post-header data (incl. indexes[])
    i2 trieSize; -- size in bytes of the case mapping properties trie
    i3 exceptionsLength; -- length in uint16_t of the exceptions array
+    i4 unfoldLength; -- length in uint16_t of the reverse-folding array (new in format version 1.1)

-    i4..i14 reservedIndexes; -- reserved values; 0 for now
+    i5..i14 reservedIndexes; -- reserved values; 0 for now

    i15 maxFullLength; -- maximum length of a full case mapping/folding string


-    Serizalied trie, see utrie.h;
+    Serialized trie, see utrie.h;

    const uint16_t exceptions[exceptionsLength];

+    const UChar unfold[unfoldLength];
+

 Trie data word:
 Bits
@ -117,12 +125,24 @@ Optional-value slots:
 1   case folding (code point)
 2   uppercase mapping (code point)
 3   titlecase mapping (code point)
-4..6 reserved
+4   reserved
+5   reserved
+6   closure mappings (new in format version 1.1)
 7   there is at least one full (string) case mapping
    the length of each is encoded in a nibble of this optional value,
    and the strings follow this optional value in the same order:
    lower/fold/upper/title

+The optional closure mappings value is used as follows:
+Bits 0..3 contain the length of a string of code points for case closure.
+The string immediately follows the full case mappings, or the closure value
+slot if there are no full case mappings.
+Bits 4..15 are reserved and could be used in the future to indicate the
+number of strings for case closure.
+Complete case closure for a code point is given by the union of all simple
+and full case mappings and foldings, plus the case closure code points
+(and potentially, in the future, case closure strings).
+
 For space saving, some values are not stored. Lookups are as follows:
 - If special casing is conditional, then no full lower/upper/title mapping
  strings are stored.
@ -135,6 +155,28 @@ For space saving, some values are not stored. Lookups are as follows:
    simple title->simple upper
    finally, the original code point (no mapping)

+This fallback order is strict:
+In particular, the fallback from full case folding is to simple case folding,
+not to full lowercase mapping.
+
+Reverse case folding data ("unfold") array: (new in format version 1.1)
+
+This array stores some miscellaneous values followed by a table. The data maps
+back from multi-character strings to their original code points, for use
+in case closure.
+
+The table contains two columns of strings.
+The string in the first column is the case folding of each of the code points
+in the second column. The strings are terminated with NUL or by the end of the
+column, whichever comes first.
+
+The miscellaneous data takes up one pseudo-row and includes:
+- number of rows
+- number of UChars per row
+- number of UChars in the left (folding string) column
+
+The table is sorted by its first column. Values in the first column are unique.
+
 ----------------------------------------------------------------------------- */

 /* UDataInfo cf. udata.h */
@ -149,7 +191,7 @@ static UDataInfo dataInfo={

    /* dataFormat="cAsE" */
    { UCASE_FMT_0, UCASE_FMT_1, UCASE_FMT_2, UCASE_FMT_3 },
-    { 1, 0, UTRIE_SHIFT, UTRIE_INDEX_SHIFT },   /* formatVersion */
+    { 1, 1, UTRIE_SHIFT, UTRIE_INDEX_SHIFT },   /* formatVersion */
    { 4, 0, 1, 0 }                              /* dataVersion */
 };

@ -167,6 +209,13 @@ static uint16_t exceptionsCount=0;
 /* becomes indexes[UCASE_IX_MAX_FULL_LENGTH] */
 static int32_t maxFullLength=U16_MAX_LENGTH;

+/* reverse case folding ("unfold") data */
+static UChar unfold[UGENCASE_UNFOLD_MAX_ROWS*UGENCASE_UNFOLD_WIDTH]={
+    0, UGENCASE_UNFOLD_WIDTH, UGENCASE_UNFOLD_STRING_WIDTH, 0, 0
+};
+static uint16_t unfoldRows=0;
+static uint16_t unfoldTop=UGENCASE_UNFOLD_WIDTH;
+
 /* -------------------------------------------------------------------------- */

 extern void
@ -176,6 +225,29 @@ setUnicodeVersion(const char *v) {
    uprv_memcpy(dataInfo.dataVersion, version, 4);
 }

+static void
+addUnfolding(UChar32 c, const UChar *s, int32_t length) {
+    int32_t i;
+
+    if(length>UGENCASE_UNFOLD_STRING_WIDTH) {
+        fprintf(stderr, "gencase error: case folding too long (length=%ld>%d=UGENCASE_UNFOLD_STRING_WIDTH)\n",
+                (long)length, UGENCASE_UNFOLD_STRING_WIDTH);
+        exit(U_INTERNAL_PROGRAM_ERROR);
+    }
+    if(unfoldTop>=LENGTHOF(unfold)) {
+        fprintf(stderr, "gencase error: too many multi-character case foldings\n");
+        exit(U_BUFFER_OVERFLOW_ERROR);
+    }
+    u_memset(unfold+unfoldTop, 0, UGENCASE_UNFOLD_WIDTH);
+    u_memcpy(unfold+unfoldTop, s, length);
+
+    i=unfoldTop+UGENCASE_UNFOLD_STRING_WIDTH;
+    U16_APPEND_UNSAFE(unfold, i, c);
+
+    ++unfoldRows;
+    unfoldTop+=UGENCASE_UNFOLD_WIDTH;
+}
+
 /* store a character's properties ------------------------------------------- */

 extern void
@ -214,6 +286,9 @@ setProps(Props *p) {
    if(p->upperCase!=p->titleCase) {
        value|=UCASE_EXCEPTION;
    }
+    if(p->closure[0]!=0) {
+        value|=UCASE_EXCEPTION;
+    }
    if(p->specialCasing!=NULL) {
        value|=UCASE_EXCEPTION;
    }
@ -286,6 +361,14 @@ setProps(Props *p) {
                        u_errorName(errorCode));
        exit(errorCode);
    }
+
+    /* add the multi-character case folding to the "unfold" data */
+    if(p->caseFolding!=NULL) {
+        int32_t length=p->caseFolding->full[0];
+        if(length>1 && u_strHasMoreChar32Than(p->caseFolding->full+1, length, 1)) {
+            addUnfolding(p->code, p->caseFolding->full+1, length);
+        }
+    }
 }

 extern void
@ -298,13 +381,368 @@ addCaseSensitive(UChar32 first, UChar32 last) {
    }
 }

+/* finalize reverse case folding ("unfold") data ---------------------------- */
+
+static int32_t U_CALLCONV
+compareUnfold(const void *context, const void *left, const void *right) {
+    return u_memcmp((const UChar *)left, (const UChar *)right, UGENCASE_UNFOLD_WIDTH);
+}
+
+static void
+makeUnfoldData() {
+    static const UChar
+        iDot[2]=        { 0x69, 0x307 };
+
+    UChar *p, *q;
+    int32_t i, j, k;
+    UErrorCode errorCode;
+
+    /*
+     * add a case folding that we missed because it's conditional:
+     * 0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE
+     */
+    addUnfolding(0x130, iDot, 2);
+
+    /* sort the data */
+    errorCode=U_ZERO_ERROR;
+    uprv_sortArray(unfold+UGENCASE_UNFOLD_WIDTH, unfoldRows, UGENCASE_UNFOLD_WIDTH*2,
+                   compareUnfold, NULL, FALSE, &errorCode);
+
+    /* make unique-string rows by merging adjacent ones' code point columns */
+
+    /* make p point to row i-1 */
+    p=(UChar *)unfold+UGENCASE_UNFOLD_WIDTH;
+
+    for(i=1; i<unfoldRows;) {
+        if(0==u_memcmp(p, p+UGENCASE_UNFOLD_WIDTH, UGENCASE_UNFOLD_STRING_WIDTH)) {
+            /* concatenate code point columns */
+            q=p+UGENCASE_UNFOLD_STRING_WIDTH;
+            for(j=1; j<UGENCASE_UNFOLD_CP_WIDTH && q[j]!=0; ++j) {}
+            for(k=0; k<UGENCASE_UNFOLD_CP_WIDTH && q[UGENCASE_UNFOLD_WIDTH+k]!=0; ++j, ++k) {
+                q[j]=q[UGENCASE_UNFOLD_WIDTH+k];
+            }
+            if(j>UGENCASE_UNFOLD_CP_WIDTH) {
+                fprintf(stderr, "gencase error: too many code points in unfold[]: %ld>%d=UGENCASE_UNFOLD_CP_WIDTH\n",
+                        (long)j, UGENCASE_UNFOLD_CP_WIDTH);
+                exit(U_BUFFER_OVERFLOW_ERROR);
+            }
+
+            /* move following rows up one */
+            --unfoldRows;
+            unfoldTop-=UGENCASE_UNFOLD_WIDTH;
+            u_memmove(p+UGENCASE_UNFOLD_WIDTH, p+UGENCASE_UNFOLD_WIDTH*2, (unfoldRows-i)*UGENCASE_UNFOLD_WIDTH);
+        } else {
+            p+=UGENCASE_UNFOLD_WIDTH;
+            ++i;
+        }
+    }
+
+    unfold[UCASE_UNFOLD_ROWS]=(UChar)unfoldRows;
+
+    if(beVerbose) {
+        puts("unfold data:");
+
+        p=(UChar *)unfold;
+        for(i=0; i<unfoldRows; ++i) {
+            p+=UGENCASE_UNFOLD_WIDTH;
+            printf("[%2d] %04x %04x %04x <- %04x %04x\n",
+                   i, p[0], p[1], p[2], p[3], p[4]);
+        }
+    }
+}
+
+/* case closure ------------------------------------------------------------- */
+
+static void
+addClosureMapping(UChar32 src, UChar32 dest) {
+    uint32_t value;
+
+    if(beVerbose) {
+        printf("add closure mapping U+%04lx->U+%04lx\n",
+                (unsigned long)src, (unsigned long)dest);
+    }
+
+    value=upvec_getValue(pv, src, 0);
+    if(value&UCASE_EXCEPTION) {
+        Props *p=excProps+(value>>UGENCASE_EXC_SHIFT);
+        int32_t i;
+
+        /* append dest to src's closure array */
+        for(i=0;; ++i) {
+            if(i==LENGTHOF(p->closure)) {
+                fprintf(stderr, "closure[] overflow for U+%04lx->U+%04lx\n",
+                                (unsigned long)src, (unsigned long)dest);
+                exit(U_BUFFER_OVERFLOW_ERROR);
+            } else if(p->closure[i]==dest) {
+                break; /* do not store duplicates */
+            } else if(p->closure[i]==0) {
+                p->closure[i]=dest;
+                break;
+            }
+        }
+    } else {
+        Props p2={ 0 };
+        UChar32 next;
+        UErrorCode errorCode;
+
+        /*
+         * decode value into p2 (enough for makeException() to work properly),
+         * add the closure mapping,
+         * and set the new exception for src
+         */
+        p2.code=src;
+        p2.closure[0]=dest;
+
+        if((value&UCASE_TYPE_MASK)>UCASE_NONE) {
+            /* one simple case mapping, don't care which one */
+            next=src+((int16_t)value>>UCASE_DELTA_SHIFT);
+            if(next!=src) {
+                if((value&UCASE_TYPE_MASK)==UCASE_LOWER) {
+                    p2.upperCase=p2.titleCase=next;
+                } else {
+                    p2.lowerCase=next;
+                }
+            }
+        } else if(value&UCASE_DELTA_MASK) {
+            fprintf(stderr, "gencase error: unable to add case closure exception to case-ignorable U+%04lx\n",
+                            (unsigned long)src);
+            exit(U_INTERNAL_PROGRAM_ERROR);
+        }
+
+        value&=~(UGENCASE_EXC_MASK|UCASE_DELTA_MASK); /* remove previous simple mapping */
+        value|=(uint32_t)exceptionsCount<<UGENCASE_EXC_SHIFT;
+        value|=UCASE_EXCEPTION;
+        uprv_memcpy(excProps+exceptionsCount, &p2, sizeof(p2));
+        if(++exceptionsCount==MAX_EXC_COUNT) {
+            fprintf(stderr, "gencase: too many exceptions\n");
+            exit(U_INDEX_OUTOFBOUNDS_ERROR);
+        }
+
+        errorCode=U_ZERO_ERROR;
+        if(!upvec_setValue(pv, src, src+1, 0, value, 0xffffffff, &errorCode)) {
+            fprintf(stderr, "gencase error: unable to set case mapping values, code: %s\n",
+                            u_errorName(errorCode));
+            exit(errorCode);
+        }
+    }
+}
+
+/*
+ * Find missing case mapping relationships and add mappings for case closure.
+ * This function starts from an "original" code point and recursively
+ * finds its case mappings and the case mappings of where it maps to.
+ *
+ * The recursion depth is capped at 3 nested calls of this function.
+ * In each call, the current code point is c, and the function enumerates
+ * all of c's simple (single-code point) case mappings.
+ * prev is the code point that case-mapped to c.
+ * prev2 is the code point that case-mapped to prev.
+ *
+ * The initial function call has prev2<0, prev<0, and c==orig
+ * (marking no code points).
+ * It enumerates c's case mappings and recurses without further action.
+ *
+ * The second-level function call has prev2<0, prev==orig, and c is
+ * the destination code point of one of prev's case mappings.
+ * The function checks if any of c's case mappings go back to orig
+ * and adds a closure mapping if not.
+ * In other words, it turns a case mapping relationship of
+ *   orig->c
+ * into
+ *   orig<->c
+ *
+ * The third-level function call has prev2==orig, prev>=0, and c is
+ * the destination code point of one of prev's case mappings.
+ * (And prev is the destination of one of prev2's case mappings.)
+ * The function checks if any of c's case mappings go back to orig
+ * and adds a closure mapping if not.
+ * In other words, it turns case mapping relationships of
+ *   orig->prev->c or orig->prev<->c
+ * into
+ *   orig->prev->c->orig or orig->prev<->c->orig
+ * etc.
+ * (Graphically, this closes a triangle.)
+ *
+ * With repeated application on all code points until no more closure mappings
+ * are added, all case equivalence groups get complete mappings.
+ * That is, in each group of code points with case relationships
+ * each code point will in the end have some mapping to each other
+ * code point in the group.
+ *
+ * @return TRUE if a closure mapping was added
+ */
+static UBool
+addClosure(UChar32 orig, UChar32 prev2, UChar32 prev, UChar32 c, uint32_t value) {
+    UChar32 next;
+    UBool someMappingsAdded=FALSE;
+
+    if(c!=orig) {
+        /* get the properties for c */
+        value=upvec_getValue(pv, c, 0);
+    }
+    /* else if c==orig then c's value was passed in */
+
+    if(value&UCASE_EXCEPTION) {
+        UChar32 set[32];
+        int32_t i, count=0;
+
+        Props *p=excProps+(value>>UGENCASE_EXC_SHIFT);
+
+        /*
+         * marker for whether any of c's mappings goes to orig
+         * c==orig: prevent adding a closure mapping when getting orig's own, direct mappings
+         */
+        UBool mapsToOrig=(UBool)(c==orig);
+
+        /* collect c's case mapping destinations in set[] */
+        if((next=p->upperCase)!=0 && next!=c) {
+            set[count++]=next;
+        }
+        if((next=p->lowerCase)!=0 && next!=c) {
+            set[count++]=next;
+        }
+        if(p->upperCase!=(next=p->titleCase) && next!=c) {
+            set[count++]=next;
+        }
+        if(p->caseFolding!=NULL && (next=p->caseFolding->simple)!=0 && next!=c) {
+            set[count++]=next;
+        }
+
+        /* append c's current closure mappings to set[] */
+        for(i=0; i<LENGTHOF(p->closure) && (next=p->closure[i])!=0; ++i) {
+            set[count++]=next;
+        }
+
+        /* process all code points to which c case-maps */
+        for(i=0; i<count; ++i) {
+            next=set[i]; /* next!=c */
+
+            if(next==orig) {
+                mapsToOrig=TRUE; /* remember that we map to orig */
+            } else if(prev2<0 && next!=prev) {
+                /*
+                 * recurse unless
+                 * we have reached maximum depth (prev2>=0) or
+                 * this is a mapping to one of the previous code points (orig, prev, c)
+                 */
+                someMappingsAdded|=addClosure(orig, prev, c, next, 0);
+            }
+        }
+
+        if(!mapsToOrig) {
+            addClosureMapping(c, orig);
+            return TRUE;
+        }
+    } else {
+        if((value&UCASE_TYPE_MASK)>UCASE_NONE) {
+            /* one simple case mapping, don't care which one */
+            next=c+((int16_t)value>>UCASE_DELTA_SHIFT);
+            if(next!=c) {
+                /*
+                 * recurse unless
+                 * we have reached maximum depth (prev2>=0) or
+                 * this is a mapping to one of the previous code points (orig, prev, c)
+                 */
+                if(prev2<0 && next!=orig && next!=prev) {
+                    someMappingsAdded|=addClosure(orig, prev, c, next, 0);
+                }
+
+                if(c!=orig && next!=orig) {
+                    /* c does not map to orig, add a closure mapping c->orig */
+                    addClosureMapping(c, orig);
+                    return TRUE;
+                }
+            }
+        }
+    }
+
+    return someMappingsAdded;
+}
+
 extern void
 makeCaseClosure() {
-    /* TODO */
+    UChar *p;
+    uint32_t *row;
+    uint32_t value;
+    UChar32 start, limit, c, c2;
+    int32_t i, j;
+    UBool someMappingsAdded;
+
+    /*
+     * finalize the "unfold" data because we need to use it to add closure mappings
+     * for situations like FB05->"st"<-FB06
+     * where we would otherwise miss the FB05<->FB06 relationship
+     */
+    makeUnfoldData();
+
+    /* use the "unfold" data to add mappings */
+
+    /* p always points to the code points; this loop ignores the strings completely */
+    p=unfold+UGENCASE_UNFOLD_WIDTH+UGENCASE_UNFOLD_STRING_WIDTH;
+
+    for(i=0; i<unfoldRows; p+=UGENCASE_UNFOLD_WIDTH, ++i) {
+        j=0;
+        U16_NEXT_UNSAFE(p, j, c);
+        while(j<UGENCASE_UNFOLD_CP_WIDTH && p[j]!=0) {
+            U16_NEXT_UNSAFE(p, j, c2);
+            addClosure(c, U_SENTINEL, c, c2, 0);
+        }
+    }
+
+    if(beVerbose) {
+        puts("---- ---- ---- ---- (done with closures from unfolding)");
+    }
+
+    /* add further closure mappings from analyzing simple mappings */
+    do {
+        someMappingsAdded=FALSE;
+
+        i=0;
+        while((row=upvec_getRow(pv, i, &start, &limit))!=NULL) {
+            value=*row;
+            if(value!=0) {
+                while(start<limit) {
+                    if(addClosure(start, U_SENTINEL, U_SENTINEL, start, value)) {
+                        someMappingsAdded=TRUE;
+
+                        /*
+                         * stop this loop because pv was changed and row is not valid any more
+                         * skip all rows below the current start
+                         */
+                        while((row=upvec_getRow(pv, i, NULL, &limit))!=NULL && start>=limit) {
+                            ++i;
+                        }
+                        row=NULL; /* signal to continue with outer loop, without further ++i */
+                        break;
+                    }
+                    ++start;
+                }
+                if(row==NULL) {
+                    continue; /* see row=NULL above */
+                }
+            }
+            ++i;
+        }
+
+        if(beVerbose && someMappingsAdded) {
+            puts("---- ---- ---- ----");
+        }
+    } while(someMappingsAdded);
 }

 /* exceptions --------------------------------------------------------------- */

+/* get the string length from zero-terminated code points in a limited-length array */
+static int32_t
+getLengthOfCodePoints(const UChar32 *s, int32_t maxLength) {
+    int32_t i, length;
+
+    for(i=length=0; i<maxLength && s[i]!=0; ++i) {
+        length+=U16_LENGTH(s[i]);
+    }
+    return length;
+}
+
 static UBool
 fullMappingEqualsSimple(const UChar *s, UChar32 simple, UChar32 c) {
    int32_t i, length;
@ -441,6 +879,15 @@ makeException(uint32_t value, Props *p) {
        excWord|=U_MASK(UCASE_EXC_TITLE);
    }

+    /* length of case closure */
+    if(p->closure[0]!=0) {
+        length=getLengthOfCodePoints(p->closure, LENGTHOF(p->closure));
+        slots[count]=(uint32_t)length; /* must be 1..UCASE_CLOSURE_MAX_LENGTH */
+        slotBits|=slots[count];
+        ++count;
+        excWord|=U_MASK(UCASE_EXC_CLOSURE);
+    }
+
    /* lengths of full case mapping strings, stored in the last slot */
    fullLengths=0;
    if(p->specialCasing!=NULL) {
@ -493,6 +940,15 @@ makeException(uint32_t value, Props *p) {
        excTop+=length;
    }

+    /* write the closure data */
+    if(p->closure[0]!=0) {
+        UChar32 c;
+
+        for(i=0; i<LENGTHOF(p->closure) && (c=p->closure[i])!=0; ++i) {
+            U16_APPEND_UNSAFE((UChar *)exceptions, excTop, c);
+        }
+    }
+
    exceptionsTop=excTop;

    /* write the main exceptions word */
@ -559,7 +1015,8 @@ generateData(const char *dataDir) {

    indexes[UCASE_IX_EXC_LENGTH]=exceptionsTop;
    indexes[UCASE_IX_TRIE_SIZE]=trieSize;
-    indexes[UCASE_IX_LENGTH]=(int32_t)sizeof(indexes)+trieSize+2*exceptionsTop;
+    indexes[UCASE_IX_UNFOLD_LENGTH]=unfoldTop;
+    indexes[UCASE_IX_LENGTH]=(int32_t)sizeof(indexes)+trieSize+2*exceptionsTop+2*unfoldTop;

    indexes[UCASE_IX_MAX_FULL_LENGTH]=maxFullLength;

@ -567,6 +1024,7 @@ generateData(const char *dataDir) {
        printf("trie size in bytes:                    %5d\n", (int)trieSize);
        printf("number of code points with exceptions: %5d\n", exceptionsCount);
        printf("size in bytes of exceptions:           %5d\n", 2*exceptionsTop);
+        printf("size in bytes of reverse foldings:     %5d\n", 2*unfoldTop);
        printf("data size:                             %5d\n", (int)indexes[UCASE_IX_LENGTH]);
    }

@ -581,6 +1039,7 @@ generateData(const char *dataDir) {
    udata_writeBlock(pData, indexes, sizeof(indexes));
    udata_writeBlock(pData, trieBlock, trieSize);
    udata_writeBlock(pData, exceptions, 2*exceptionsTop);
+    udata_writeBlock(pData, unfold, 2*unfoldTop);

    /* finish up */
    dataLength=udata_finish(pData, &errorCode);