ICU-4078 enable building a UnicodeSet from a property using/loading only the relevant data file

X-SVN-Rev: 16313
2004-09-13 23:33:22 +00:00 · 2004-09-13 23:33:22 +00:00 · f7b7183d7a
commit f7b7183d7a
parent 1ed0796a99
4 changed files with 271 additions and 162 deletions
--- a/icu4c/source/common/uchar.c
+++ b/icu4c/source/common/uchar.c
@ -930,7 +930,7 @@ uprv_getMaxValues(int32_t column) {

 /*
 * get Hangul Syllable Type
- * implemented here so that uchar.c (uchar_addPropertyStarts())
+ * implemented here so that uchar.c (uhst_addPropertyStarts())
 * does not depend on uprops.c (u_getIntPropertyValue(c, UCHAR_HANGUL_SYLLABLE_TYPE))
 */
 U_CFUNC UHangulSyllableType
@ -995,6 +995,69 @@ ublock_getCode(UChar32 c) {

 /* property starts for UnicodeSet ------------------------------------------- */

+/* for Hangul_Syllable_Type */
+U_CAPI void U_EXPORT2
+uhst_addPropertyStarts(USetAdder *sa, UErrorCode *pErrorCode) {
+    UChar32 c;
+    int32_t value, value2;
+
+    if(U_FAILURE(*pErrorCode)) {
+        return;
+    }
+
+    if(!HAVE_DATA) {
+        *pErrorCode=dataErrorCode;
+        return;
+    }
+
+    /* add code points with hardcoded properties, plus the ones following them */
+
+    /*
+     * Add Jamo type boundaries for UCHAR_HANGUL_SYLLABLE_TYPE.
+     * First, we add fixed boundaries for the blocks of Jamos.
+     * Then we check in loops to see where the current Unicode version
+     * actually stops assigning such Jamos. We start each loop
+     * at the end of the per-Jamo-block assignments in Unicode 4 or earlier.
+     * (These have not changed since Unicode 2.)
+     */
+    sa->add(sa->set, 0x1100);
+    value=U_HST_LEADING_JAMO;
+    for(c=0x115a; c<=0x115f; ++c) {
+        value2=uchar_getHST(c);
+        if(value!=value2) {
+            value=value2;
+            sa->add(sa->set, c);
+        }
+    }
+
+    sa->add(sa->set, 0x1160);
+    value=U_HST_VOWEL_JAMO;
+    for(c=0x11a3; c<=0x11a7; ++c) {
+        value2=uchar_getHST(c);
+        if(value!=value2) {
+            value=value2;
+            sa->add(sa->set, c);
+        }
+    }
+
+    sa->add(sa->set, 0x11a8);
+    value=U_HST_TRAILING_JAMO;
+    for(c=0x11fa; c<=0x11ff; ++c) {
+        value2=uchar_getHST(c);
+        if(value!=value2) {
+            value=value2;
+            sa->add(sa->set, c);
+        }
+    }
+
+    /* Add Hangul type boundaries for UCHAR_HANGUL_SYLLABLE_TYPE. */
+    for(c=HANGUL_BASE; c<(HANGUL_BASE+HANGUL_COUNT); c+=JAMO_T_COUNT) {
+        sa->add(sa->set, c);
+        sa->add(sa->set, c+1);
+    }
+    sa->add(sa->set, c);
+}
+
 static UBool U_CALLCONV
 _enumPropertyStartsRange(const void *context, UChar32 start, UChar32 limit, uint32_t value) {
    /* add the start code point to the USet */
@ -1007,8 +1070,9 @@ _enumPropertyStartsRange(const void *context, UChar32 start, UChar32 limit, uint

 U_CAPI void U_EXPORT2
 uchar_addPropertyStarts(USetAdder *sa, UErrorCode *pErrorCode) {
-    UChar32 c;
-    int32_t value, value2;
+    if(U_FAILURE(*pErrorCode)) {
+        return;
+    }

    if(!HAVE_DATA) {
        *pErrorCode=dataErrorCode;
@ -1072,42 +1136,4 @@ uchar_addPropertyStarts(USetAdder *sa, UErrorCode *pErrorCode) {
    /* add for UCHAR_JOINING_TYPE */
    sa->add(sa->set, ZWNJ); /* range ZWNJ..ZWJ */
    sa->add(sa->set, ZWJ+1);
-
-    /*
-     * Add Jamo type boundaries for UCHAR_HANGUL_SYLLABLE_TYPE.
-     * First, we add fixed boundaries for the blocks of Jamos.
-     * Then we check in loops to see where the current Unicode version
-     * actually stops assigning such Jamos. We start each loop
-     * at the end of the per-Jamo-block assignments in Unicode 4 or earlier.
-     * (These have not changed since Unicode 2.)
-     */
-    sa->add(sa->set, 0x1100);
-    value=U_HST_LEADING_JAMO;
-    for(c=0x115a; c<=0x115f; ++c) {
-        value2=uchar_getHST(c);
-        if(value!=value2) {
-            value=value2;
-            sa->add(sa->set, c);
-        }
-    }
-
-    sa->add(sa->set, 0x1160);
-    value=U_HST_VOWEL_JAMO;
-    for(c=0x11a3; c<=0x11a7; ++c) {
-        value2=uchar_getHST(c);
-        if(value!=value2) {
-            value=value2;
-            sa->add(sa->set, c);
-        }
-    }
-
-    sa->add(sa->set, 0x11a8);
-    value=U_HST_TRAILING_JAMO;
-    for(c=0x11fa; c<=0x11ff; ++c) {
-        value2=uchar_getHST(c);
-        if(value!=value2) {
-            value=value2;
-            sa->add(sa->set, c);
-        }
-    }
 }
--- a/icu4c/source/common/uniset_props.cpp
+++ b/icu4c/source/common/uniset_props.cpp
@ -31,6 +31,8 @@
 #include "uvector.h"
 #include "uprops.h"
 #include "propname.h"
+#include "unormimp.h"
+#include "ucase.h"
 #include "charstr.h"
 #include "ustrfmt.h"
 #include "mutex.h"
@ -149,7 +151,7 @@ static const UChar CATEGORY_CLOSE[] = {COLON, SET_CLOSE, 0x0000}; /* ":]" */

 U_NAMESPACE_BEGIN

-static UnicodeSet* INCLUSIONS = NULL; // cached uprv_getInclusions()
+static UnicodeSet *INCLUSIONS[UPROPS_SRC_COUNT] = { NULL }; // cached getInclusions()

 static Hashtable* CASE_EQUIV_HASH = NULL; // for closeOver(USET_CASE)

@ -1016,6 +1018,7 @@ static UBool intPropertyFilter(UChar32 ch, void* context) {
 */
 void UnicodeSet::applyFilter(UnicodeSet::Filter filter,
                             void* context,
+                             int32_t src,
                             UErrorCode &status) {
    // Walk through all Unicode characters, noting the start
    // and end of each range for which filter.contain(c) is
@ -1031,7 +1034,7 @@ void UnicodeSet::applyFilter(UnicodeSet::Filter filter,
    // those properties.  Scanning code points is slow.
    if (U_FAILURE(status)) return;

-    const UnicodeSet* inclusions = getInclusions(status);
+    const UnicodeSet* inclusions = getInclusions(src, status);
    if (U_FAILURE(status)) {
        return;
    }
@ -1101,38 +1104,10 @@ UnicodeSet::applyIntPropertyValue(UProperty prop, int32_t value, UErrorCode& ec)
    if (U_FAILURE(ec)) return *this;

    if (prop == UCHAR_GENERAL_CATEGORY_MASK) {
-        applyFilter(generalCategoryMaskFilter, &value, ec);
-#if UCONFIG_NO_NORMALIZATION
-    } else if(prop == UCHAR_HANGUL_SYLLABLE_TYPE) {
-        /*
-         * Special code for when normalization is off.
-         * HST is still available because it is hardcoded in uprops.c, but
-         * the inclusions set does not have the necessary code points
-         * for normalization properties.
-         * I am hardcoding HST in this case because it is the only property
-         * that prevents genbrk from compiling char.txt when normalization is off.
-         * This saves me from turning off break iteration or making more
-         * complicated changes in genbrk.
-         *
-         * This code is not efficient. For efficiency turn on normalization.
-         *
-         * markus 20030505
-         */
-        UChar32 c;
-
-        clear();
-        for(c=0x1100; c<=0xd7a3; ++c) {
-            if(c==0x1200) {
-                c=0xac00;
-            }
-            if(value == u_getIntPropertyValue(c, UCHAR_HANGUL_SYLLABLE_TYPE)) {
-                add(c);
-            }
-        }
-#endif
+        applyFilter(generalCategoryMaskFilter, &value, UPROPS_SRC_CHAR, ec);
    } else {
        IntPropertyContext c = {prop, value};
-        applyFilter(intPropertyFilter, &c, ec);
+        applyFilter(intPropertyFilter, &c, uprops_getSource(prop), ec);
    }
    return *this;
 }
@ -1205,7 +1180,7 @@ UnicodeSet::applyPropertyAlias(const UnicodeString& prop,
                    if (*end != 0) {
                        FAIL(ec);
                    }
-                    applyFilter(numericValueFilter, &value, ec);
+                    applyFilter(numericValueFilter, &value, UPROPS_SRC_CHAR, ec);
                    return *this;
                }
                break;
@ -1236,7 +1211,7 @@ UnicodeSet::applyPropertyAlias(const UnicodeString& prop,
                    if (!mungeCharName(buf, vname, sizeof(buf))) FAIL(ec);
                    UVersionInfo version;
                    u_versionFromString(version, buf);
-                    applyFilter(versionFilter, &version, ec);
+                    applyFilter(versionFilter, &version, UPROPS_SRC_CHAR, ec);
                    return *this;
                }
                break;
@ -1274,7 +1249,7 @@ UnicodeSet::applyPropertyAlias(const UnicodeString& prop,
                    for (int32_t i=0; i<C99_COUNT; ++i) {
                        int32_t c = uprv_comparePropertyNames(pname, C99_DISPATCH[i].name);
                        if (c == 0) {
-                            applyFilter(c99Filter, (void*) &C99_DISPATCH[i], ec);
+                            applyFilter(c99Filter, (void*) &C99_DISPATCH[i], UPROPS_SRC_CHAR, ec);
                            return *this;
                        } else if (c < 0) {
                            // Further entries will not match; bail out
@ -1490,9 +1465,9 @@ _set_addString(USet *set, const UChar *str, int32_t length) {

 U_CDECL_END

-const UnicodeSet* UnicodeSet::getInclusions(UErrorCode &status) {
+const UnicodeSet* UnicodeSet::getInclusions(int32_t src, UErrorCode &status) {
    umtx_lock(NULL);
-    UBool f = (INCLUSIONS == NULL);
+    UBool f = (INCLUSIONS[src] == NULL);
    umtx_unlock(NULL);
    if (f) {
        UnicodeSet* incl = new UnicodeSet();
@ -1504,11 +1479,29 @@ const UnicodeSet* UnicodeSet::getInclusions(UErrorCode &status) {
        };

        if (incl != NULL) {
-            uprv_getInclusions(&sa, &status);
+            switch(src) {
+            case UPROPS_SRC_CHAR:
+                uchar_addPropertyStarts(&sa, &status);
+                break;
+            case UPROPS_SRC_HST:
+                uhst_addPropertyStarts(&sa, &status);
+                break;
+#if !UCONFIG_NO_NORMALIZATION
+            case UPROPS_SRC_NORM:
+                unorm_addPropertyStarts(&sa, &status);
+                break;
+#endif
+            case UPROPS_SRC_CASE:
+                ucase_addPropertyStarts(ucase_getSingleton(&status), &sa, &status);
+                break;
+            default:
+                status = U_INTERNAL_PROGRAM_ERROR;
+                break;
+            }
            if (U_SUCCESS(status)) {
                umtx_lock(NULL);
-                if (INCLUSIONS == NULL) {
-                    INCLUSIONS = incl;
+                if (INCLUSIONS[src] == NULL) {
+                    INCLUSIONS[src] = incl;
                    incl = NULL;        
                } 
                umtx_unlock(NULL);
@ -1518,16 +1511,20 @@ const UnicodeSet* UnicodeSet::getInclusions(UErrorCode &status) {
            status = U_MEMORY_ALLOCATION_ERROR;
        }
    }
-    return INCLUSIONS;
+    return INCLUSIONS[src];
 }

 /**
 * Cleanup function for UnicodeSet
 */
 U_CFUNC UBool uset_cleanup(void) {
-    if (INCLUSIONS != NULL) {
-        delete INCLUSIONS;
-        INCLUSIONS = NULL;
+    int32_t i;
+
+    for(i = UPROPS_SRC_NONE; i < UPROPS_SRC_COUNT; ++i) {
+        if (INCLUSIONS[i] != NULL) {
+            delete INCLUSIONS[i];
+            INCLUSIONS[i] = NULL;
+        }
    }

    if (CASE_EQUIV_HASH != NULL) {
--- a/icu4c/source/common/uprops.c
+++ b/icu4c/source/common/uprops.c
@ -42,50 +42,50 @@ static const struct {
     * and there must be exacly one entry per binary UProperty.
     *
     * Properties with mask 0 are handled in code.
-     * Pseudo-column -2 indicates case mapping properties.
+     * For them, column is the UPropertySource value.
     */
-    {  1, U_MASK(UPROPS_ALPHABETIC) },
-    {  1, U_MASK(UPROPS_ASCII_HEX_DIGIT) },
-    {  1, U_MASK(UPROPS_BIDI_CONTROL) },
-    { -1, U_MASK(UPROPS_MIRROR_SHIFT) },
-    {  1, U_MASK(UPROPS_DASH) },
-    {  1, U_MASK(UPROPS_DEFAULT_IGNORABLE_CODE_POINT) },
-    {  1, U_MASK(UPROPS_DEPRECATED) },
-    {  1, U_MASK(UPROPS_DIACRITIC) },
-    {  1, U_MASK(UPROPS_EXTENDER) },
-    {  0, 0 },                                  /* UCHAR_FULL_COMPOSITION_EXCLUSION */
-    {  1, U_MASK(UPROPS_GRAPHEME_BASE) },
-    {  1, U_MASK(UPROPS_GRAPHEME_EXTEND) },
-    {  1, U_MASK(UPROPS_GRAPHEME_LINK) },
-    {  1, U_MASK(UPROPS_HEX_DIGIT) },
-    {  1, U_MASK(UPROPS_HYPHEN) },
-    {  1, U_MASK(UPROPS_ID_CONTINUE) },
-    {  1, U_MASK(UPROPS_ID_START) },
-    {  1, U_MASK(UPROPS_IDEOGRAPHIC) },
-    {  1, U_MASK(UPROPS_IDS_BINARY_OPERATOR) },
-    {  1, U_MASK(UPROPS_IDS_TRINARY_OPERATOR) },
-    {  1, U_MASK(UPROPS_JOIN_CONTROL) },
-    {  1, U_MASK(UPROPS_LOGICAL_ORDER_EXCEPTION) },
-    { -2, 0 },                                  /* UCHAR_LOWERCASE */
-    {  1, U_MASK(UPROPS_MATH) },
-    {  1, U_MASK(UPROPS_NONCHARACTER_CODE_POINT) },
-    {  1, U_MASK(UPROPS_QUOTATION_MARK) },
-    {  1, U_MASK(UPROPS_RADICAL) },
-    { -2, 0 },                                  /* UCHAR_SOFT_DOTTED */
-    {  1, U_MASK(UPROPS_TERMINAL_PUNCTUATION) },
-    {  1, U_MASK(UPROPS_UNIFIED_IDEOGRAPH) },
-    { -2, 0 },                                  /* UCHAR_UPPERCASE */
-    {  1, U_MASK(UPROPS_WHITE_SPACE) },
-    {  1, U_MASK(UPROPS_XID_CONTINUE) },
-    {  1, U_MASK(UPROPS_XID_START) },
-    { -2, 0 },                                  /* UCHAR_CASE_SENSITIVE */
-    {  2, U_MASK(UPROPS_V2_S_TERM) },
-    {  2, U_MASK(UPROPS_V2_VARIATION_SELECTOR) },
-    {  0, 0 },                                  /* UCHAR_NFD_INERT */
-    {  0, 0 },                                  /* UCHAR_NFKD_INERT */
-    {  0, 0 },                                  /* UCHAR_NFC_INERT */
-    {  0, 0 },                                  /* UCHAR_NFKC_INERT */
-    {  0, 0 }                                   /* UCHAR_SEGMENT_STARTER */
+    {  1,               U_MASK(UPROPS_ALPHABETIC) },
+    {  1,               U_MASK(UPROPS_ASCII_HEX_DIGIT) },
+    {  1,               U_MASK(UPROPS_BIDI_CONTROL) },
+    { -1,               U_MASK(UPROPS_MIRROR_SHIFT) },
+    {  1,               U_MASK(UPROPS_DASH) },
+    {  1,               U_MASK(UPROPS_DEFAULT_IGNORABLE_CODE_POINT) },
+    {  1,               U_MASK(UPROPS_DEPRECATED) },
+    {  1,               U_MASK(UPROPS_DIACRITIC) },
+    {  1,               U_MASK(UPROPS_EXTENDER) },
+    { UPROPS_SRC_NORM,  0 },                                    /* UCHAR_FULL_COMPOSITION_EXCLUSION */
+    {  1,               U_MASK(UPROPS_GRAPHEME_BASE) },
+    {  1,               U_MASK(UPROPS_GRAPHEME_EXTEND) },
+    {  1,               U_MASK(UPROPS_GRAPHEME_LINK) },
+    {  1,               U_MASK(UPROPS_HEX_DIGIT) },
+    {  1,               U_MASK(UPROPS_HYPHEN) },
+    {  1,               U_MASK(UPROPS_ID_CONTINUE) },
+    {  1,               U_MASK(UPROPS_ID_START) },
+    {  1,               U_MASK(UPROPS_IDEOGRAPHIC) },
+    {  1,               U_MASK(UPROPS_IDS_BINARY_OPERATOR) },
+    {  1,               U_MASK(UPROPS_IDS_TRINARY_OPERATOR) },
+    {  1,               U_MASK(UPROPS_JOIN_CONTROL) },
+    {  1,               U_MASK(UPROPS_LOGICAL_ORDER_EXCEPTION) },
+    { UPROPS_SRC_CASE,  0 },                                    /* UCHAR_LOWERCASE */
+    {  1,               U_MASK(UPROPS_MATH) },
+    {  1,               U_MASK(UPROPS_NONCHARACTER_CODE_POINT) },
+    {  1,               U_MASK(UPROPS_QUOTATION_MARK) },
+    {  1,               U_MASK(UPROPS_RADICAL) },
+    { UPROPS_SRC_CASE,  0 },                                    /* UCHAR_SOFT_DOTTED */
+    {  1,               U_MASK(UPROPS_TERMINAL_PUNCTUATION) },
+    {  1,               U_MASK(UPROPS_UNIFIED_IDEOGRAPH) },
+    { UPROPS_SRC_CASE,  0 },                                    /* UCHAR_UPPERCASE */
+    {  1,               U_MASK(UPROPS_WHITE_SPACE) },
+    {  1,               U_MASK(UPROPS_XID_CONTINUE) },
+    {  1,               U_MASK(UPROPS_XID_START) },
+    { UPROPS_SRC_CASE,  0 },                                    /* UCHAR_CASE_SENSITIVE */
+    {  2,               U_MASK(UPROPS_V2_S_TERM) },
+    {  2,               U_MASK(UPROPS_V2_VARIATION_SELECTOR) },
+    { UPROPS_SRC_NORM,  0 },                                    /* UCHAR_NFD_INERT */
+    { UPROPS_SRC_NORM,  0 },                                    /* UCHAR_NFKD_INERT */
+    { UPROPS_SRC_NORM,  0 },                                    /* UCHAR_NFC_INERT */
+    { UPROPS_SRC_NORM,  0 },                                    /* UCHAR_NFKC_INERT */
+    { UPROPS_SRC_NORM,  0 }                                     /* UCHAR_SEGMENT_STARTER */
 };

 U_CAPI UBool U_EXPORT2
@ -95,45 +95,48 @@ u_hasBinaryProperty(UChar32 c, UProperty which) {
        /* not a known binary property */
    } else {
        uint32_t mask=binProps[which].mask;
+        int32_t column=binProps[which].column;
        if(mask!=0) {
            /* systematic, directly stored properties */
-            return (u_getUnicodeProperties(c, binProps[which].column)&mask)!=0;
-        } else if(binProps[which].column==-2) {
-            /* case mapping properties */
-            UErrorCode errorCode=U_ZERO_ERROR;
-            UCaseProps *csp=ucase_getSingleton(&errorCode);
-            if(U_FAILURE(errorCode)) {
-                return FALSE;
-            }
-            switch(which) {
-            case UCHAR_LOWERCASE:
-                return (UBool)(UCASE_LOWER==ucase_getType(csp, c));
-            case UCHAR_UPPERCASE:
-                return (UBool)(UCASE_UPPER==ucase_getType(csp, c));
-            case UCHAR_SOFT_DOTTED:
-                return ucase_isSoftDotted(csp, c);
-            case UCHAR_CASE_SENSITIVE:
-                return ucase_isCaseSensitive(csp, c);
-            default:
-                break;
-            }
+            return (u_getUnicodeProperties(c, column)&mask)!=0;
        } else {
+            if(column==UPROPS_SRC_CASE) {
+                /* case mapping properties */
+                UErrorCode errorCode=U_ZERO_ERROR;
+                UCaseProps *csp=ucase_getSingleton(&errorCode);
+                if(U_FAILURE(errorCode)) {
+                    return FALSE;
+                }
+                switch(which) {
+                case UCHAR_LOWERCASE:
+                    return (UBool)(UCASE_LOWER==ucase_getType(csp, c));
+                case UCHAR_UPPERCASE:
+                    return (UBool)(UCASE_UPPER==ucase_getType(csp, c));
+                case UCHAR_SOFT_DOTTED:
+                    return ucase_isSoftDotted(csp, c);
+                case UCHAR_CASE_SENSITIVE:
+                    return ucase_isCaseSensitive(csp, c);
+                default:
+                    break;
+                }
+            } else if(column==UPROPS_SRC_NORM) {
 #if !UCONFIG_NO_NORMALIZATION
-            /* normalization properties from unorm.icu */
-            switch(which) {
-            case UCHAR_FULL_COMPOSITION_EXCLUSION:
-                return unorm_internalIsFullCompositionExclusion(c);
-            case UCHAR_NFD_INERT:
-            case UCHAR_NFKD_INERT:
-            case UCHAR_NFC_INERT:
-            case UCHAR_NFKC_INERT:
-                return unorm_isNFSkippable(c, (UNormalizationMode)(which-UCHAR_NFD_INERT)+UNORM_NFD);
-            case UCHAR_SEGMENT_STARTER:
-                return unorm_isCanonSafeStart(c);
-            default:
-                break;
-            }
+                /* normalization properties from unorm.icu */
+                switch(which) {
+                case UCHAR_FULL_COMPOSITION_EXCLUSION:
+                    return unorm_internalIsFullCompositionExclusion(c);
+                case UCHAR_NFD_INERT:
+                case UCHAR_NFKD_INERT:
+                case UCHAR_NFC_INERT:
+                case UCHAR_NFKC_INERT:
+                    return unorm_isNFSkippable(c, (UNormalizationMode)(which-UCHAR_NFD_INERT)+UNORM_NFD);
+                case UCHAR_SEGMENT_STARTER:
+                    return unorm_isCanonSafeStart(c);
+                default:
+                    break;
+                }
 #endif
+            }
        }
    }
    return FALSE;
@ -291,6 +294,40 @@ u_getIntPropertyMaxValue(UProperty which) {
    }
 }

+U_CAPI UPropertySource U_EXPORT2
+uprops_getSource(UProperty which) {
+    if(which<UCHAR_BINARY_START) {
+        return UPROPS_SRC_NONE; /* undefined */
+    } else if(which<UCHAR_BINARY_LIMIT) {
+        if(binProps[which].mask!=0) {
+            return UPROPS_SRC_CHAR;
+        } else {
+            return (UPropertySource)binProps[which].column;
+        }
+    } else if(which<UCHAR_INT_START) {
+        return UPROPS_SRC_NONE; /* undefined */
+    } else if(which<UCHAR_INT_LIMIT) {
+        switch(which) {
+        case UCHAR_HANGUL_SYLLABLE_TYPE:
+            return UPROPS_SRC_HST;
+        case UCHAR_CANONICAL_COMBINING_CLASS:
+        case UCHAR_NFD_QUICK_CHECK:
+        case UCHAR_NFKD_QUICK_CHECK:
+        case UCHAR_NFC_QUICK_CHECK:
+        case UCHAR_NFKC_QUICK_CHECK:
+        case UCHAR_LEAD_CANONICAL_COMBINING_CLASS:
+        case UCHAR_TRAIL_CANONICAL_COMBINING_CLASS:
+            return UPROPS_SRC_NORM;
+        default:
+            return UPROPS_SRC_CHAR;
+        }
+    } else if(which==UCHAR_GENERAL_CATEGORY_MASK) {
+        return UPROPS_SRC_CHAR;
+    } else {
+        return UPROPS_SRC_NONE; /* undefined */
+    }
+}
+
 /*----------------------------------------------------------------
 * Inclusions list
 *----------------------------------------------------------------*/
@ -377,7 +414,15 @@ u_getIntPropertyMaxValue(UProperty which) {
 *
 * Do not use a UnicodeSet pattern because that causes infinite recursion;
 * UnicodeSet depends on the inclusions set.
+ *
+ * ---
+ *
+ * uprv_getInclusions() is commented out starting 2004-sep-13 because
+ * uniset_props.cpp now calls the uxyz_addPropertyStarts() directly,
+ * and only for the relevant property source.
 */
+#if 0
+
 U_CAPI void U_EXPORT2
 uprv_getInclusions(USetAdder *sa, UErrorCode *pErrorCode) {
    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
@ -390,3 +435,5 @@ uprv_getInclusions(USetAdder *sa, UErrorCode *pErrorCode) {
    uchar_addPropertyStarts(sa, pErrorCode);
    ucase_addPropertyStarts(ucase_getSingleton(pErrorCode), sa, pErrorCode);
 }
+
+#endif
--- a/icu4c/source/common/uprops.h
+++ b/icu4c/source/common/uprops.h
@ -337,6 +337,38 @@ uprv_getISOCommentCharacters(USetAdder *sa);
 */
 #endif

+/**
+ * Constants for which data and implementation files provide which properties.
+ * Used by UnicodeSet for service-specific property enumeration.
+ * @internal
+ */
+enum UPropertySource {
+    /** No source, not a supported property. */
+    UPROPS_SRC_NONE,
+    /** From uchar.c/uprops.icu */
+    UPROPS_SRC_CHAR,
+    /** Hangul_Syllable_Type, from uchar.c/uprops.icu */
+    UPROPS_SRC_HST,
+    /** From unames.c/unames.icu */
+    UPROPS_SRC_NAMES,
+    /** From unorm.cpp/unorm.icu */
+    UPROPS_SRC_NORM,
+    /** From ucase.c/ucase.icu */
+    UPROPS_SRC_CASE,
+    /** From ubidi.c/ubidi.icu */
+    UPROPS_SRC_BIDI,
+    /** One more than the highes UPropertySource (UPROPS_SRC_) constant. */
+    UPROPS_SRC_COUNT
+};
+typedef enum UPropertySource UPropertySource;
+
+/**
+ * @see UPropertySource
+ * @internal
+ */
+U_CAPI UPropertySource U_EXPORT2
+uprops_getSource(UProperty which);
+
 /**
 * Enumerate each core properties data trie and add the
 * start of each range of same properties to the set.
@ -345,6 +377,13 @@ uprv_getISOCommentCharacters(USetAdder *sa);
 U_CAPI void U_EXPORT2
 uchar_addPropertyStarts(USetAdder *sa, UErrorCode *pErrorCode);

+/**
+ * Same as uchar_addPropertyStarts() but only for Hangul_Syllable_Type.
+ * @internal
+ */
+U_CAPI void U_EXPORT2
+uhst_addPropertyStarts(USetAdder *sa, UErrorCode *pErrorCode);
+
 /**
 * Return a set of characters for property enumeration.
 * For each two consecutive characters (start, limit) in the set,