From f7b7183d7a7daf29c2e97252e4ff19ab45240d32 Mon Sep 17 00:00:00 2001
From: Markus Scherer <markus.icu@gmail.com>
Date: Mon, 13 Sep 2004 23:33:22 +0000
Subject: [PATCH] ICU-4078 enable building a UnicodeSet from a property
 using/loading only the relevant data file

X-SVN-Rev: 16313
---
 icu4c/source/common/uchar.c          | 108 ++++++++------
 icu4c/source/common/uniset_props.cpp |  85 ++++++-----
 icu4c/source/common/uprops.c         | 201 +++++++++++++++++----------
 icu4c/source/common/uprops.h         |  39 ++++++
 4 files changed, 271 insertions(+), 162 deletions(-)

diff --git a/icu4c/source/common/uchar.c b/icu4c/source/common/uchar.c
index 397253366f..5f5b896346 100644
--- a/icu4c/source/common/uchar.c
+++ b/icu4c/source/common/uchar.c
@@ -930,7 +930,7 @@ uprv_getMaxValues(int32_t column) {
 
 /*
  * get Hangul Syllable Type
- * implemented here so that uchar.c (uchar_addPropertyStarts())
+ * implemented here so that uchar.c (uhst_addPropertyStarts())
  * does not depend on uprops.c (u_getIntPropertyValue(c, UCHAR_HANGUL_SYLLABLE_TYPE))
  */
 U_CFUNC UHangulSyllableType
@@ -995,6 +995,69 @@ ublock_getCode(UChar32 c) {
 
 /* property starts for UnicodeSet ------------------------------------------- */
 
+/* for Hangul_Syllable_Type */
+U_CAPI void U_EXPORT2
+uhst_addPropertyStarts(USetAdder *sa, UErrorCode *pErrorCode) {
+    UChar32 c;
+    int32_t value, value2;
+
+    if(U_FAILURE(*pErrorCode)) {
+        return;
+    }
+
+    if(!HAVE_DATA) {
+        *pErrorCode=dataErrorCode;
+        return;
+    }
+
+    /* add code points with hardcoded properties, plus the ones following them */
+
+    /*
+     * Add Jamo type boundaries for UCHAR_HANGUL_SYLLABLE_TYPE.
+     * First, we add fixed boundaries for the blocks of Jamos.
+     * Then we check in loops to see where the current Unicode version
+     * actually stops assigning such Jamos. We start each loop
+     * at the end of the per-Jamo-block assignments in Unicode 4 or earlier.
+     * (These have not changed since Unicode 2.)
+     */
+    sa->add(sa->set, 0x1100);
+    value=U_HST_LEADING_JAMO;
+    for(c=0x115a; c<=0x115f; ++c) {
+        value2=uchar_getHST(c);
+        if(value!=value2) {
+            value=value2;
+            sa->add(sa->set, c);
+        }
+    }
+
+    sa->add(sa->set, 0x1160);
+    value=U_HST_VOWEL_JAMO;
+    for(c=0x11a3; c<=0x11a7; ++c) {
+        value2=uchar_getHST(c);
+        if(value!=value2) {
+            value=value2;
+            sa->add(sa->set, c);
+        }
+    }
+
+    sa->add(sa->set, 0x11a8);
+    value=U_HST_TRAILING_JAMO;
+    for(c=0x11fa; c<=0x11ff; ++c) {
+        value2=uchar_getHST(c);
+        if(value!=value2) {
+            value=value2;
+            sa->add(sa->set, c);
+        }
+    }
+
+    /* Add Hangul type boundaries for UCHAR_HANGUL_SYLLABLE_TYPE. */
+    for(c=HANGUL_BASE; c<(HANGUL_BASE+HANGUL_COUNT); c+=JAMO_T_COUNT) {
+        sa->add(sa->set, c);
+        sa->add(sa->set, c+1);
+    }
+    sa->add(sa->set, c);
+}
+
 static UBool U_CALLCONV
 _enumPropertyStartsRange(const void *context, UChar32 start, UChar32 limit, uint32_t value) {
     /* add the start code point to the USet */
@@ -1007,8 +1070,9 @@ _enumPropertyStartsRange(const void *context, UChar32 start, UChar32 limit, uint
 
 U_CAPI void U_EXPORT2
 uchar_addPropertyStarts(USetAdder *sa, UErrorCode *pErrorCode) {
-    UChar32 c;
-    int32_t value, value2;
+    if(U_FAILURE(*pErrorCode)) {
+        return;
+    }
 
     if(!HAVE_DATA) {
         *pErrorCode=dataErrorCode;
@@ -1072,42 +1136,4 @@ uchar_addPropertyStarts(USetAdder *sa, UErrorCode *pErrorCode) {
     /* add for UCHAR_JOINING_TYPE */
     sa->add(sa->set, ZWNJ); /* range ZWNJ..ZWJ */
     sa->add(sa->set, ZWJ+1);
-
-    /*
-     * Add Jamo type boundaries for UCHAR_HANGUL_SYLLABLE_TYPE.
-     * First, we add fixed boundaries for the blocks of Jamos.
-     * Then we check in loops to see where the current Unicode version
-     * actually stops assigning such Jamos. We start each loop
-     * at the end of the per-Jamo-block assignments in Unicode 4 or earlier.
-     * (These have not changed since Unicode 2.)
-     */
-    sa->add(sa->set, 0x1100);
-    value=U_HST_LEADING_JAMO;
-    for(c=0x115a; c<=0x115f; ++c) {
-        value2=uchar_getHST(c);
-        if(value!=value2) {
-            value=value2;
-            sa->add(sa->set, c);
-        }
-    }
-
-    sa->add(sa->set, 0x1160);
-    value=U_HST_VOWEL_JAMO;
-    for(c=0x11a3; c<=0x11a7; ++c) {
-        value2=uchar_getHST(c);
-        if(value!=value2) {
-            value=value2;
-            sa->add(sa->set, c);
-        }
-    }
-
-    sa->add(sa->set, 0x11a8);
-    value=U_HST_TRAILING_JAMO;
-    for(c=0x11fa; c<=0x11ff; ++c) {
-        value2=uchar_getHST(c);
-        if(value!=value2) {
-            value=value2;
-            sa->add(sa->set, c);
-        }
-    }
 }
diff --git a/icu4c/source/common/uniset_props.cpp b/icu4c/source/common/uniset_props.cpp
index da9ad5bd01..282835b083 100644
--- a/icu4c/source/common/uniset_props.cpp
+++ b/icu4c/source/common/uniset_props.cpp
@@ -31,6 +31,8 @@
 #include "uvector.h"
 #include "uprops.h"
 #include "propname.h"
+#include "unormimp.h"
+#include "ucase.h"
 #include "charstr.h"
 #include "ustrfmt.h"
 #include "mutex.h"
@@ -149,7 +151,7 @@ static const UChar CATEGORY_CLOSE[] = {COLON, SET_CLOSE, 0x0000}; /* ":]" */
 
 U_NAMESPACE_BEGIN
 
-static UnicodeSet* INCLUSIONS = NULL; // cached uprv_getInclusions()
+static UnicodeSet *INCLUSIONS[UPROPS_SRC_COUNT] = { NULL }; // cached getInclusions()
 
 static Hashtable* CASE_EQUIV_HASH = NULL; // for closeOver(USET_CASE)
 
@@ -1016,6 +1018,7 @@ static UBool intPropertyFilter(UChar32 ch, void* context) {
  */
 void UnicodeSet::applyFilter(UnicodeSet::Filter filter,
                              void* context,
+                             int32_t src,
                              UErrorCode &status) {
     // Walk through all Unicode characters, noting the start
     // and end of each range for which filter.contain(c) is
@@ -1031,7 +1034,7 @@ void UnicodeSet::applyFilter(UnicodeSet::Filter filter,
     // those properties.  Scanning code points is slow.
     if (U_FAILURE(status)) return;
 
-    const UnicodeSet* inclusions = getInclusions(status);
+    const UnicodeSet* inclusions = getInclusions(src, status);
     if (U_FAILURE(status)) {
         return;
     }
@@ -1101,38 +1104,10 @@ UnicodeSet::applyIntPropertyValue(UProperty prop, int32_t value, UErrorCode& ec)
     if (U_FAILURE(ec)) return *this;
 
     if (prop == UCHAR_GENERAL_CATEGORY_MASK) {
-        applyFilter(generalCategoryMaskFilter, &value, ec);
-#if UCONFIG_NO_NORMALIZATION
-    } else if(prop == UCHAR_HANGUL_SYLLABLE_TYPE) {
-        /*
-         * Special code for when normalization is off.
-         * HST is still available because it is hardcoded in uprops.c, but
-         * the inclusions set does not have the necessary code points
-         * for normalization properties.
-         * I am hardcoding HST in this case because it is the only property
-         * that prevents genbrk from compiling char.txt when normalization is off.
-         * This saves me from turning off break iteration or making more
-         * complicated changes in genbrk.
-         *
-         * This code is not efficient. For efficiency turn on normalization.
-         *
-         * markus 20030505
-         */
-        UChar32 c;
-
-        clear();
-        for(c=0x1100; c<=0xd7a3; ++c) {
-            if(c==0x1200) {
-                c=0xac00;
-            }
-            if(value == u_getIntPropertyValue(c, UCHAR_HANGUL_SYLLABLE_TYPE)) {
-                add(c);
-            }
-        }
-#endif
+        applyFilter(generalCategoryMaskFilter, &value, UPROPS_SRC_CHAR, ec);
     } else {
         IntPropertyContext c = {prop, value};
-        applyFilter(intPropertyFilter, &c, ec);
+        applyFilter(intPropertyFilter, &c, uprops_getSource(prop), ec);
     }
     return *this;
 }
@@ -1205,7 +1180,7 @@ UnicodeSet::applyPropertyAlias(const UnicodeString& prop,
                     if (*end != 0) {
                         FAIL(ec);
                     }
-                    applyFilter(numericValueFilter, &value, ec);
+                    applyFilter(numericValueFilter, &value, UPROPS_SRC_CHAR, ec);
                     return *this;
                 }
                 break;
@@ -1236,7 +1211,7 @@ UnicodeSet::applyPropertyAlias(const UnicodeString& prop,
                     if (!mungeCharName(buf, vname, sizeof(buf))) FAIL(ec);
                     UVersionInfo version;
                     u_versionFromString(version, buf);
-                    applyFilter(versionFilter, &version, ec);
+                    applyFilter(versionFilter, &version, UPROPS_SRC_CHAR, ec);
                     return *this;
                 }
                 break;
@@ -1274,7 +1249,7 @@ UnicodeSet::applyPropertyAlias(const UnicodeString& prop,
                     for (int32_t i=0; i<C99_COUNT; ++i) {
                         int32_t c = uprv_comparePropertyNames(pname, C99_DISPATCH[i].name);
                         if (c == 0) {
-                            applyFilter(c99Filter, (void*) &C99_DISPATCH[i], ec);
+                            applyFilter(c99Filter, (void*) &C99_DISPATCH[i], UPROPS_SRC_CHAR, ec);
                             return *this;
                         } else if (c < 0) {
                             // Further entries will not match; bail out
@@ -1490,9 +1465,9 @@ _set_addString(USet *set, const UChar *str, int32_t length) {
 
 U_CDECL_END
 
-const UnicodeSet* UnicodeSet::getInclusions(UErrorCode &status) {
+const UnicodeSet* UnicodeSet::getInclusions(int32_t src, UErrorCode &status) {
     umtx_lock(NULL);
-    UBool f = (INCLUSIONS == NULL);
+    UBool f = (INCLUSIONS[src] == NULL);
     umtx_unlock(NULL);
     if (f) {
         UnicodeSet* incl = new UnicodeSet();
@@ -1504,11 +1479,29 @@ const UnicodeSet* UnicodeSet::getInclusions(UErrorCode &status) {
         };
 
         if (incl != NULL) {
-            uprv_getInclusions(&sa, &status);
+            switch(src) {
+            case UPROPS_SRC_CHAR:
+                uchar_addPropertyStarts(&sa, &status);
+                break;
+            case UPROPS_SRC_HST:
+                uhst_addPropertyStarts(&sa, &status);
+                break;
+#if !UCONFIG_NO_NORMALIZATION
+            case UPROPS_SRC_NORM:
+                unorm_addPropertyStarts(&sa, &status);
+                break;
+#endif
+            case UPROPS_SRC_CASE:
+                ucase_addPropertyStarts(ucase_getSingleton(&status), &sa, &status);
+                break;
+            default:
+                status = U_INTERNAL_PROGRAM_ERROR;
+                break;
+            }
             if (U_SUCCESS(status)) {
                 umtx_lock(NULL);
-                if (INCLUSIONS == NULL) {
-                    INCLUSIONS = incl;
+                if (INCLUSIONS[src] == NULL) {
+                    INCLUSIONS[src] = incl;
                     incl = NULL;        
                 } 
                 umtx_unlock(NULL);
@@ -1518,16 +1511,20 @@ const UnicodeSet* UnicodeSet::getInclusions(UErrorCode &status) {
             status = U_MEMORY_ALLOCATION_ERROR;
         }
     }
-    return INCLUSIONS;
+    return INCLUSIONS[src];
 }
 
 /**
  * Cleanup function for UnicodeSet
  */
 U_CFUNC UBool uset_cleanup(void) {
-    if (INCLUSIONS != NULL) {
-        delete INCLUSIONS;
-        INCLUSIONS = NULL;
+    int32_t i;
+
+    for(i = UPROPS_SRC_NONE; i < UPROPS_SRC_COUNT; ++i) {
+        if (INCLUSIONS[i] != NULL) {
+            delete INCLUSIONS[i];
+            INCLUSIONS[i] = NULL;
+        }
     }
 
     if (CASE_EQUIV_HASH != NULL) {
diff --git a/icu4c/source/common/uprops.c b/icu4c/source/common/uprops.c
index 1d281345a8..523f564bd3 100644
--- a/icu4c/source/common/uprops.c
+++ b/icu4c/source/common/uprops.c
@@ -42,50 +42,50 @@ static const struct {
      * and there must be exacly one entry per binary UProperty.
      *
      * Properties with mask 0 are handled in code.
-     * Pseudo-column -2 indicates case mapping properties.
+     * For them, column is the UPropertySource value.
      */
-    {  1, U_MASK(UPROPS_ALPHABETIC) },
-    {  1, U_MASK(UPROPS_ASCII_HEX_DIGIT) },
-    {  1, U_MASK(UPROPS_BIDI_CONTROL) },
-    { -1, U_MASK(UPROPS_MIRROR_SHIFT) },
-    {  1, U_MASK(UPROPS_DASH) },
-    {  1, U_MASK(UPROPS_DEFAULT_IGNORABLE_CODE_POINT) },
-    {  1, U_MASK(UPROPS_DEPRECATED) },
-    {  1, U_MASK(UPROPS_DIACRITIC) },
-    {  1, U_MASK(UPROPS_EXTENDER) },
-    {  0, 0 },                                  /* UCHAR_FULL_COMPOSITION_EXCLUSION */
-    {  1, U_MASK(UPROPS_GRAPHEME_BASE) },
-    {  1, U_MASK(UPROPS_GRAPHEME_EXTEND) },
-    {  1, U_MASK(UPROPS_GRAPHEME_LINK) },
-    {  1, U_MASK(UPROPS_HEX_DIGIT) },
-    {  1, U_MASK(UPROPS_HYPHEN) },
-    {  1, U_MASK(UPROPS_ID_CONTINUE) },
-    {  1, U_MASK(UPROPS_ID_START) },
-    {  1, U_MASK(UPROPS_IDEOGRAPHIC) },
-    {  1, U_MASK(UPROPS_IDS_BINARY_OPERATOR) },
-    {  1, U_MASK(UPROPS_IDS_TRINARY_OPERATOR) },
-    {  1, U_MASK(UPROPS_JOIN_CONTROL) },
-    {  1, U_MASK(UPROPS_LOGICAL_ORDER_EXCEPTION) },
-    { -2, 0 },                                  /* UCHAR_LOWERCASE */
-    {  1, U_MASK(UPROPS_MATH) },
-    {  1, U_MASK(UPROPS_NONCHARACTER_CODE_POINT) },
-    {  1, U_MASK(UPROPS_QUOTATION_MARK) },
-    {  1, U_MASK(UPROPS_RADICAL) },
-    { -2, 0 },                                  /* UCHAR_SOFT_DOTTED */
-    {  1, U_MASK(UPROPS_TERMINAL_PUNCTUATION) },
-    {  1, U_MASK(UPROPS_UNIFIED_IDEOGRAPH) },
-    { -2, 0 },                                  /* UCHAR_UPPERCASE */
-    {  1, U_MASK(UPROPS_WHITE_SPACE) },
-    {  1, U_MASK(UPROPS_XID_CONTINUE) },
-    {  1, U_MASK(UPROPS_XID_START) },
-    { -2, 0 },                                  /* UCHAR_CASE_SENSITIVE */
-    {  2, U_MASK(UPROPS_V2_S_TERM) },
-    {  2, U_MASK(UPROPS_V2_VARIATION_SELECTOR) },
-    {  0, 0 },                                  /* UCHAR_NFD_INERT */
-    {  0, 0 },                                  /* UCHAR_NFKD_INERT */
-    {  0, 0 },                                  /* UCHAR_NFC_INERT */
-    {  0, 0 },                                  /* UCHAR_NFKC_INERT */
-    {  0, 0 }                                   /* UCHAR_SEGMENT_STARTER */
+    {  1,               U_MASK(UPROPS_ALPHABETIC) },
+    {  1,               U_MASK(UPROPS_ASCII_HEX_DIGIT) },
+    {  1,               U_MASK(UPROPS_BIDI_CONTROL) },
+    { -1,               U_MASK(UPROPS_MIRROR_SHIFT) },
+    {  1,               U_MASK(UPROPS_DASH) },
+    {  1,               U_MASK(UPROPS_DEFAULT_IGNORABLE_CODE_POINT) },
+    {  1,               U_MASK(UPROPS_DEPRECATED) },
+    {  1,               U_MASK(UPROPS_DIACRITIC) },
+    {  1,               U_MASK(UPROPS_EXTENDER) },
+    { UPROPS_SRC_NORM,  0 },                                    /* UCHAR_FULL_COMPOSITION_EXCLUSION */
+    {  1,               U_MASK(UPROPS_GRAPHEME_BASE) },
+    {  1,               U_MASK(UPROPS_GRAPHEME_EXTEND) },
+    {  1,               U_MASK(UPROPS_GRAPHEME_LINK) },
+    {  1,               U_MASK(UPROPS_HEX_DIGIT) },
+    {  1,               U_MASK(UPROPS_HYPHEN) },
+    {  1,               U_MASK(UPROPS_ID_CONTINUE) },
+    {  1,               U_MASK(UPROPS_ID_START) },
+    {  1,               U_MASK(UPROPS_IDEOGRAPHIC) },
+    {  1,               U_MASK(UPROPS_IDS_BINARY_OPERATOR) },
+    {  1,               U_MASK(UPROPS_IDS_TRINARY_OPERATOR) },
+    {  1,               U_MASK(UPROPS_JOIN_CONTROL) },
+    {  1,               U_MASK(UPROPS_LOGICAL_ORDER_EXCEPTION) },
+    { UPROPS_SRC_CASE,  0 },                                    /* UCHAR_LOWERCASE */
+    {  1,               U_MASK(UPROPS_MATH) },
+    {  1,               U_MASK(UPROPS_NONCHARACTER_CODE_POINT) },
+    {  1,               U_MASK(UPROPS_QUOTATION_MARK) },
+    {  1,               U_MASK(UPROPS_RADICAL) },
+    { UPROPS_SRC_CASE,  0 },                                    /* UCHAR_SOFT_DOTTED */
+    {  1,               U_MASK(UPROPS_TERMINAL_PUNCTUATION) },
+    {  1,               U_MASK(UPROPS_UNIFIED_IDEOGRAPH) },
+    { UPROPS_SRC_CASE,  0 },                                    /* UCHAR_UPPERCASE */
+    {  1,               U_MASK(UPROPS_WHITE_SPACE) },
+    {  1,               U_MASK(UPROPS_XID_CONTINUE) },
+    {  1,               U_MASK(UPROPS_XID_START) },
+    { UPROPS_SRC_CASE,  0 },                                    /* UCHAR_CASE_SENSITIVE */
+    {  2,               U_MASK(UPROPS_V2_S_TERM) },
+    {  2,               U_MASK(UPROPS_V2_VARIATION_SELECTOR) },
+    { UPROPS_SRC_NORM,  0 },                                    /* UCHAR_NFD_INERT */
+    { UPROPS_SRC_NORM,  0 },                                    /* UCHAR_NFKD_INERT */
+    { UPROPS_SRC_NORM,  0 },                                    /* UCHAR_NFC_INERT */
+    { UPROPS_SRC_NORM,  0 },                                    /* UCHAR_NFKC_INERT */
+    { UPROPS_SRC_NORM,  0 }                                     /* UCHAR_SEGMENT_STARTER */
 };
 
 U_CAPI UBool U_EXPORT2
@@ -95,45 +95,48 @@ u_hasBinaryProperty(UChar32 c, UProperty which) {
         /* not a known binary property */
     } else {
         uint32_t mask=binProps[which].mask;
+        int32_t column=binProps[which].column;
         if(mask!=0) {
             /* systematic, directly stored properties */
-            return (u_getUnicodeProperties(c, binProps[which].column)&mask)!=0;
-        } else if(binProps[which].column==-2) {
-            /* case mapping properties */
-            UErrorCode errorCode=U_ZERO_ERROR;
-            UCaseProps *csp=ucase_getSingleton(&errorCode);
-            if(U_FAILURE(errorCode)) {
-                return FALSE;
-            }
-            switch(which) {
-            case UCHAR_LOWERCASE:
-                return (UBool)(UCASE_LOWER==ucase_getType(csp, c));
-            case UCHAR_UPPERCASE:
-                return (UBool)(UCASE_UPPER==ucase_getType(csp, c));
-            case UCHAR_SOFT_DOTTED:
-                return ucase_isSoftDotted(csp, c);
-            case UCHAR_CASE_SENSITIVE:
-                return ucase_isCaseSensitive(csp, c);
-            default:
-                break;
-            }
+            return (u_getUnicodeProperties(c, column)&mask)!=0;
         } else {
+            if(column==UPROPS_SRC_CASE) {
+                /* case mapping properties */
+                UErrorCode errorCode=U_ZERO_ERROR;
+                UCaseProps *csp=ucase_getSingleton(&errorCode);
+                if(U_FAILURE(errorCode)) {
+                    return FALSE;
+                }
+                switch(which) {
+                case UCHAR_LOWERCASE:
+                    return (UBool)(UCASE_LOWER==ucase_getType(csp, c));
+                case UCHAR_UPPERCASE:
+                    return (UBool)(UCASE_UPPER==ucase_getType(csp, c));
+                case UCHAR_SOFT_DOTTED:
+                    return ucase_isSoftDotted(csp, c);
+                case UCHAR_CASE_SENSITIVE:
+                    return ucase_isCaseSensitive(csp, c);
+                default:
+                    break;
+                }
+            } else if(column==UPROPS_SRC_NORM) {
 #if !UCONFIG_NO_NORMALIZATION
-            /* normalization properties from unorm.icu */
-            switch(which) {
-            case UCHAR_FULL_COMPOSITION_EXCLUSION:
-                return unorm_internalIsFullCompositionExclusion(c);
-            case UCHAR_NFD_INERT:
-            case UCHAR_NFKD_INERT:
-            case UCHAR_NFC_INERT:
-            case UCHAR_NFKC_INERT:
-                return unorm_isNFSkippable(c, (UNormalizationMode)(which-UCHAR_NFD_INERT)+UNORM_NFD);
-            case UCHAR_SEGMENT_STARTER:
-                return unorm_isCanonSafeStart(c);
-            default:
-                break;
-            }
+                /* normalization properties from unorm.icu */
+                switch(which) {
+                case UCHAR_FULL_COMPOSITION_EXCLUSION:
+                    return unorm_internalIsFullCompositionExclusion(c);
+                case UCHAR_NFD_INERT:
+                case UCHAR_NFKD_INERT:
+                case UCHAR_NFC_INERT:
+                case UCHAR_NFKC_INERT:
+                    return unorm_isNFSkippable(c, (UNormalizationMode)(which-UCHAR_NFD_INERT)+UNORM_NFD);
+                case UCHAR_SEGMENT_STARTER:
+                    return unorm_isCanonSafeStart(c);
+                default:
+                    break;
+                }
 #endif
+            }
         }
     }
     return FALSE;
@@ -291,6 +294,40 @@ u_getIntPropertyMaxValue(UProperty which) {
     }
 }
 
+U_CAPI UPropertySource U_EXPORT2
+uprops_getSource(UProperty which) {
+    if(which<UCHAR_BINARY_START) {
+        return UPROPS_SRC_NONE; /* undefined */
+    } else if(which<UCHAR_BINARY_LIMIT) {
+        if(binProps[which].mask!=0) {
+            return UPROPS_SRC_CHAR;
+        } else {
+            return (UPropertySource)binProps[which].column;
+        }
+    } else if(which<UCHAR_INT_START) {
+        return UPROPS_SRC_NONE; /* undefined */
+    } else if(which<UCHAR_INT_LIMIT) {
+        switch(which) {
+        case UCHAR_HANGUL_SYLLABLE_TYPE:
+            return UPROPS_SRC_HST;
+        case UCHAR_CANONICAL_COMBINING_CLASS:
+        case UCHAR_NFD_QUICK_CHECK:
+        case UCHAR_NFKD_QUICK_CHECK:
+        case UCHAR_NFC_QUICK_CHECK:
+        case UCHAR_NFKC_QUICK_CHECK:
+        case UCHAR_LEAD_CANONICAL_COMBINING_CLASS:
+        case UCHAR_TRAIL_CANONICAL_COMBINING_CLASS:
+            return UPROPS_SRC_NORM;
+        default:
+            return UPROPS_SRC_CHAR;
+        }
+    } else if(which==UCHAR_GENERAL_CATEGORY_MASK) {
+        return UPROPS_SRC_CHAR;
+    } else {
+        return UPROPS_SRC_NONE; /* undefined */
+    }
+}
+
 /*----------------------------------------------------------------
  * Inclusions list
  *----------------------------------------------------------------*/
@@ -377,7 +414,15 @@ u_getIntPropertyMaxValue(UProperty which) {
  *
  * Do not use a UnicodeSet pattern because that causes infinite recursion;
  * UnicodeSet depends on the inclusions set.
+ *
+ * ---
+ *
+ * uprv_getInclusions() is commented out starting 2004-sep-13 because
+ * uniset_props.cpp now calls the uxyz_addPropertyStarts() directly,
+ * and only for the relevant property source.
  */
+#if 0
+
 U_CAPI void U_EXPORT2
 uprv_getInclusions(USetAdder *sa, UErrorCode *pErrorCode) {
     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
@@ -390,3 +435,5 @@ uprv_getInclusions(USetAdder *sa, UErrorCode *pErrorCode) {
     uchar_addPropertyStarts(sa, pErrorCode);
     ucase_addPropertyStarts(ucase_getSingleton(pErrorCode), sa, pErrorCode);
 }
+
+#endif
diff --git a/icu4c/source/common/uprops.h b/icu4c/source/common/uprops.h
index a50a8fbe47..b733b0917d 100644
--- a/icu4c/source/common/uprops.h
+++ b/icu4c/source/common/uprops.h
@@ -337,6 +337,38 @@ uprv_getISOCommentCharacters(USetAdder *sa);
 */
 #endif
 
+/**
+ * Constants for which data and implementation files provide which properties.
+ * Used by UnicodeSet for service-specific property enumeration.
+ * @internal
+ */
+enum UPropertySource {
+    /** No source, not a supported property. */
+    UPROPS_SRC_NONE,
+    /** From uchar.c/uprops.icu */
+    UPROPS_SRC_CHAR,
+    /** Hangul_Syllable_Type, from uchar.c/uprops.icu */
+    UPROPS_SRC_HST,
+    /** From unames.c/unames.icu */
+    UPROPS_SRC_NAMES,
+    /** From unorm.cpp/unorm.icu */
+    UPROPS_SRC_NORM,
+    /** From ucase.c/ucase.icu */
+    UPROPS_SRC_CASE,
+    /** From ubidi.c/ubidi.icu */
+    UPROPS_SRC_BIDI,
+    /** One more than the highes UPropertySource (UPROPS_SRC_) constant. */
+    UPROPS_SRC_COUNT
+};
+typedef enum UPropertySource UPropertySource;
+
+/**
+ * @see UPropertySource
+ * @internal
+ */
+U_CAPI UPropertySource U_EXPORT2
+uprops_getSource(UProperty which);
+
 /**
  * Enumerate each core properties data trie and add the
  * start of each range of same properties to the set.
@@ -345,6 +377,13 @@ uprv_getISOCommentCharacters(USetAdder *sa);
 U_CAPI void U_EXPORT2
 uchar_addPropertyStarts(USetAdder *sa, UErrorCode *pErrorCode);
 
+/**
+ * Same as uchar_addPropertyStarts() but only for Hangul_Syllable_Type.
+ * @internal
+ */
+U_CAPI void U_EXPORT2
+uhst_addPropertyStarts(USetAdder *sa, UErrorCode *pErrorCode);
+
 /**
  * Return a set of characters for property enumeration.
  * For each two consecutive characters (start, limit) in the set,