ICU-880 strcoll: More performance tweaks, plus fix inlines for UNIX builds.

X-SVN-Rev: 4502
2001-04-18 19:31:05 +00:00 · 2001-04-18 19:31:05 +00:00 · fe8f7ca9cd
commit fe8f7ca9cd
parent 53f50a5718
6 changed files with 1005 additions and 946 deletions
--- a/icu4c/source/common/common.dsp
+++ b/icu4c/source/common/common.dsp
@ -70,7 +70,7 @@ LINK32=link.exe
 # PROP Ignore_Export_Lib 0
 # PROP Target_Dir ""
 # ADD BASE CPP /nologo /MTd /W3 /Gm /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "COMMON_EXPORTS" /YX /FD /GZ /c
-# ADD CPP /nologo /MDd /Za /W3 /Gm /GX /ZI /Od /I "..\..\include" /D "WIN32" /D "_DEBUG" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "COMMON_EXPORTS" /D "U_COMMON_IMPLEMENTATION" /YX /FD /GZ /c
+# ADD CPP /nologo /MDd /Za /W3 /Gm /GX /ZI /Od /I "..\..\include" /D "WIN32" /D "_DEBUG" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "COMMON_EXPORTS" /D "U_COMMON_IMPLEMENTATION" /FR /YX /FD /GZ /c
 # SUBTRACT CPP /WX
 # ADD BASE MTL /nologo /D "_DEBUG" /mktyplib203 /win32
 # ADD MTL /nologo /D "_DEBUG" /mktyplib203 /win32
--- a/icu4c/source/i18n/ucol.cpp
+++ b/icu4c/source/i18n/ucol.cpp
@ -82,7 +82,7 @@ isAcceptableUCA(void *context,
 }


-inline  void IInit_collIterate(const UCollator *collator, const UChar *sourceString,
+inline void  IInit_collIterate(const UCollator *collator, const UChar *sourceString,
                              int32_t sourceLen, collIterate *s) {
    (s)->string = (s)->pos = (UChar *)(sourceString);
    (s)->flags = 0;
@ -410,10 +410,41 @@ static const uint16_t *FCD_STAGE_2_;
 static const uint16_t *FCD_STAGE_3_;


+inline UBool ucol_unsafeCP(UChar c, const UCollator *coll) {
+
+    if (c < coll->minUnsafeCP) return false;
+
+    int32_t  hash = c;
+    uint8_t  htbyte;
+
+    if (hash >= UCOL_UNSAFECP_TABLE_SIZE*8) {
+        if (hash >= 0xd800 && hash <= 0xf8ff) {
+            /*  Part of a surrogate, or in private use area.            */
+            /*   These are always considered unsafe.                    */
+            return TRUE;
+        }
+        hash = (hash & UCOL_UNSAFECP_TABLE_MASK) + 256;
+    }
+    htbyte = coll->unsafeCP[hash>>3];
+    if (((htbyte >> (hash & 7)) & 1) == 1) {
+        return TRUE;
+    }
+
+    /*  TODO:  main UCA table data needs to be merged into tailoring tables,   */
+    /*         and this second level of test removed from here.                */
+    if (coll == UCA || UCA == NULL) {
+        return FALSE;
+    }
+
+    htbyte = UCA->unsafeCP[hash>>3];
+    return ((htbyte >> (hash & 7)) & 1) == 1;
+}
+



 UCollator* ucol_initCollator(const UCATableHeader *image, UCollator *fillIn, UErrorCode *status) {
+    UChar c;
    UCollator *result = fillIn;
    if(U_FAILURE(*status) || image == NULL) {
        return NULL;
@ -472,11 +503,17 @@ UCollator* ucol_initCollator(const UCATableHeader *image, UCollator *fillIn, UEr

    result->zero = 0;
    result->rules = NULL;
-    /* get the version info form UCATableHeader and populate the Collator struct*/
+
+    /* get the version info from UCATableHeader and populate the Collator struct*/
    result->dataInfo.dataVersion[0] = result->image->version[0]; /* UCA Builder version*/
    result->dataInfo.dataVersion[1] = result->image->version[1]; /* UCA Tailoring rules version*/

    result->unsafeCP = (uint8_t *)result->image + result->image->unsafeCP;
+    result->minUnsafeCP = 0;
+    for (c=0; c<0x300; c++) {  // Find the smallest unsafe char.
+        if (ucol_unsafeCP(c, result)) break;
+    }
+    result->minUnsafeCP = c;

    /* max expansion tables */
    result->endExpansionCE = (uint32_t*)((uint8_t*)result->image +
@ -550,7 +587,7 @@ void ucol_initUCA(UErrorCode *status) {
 /* This is the first function that tries to fetch a collation element  */
 /* If it's not succesfull or it encounters a more difficult situation  */
 /* some more sofisticated and slower functions are invoked             */
-uint32_t ucol_getNextCE(const UCollator *coll, collIterate *collationSource, UErrorCode *status) {
+inline uint32_t ucol_IGetNextCE(const UCollator *coll, collIterate *collationSource, UErrorCode *status) {
    uint32_t order;
    if (collationSource->CEpos > collationSource->toReturn) {       /* Are there any CEs from previous expansions? */
      order = *(collationSource->toReturn++);                         /* if so, return them */
@ -646,22 +683,33 @@ uint32_t ucol_getNextCE(const UCollator *coll, collIterate *collationSource, UEr
    }   // end for (;;)


-      if(ch <= 0xFF) {                                                 /* if it's Latin One, we'll try to fast track it */
-        order = coll->latinOneMapping[ch];                            /* by looking in up in an array */
-      } else {                                                        /* otherwise, */
-        order = ucmp32_get(coll->mapping, ch);                        /* we'll go for slightly slower trie */
+      if (ch <= 0xFF) {
+          /*  For latin-1 characters we never need to fall back to the UCA table        */
+          /*    because all of the UCA data is replicated in the latinOneMapping array  */
+          order = coll->latinOneMapping[ch];
+          if (order > UCOL_NOT_FOUND) {
+              order = getSpecialCE(coll, order, collationSource, status);
+          }
      }
-      if(order >= UCOL_NOT_FOUND) {                                   /* if a CE is special */
-        //*(collationSource->CEpos) = order;                            /* prepare the buffer */
-        order = getSpecialCE(coll, order, collationSource, status);       /* and try to get the special CE */
-        if(order == UCOL_NOT_FOUND) {   /* We couldn't find a good CE in the tailoring */
-          order = ucol_getNextUCA(ch, collationSource, status);
-        }
+      else
+      {
+          order = ucmp32_get(coll->mapping, ch);                             /* we'll go for slightly slower trie */
+          if(order > UCOL_NOT_FOUND) {                                       /* if a CE is special                */
+              order = getSpecialCE(coll, order, collationSource, status);    /* and try to get the special CE     */
+          }
+          if(order == UCOL_NOT_FOUND) {   /* We couldn't find a good CE in the tailoring */
+              order = ucol_getNextUCA(ch, collationSource, status);
+          }
      }
-    /* This means that contraction should spit back the last codepoint eaten! */
    return order; /* return the CE */
 }

+/* ucol_getNextCE, out-of-line version for use from other files.   */
+U_CAPI uint32_t ucol_getNextCE(const UCollator *coll, collIterate *collationSource, UErrorCode *status) {
+    return ucol_IGetNextCE(coll, collationSource, status);
+    }
+
+
 /**
 * Incremental previous normalization happens here. Pick up the range of chars
 * identifed by FCD, normalize it into the collIterate's writable buffer,
@ -803,7 +851,7 @@ inline void collPrevIterFCD(collIterate *data)
 * @param data collation iterator struct
 * @param status error status
 */
-uint32_t ucol_getPrevCE(const UCollator *coll, collIterate *data, 
+inline uint32_t ucol_IGetPrevCE(const UCollator *coll, collIterate *data,
                               UErrorCode *status)
 {
    uint32_t result = UCOL_NULLORDER;
@ -919,6 +967,14 @@ uint32_t ucol_getPrevCE(const UCollator *coll, collIterate *data,
 }


+/*   ucol_getPrevCE, out-of-line version for use from other files.  */
+U_CAPI uint32_t ucol_getPrevCE(const UCollator *coll, collIterate *data,
+                        UErrorCode *status) {
+    return ucol_IGetPrevCE(coll, data, status);
+}
+
+
+
 /*    collIterNormalize     Incremental Normalization happens here.                       */
 /*                          pick up the range of chars identifed by FCD,                  */
 /*                          normalize it into the collIterate's writable buffer,          */
@ -1040,7 +1096,7 @@ uint32_t ucol_getFirstCE(const UCollator *coll, UChar u, UErrorCode *status) {
  collIterate colIt;
  uint32_t order;
  IInit_collIterate(coll, &u, 1, &colIt);
-  order = ucol_getNextCE(coll, &colIt, status);
+  order = ucol_IGetNextCE(coll, &colIt, status);
  /*UCOL_GETNEXTCE(order, coll, colIt, status);*/
  return order;
 }
@ -1165,11 +1221,11 @@ uint32_t ucol_getNextUCA(UChar ch, collIterate *collationSource, UErrorCode *sta
              IInit_collIterate(collator, jamoString, 2, &jamos);
            }

-            CE = ucol_getNextCE(collator, &jamos, status);
+            CE = ucol_IGetNextCE(collator, &jamos, status);

            while(CE != UCOL_NO_MORE_CES) {
              *(collationSource->CEpos++) = CE;
-              CE = ucol_getNextCE(collator, &jamos, status);
+              CE = ucol_IGetNextCE(collator, &jamos, status);
            }
            return *(collationSource->toReturn++);

@ -1318,11 +1374,11 @@ uint32_t ucol_getPrevUCA(UChar ch, collIterate *collationSource,
          IInit_collIterate(collator, jamoString, 2, &jamos);
        }

-        CE = ucol_getNextCE(collator, &jamos, status);
+        CE = ucol_IGetNextCE(collator, &jamos, status);

        while(CE != UCOL_NO_MORE_CES) {
          *(collationSource->CEpos++) = CE;
-          CE = ucol_getNextCE(collator, &jamos, status);
+          CE = ucol_IGetNextCE(collator, &jamos, status);
        }
        collationSource->toReturn = collationSource->CEpos - 1;
        return *(collationSource->toReturn);
@ -1459,8 +1515,8 @@ uint32_t getSpecialCE(const UCollator *coll, uint32_t CE, collIterate *source, U
        const UChar *ContractionStart = UCharOffset = (UChar *)coll->image+getContractOffset(CE);

        if ((source->flags & UCOL_ITER_HASLEN) && source->pos>=source->endp) {
-        /* this is the end of string.  (Null terminated handled later,
-           when the null doesn't match the contraction sequence.)     */
+                                           /* this is the end of string.  (Null terminated handled later,
+                                            when the null doesn't match the contraction sequence.)     */
          {
            CE = *(coll->contractionCEs + (UCharOffset - coll->contractionIndex)); /* So we'll pick whatever we have at the point... */
            if (CE == UCOL_NOT_FOUND) {
@ -1854,7 +1910,7 @@ int32_t ucol_getSortKeySize(const UCollator *coll, collIterate *s, int32_t curre


    for(;;) {
-          order = ucol_getNextCE(coll, s, &status);
+          order = ucol_IGetNextCE(coll, s, &status);
          //UCOL_GETNEXTCE(order, coll, *s, &status);

          if(order == UCOL_NO_MORE_CES) {
@ -2134,7 +2190,7 @@ ucol_calcSortKey(const    UCollator    *coll,
    for(;;) {
        for(i=prevBuffSize; i<minBufferSize; ++i) {

-            order = ucol_getNextCE(coll, &s, status);
+            order = ucol_IGetNextCE(coll, &s, status);
            // UCOL_GETNEXTCE(order, coll, s, status);

            if(order == UCOL_NO_MORE_CES) {
@ -2634,7 +2690,7 @@ ucol_calcSortKeySimpleTertiary(const    UCollator    *coll,
    for(;;) {
        for(i=prevBuffSize; i<minBufferSize; ++i) {

-            order = ucol_getNextCE(coll, &s, status);
+            order = ucol_IGetNextCE(coll, &s, status);
            // UCOL_GETNEXTCE(order, coll, s, status);

            if(isCEIgnorable(order)) {
@ -3236,33 +3292,6 @@ ucol_getVersion(const UCollator* coll,
 }


-inline UBool ucol_unsafeCP(UChar c, const UCollator *coll) {
-    int32_t  hash = c;
-    uint8_t  htbyte;
-
-    if (hash >= UCOL_UNSAFECP_TABLE_SIZE*8) {
-        if (hash >= 0xd800 && hash <= 0xf8ff) {
-            /*  Part of a surrogate, or in private use area.            */
-            /*   These are always considered unsafe.                    */
-            return TRUE;
-        }
-        hash = (hash & UCOL_UNSAFECP_TABLE_MASK) + 256;
-    }
-    htbyte = coll->unsafeCP[hash>>3];
-    if (((htbyte >> (hash & 7)) & 1) == 1) {
-        return TRUE;   
-    }
-
-    /*  TODO:  main UCA table data needs to be merged into tailoring tables,   */
-    /*         and this second level of test removed from here.                */
-    if (coll == UCA) {
-        return FALSE;
-    }
-    
-    htbyte = UCA->unsafeCP[hash>>3];
-    return ((htbyte >> (hash & 7)) & 1) == 1;
-}
-
 /* This internal API checks whether a character is tailored or not */
 U_CAPI UBool isTailored(const UCollator *coll, const UChar u, UErrorCode *status) {
  uint32_t CE = UCOL_NOT_FOUND;
@ -3393,18 +3422,29 @@ UCollationResult    ucol_checkIdent(collIterate *sColl, collIterate *tColl, UBoo
    return result;
 }

+/*  CEBuf - A struct and some inline functions to handle the saving    */
+/*          of CEs in a buffer within ucol_strcoll                     */

-/*                                                                       */
-/* ucol_CEBuf_Expand     Make an expanded CE Buffer on the heap.  Called */
-/*                       when the original stack based buffer overflows  */
-/*                       CEBuffers are used in ucol_strcoll to hold      */
-/*                       the CEs for the strings being compared.         */
-/*                                                                       */
-void ucol_CEBuf_Expand(ucol_CEBuf *b) {
+#define UCOL_CEBUF_SIZE 512
+typedef struct ucol_CEBuf {
+    uint32_t    *buf;
+    uint32_t    *endp;
+    uint32_t    *pos;
+    uint32_t     localArray[UCOL_CEBUF_SIZE];
+} ucol_CEBuf;
+
+
+inline void UCOL_INIT_CEBUF(ucol_CEBuf *b) {
+    (b)->buf = (b)->pos = (b)->localArray;
+    (b)->endp = (b)->buf + UCOL_CEBUF_SIZE;
+};
+
+void ucol_CEBuf_Expand(ucol_CEBuf *b, collIterate *ci) {
    uint32_t  oldSize;
    uint32_t  newSize;
    uint32_t  *newBuf;

+    ci->flags |= UCOL_ITER_ALLOCATED;
    oldSize = b->pos - b->buf;
    newSize = oldSize * 2;
    newBuf = (uint32_t *)uprv_malloc(newSize * sizeof(uint32_t));
@ -3417,64 +3457,94 @@ void ucol_CEBuf_Expand(ucol_CEBuf *b) {
    b->pos  = b->buf + oldSize;
 }

+inline void UCOL_CEBUF_CHECK(ucol_CEBuf *b, collIterate *ci) {
+    if ((b)->pos == (b)->endp) ucol_CEBuf_Expand(b, ci);
+}
+
+inline void UCOL_CEBUF_PUT(ucol_CEBuf *b, uint32_t ce) {
+    *(b)->pos++ = ce;
+};
+


 /*                                                                      */
 /* ucol_strcoll     Main public API string comparison function          */
 /*                                                                      */
 U_CAPI UCollationResult
-ucol_strcoll(    const    UCollator    *coll,
-        const    UChar        *source,
-        int32_t            sourceLength,
-        const    UChar        *target,
-        int32_t            targetLength)
+ucol_strcoll( const UCollator    *coll,
+              const UChar        *source,
+              int32_t            sourceLength,
+              const UChar        *target,
+              int32_t            targetLength)
 {
-    /* check if source and target are same strings */
-    if (source==target  && sourceLength==targetLength)
-    {
-        return UCOL_EQUAL;
-    }
+#ifdef _MSC_VER
+        /* TODO:  this really does speed thing up significantly on MSVC builds on P6 processors.  */
+        /*        What's the best way to ifdef it in?                                             */
+//       __asm         align 16
+#endif

    /* Scan the strings.  Find:                                                             */
    /*    The length of any leading portion that is equal                                   */
    /*    Whether they are exactly equal.  (in which case we just return)                   */
    const UChar    *pSrc    = source;
    const UChar    *pTarg   = target;
-    
-    const UChar    *pSrcEnd = source + sourceLength;
-    const UChar    *pTargEnd = target + targetLength;
-    
    int32_t        equalLength;

-    // Scan while the strings are bitwise ==, or until one is exhausted.
-#ifdef _MSC_VER
-    /* TODO:  this really does speed thing up significantly on MSVC builds on P6 processors.  */
-    /*        What's the best way to ifdef it in?                                             */                                              
-    __asm         align 16
-#endif
+    if (sourceLength == -1 && targetLength == -1) {
+        // Both strings are null terminated.
+        //    Check for them being the same string, and scan through
+        //    any leading equal portion.
+        if (source==target) {
+            return UCOL_EQUAL;
+        }

-    for (;;) {
-        if (pSrc == pSrcEnd || pTarg == pTargEnd) {
-            break;
+        for (;;) {
+            if ( *pSrc != *pTarg || *pSrc == 0) {
+                break;
+            }
+            pSrc++;
+            pTarg++;
        }
-        if (*pSrc == 0 && (sourceLength == -1 || targetLength == -1)) {
-            break;
+        if (*pSrc == 0 && *pTarg == 0) {
+            return UCOL_EQUAL;
        }
-        if (*pSrc != *pTarg) {
-            break;
-        }
-        pSrc++;
-        pTarg++;
+        equalLength = pSrc - source;
    }
-    equalLength = pSrc - source;
+    else
+    {
+        // One or both strings has an explicit length.
+        /* check if source and target are same strings */

-    // If we made it all the way through both strings, we are done.  They are ==
-    if ((pSrc ==pSrcEnd  || (pSrcEnd <pSrc  && *pSrc==0))  &&   /* At end of src string, however it was specified. */
-        (pTarg==pTargEnd || (pTargEnd<pTarg && *pTarg==0)))  {  /* and also at end of dest string                  */
-        return UCOL_EQUAL;
+        if (source==target  && sourceLength==targetLength) {
+            return UCOL_EQUAL;
+        }
+        const UChar    *pSrcEnd = source + sourceLength;
+        const UChar    *pTargEnd = target + targetLength;
+
+
+        // Scan while the strings are bitwise ==, or until one is exhausted.
+            for (;;) {
+                if (pSrc == pSrcEnd || pTarg == pTargEnd) {
+                    break;
+                }
+                if ((*pSrc == 0 && sourceLength == -1) || (*pTarg == 0 && targetLength == -1)) {
+                    break;
+                }
+                if (*pSrc != *pTarg) {
+                    break;
+                }
+                pSrc++;
+                pTarg++;
+            }
+            equalLength = pSrc - source;
+
+            // If we made it all the way through both strings, we are done.  They are ==
+            if ((pSrc ==pSrcEnd  || (pSrcEnd <pSrc  && *pSrc==0))  &&   /* At end of src string, however it was specified. */
+                (pTarg==pTargEnd || (pTargEnd<pTarg && *pTarg==0)))  {  /* and also at end of dest string                  */
+                return UCOL_EQUAL;
+            }
    }
-    
-    if (equalLength > 1) {
+    if (equalLength > 0) {
        /* There is an identical portion at the beginning of the two strings.        */
        /*   If the identical portion ends within a contraction or a comibining      */
        /*   character sequence, back up to the start of that sequence.              */
@ -3548,12 +3618,15 @@ ucol_strcoll(    const    UCollator    *coll,
    uint32_t sOrder=0, tOrder=0;
    if(!shifted) {
      for(;;) {
+          // TODO:  Verify that at most one CE an be added per buf per time through here.
+        UCOL_CEBUF_CHECK(&sCEs , &sColl);
+        UCOL_CEBUF_CHECK(&sCEs , &sColl);

        /* Get the next collation element in each of the strings, unless */
        /* we've been requested to skip it. */
        while(sOrder == 0) {
          // UCOL_GETNEXTCE(sOrder, coll, sColl, &status);
-          sOrder = ucol_getNextCE(coll, &sColl, &status);
+          sOrder = ucol_IGetNextCE(coll, &sColl, &status);
          sOrder ^= caseSwitch;
          // *(sCEs++) = sOrder;
          UCOL_CEBUF_PUT(&sCEs, sOrder);
@ -3562,7 +3635,7 @@ ucol_strcoll(    const    UCollator    *coll,

        while(tOrder == 0) {
          // UCOL_GETNEXTCE(tOrder, coll, tColl, &status);
-          tOrder = ucol_getNextCE(coll, &tColl, &status);
+          tOrder = ucol_IGetNextCE(coll, &tColl, &status);
          tOrder ^= caseSwitch;
          UCOL_CEBUF_PUT(&tCEs, tOrder);
          // *(tCEs++) = tOrder;
@ -3590,7 +3663,7 @@ ucol_strcoll(    const    UCollator    *coll,
 /* This is where abridged version for shifted should go */
        for(;;) {
          // UCOL_GETNEXTCE(sOrder, coll, sColl, &status);
-          sOrder = ucol_getNextCE(coll, &sColl, &status);
+          sOrder = ucol_IGetNextCE(coll, &sColl, &status);
          if(sOrder == UCOL_NO_MORE_CES) {
            UCOL_CEBUF_PUT(&sCEs, sOrder);
            break;
@ -3645,7 +3718,7 @@ ucol_strcoll(    const    UCollator    *coll,

        for(;;) {
          // UCOL_GETNEXTCE(tOrder, coll, tColl, &status);
-          tOrder = ucol_getNextCE(coll, &tColl, &status);
+          tOrder = ucol_IGetNextCE(coll, &tColl, &status);
          if(tOrder == UCOL_NO_MORE_CES) {
            UCOL_CEBUF_PUT(&tCEs, tOrder);
            // *(tCEs++) = tOrder;
@ -3933,18 +4006,20 @@ ucol_strcoll(    const    UCollator    *coll,
    }

 commonReturn:
-    if (sColl.writableBuffer != sColl.stackWritableBuffer) {
-        uprv_free(sColl.writableBuffer);
-    }
-    if (tColl.writableBuffer != tColl.stackWritableBuffer) {
-        uprv_free(tColl.writableBuffer);
-    }
+    if ((sColl.flags | tColl.flags) & UCOL_ITER_ALLOCATED) {
+        if (sColl.writableBuffer != sColl.stackWritableBuffer) {
+            uprv_free(sColl.writableBuffer);
+        }
+        if (tColl.writableBuffer != tColl.stackWritableBuffer) {
+            uprv_free(tColl.writableBuffer);
+        }

-    if (sCEs.buf != sCEs.localArray ) {
-        uprv_free(sCEs.buf);
-    }
-    if (tCEs.buf != tCEs.localArray ) {
-        uprv_free(tCEs.buf);
+        if (sCEs.buf != sCEs.localArray ) {
+            uprv_free(sCEs.buf);
+        }
+        if (tCEs.buf != tCEs.localArray ) {
+            uprv_free(tCEs.buf);
+        }
    }

    return result;
@ -4641,11 +4716,11 @@ uint32_t ucol_getIncrementalUCA(UChar ch, incrementalContext *collationSource, U
              IInit_collIterate(collator, jamoString, 2, &jamos);
            }

-            CE = ucol_getNextCE(collator, &jamos, status);
+            CE = ucol_IGetNextCE(collator, &jamos, status);

            while(CE != UCOL_NO_MORE_CES) {
              *(collationSource->CEpos++) = CE;
-              CE = ucol_getNextCE(collator, &jamos, status);
+              CE = ucol_IGetNextCE(collator, &jamos, status);
            }
            return *(collationSource->toReturn++);

--- a/icu4c/source/i18n/ucol_imp.h
+++ b/icu4c/source/i18n/ucol_imp.h
@ -81,11 +81,15 @@ Note 3 is the minimum value for Thai collation to work correctly.

 #define UCOL_ITER_HASLEN 2

-/* UCOL_ITER_INNORMBUF - set if the "pos" is in       */
-/*               the writable side buffer, handling   */
-/*               incrementally normalized characters. */
+                              /* UCOL_ITER_INNORMBUF - set if the "pos" is in          */
+                              /*               the writable side buffer, handling      */
+                              /*               incrementally normalized characters.    */
 #define UCOL_ITER_INNORMBUF 4

+                              /* UCOL_ITER_ALLOCATED - set if this iterator has        */
+                              /*    malloced storage to expand a buffer.               */
+#define UCOL_ITER_ALLOCATED 8
+
 #define NFC_ZERO_CC_BLOCK_LIMIT_  0x300

 struct collIterate {
@ -223,27 +227,6 @@ struct incrementalContext {
 #endif


-/* CEBuf - a growable buffer for holding CEs during strcoll            */
-#define UCOL_CEBUF_SIZE 512
-typedef struct ucol_CEBuf {
-    uint32_t    *buf;
-    uint32_t    *endp;
-    uint32_t    *pos;
-    uint32_t     localArray[UCOL_CEBUF_SIZE];
-} ucol_CEBuf;
-
-
-#define UCOL_INIT_CEBUF(b) {                 \
-    (b)->buf = (b)->pos = (b)->localArray;   \
-    (b)->endp = (b)->buf + UCOL_CEBUF_SIZE;  \
-}
-    
-void ucol_CEBuf_Expand(ucol_CEBuf *b);
-
-#define UCOL_CEBUF_PUT(b, ce) {                       \
-    if ((b)->pos == (b)->endp) ucol_CEBuf_Expand(b);  \
-    *(b)->pos++ = ce;                                 \
-}

 /* a macro that gets a simple CE */
 /* for more complicated CEs it resorts to getComplicatedCE (what else) */
@ -552,7 +535,8 @@ struct UCollator {
                                         expansion ce with the last ce
                                         corresponding to endExpansionCE,
                                         terminated with a null */
-    const uint8_t *unsafeCP;          /* unsafe code points hashtable */
+    const uint8_t *unsafeCP;           /* unsafe code points hashtable */
+    UChar          minUnsafeCP;        /* Smallest unsafe Code Point. */
 };

 /* various internal functions */
--- a/icu4c/source/test/intltest/intltest.dsp
+++ b/icu4c/source/test/intltest/intltest.dsp
@ -42,7 +42,7 @@ RSC=rc.exe
 # PROP Ignore_Export_Lib 0
 # PROP Target_Dir ""
 # ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c
-# ADD CPP /nologo /MD /W3 /GX /Ox /Op /I "..\..\..\include" /I "..\..\..\source\common" /I "..\..\tools\toolutil" /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c
+# ADD CPP /nologo /MD /W3 /GX /Zi /Ox /Op /Ob0 /I "..\..\..\include" /I "..\..\..\source\common" /I "..\..\tools\toolutil" /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c
 # ADD BASE RSC /l 0x409 /d "NDEBUG"
 # ADD RSC /l 0x409 /d "NDEBUG"
 BSC32=bscmake.exe