ICU-861

Updated backwards collation element iterator codes. X-SVN-Rev: 3760
2001-02-23 23:36:42 +00:00 · 2001-02-23 23:36:42 +00:00 · ec4c07eeb0
commit ec4c07eeb0
parent 6d5b35e584
3 changed files with 123 additions and 147 deletions
--- a/icu4c/source/i18n/ucol.cpp
+++ b/icu4c/source/i18n/ucol.cpp
@ -7,7 +7,6 @@
 * Date        Name      Comments
 * 02/16/2001  synwee    Added internal method getPrevSpecialCE 
 */
 #include "ucolimp.h"
 #include "ucoltok.h"
@ -1068,12 +1067,13 @@ uint32_t ucol_getNextUCA(UChar ch, collIterate *collationSource, UErrorCode *sta
          T += TBase;
          // return the first CE, but first put the rest into the expansion buffer
          if (!collationSource->JamoSpecial) { // FAST PATH
            *(collationSource->CEpos++) = ucmp32_get(UCA->mapping, V);
            if (T != TBase) {
                *(collationSource->CEpos++) = ucmp32_get(UCA->mapping, T);
            }
            return ucmp32_get(UCA->mapping, L); // return first one
          } else { // Jamo is Special
@ -1103,6 +1103,7 @@ uint32_t ucol_getNextUCA(UChar ch, collIterate *collationSource, UErrorCode *sta
          }
          /* This is a code point minus 0x10000, that's what algorithm requires */
          order = 0xE0010303 | (cp & 0xFFE00) << 8;
          *(collationSource->CEpos++) = 0x80200080 | (cp & 0x001FF) << 22;
        } else {
          return 0; /* completely ignorable */
@ -1144,7 +1145,7 @@ uint32_t ucol_getPrevUCA(UChar ch, collIterate *collationSource,
    We have to check if ch is possibly a first surrogate - then we need to 
    take the next code unit and make a bigger CE 
    */
-    UChar nextChar;
+    UChar prevChar;
    const int 
      SBase = 0xAC00, LBase = 0x1100, VBase = 0x1161, TBase = 0x11A7,
      LCount = 19, VCount = 21, TCount = 28,
@ -1184,11 +1185,13 @@ uint32_t ucol_getPrevUCA(UChar ch, collIterate *collationSource,
      */
      if (!collationSource->JamoSpecial) 
      { 
        *(collationSource->CEpos ++) = ucmp32_get(UCA->mapping, L); 
        *(collationSource->CEpos ++) = ucmp32_get(UCA->mapping, V);
        if (T != TBase)
-          *(collationSource->CEpos++) = ucmp32_get(UCA->mapping, T);
+          *(collationSource->CEpos ++) = ucmp32_get(UCA->mapping, T);
-        /* return first one */
+        
-        return ucmp32_get(UCA->mapping, L); 
+        collationSource->toReturn = collationSource->CEpos - 1;
        return *(collationSource->toReturn);
      } else { 
        /* 
        Jamo is Special
@ -1213,28 +1216,23 @@ uint32_t ucol_getPrevUCA(UChar ch, collIterate *collationSource,
    if (UTF_IS_SECOND_SURROGATE(ch)) 
    {
-      if ((collationSource->len - collationSource->pos != length) &&
+      UChar *temp = collationSource->pos;
-                  (UTF_IS_FIRST_SURROGATE(nextChar = *collationSource->pos))) 
+      if (((collationSource->string < temp) ||
          (collationSource->writableBuffer < temp)) &&
          (UTF_IS_FIRST_SURROGATE(prevChar = *(collationSource->pos - 1)))) 
      {
-        uint32_t cp = ((ch << 10UL) + nextChar - ((0xd800 << 10UL) + 0xdc00));
+        uint32_t cp = ((prevChar << 10UL) + ch - ((0xd800 << 10UL) + 0xdc00));
-        if (collationSource->pos != collationSource->writableBuffer)
+        collationSource->pos --;
          collationSource->pos --;
        else
        {
          collationSource->pos = collationSource->string + 
           (length - (collationSource->len - collationSource->writableBuffer));
          collationSource->len = collationSource->string + length;
          collationSource->isThai = TRUE;
        }
        if ((cp & 0xFFFE) == 0xFFFE || (0xD800 <= cp && cp <= 0xDC00))
          return 0;  /* illegal code value, use completely ignoreable! */
        /* 
        This is a code point minus 0x10000, that's what algorithm requires 
        */
-        order = 0xE0010303 | (cp & 0xFFE00) << 8;
+        *(collationSource->CEpos ++) = 0xE0010303 | (cp & 0xFFE00) << 8;
-        *(collationSource->CEpos ++) = 0x80200080 | (cp & 0x001FF) << 22;
+        order = 0x80200080 | (cp & 0x001FF) << 22;
-        collationSource->toReturn ++;
+        collationSource->toReturn = collationSource->CEpos;
        *(collationSource->CEpos ++) = order;
      } 
      else
        return 0; /* completely ignorable */
@ -1246,9 +1244,11 @@ uint32_t ucol_getPrevUCA(UChar ch, collIterate *collationSource,
        return 0; /* completely ignorable */
      /* Make up an artifical CE from code point as per UCA */
-      order = 0xD08003C3 | (ch & 0xF000) << 12 | (ch & 0x0FE0) << 11;
+      *(collationSource->CEpos ++) = 0xD08003C3 | (ch & 0xF000) << 12 | 
-      *(collationSource->CEpos ++) = 0x04000080 | (ch & 0x001F) << 27;
+                                     (ch & 0x0FE0) << 11;
-      collationSource->toReturn ++;
+      collationSource->toReturn = collationSource->CEpos;
      order = 0x04000080 | (ch & 0x001F) << 27;
      *(collationSource->CEpos ++) = order;
    }
  }
  return order; /* return the CE */
@ -1397,12 +1397,7 @@ uint32_t getSpecialPrevCE(const UCollator *coll, uint32_t CE,
      if (source->isThai == TRUE) 
      { /* if we encountered Thai prevowel & the string is not yet touched */
        source->isThai = FALSE;
-        /*
+        strend =  source->pos;
        sigh... to cater for getNextCE, we'll have to modify and store the 
        whole string instead of a substring as in getSpecialCE
        */
        UCharOffset = source->pos;
        strend =  source->len;
        size = strend - source->string;
        if (size > UCOL_WRITABLE_BUFFER_SIZE) 
        {
@ -1417,22 +1412,21 @@ uint32_t getSpecialPrevCE(const UCollator *coll, uint32_t CE,
        } 
        UChar *sourceCopy = source->string;
        UChar *targetCopy = source->writableBuffer;
-        while (sourceCopy < strend)
+        while (sourceCopy <= strend)
        {
 	        if (UCOL_ISTHAIPREVOWEL(*sourceCopy) &&      
            /* This is the combination that needs to be swapped */
 		        UCOL_ISTHAIBASECONSONANT(*(sourceCopy + 1))) 
          {
-		        *(targetCopy) = *(sourceCopy + count + 1);
+		        *(targetCopy)     = *(sourceCopy + 1);
-		        *(targetCopy+1) = *(sourceCopy + count);
+		        *(targetCopy + 1) = *(sourceCopy);
-		        targetCopy+=2;
+		        targetCopy += 2;
-		        sourceCopy+=2;
+		        sourceCopy += 2;
 	        } 
          else
-		        *(targetCopy++) = *(sourceCopy++);
+		        *(targetCopy ++) = *(sourceCopy ++);
        }
-        source->pos   = source->writableBuffer + 
+        source->pos   = targetCopy;
                                               (UCharOffset - source->string);
        source->len   = targetCopy;
        source->CEpos = source->toReturn = source->CEs;
        CE = UCOL_IGNORABLE;
@ -1470,32 +1464,22 @@ uint32_t getSpecialPrevCE(const UCollator *coll, uint32_t CE,
        */
        UCharOffset += *UCharOffset; 
-        schar = *source->pos;
+        schar = *(source->pos - 1);
        while (schar > (tchar = *UCharOffset)) 
          UCharOffset ++;
        if (schar != tchar) 
-        { 
+        {
          /* 
          we didn't find the correct codepoint. We can use either the first or 
          the last CE 
          */
-          if (tchar != 0xFFFF)
+          /* testing if (tchar != 0xFFFF) */
-            UCharOffset = constart; 
+          UCharOffset = constart; 
        } 
        else
        {
          /* Move up one character */
-          if (source->pos != source->writableBuffer)
+          source->pos --;
            source->pos --;
          else
          {
            source->pos = source->string + 
                          (length - (source->len - source->writableBuffer));
            source->len = source->string + length;
            source->isThai = TRUE;
          }
        }
        CE = *(coll->contractionCEs + (UCharOffset - coll->contractionIndex));
        if (!isContraction(CE))
          break;  
@ -1521,7 +1505,7 @@ uint32_t getSpecialPrevCE(const UCollator *coll, uint32_t CE,
        while (*CEOffset != 0) 
          *(source->CEpos ++) = *CEOffset ++;
      source->toReturn = source->CEpos - 1;
-      return *(source->toReturn --);
+      return *(source->toReturn);
    case CHARSET_TAG:
      /* probably after 1.8 */
      return UCOL_NOT_FOUND;
--- a/icu4c/source/i18n/ucoleitr.cpp
+++ b/icu4c/source/i18n/ucoleitr.cpp
@ -57,6 +57,7 @@ ucol_openElements(const UCollator  *coll,
    textLength = u_strlen(text);
  result->length_ = textLength;
  result->reset_   = TRUE;
  init_collIterate(text, textLength, &result->iteratordata_, FALSE);
  return result;
@ -77,6 +78,7 @@ U_CAPI void
 ucol_reset(UCollationElements *elems)
 {
  collIterate *ci = &(elems->iteratordata_);
  elems->reset_   = TRUE;
  ci->pos         = ci->string;
  ci->len         = ci->string + elems->length_;
  ci->CEpos       = ci->toReturn = ci->CEs;
@ -97,40 +99,42 @@ U_CAPI int32_t
 ucol_next(UCollationElements *elems,
          UErrorCode         *status)
 {
  int32_t result;
  if (U_FAILURE(*status)) 
    return UCOL_NULLORDER;
-  int32_t result;
+  elems->reset_ = FALSE;
  UCOL_GETNEXTCE(result, elems->collator_, elems->iteratordata_, status);
  /*
  if ((elems->iteratordata_).CEpos > (elems->iteratordata_).toReturn) 
-  {                       
+    {                       
-    result = *((elems->iteratordata_).toReturn++);                                      
+      result = *((elems->iteratordata_).toReturn++);                                      
-    if ((elems->iteratordata_).CEpos == (elems->iteratordata_).toReturn)
+      if ((elems->iteratordata_).CEpos == (elems->iteratordata_).toReturn)
-      (elems->iteratordata_).CEpos = (elems->iteratordata_).toReturn = 
+        (elems->iteratordata_).CEpos = (elems->iteratordata_).toReturn = 
-      (elems->iteratordata_).CEs; 
+        (elems->iteratordata_).CEs; 
  } 
  else 
    if ((elems->iteratordata_).pos < (elems->iteratordata_).len) 
    {                        
      UChar ch = *(elems->iteratordata_).pos++;     
      if (ch <= 0xFF)
        (result) = (elems->collator_)->latinOneMapping[ch];                                          
      else
        (result) = ucmp32_get((elems->collator_)->mapping, ch);                                      
      if((result) >= UCOL_NOT_FOUND) 
      {
        (result) = getSpecialCE((elems->collator_), (result), 
                                &(elems->iteratordata_), (status));        
        if ((result) == UCOL_NOT_FOUND)
          (result) = ucol_getNextUCA(ch, &(elems->iteratordata_), (status));                                                                            
      }                                                                               
    } 
-    else
+    else 
-      (result) = UCOL_NO_MORE_CES;                                                     
+      if ((elems->iteratordata_).pos < (elems->iteratordata_).len) 
      {                        
        UChar ch = *(elems->iteratordata_).pos++;     
        if (ch <= 0xFF)
          (result) = (elems->collator_)->latinOneMapping[ch];                                          
        else
          (result) = ucmp32_get((elems->collator_)->mapping, ch);                                      
        if((result) >= UCOL_NOT_FOUND) 
        {
          (result) = getSpecialCE((elems->collator_), (result), 
                                  &(elems->iteratordata_), (status));        
          if ((result) == UCOL_NOT_FOUND)
            (result) = ucol_getNextUCA(ch, &(elems->iteratordata_), (status));                                                                            
        }                                                                               
      } 
      else
        (result) = UCOL_NO_MORE_CES;
  */
-    
+  
  if (result == UCOL_NO_MORE_CES)
    result = UCOL_NULLORDER;
  return result;
@ -142,62 +146,61 @@ ucol_previous(UCollationElements *elems,
 {
  if(U_FAILURE(*status)) 
    return UCOL_NULLORDER;
  else
  {
    int32_t result;
-  int32_t result;
+    if (elems->reset_ && 
-  UCOL_GETPREVCE(result, elems->collator_, elems->iteratordata_, 
+        (elems->iteratordata_.pos == elems->iteratordata_.string))
-                 elems->length_, status);
+      elems->iteratordata_.pos = elems->iteratordata_.len;
-  /* synwee : to be removed, only for testing 
+    elems->reset_ = FALSE;
  const UCollator   *coll  = elems->collator_;
        collIterate *data  = &(elems->iteratordata_);
        int32_t     length = elems->length_;
-  if (data->CEpos > data->CEs) 
+    UCOL_GETPREVCE(result, elems->collator_, elems->iteratordata_, 
-  {              
+                   elems->length_, status);
-    data->toReturn --;
+
-    (result) = *(data->toReturn);                                           
+    /* synwee : to be removed, only for testing */
-    if (data->CEs == data->toReturn)                                
+    /*
-      data->CEpos = data->toReturn = data->CEs; 
+    const UCollator   *coll  = elems->collator_;
-  }                                                                          
+          collIterate *data  = &(elems->iteratordata_);
-  else 
+          int32_t     length = elems->length_;
-  {                    
+
-    /* pointers are always at the next position to be retrieved for getnextce 
+    if (data->CEpos > data->CEs) 
-    for every first previous step after a next, value returned will the same 
+    {              
-    as the last next value
+      data->toReturn --;
-    */
+      (result) = *(data->toReturn);                                           
-    /*if (data->len - data->pos == length)
+      if (data->CEs == data->toReturn)                                
-      (result) = UCOL_NO_MORE_CES;                                                                                                                    
+        data->CEpos = data->toReturn = data->CEs; 
    }                                                                          
    else 
-    {                  
+    {                    
-      if (data->pos != data->writableBuffer)
+      if (data->pos == data->string || data->pos == data->writableBuffer)
-        data->pos --;                                 
+        (result) = UCOL_NO_MORE_CES;                                                                                                                    
      else 
-      {                                                                 
+      {                  
-        data->pos = data->string +                                             
+        data->pos --;                                 
-                            (length - (data->len - data->writableBuffer));     
+      
-        data->len = data->string + length;                                     
+        UChar ch = *(data->pos);
-        data->isThai = TRUE;                                                  
+        if (ch <= 0xFF)                                                
-      }                
+          (result) = (coll)->latinOneMapping[ch];                                                                                       
-
+        else
-      UChar ch = *(data->pos);
+          (result) = ucmp32_get((coll)->mapping, ch);                           
      if (ch <= 0xFF)                                                
        (result) = (coll)->latinOneMapping[ch];                                                                                       
      else
        (result) = ucmp32_get((coll)->mapping, ch);                           
-      if ((result) >= UCOL_NOT_FOUND) 
+        if ((result) >= UCOL_NOT_FOUND) 
-      {
+        {
-        (result) = getSpecialPrevCE(coll, result, data, length, status);      
+          (result) = getSpecialPrevCE(coll, result, data, length, status);      
-        if ((result) == UCOL_NOT_FOUND)
+          if ((result) == UCOL_NOT_FOUND)
-          (result) = ucol_getPrevUCA(ch, data, length, status);                                      
+            (result) = ucol_getPrevUCA(ch, data, length, status);                                      
-      }                                                                      
+        }                                                                      
-    }                                                                        
+      }                                                                        
-  }   */
+    } 
    */
    if (result == UCOL_NO_MORE_CES)
      result = UCOL_NULLORDER;
-  if (result == UCOL_NO_MORE_CES)
+    return result;
-    result = UCOL_NULLORDER;
+  }
  return result;
 }
 U_CAPI int32_t
@ -240,14 +243,7 @@ ucol_getOffset(const UCollationElements *elems)
  if (ci->isThai == TRUE)
    return ci->pos - ci->string;
-  /* 
+  return ci->pos - ci->writableBuffer;
  if it is a thai string with reversed elements, since getNextCE does not 
  store only a substring in writeablebuffer, we'll have to do some calculation
  to get the offset out.
  need discussion to see if it is a better idea to store the whole string 
  instead.
  */
  return elems->length_ - (ci->len - ci->pos);
 }
 U_CAPI void
--- a/icu4c/source/i18n/ucolimp.h
+++ b/icu4c/source/i18n/ucolimp.h
@ -87,6 +87,10 @@ struct UCollationElements
  * Source text length
  */
        int32_t            length_;
  /**
  * Indicates if this data has been reset.
  */
        UBool              reset_;
 };
 struct incrementalContext {
@ -240,20 +244,12 @@ struct incrementalContext {
    }                                                                        \
  }                                                                          \
  else {                                                                     \
-    if ((data).len - (data).pos == length) {                                 \
+    if ((data).pos == (data).string || (data).pos == (data).writableBuffer) {\
      (order) = UCOL_NO_MORE_CES;                                            \
    }                                                                        \
    else {                                                                   \
      UChar ch;                                                              \
-      if ((data).pos != (data).writableBuffer) {                             \
+      (data).pos --;                                                         \
        (data).pos --;                                                       \
      }                                                                      \
      else {                                                                 \
        (data).pos = (data).string +                                         \
                            (length - ((data).len - (data).writableBuffer)); \
        (data).len = (data).string + length;                                 \
        (data).isThai = TRUE;                                                \
      }                                                                      \
      ch = *((data).pos);                                                    \
      if (ch <= 0xFF) {                                                      \
        (order) = (coll)->latinOneMapping[ch];                               \