ICU-1686 some UCharIterator improvements - more consistent constant names, uiter_next32() etc.

X-SVN-Rev: 7715
2002-02-20 02:04:23 +00:00 · 2002-02-20 02:04:23 +00:00 · 6bdadbb707
commit 6bdadbb707
parent dccc8aeafd
4 changed files with 149 additions and 29 deletions
--- a/icu4c/source/common/normlzr.cpp
+++ b/icu4c/source/common/normlzr.cpp
@ -306,13 +306,13 @@ UChar32 Normalizer::previous() {
 }

 void Normalizer::reset() {
-    currentIndex=nextIndex=text->move(text, 0, UITERATOR_START);
+    currentIndex=nextIndex=text->move(text, 0, UITER_START);
    clearBuffer();
 }

 void
 Normalizer::setIndexOnly(UTextOffset index) {
-    currentIndex=nextIndex=text->move(text, index, UITERATOR_START); // validates index
+    currentIndex=nextIndex=text->move(text, index, UITER_START); // validates index
    clearBuffer();
 }

@ -355,7 +355,7 @@ UChar32 Normalizer::first() {
 * the input text corresponding to that normalized character.
 */
 UChar32 Normalizer::last() {
-    currentIndex=nextIndex=text->move(text, 0, UITERATOR_LIMIT);
+    currentIndex=nextIndex=text->move(text, 0, UITER_LIMIT);
    clearBuffer();
    return previous();
 }
@ -388,7 +388,7 @@ UTextOffset Normalizer::getIndex() const {
 * over which this <tt>Normalizer</tt> is iterating
 */
 UTextOffset Normalizer::startIndex() const {
-    return text->move(text, 0, UITERATOR_START);
+    return text->move(text, 0, UITER_START);
 }

 /**
@ -397,7 +397,7 @@ UTextOffset Normalizer::startIndex() const {
 * over which this <tt>Normalizer</tt> is iterating
 */
 UTextOffset Normalizer::endIndex() const {
-    return text->move(text, 0, UITERATOR_LIMIT);
+    return text->move(text, 0, UITER_LIMIT);
 }

 //-------------------------------------------------------------------------
@ -520,7 +520,7 @@ Normalizer::nextNormalize() {

    clearBuffer();
    currentIndex=nextIndex;
-    text->move(text, nextIndex, UITERATOR_START);
+    text->move(text, nextIndex, UITER_START);
    if(!text->hasNext(text)) {
        return FALSE;
    }
@ -534,7 +534,7 @@ Normalizer::nextNormalize() {
    buffer.releaseBuffer(length);
    if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
        errorCode=U_ZERO_ERROR;
-        text->move(text, nextIndex, UITERATOR_START);
+        text->move(text, nextIndex, UITER_START);
        p=buffer.getBuffer(length);
        length=unorm_next(text, p, buffer.getCapacity(),
                          fUMode, fOptions!=0,
@ -543,7 +543,7 @@ Normalizer::nextNormalize() {
        buffer.releaseBuffer(length);
    }

-    nextIndex=text->move(text, 0, UITERATOR_CURRENT);
+    nextIndex=text->move(text, 0, UITER_CURRENT);
    return U_SUCCESS(errorCode) && !buffer.isEmpty();
 }

@ -555,7 +555,7 @@ Normalizer::previousNormalize() {

    clearBuffer();
    nextIndex=currentIndex;
-    text->move(text, currentIndex, UITERATOR_START);
+    text->move(text, currentIndex, UITER_START);
    if(!text->hasPrevious(text)) {
        return FALSE;
    }
@ -569,7 +569,7 @@ Normalizer::previousNormalize() {
    buffer.releaseBuffer(length);
    if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
        errorCode=U_ZERO_ERROR;
-        text->move(text, currentIndex, UITERATOR_START);
+        text->move(text, currentIndex, UITER_START);
        p=buffer.getBuffer(length);
        length=unorm_previous(text, p, buffer.getCapacity(),
                              fUMode, fOptions,
@ -579,7 +579,7 @@ Normalizer::previousNormalize() {
    }

    bufferPos=buffer.length();
-    currentIndex=text->move(text, 0, UITERATOR_CURRENT);
+    currentIndex=text->move(text, 0, UITER_CURRENT);
    return U_SUCCESS(errorCode) && !buffer.isEmpty();
 }

--- a/icu4c/source/common/uiter.cpp
+++ b/icu4c/source/common/uiter.cpp
@ -68,11 +68,11 @@ static const UCharIterator noopIterator={
 static int32_t U_CALLCONV
 stringIteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin) {
    switch(origin) {
-    case UITERATOR_START:
+    case UITER_START:
        return iter->start;
-    case UITERATOR_CURRENT:
+    case UITER_CURRENT:
        return iter->index;
-    case UITERATOR_LIMIT:
+    case UITER_LIMIT:
        return iter->limit;
    default:
        /* not a valid origin */
@ -86,13 +86,13 @@ stringIteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origi
    int32_t pos;

    switch(origin) {
-    case UITERATOR_START:
+    case UITER_START:
        pos=iter->start+delta;
        break;
-    case UITERATOR_CURRENT:
+    case UITER_CURRENT:
        pos=iter->index+delta;
        break;
-    case UITERATOR_LIMIT:
+    case UITER_LIMIT:
        pos=iter->limit+delta;
        break;
    default:
@ -190,11 +190,11 @@ uiter_setString(UCharIterator *iter, const UChar *s, int32_t length) {
 static int32_t U_CALLCONV
 characterIteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin) {
    switch(origin) {
-    case UITERATOR_START:
+    case UITER_START:
        return ((CharacterIterator *)(iter->context))->startIndex();
-    case UITERATOR_CURRENT:
+    case UITER_CURRENT:
        return ((CharacterIterator *)(iter->context))->getIndex();
-    case UITERATOR_LIMIT:
+    case UITER_LIMIT:
        return ((CharacterIterator *)(iter->context))->endIndex();
    default:
        /* not a valid origin */
@ -335,4 +335,69 @@ uiter_setReplaceable(UCharIterator *iter, const Replaceable *rep) {
    }
 }

+/* Helper functions --------------------------------------------------------- */
+
+U_CAPI int32_t U_EXPORT2
+uiter_current32(UCharIterator *iter) {
+    int32_t c, c2;
+
+    c=iter->current(iter);
+    if(UTF_IS_SURROGATE(c)) {
+        if(UTF_IS_SURROGATE_FIRST(c)) {
+            /*
+             * go to the next code unit
+             * we know that we are not at the limit because c!=-1
+             */
+            iter->move(iter, 1, UITER_CURRENT);
+            if(UTF_IS_SECOND_SURROGATE(c2=iter->current(iter))) {
+                c=UTF16_GET_PAIR_VALUE(c, c2);
+            }
+
+            /* undo index movement */
+            iter->move(iter, -1, UITER_CURRENT);
+        } else {
+            if(UTF_IS_FIRST_SURROGATE(c2=iter->previous(iter))) {
+                c=UTF16_GET_PAIR_VALUE(c2, c);
+            }
+            if(c2>=0) {
+                /* undo index movement */
+                iter->move(iter, 1, UITER_CURRENT);
+            }
+        }
+    }
+    return c;
+}
+
+U_CAPI int32_t U_EXPORT2
+uiter_next32(UCharIterator *iter) {
+    int32_t c, c2;
+
+    c=iter->next(iter);
+    if(UTF_IS_FIRST_SURROGATE(c)) {
+        if(UTF_IS_SECOND_SURROGATE(c2=iter->next(iter))) {
+            c=UTF16_GET_PAIR_VALUE(c, c2);
+        } else if(c2>=0) {
+            /* unmatched first surrogate, undo index movement */
+            iter->move(iter, -1, UITER_CURRENT);
+        }
+    }
+    return c;
+}
+
+U_CAPI int32_t U_EXPORT2
+uiter_previous32(UCharIterator *iter) {
+    int32_t c, c2;
+
+    c=iter->previous(iter);
+    if(UTF_IS_SECOND_SURROGATE(c)) {
+        if(UTF_IS_FIRST_SURROGATE(c2=iter->previous(iter))) {
+            c=UTF16_GET_PAIR_VALUE(c2, c);
+        } else if(c2>=0) {
+            /* unmatched second surrogate, undo index movement */
+            iter->move(iter, 1, UITER_CURRENT);
+        }
+    }
+    return c;
+}
+
 U_CDECL_END
--- a/icu4c/source/common/unicode/uiter.h
+++ b/icu4c/source/common/unicode/uiter.h
@ -42,7 +42,7 @@ typedef struct UCharIterator UCharIterator;
 * @draft ICU 2.1
 */
 enum UCharIteratorOrigin {
-    UITERATOR_START, UITERATOR_CURRENT, UITERATOR_LIMIT
+    UITER_START, UITER_CURRENT, UITER_LIMIT
 };
 typedef enum UCharIteratorOrigin UCharIteratorOrigin;

@ -298,6 +298,61 @@ struct UCharIterator {
    UCharIteratorReserved *reservedFn;
 };

+/**
+ * Helper function for UCharIterator to get the code point
+ * at the current index.
+ *
+ * Return the code point that includes the code unit at the current position,
+ * or -1 if there is none (index is at the limit).
+ * If the current code unit is a lead or trail surrogate,
+ * then the following or preceding surrogate is used to form
+ * the code point value.
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @return the current code point
+ *
+ * @see UCharIterator
+ * @see UTF_GET_CHAR
+ * @see UnicodeString::char32At()
+ * @draft ICU 2.1
+ */
+U_CAPI int32_t U_EXPORT2
+uiter_current32(UCharIterator *iter);
+
+/**
+ * Helper function for UCharIterator to get the next code point.
+ *
+ * Return the code point at the current index and increment
+ * the index (post-increment, like s[i++]),
+ * or return -1 if there is none (index is at the limit).
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @return the current code point (and post-increment the current index)
+ *
+ * @see UCharIterator
+ * @see UTF_NEXT_CHAR
+ * @draft ICU 2.1
+ */
+U_CAPI int32_t U_EXPORT2
+uiter_next32(UCharIterator *iter);
+
+/**
+ * Helper function for UCharIterator to get the previous code point.
+ *
+ * Decrement the index and return the code point from there
+ * (pre-decrement, like s[--i]),
+ * or return -1 if there is none (index is at the start).
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @return the previous code point (after pre-decrementing the current index)
+ *
+ * @see UCharIterator
+ * @see UTF_PREV_CHAR
+ * @draft ICU 2.1
+ */
+U_CAPI int32_t U_EXPORT2
+uiter_previous32(UCharIterator *iter);
+
 /**
 * Set up a UCharIterator to iterate over a string.
 *
--- a/icu4c/source/common/unorm.cpp
+++ b/icu4c/source/common/unorm.cpp
@ -2304,7 +2304,7 @@ _getPrevNorm32(UCharIterator &src, uint32_t minC, uint32_t mask, UChar &c, UChar
        }
    } else {
        /* unpaired second surrogate, undo the c2=src.previous() movement */
-        src.move(&src, 1, UITERATOR_CURRENT);
+        src.move(&src, 1, UITER_CURRENT);
        return 0;
    }
 }
@ -2364,7 +2364,7 @@ _findPreviousIterationBoundary(UCharIterator &src,

            if(!u_growBufferFromStatic(stackBuffer, &buffer, &bufferCapacity, 2*bufferCapacity, bufferLength)) {
                *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
-                src.move(&src, 0, UITERATOR_START);
+                src.move(&src, 0, UITER_START);
                return 0;
            }

@ -2456,7 +2456,7 @@ unorm_previous(UCharIterator *src,
                    }
                    c=c2; /* lead surrogate to be written below */
                } else {
-                    src->move(src, 1, UITERATOR_CURRENT);
+                    src->move(src, 1, UITER_CURRENT);
                }
            }

@ -2530,7 +2530,7 @@ _getNextNorm32(UCharIterator &src, uint32_t minC, uint32_t mask, UChar &c, UChar
    norm32=_getNorm32(c);
    if(UTF_IS_FIRST_SURROGATE(c)) {
        if(src.hasNext(&src) && UTF_IS_SECOND_SURROGATE(c2=(UChar)src.current(&src))) {
-            src.move(&src, 1, UITERATOR_CURRENT); /* skip the c2 surrogate */
+            src.move(&src, 1, UITER_CURRENT); /* skip the c2 surrogate */
            if((norm32&mask)==0) {
                /* irrelevant data */
                return 0;
@ -2601,7 +2601,7 @@ _findNextIterationBoundary(UCharIterator &src,
        if(UTF_IS_SECOND_SURROGATE(c2=(UChar)src.next(&src))) {
            buffer[bufferIndex++]=c2;
        } else {
-            src.move(&src, -1, UITERATOR_CURRENT); /* back out the non-trail-surrogate */
+            src.move(&src, -1, UITER_CURRENT); /* back out the non-trail-surrogate */
        }
    }

@ -2610,7 +2610,7 @@ _findNextIterationBoundary(UCharIterator &src,
    while(src.hasNext(&src)) {
        if(isNextBoundary(src, minC, mask, c, c2)) {
            /* back out the latest movement to stop at the boundary */
-            src.move(&src, c2==0 ? -1 : -2, UITERATOR_CURRENT);
+            src.move(&src, c2==0 ? -1 : -2, UITER_CURRENT);
            break;
        } else {
            if(bufferIndex+(c2==0 ? 1 : 2)<=bufferCapacity ||
@ -2625,7 +2625,7 @@ _findNextIterationBoundary(UCharIterator &src,
                }
            } else {
                *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
-                src.move(&src, 0, UITERATOR_LIMIT);
+                src.move(&src, 0, UITER_LIMIT);
                return 0;
            }
        }
@ -2703,7 +2703,7 @@ unorm_next(UCharIterator *src,
                    }
                    /* lead surrogate to be written below */
                } else {
-                    src->move(src, -1, UITERATOR_CURRENT);
+                    src->move(src, -1, UITER_CURRENT);
                }
            }