From 6bdadbb7072e1c57ac49b563a8102d1bb48cdda1 Mon Sep 17 00:00:00 2001 From: Markus Scherer Date: Wed, 20 Feb 2002 02:04:23 +0000 Subject: [PATCH] ICU-1686 some UCharIterator improvements - more consistent constant names, uiter_next32() etc. X-SVN-Rev: 7715 --- icu4c/source/common/normlzr.cpp | 22 ++++---- icu4c/source/common/uiter.cpp | 83 +++++++++++++++++++++++++---- icu4c/source/common/unicode/uiter.h | 57 +++++++++++++++++++- icu4c/source/common/unorm.cpp | 16 +++--- 4 files changed, 149 insertions(+), 29 deletions(-) diff --git a/icu4c/source/common/normlzr.cpp b/icu4c/source/common/normlzr.cpp index 4c57c50f47..f167dbdcfc 100644 --- a/icu4c/source/common/normlzr.cpp +++ b/icu4c/source/common/normlzr.cpp @@ -306,13 +306,13 @@ UChar32 Normalizer::previous() { } void Normalizer::reset() { - currentIndex=nextIndex=text->move(text, 0, UITERATOR_START); + currentIndex=nextIndex=text->move(text, 0, UITER_START); clearBuffer(); } void Normalizer::setIndexOnly(UTextOffset index) { - currentIndex=nextIndex=text->move(text, index, UITERATOR_START); // validates index + currentIndex=nextIndex=text->move(text, index, UITER_START); // validates index clearBuffer(); } @@ -355,7 +355,7 @@ UChar32 Normalizer::first() { * the input text corresponding to that normalized character. */ UChar32 Normalizer::last() { - currentIndex=nextIndex=text->move(text, 0, UITERATOR_LIMIT); + currentIndex=nextIndex=text->move(text, 0, UITER_LIMIT); clearBuffer(); return previous(); } @@ -388,7 +388,7 @@ UTextOffset Normalizer::getIndex() const { * over which this Normalizer is iterating */ UTextOffset Normalizer::startIndex() const { - return text->move(text, 0, UITERATOR_START); + return text->move(text, 0, UITER_START); } /** @@ -397,7 +397,7 @@ UTextOffset Normalizer::startIndex() const { * over which this Normalizer is iterating */ UTextOffset Normalizer::endIndex() const { - return text->move(text, 0, UITERATOR_LIMIT); + return text->move(text, 0, UITER_LIMIT); } //------------------------------------------------------------------------- @@ -520,7 +520,7 @@ Normalizer::nextNormalize() { clearBuffer(); currentIndex=nextIndex; - text->move(text, nextIndex, UITERATOR_START); + text->move(text, nextIndex, UITER_START); if(!text->hasNext(text)) { return FALSE; } @@ -534,7 +534,7 @@ Normalizer::nextNormalize() { buffer.releaseBuffer(length); if(errorCode==U_BUFFER_OVERFLOW_ERROR) { errorCode=U_ZERO_ERROR; - text->move(text, nextIndex, UITERATOR_START); + text->move(text, nextIndex, UITER_START); p=buffer.getBuffer(length); length=unorm_next(text, p, buffer.getCapacity(), fUMode, fOptions!=0, @@ -543,7 +543,7 @@ Normalizer::nextNormalize() { buffer.releaseBuffer(length); } - nextIndex=text->move(text, 0, UITERATOR_CURRENT); + nextIndex=text->move(text, 0, UITER_CURRENT); return U_SUCCESS(errorCode) && !buffer.isEmpty(); } @@ -555,7 +555,7 @@ Normalizer::previousNormalize() { clearBuffer(); nextIndex=currentIndex; - text->move(text, currentIndex, UITERATOR_START); + text->move(text, currentIndex, UITER_START); if(!text->hasPrevious(text)) { return FALSE; } @@ -569,7 +569,7 @@ Normalizer::previousNormalize() { buffer.releaseBuffer(length); if(errorCode==U_BUFFER_OVERFLOW_ERROR) { errorCode=U_ZERO_ERROR; - text->move(text, currentIndex, UITERATOR_START); + text->move(text, currentIndex, UITER_START); p=buffer.getBuffer(length); length=unorm_previous(text, p, buffer.getCapacity(), fUMode, fOptions, @@ -579,7 +579,7 @@ Normalizer::previousNormalize() { } bufferPos=buffer.length(); - currentIndex=text->move(text, 0, UITERATOR_CURRENT); + currentIndex=text->move(text, 0, UITER_CURRENT); return U_SUCCESS(errorCode) && !buffer.isEmpty(); } diff --git a/icu4c/source/common/uiter.cpp b/icu4c/source/common/uiter.cpp index e77f12616c..d779cb9417 100644 --- a/icu4c/source/common/uiter.cpp +++ b/icu4c/source/common/uiter.cpp @@ -68,11 +68,11 @@ static const UCharIterator noopIterator={ static int32_t U_CALLCONV stringIteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin) { switch(origin) { - case UITERATOR_START: + case UITER_START: return iter->start; - case UITERATOR_CURRENT: + case UITER_CURRENT: return iter->index; - case UITERATOR_LIMIT: + case UITER_LIMIT: return iter->limit; default: /* not a valid origin */ @@ -86,13 +86,13 @@ stringIteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origi int32_t pos; switch(origin) { - case UITERATOR_START: + case UITER_START: pos=iter->start+delta; break; - case UITERATOR_CURRENT: + case UITER_CURRENT: pos=iter->index+delta; break; - case UITERATOR_LIMIT: + case UITER_LIMIT: pos=iter->limit+delta; break; default: @@ -190,11 +190,11 @@ uiter_setString(UCharIterator *iter, const UChar *s, int32_t length) { static int32_t U_CALLCONV characterIteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin) { switch(origin) { - case UITERATOR_START: + case UITER_START: return ((CharacterIterator *)(iter->context))->startIndex(); - case UITERATOR_CURRENT: + case UITER_CURRENT: return ((CharacterIterator *)(iter->context))->getIndex(); - case UITERATOR_LIMIT: + case UITER_LIMIT: return ((CharacterIterator *)(iter->context))->endIndex(); default: /* not a valid origin */ @@ -335,4 +335,69 @@ uiter_setReplaceable(UCharIterator *iter, const Replaceable *rep) { } } +/* Helper functions --------------------------------------------------------- */ + +U_CAPI int32_t U_EXPORT2 +uiter_current32(UCharIterator *iter) { + int32_t c, c2; + + c=iter->current(iter); + if(UTF_IS_SURROGATE(c)) { + if(UTF_IS_SURROGATE_FIRST(c)) { + /* + * go to the next code unit + * we know that we are not at the limit because c!=-1 + */ + iter->move(iter, 1, UITER_CURRENT); + if(UTF_IS_SECOND_SURROGATE(c2=iter->current(iter))) { + c=UTF16_GET_PAIR_VALUE(c, c2); + } + + /* undo index movement */ + iter->move(iter, -1, UITER_CURRENT); + } else { + if(UTF_IS_FIRST_SURROGATE(c2=iter->previous(iter))) { + c=UTF16_GET_PAIR_VALUE(c2, c); + } + if(c2>=0) { + /* undo index movement */ + iter->move(iter, 1, UITER_CURRENT); + } + } + } + return c; +} + +U_CAPI int32_t U_EXPORT2 +uiter_next32(UCharIterator *iter) { + int32_t c, c2; + + c=iter->next(iter); + if(UTF_IS_FIRST_SURROGATE(c)) { + if(UTF_IS_SECOND_SURROGATE(c2=iter->next(iter))) { + c=UTF16_GET_PAIR_VALUE(c, c2); + } else if(c2>=0) { + /* unmatched first surrogate, undo index movement */ + iter->move(iter, -1, UITER_CURRENT); + } + } + return c; +} + +U_CAPI int32_t U_EXPORT2 +uiter_previous32(UCharIterator *iter) { + int32_t c, c2; + + c=iter->previous(iter); + if(UTF_IS_SECOND_SURROGATE(c)) { + if(UTF_IS_FIRST_SURROGATE(c2=iter->previous(iter))) { + c=UTF16_GET_PAIR_VALUE(c2, c); + } else if(c2>=0) { + /* unmatched second surrogate, undo index movement */ + iter->move(iter, 1, UITER_CURRENT); + } + } + return c; +} + U_CDECL_END diff --git a/icu4c/source/common/unicode/uiter.h b/icu4c/source/common/unicode/uiter.h index 9fb78dafea..4eaac29bef 100644 --- a/icu4c/source/common/unicode/uiter.h +++ b/icu4c/source/common/unicode/uiter.h @@ -42,7 +42,7 @@ typedef struct UCharIterator UCharIterator; * @draft ICU 2.1 */ enum UCharIteratorOrigin { - UITERATOR_START, UITERATOR_CURRENT, UITERATOR_LIMIT + UITER_START, UITER_CURRENT, UITER_LIMIT }; typedef enum UCharIteratorOrigin UCharIteratorOrigin; @@ -298,6 +298,61 @@ struct UCharIterator { UCharIteratorReserved *reservedFn; }; +/** + * Helper function for UCharIterator to get the code point + * at the current index. + * + * Return the code point that includes the code unit at the current position, + * or -1 if there is none (index is at the limit). + * If the current code unit is a lead or trail surrogate, + * then the following or preceding surrogate is used to form + * the code point value. + * + * @param iter the UCharIterator structure ("this pointer") + * @return the current code point + * + * @see UCharIterator + * @see UTF_GET_CHAR + * @see UnicodeString::char32At() + * @draft ICU 2.1 + */ +U_CAPI int32_t U_EXPORT2 +uiter_current32(UCharIterator *iter); + +/** + * Helper function for UCharIterator to get the next code point. + * + * Return the code point at the current index and increment + * the index (post-increment, like s[i++]), + * or return -1 if there is none (index is at the limit). + * + * @param iter the UCharIterator structure ("this pointer") + * @return the current code point (and post-increment the current index) + * + * @see UCharIterator + * @see UTF_NEXT_CHAR + * @draft ICU 2.1 + */ +U_CAPI int32_t U_EXPORT2 +uiter_next32(UCharIterator *iter); + +/** + * Helper function for UCharIterator to get the previous code point. + * + * Decrement the index and return the code point from there + * (pre-decrement, like s[--i]), + * or return -1 if there is none (index is at the start). + * + * @param iter the UCharIterator structure ("this pointer") + * @return the previous code point (after pre-decrementing the current index) + * + * @see UCharIterator + * @see UTF_PREV_CHAR + * @draft ICU 2.1 + */ +U_CAPI int32_t U_EXPORT2 +uiter_previous32(UCharIterator *iter); + /** * Set up a UCharIterator to iterate over a string. * diff --git a/icu4c/source/common/unorm.cpp b/icu4c/source/common/unorm.cpp index e504d02dbe..81db64fdb4 100644 --- a/icu4c/source/common/unorm.cpp +++ b/icu4c/source/common/unorm.cpp @@ -2304,7 +2304,7 @@ _getPrevNorm32(UCharIterator &src, uint32_t minC, uint32_t mask, UChar &c, UChar } } else { /* unpaired second surrogate, undo the c2=src.previous() movement */ - src.move(&src, 1, UITERATOR_CURRENT); + src.move(&src, 1, UITER_CURRENT); return 0; } } @@ -2364,7 +2364,7 @@ _findPreviousIterationBoundary(UCharIterator &src, if(!u_growBufferFromStatic(stackBuffer, &buffer, &bufferCapacity, 2*bufferCapacity, bufferLength)) { *pErrorCode=U_MEMORY_ALLOCATION_ERROR; - src.move(&src, 0, UITERATOR_START); + src.move(&src, 0, UITER_START); return 0; } @@ -2456,7 +2456,7 @@ unorm_previous(UCharIterator *src, } c=c2; /* lead surrogate to be written below */ } else { - src->move(src, 1, UITERATOR_CURRENT); + src->move(src, 1, UITER_CURRENT); } } @@ -2530,7 +2530,7 @@ _getNextNorm32(UCharIterator &src, uint32_t minC, uint32_t mask, UChar &c, UChar norm32=_getNorm32(c); if(UTF_IS_FIRST_SURROGATE(c)) { if(src.hasNext(&src) && UTF_IS_SECOND_SURROGATE(c2=(UChar)src.current(&src))) { - src.move(&src, 1, UITERATOR_CURRENT); /* skip the c2 surrogate */ + src.move(&src, 1, UITER_CURRENT); /* skip the c2 surrogate */ if((norm32&mask)==0) { /* irrelevant data */ return 0; @@ -2601,7 +2601,7 @@ _findNextIterationBoundary(UCharIterator &src, if(UTF_IS_SECOND_SURROGATE(c2=(UChar)src.next(&src))) { buffer[bufferIndex++]=c2; } else { - src.move(&src, -1, UITERATOR_CURRENT); /* back out the non-trail-surrogate */ + src.move(&src, -1, UITER_CURRENT); /* back out the non-trail-surrogate */ } } @@ -2610,7 +2610,7 @@ _findNextIterationBoundary(UCharIterator &src, while(src.hasNext(&src)) { if(isNextBoundary(src, minC, mask, c, c2)) { /* back out the latest movement to stop at the boundary */ - src.move(&src, c2==0 ? -1 : -2, UITERATOR_CURRENT); + src.move(&src, c2==0 ? -1 : -2, UITER_CURRENT); break; } else { if(bufferIndex+(c2==0 ? 1 : 2)<=bufferCapacity || @@ -2625,7 +2625,7 @@ _findNextIterationBoundary(UCharIterator &src, } } else { *pErrorCode=U_MEMORY_ALLOCATION_ERROR; - src.move(&src, 0, UITERATOR_LIMIT); + src.move(&src, 0, UITER_LIMIT); return 0; } } @@ -2703,7 +2703,7 @@ unorm_next(UCharIterator *src, } /* lead surrogate to be written below */ } else { - src->move(src, -1, UITERATOR_CURRENT); + src->move(src, -1, UITER_CURRENT); } }