ICU-1686 some UCharIterator improvements - more consistent constant names, uiter_next32() etc.

X-SVN-Rev: 7715
This commit is contained in:
Markus Scherer 2002-02-20 02:04:23 +00:00
parent dccc8aeafd
commit 6bdadbb707
4 changed files with 149 additions and 29 deletions

View File

@ -306,13 +306,13 @@ UChar32 Normalizer::previous() {
} }
void Normalizer::reset() { void Normalizer::reset() {
currentIndex=nextIndex=text->move(text, 0, UITERATOR_START); currentIndex=nextIndex=text->move(text, 0, UITER_START);
clearBuffer(); clearBuffer();
} }
void void
Normalizer::setIndexOnly(UTextOffset index) { Normalizer::setIndexOnly(UTextOffset index) {
currentIndex=nextIndex=text->move(text, index, UITERATOR_START); // validates index currentIndex=nextIndex=text->move(text, index, UITER_START); // validates index
clearBuffer(); clearBuffer();
} }
@ -355,7 +355,7 @@ UChar32 Normalizer::first() {
* the input text corresponding to that normalized character. * the input text corresponding to that normalized character.
*/ */
UChar32 Normalizer::last() { UChar32 Normalizer::last() {
currentIndex=nextIndex=text->move(text, 0, UITERATOR_LIMIT); currentIndex=nextIndex=text->move(text, 0, UITER_LIMIT);
clearBuffer(); clearBuffer();
return previous(); return previous();
} }
@ -388,7 +388,7 @@ UTextOffset Normalizer::getIndex() const {
* over which this <tt>Normalizer</tt> is iterating * over which this <tt>Normalizer</tt> is iterating
*/ */
UTextOffset Normalizer::startIndex() const { UTextOffset Normalizer::startIndex() const {
return text->move(text, 0, UITERATOR_START); return text->move(text, 0, UITER_START);
} }
/** /**
@ -397,7 +397,7 @@ UTextOffset Normalizer::startIndex() const {
* over which this <tt>Normalizer</tt> is iterating * over which this <tt>Normalizer</tt> is iterating
*/ */
UTextOffset Normalizer::endIndex() const { UTextOffset Normalizer::endIndex() const {
return text->move(text, 0, UITERATOR_LIMIT); return text->move(text, 0, UITER_LIMIT);
} }
//------------------------------------------------------------------------- //-------------------------------------------------------------------------
@ -520,7 +520,7 @@ Normalizer::nextNormalize() {
clearBuffer(); clearBuffer();
currentIndex=nextIndex; currentIndex=nextIndex;
text->move(text, nextIndex, UITERATOR_START); text->move(text, nextIndex, UITER_START);
if(!text->hasNext(text)) { if(!text->hasNext(text)) {
return FALSE; return FALSE;
} }
@ -534,7 +534,7 @@ Normalizer::nextNormalize() {
buffer.releaseBuffer(length); buffer.releaseBuffer(length);
if(errorCode==U_BUFFER_OVERFLOW_ERROR) { if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
errorCode=U_ZERO_ERROR; errorCode=U_ZERO_ERROR;
text->move(text, nextIndex, UITERATOR_START); text->move(text, nextIndex, UITER_START);
p=buffer.getBuffer(length); p=buffer.getBuffer(length);
length=unorm_next(text, p, buffer.getCapacity(), length=unorm_next(text, p, buffer.getCapacity(),
fUMode, fOptions!=0, fUMode, fOptions!=0,
@ -543,7 +543,7 @@ Normalizer::nextNormalize() {
buffer.releaseBuffer(length); buffer.releaseBuffer(length);
} }
nextIndex=text->move(text, 0, UITERATOR_CURRENT); nextIndex=text->move(text, 0, UITER_CURRENT);
return U_SUCCESS(errorCode) && !buffer.isEmpty(); return U_SUCCESS(errorCode) && !buffer.isEmpty();
} }
@ -555,7 +555,7 @@ Normalizer::previousNormalize() {
clearBuffer(); clearBuffer();
nextIndex=currentIndex; nextIndex=currentIndex;
text->move(text, currentIndex, UITERATOR_START); text->move(text, currentIndex, UITER_START);
if(!text->hasPrevious(text)) { if(!text->hasPrevious(text)) {
return FALSE; return FALSE;
} }
@ -569,7 +569,7 @@ Normalizer::previousNormalize() {
buffer.releaseBuffer(length); buffer.releaseBuffer(length);
if(errorCode==U_BUFFER_OVERFLOW_ERROR) { if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
errorCode=U_ZERO_ERROR; errorCode=U_ZERO_ERROR;
text->move(text, currentIndex, UITERATOR_START); text->move(text, currentIndex, UITER_START);
p=buffer.getBuffer(length); p=buffer.getBuffer(length);
length=unorm_previous(text, p, buffer.getCapacity(), length=unorm_previous(text, p, buffer.getCapacity(),
fUMode, fOptions, fUMode, fOptions,
@ -579,7 +579,7 @@ Normalizer::previousNormalize() {
} }
bufferPos=buffer.length(); bufferPos=buffer.length();
currentIndex=text->move(text, 0, UITERATOR_CURRENT); currentIndex=text->move(text, 0, UITER_CURRENT);
return U_SUCCESS(errorCode) && !buffer.isEmpty(); return U_SUCCESS(errorCode) && !buffer.isEmpty();
} }

View File

@ -68,11 +68,11 @@ static const UCharIterator noopIterator={
static int32_t U_CALLCONV static int32_t U_CALLCONV
stringIteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin) { stringIteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin) {
switch(origin) { switch(origin) {
case UITERATOR_START: case UITER_START:
return iter->start; return iter->start;
case UITERATOR_CURRENT: case UITER_CURRENT:
return iter->index; return iter->index;
case UITERATOR_LIMIT: case UITER_LIMIT:
return iter->limit; return iter->limit;
default: default:
/* not a valid origin */ /* not a valid origin */
@ -86,13 +86,13 @@ stringIteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origi
int32_t pos; int32_t pos;
switch(origin) { switch(origin) {
case UITERATOR_START: case UITER_START:
pos=iter->start+delta; pos=iter->start+delta;
break; break;
case UITERATOR_CURRENT: case UITER_CURRENT:
pos=iter->index+delta; pos=iter->index+delta;
break; break;
case UITERATOR_LIMIT: case UITER_LIMIT:
pos=iter->limit+delta; pos=iter->limit+delta;
break; break;
default: default:
@ -190,11 +190,11 @@ uiter_setString(UCharIterator *iter, const UChar *s, int32_t length) {
static int32_t U_CALLCONV static int32_t U_CALLCONV
characterIteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin) { characterIteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin) {
switch(origin) { switch(origin) {
case UITERATOR_START: case UITER_START:
return ((CharacterIterator *)(iter->context))->startIndex(); return ((CharacterIterator *)(iter->context))->startIndex();
case UITERATOR_CURRENT: case UITER_CURRENT:
return ((CharacterIterator *)(iter->context))->getIndex(); return ((CharacterIterator *)(iter->context))->getIndex();
case UITERATOR_LIMIT: case UITER_LIMIT:
return ((CharacterIterator *)(iter->context))->endIndex(); return ((CharacterIterator *)(iter->context))->endIndex();
default: default:
/* not a valid origin */ /* not a valid origin */
@ -335,4 +335,69 @@ uiter_setReplaceable(UCharIterator *iter, const Replaceable *rep) {
} }
} }
/* Helper functions --------------------------------------------------------- */
U_CAPI int32_t U_EXPORT2
uiter_current32(UCharIterator *iter) {
int32_t c, c2;
c=iter->current(iter);
if(UTF_IS_SURROGATE(c)) {
if(UTF_IS_SURROGATE_FIRST(c)) {
/*
* go to the next code unit
* we know that we are not at the limit because c!=-1
*/
iter->move(iter, 1, UITER_CURRENT);
if(UTF_IS_SECOND_SURROGATE(c2=iter->current(iter))) {
c=UTF16_GET_PAIR_VALUE(c, c2);
}
/* undo index movement */
iter->move(iter, -1, UITER_CURRENT);
} else {
if(UTF_IS_FIRST_SURROGATE(c2=iter->previous(iter))) {
c=UTF16_GET_PAIR_VALUE(c2, c);
}
if(c2>=0) {
/* undo index movement */
iter->move(iter, 1, UITER_CURRENT);
}
}
}
return c;
}
U_CAPI int32_t U_EXPORT2
uiter_next32(UCharIterator *iter) {
int32_t c, c2;
c=iter->next(iter);
if(UTF_IS_FIRST_SURROGATE(c)) {
if(UTF_IS_SECOND_SURROGATE(c2=iter->next(iter))) {
c=UTF16_GET_PAIR_VALUE(c, c2);
} else if(c2>=0) {
/* unmatched first surrogate, undo index movement */
iter->move(iter, -1, UITER_CURRENT);
}
}
return c;
}
U_CAPI int32_t U_EXPORT2
uiter_previous32(UCharIterator *iter) {
int32_t c, c2;
c=iter->previous(iter);
if(UTF_IS_SECOND_SURROGATE(c)) {
if(UTF_IS_FIRST_SURROGATE(c2=iter->previous(iter))) {
c=UTF16_GET_PAIR_VALUE(c2, c);
} else if(c2>=0) {
/* unmatched second surrogate, undo index movement */
iter->move(iter, 1, UITER_CURRENT);
}
}
return c;
}
U_CDECL_END U_CDECL_END

View File

@ -42,7 +42,7 @@ typedef struct UCharIterator UCharIterator;
* @draft ICU 2.1 * @draft ICU 2.1
*/ */
enum UCharIteratorOrigin { enum UCharIteratorOrigin {
UITERATOR_START, UITERATOR_CURRENT, UITERATOR_LIMIT UITER_START, UITER_CURRENT, UITER_LIMIT
}; };
typedef enum UCharIteratorOrigin UCharIteratorOrigin; typedef enum UCharIteratorOrigin UCharIteratorOrigin;
@ -298,6 +298,61 @@ struct UCharIterator {
UCharIteratorReserved *reservedFn; UCharIteratorReserved *reservedFn;
}; };
/**
* Helper function for UCharIterator to get the code point
* at the current index.
*
* Return the code point that includes the code unit at the current position,
* or -1 if there is none (index is at the limit).
* If the current code unit is a lead or trail surrogate,
* then the following or preceding surrogate is used to form
* the code point value.
*
* @param iter the UCharIterator structure ("this pointer")
* @return the current code point
*
* @see UCharIterator
* @see UTF_GET_CHAR
* @see UnicodeString::char32At()
* @draft ICU 2.1
*/
U_CAPI int32_t U_EXPORT2
uiter_current32(UCharIterator *iter);
/**
* Helper function for UCharIterator to get the next code point.
*
* Return the code point at the current index and increment
* the index (post-increment, like s[i++]),
* or return -1 if there is none (index is at the limit).
*
* @param iter the UCharIterator structure ("this pointer")
* @return the current code point (and post-increment the current index)
*
* @see UCharIterator
* @see UTF_NEXT_CHAR
* @draft ICU 2.1
*/
U_CAPI int32_t U_EXPORT2
uiter_next32(UCharIterator *iter);
/**
* Helper function for UCharIterator to get the previous code point.
*
* Decrement the index and return the code point from there
* (pre-decrement, like s[--i]),
* or return -1 if there is none (index is at the start).
*
* @param iter the UCharIterator structure ("this pointer")
* @return the previous code point (after pre-decrementing the current index)
*
* @see UCharIterator
* @see UTF_PREV_CHAR
* @draft ICU 2.1
*/
U_CAPI int32_t U_EXPORT2
uiter_previous32(UCharIterator *iter);
/** /**
* Set up a UCharIterator to iterate over a string. * Set up a UCharIterator to iterate over a string.
* *

View File

@ -2304,7 +2304,7 @@ _getPrevNorm32(UCharIterator &src, uint32_t minC, uint32_t mask, UChar &c, UChar
} }
} else { } else {
/* unpaired second surrogate, undo the c2=src.previous() movement */ /* unpaired second surrogate, undo the c2=src.previous() movement */
src.move(&src, 1, UITERATOR_CURRENT); src.move(&src, 1, UITER_CURRENT);
return 0; return 0;
} }
} }
@ -2364,7 +2364,7 @@ _findPreviousIterationBoundary(UCharIterator &src,
if(!u_growBufferFromStatic(stackBuffer, &buffer, &bufferCapacity, 2*bufferCapacity, bufferLength)) { if(!u_growBufferFromStatic(stackBuffer, &buffer, &bufferCapacity, 2*bufferCapacity, bufferLength)) {
*pErrorCode=U_MEMORY_ALLOCATION_ERROR; *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
src.move(&src, 0, UITERATOR_START); src.move(&src, 0, UITER_START);
return 0; return 0;
} }
@ -2456,7 +2456,7 @@ unorm_previous(UCharIterator *src,
} }
c=c2; /* lead surrogate to be written below */ c=c2; /* lead surrogate to be written below */
} else { } else {
src->move(src, 1, UITERATOR_CURRENT); src->move(src, 1, UITER_CURRENT);
} }
} }
@ -2530,7 +2530,7 @@ _getNextNorm32(UCharIterator &src, uint32_t minC, uint32_t mask, UChar &c, UChar
norm32=_getNorm32(c); norm32=_getNorm32(c);
if(UTF_IS_FIRST_SURROGATE(c)) { if(UTF_IS_FIRST_SURROGATE(c)) {
if(src.hasNext(&src) && UTF_IS_SECOND_SURROGATE(c2=(UChar)src.current(&src))) { if(src.hasNext(&src) && UTF_IS_SECOND_SURROGATE(c2=(UChar)src.current(&src))) {
src.move(&src, 1, UITERATOR_CURRENT); /* skip the c2 surrogate */ src.move(&src, 1, UITER_CURRENT); /* skip the c2 surrogate */
if((norm32&mask)==0) { if((norm32&mask)==0) {
/* irrelevant data */ /* irrelevant data */
return 0; return 0;
@ -2601,7 +2601,7 @@ _findNextIterationBoundary(UCharIterator &src,
if(UTF_IS_SECOND_SURROGATE(c2=(UChar)src.next(&src))) { if(UTF_IS_SECOND_SURROGATE(c2=(UChar)src.next(&src))) {
buffer[bufferIndex++]=c2; buffer[bufferIndex++]=c2;
} else { } else {
src.move(&src, -1, UITERATOR_CURRENT); /* back out the non-trail-surrogate */ src.move(&src, -1, UITER_CURRENT); /* back out the non-trail-surrogate */
} }
} }
@ -2610,7 +2610,7 @@ _findNextIterationBoundary(UCharIterator &src,
while(src.hasNext(&src)) { while(src.hasNext(&src)) {
if(isNextBoundary(src, minC, mask, c, c2)) { if(isNextBoundary(src, minC, mask, c, c2)) {
/* back out the latest movement to stop at the boundary */ /* back out the latest movement to stop at the boundary */
src.move(&src, c2==0 ? -1 : -2, UITERATOR_CURRENT); src.move(&src, c2==0 ? -1 : -2, UITER_CURRENT);
break; break;
} else { } else {
if(bufferIndex+(c2==0 ? 1 : 2)<=bufferCapacity || if(bufferIndex+(c2==0 ? 1 : 2)<=bufferCapacity ||
@ -2625,7 +2625,7 @@ _findNextIterationBoundary(UCharIterator &src,
} }
} else { } else {
*pErrorCode=U_MEMORY_ALLOCATION_ERROR; *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
src.move(&src, 0, UITERATOR_LIMIT); src.move(&src, 0, UITER_LIMIT);
return 0; return 0;
} }
} }
@ -2703,7 +2703,7 @@ unorm_next(UCharIterator *src,
} }
/* lead surrogate to be written below */ /* lead surrogate to be written below */
} else { } else {
src->move(src, -1, UITERATOR_CURRENT); src->move(src, -1, UITER_CURRENT);
} }
} }