ICU-1686 some UCharIterator improvements - more consistent constant names, uiter_next32() etc.

X-SVN-Rev: 7715
This commit is contained in:
Markus Scherer 2002-02-20 02:04:23 +00:00
parent dccc8aeafd
commit 6bdadbb707
4 changed files with 149 additions and 29 deletions

View File

@ -306,13 +306,13 @@ UChar32 Normalizer::previous() {
}
void Normalizer::reset() {
currentIndex=nextIndex=text->move(text, 0, UITERATOR_START);
currentIndex=nextIndex=text->move(text, 0, UITER_START);
clearBuffer();
}
void
Normalizer::setIndexOnly(UTextOffset index) {
currentIndex=nextIndex=text->move(text, index, UITERATOR_START); // validates index
currentIndex=nextIndex=text->move(text, index, UITER_START); // validates index
clearBuffer();
}
@ -355,7 +355,7 @@ UChar32 Normalizer::first() {
* the input text corresponding to that normalized character.
*/
UChar32 Normalizer::last() {
currentIndex=nextIndex=text->move(text, 0, UITERATOR_LIMIT);
currentIndex=nextIndex=text->move(text, 0, UITER_LIMIT);
clearBuffer();
return previous();
}
@ -388,7 +388,7 @@ UTextOffset Normalizer::getIndex() const {
* over which this <tt>Normalizer</tt> is iterating
*/
UTextOffset Normalizer::startIndex() const {
return text->move(text, 0, UITERATOR_START);
return text->move(text, 0, UITER_START);
}
/**
@ -397,7 +397,7 @@ UTextOffset Normalizer::startIndex() const {
* over which this <tt>Normalizer</tt> is iterating
*/
UTextOffset Normalizer::endIndex() const {
return text->move(text, 0, UITERATOR_LIMIT);
return text->move(text, 0, UITER_LIMIT);
}
//-------------------------------------------------------------------------
@ -520,7 +520,7 @@ Normalizer::nextNormalize() {
clearBuffer();
currentIndex=nextIndex;
text->move(text, nextIndex, UITERATOR_START);
text->move(text, nextIndex, UITER_START);
if(!text->hasNext(text)) {
return FALSE;
}
@ -534,7 +534,7 @@ Normalizer::nextNormalize() {
buffer.releaseBuffer(length);
if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
errorCode=U_ZERO_ERROR;
text->move(text, nextIndex, UITERATOR_START);
text->move(text, nextIndex, UITER_START);
p=buffer.getBuffer(length);
length=unorm_next(text, p, buffer.getCapacity(),
fUMode, fOptions!=0,
@ -543,7 +543,7 @@ Normalizer::nextNormalize() {
buffer.releaseBuffer(length);
}
nextIndex=text->move(text, 0, UITERATOR_CURRENT);
nextIndex=text->move(text, 0, UITER_CURRENT);
return U_SUCCESS(errorCode) && !buffer.isEmpty();
}
@ -555,7 +555,7 @@ Normalizer::previousNormalize() {
clearBuffer();
nextIndex=currentIndex;
text->move(text, currentIndex, UITERATOR_START);
text->move(text, currentIndex, UITER_START);
if(!text->hasPrevious(text)) {
return FALSE;
}
@ -569,7 +569,7 @@ Normalizer::previousNormalize() {
buffer.releaseBuffer(length);
if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
errorCode=U_ZERO_ERROR;
text->move(text, currentIndex, UITERATOR_START);
text->move(text, currentIndex, UITER_START);
p=buffer.getBuffer(length);
length=unorm_previous(text, p, buffer.getCapacity(),
fUMode, fOptions,
@ -579,7 +579,7 @@ Normalizer::previousNormalize() {
}
bufferPos=buffer.length();
currentIndex=text->move(text, 0, UITERATOR_CURRENT);
currentIndex=text->move(text, 0, UITER_CURRENT);
return U_SUCCESS(errorCode) && !buffer.isEmpty();
}

View File

@ -68,11 +68,11 @@ static const UCharIterator noopIterator={
static int32_t U_CALLCONV
stringIteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin) {
switch(origin) {
case UITERATOR_START:
case UITER_START:
return iter->start;
case UITERATOR_CURRENT:
case UITER_CURRENT:
return iter->index;
case UITERATOR_LIMIT:
case UITER_LIMIT:
return iter->limit;
default:
/* not a valid origin */
@ -86,13 +86,13 @@ stringIteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origi
int32_t pos;
switch(origin) {
case UITERATOR_START:
case UITER_START:
pos=iter->start+delta;
break;
case UITERATOR_CURRENT:
case UITER_CURRENT:
pos=iter->index+delta;
break;
case UITERATOR_LIMIT:
case UITER_LIMIT:
pos=iter->limit+delta;
break;
default:
@ -190,11 +190,11 @@ uiter_setString(UCharIterator *iter, const UChar *s, int32_t length) {
static int32_t U_CALLCONV
characterIteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin) {
switch(origin) {
case UITERATOR_START:
case UITER_START:
return ((CharacterIterator *)(iter->context))->startIndex();
case UITERATOR_CURRENT:
case UITER_CURRENT:
return ((CharacterIterator *)(iter->context))->getIndex();
case UITERATOR_LIMIT:
case UITER_LIMIT:
return ((CharacterIterator *)(iter->context))->endIndex();
default:
/* not a valid origin */
@ -335,4 +335,69 @@ uiter_setReplaceable(UCharIterator *iter, const Replaceable *rep) {
}
}
/* Helper functions --------------------------------------------------------- */
U_CAPI int32_t U_EXPORT2
uiter_current32(UCharIterator *iter) {
int32_t c, c2;
c=iter->current(iter);
if(UTF_IS_SURROGATE(c)) {
if(UTF_IS_SURROGATE_FIRST(c)) {
/*
* go to the next code unit
* we know that we are not at the limit because c!=-1
*/
iter->move(iter, 1, UITER_CURRENT);
if(UTF_IS_SECOND_SURROGATE(c2=iter->current(iter))) {
c=UTF16_GET_PAIR_VALUE(c, c2);
}
/* undo index movement */
iter->move(iter, -1, UITER_CURRENT);
} else {
if(UTF_IS_FIRST_SURROGATE(c2=iter->previous(iter))) {
c=UTF16_GET_PAIR_VALUE(c2, c);
}
if(c2>=0) {
/* undo index movement */
iter->move(iter, 1, UITER_CURRENT);
}
}
}
return c;
}
U_CAPI int32_t U_EXPORT2
uiter_next32(UCharIterator *iter) {
int32_t c, c2;
c=iter->next(iter);
if(UTF_IS_FIRST_SURROGATE(c)) {
if(UTF_IS_SECOND_SURROGATE(c2=iter->next(iter))) {
c=UTF16_GET_PAIR_VALUE(c, c2);
} else if(c2>=0) {
/* unmatched first surrogate, undo index movement */
iter->move(iter, -1, UITER_CURRENT);
}
}
return c;
}
U_CAPI int32_t U_EXPORT2
uiter_previous32(UCharIterator *iter) {
int32_t c, c2;
c=iter->previous(iter);
if(UTF_IS_SECOND_SURROGATE(c)) {
if(UTF_IS_FIRST_SURROGATE(c2=iter->previous(iter))) {
c=UTF16_GET_PAIR_VALUE(c2, c);
} else if(c2>=0) {
/* unmatched second surrogate, undo index movement */
iter->move(iter, 1, UITER_CURRENT);
}
}
return c;
}
U_CDECL_END

View File

@ -42,7 +42,7 @@ typedef struct UCharIterator UCharIterator;
* @draft ICU 2.1
*/
enum UCharIteratorOrigin {
UITERATOR_START, UITERATOR_CURRENT, UITERATOR_LIMIT
UITER_START, UITER_CURRENT, UITER_LIMIT
};
typedef enum UCharIteratorOrigin UCharIteratorOrigin;
@ -298,6 +298,61 @@ struct UCharIterator {
UCharIteratorReserved *reservedFn;
};
/**
* Helper function for UCharIterator to get the code point
* at the current index.
*
* Return the code point that includes the code unit at the current position,
* or -1 if there is none (index is at the limit).
* If the current code unit is a lead or trail surrogate,
* then the following or preceding surrogate is used to form
* the code point value.
*
* @param iter the UCharIterator structure ("this pointer")
* @return the current code point
*
* @see UCharIterator
* @see UTF_GET_CHAR
* @see UnicodeString::char32At()
* @draft ICU 2.1
*/
U_CAPI int32_t U_EXPORT2
uiter_current32(UCharIterator *iter);
/**
* Helper function for UCharIterator to get the next code point.
*
* Return the code point at the current index and increment
* the index (post-increment, like s[i++]),
* or return -1 if there is none (index is at the limit).
*
* @param iter the UCharIterator structure ("this pointer")
* @return the current code point (and post-increment the current index)
*
* @see UCharIterator
* @see UTF_NEXT_CHAR
* @draft ICU 2.1
*/
U_CAPI int32_t U_EXPORT2
uiter_next32(UCharIterator *iter);
/**
* Helper function for UCharIterator to get the previous code point.
*
* Decrement the index and return the code point from there
* (pre-decrement, like s[--i]),
* or return -1 if there is none (index is at the start).
*
* @param iter the UCharIterator structure ("this pointer")
* @return the previous code point (after pre-decrementing the current index)
*
* @see UCharIterator
* @see UTF_PREV_CHAR
* @draft ICU 2.1
*/
U_CAPI int32_t U_EXPORT2
uiter_previous32(UCharIterator *iter);
/**
* Set up a UCharIterator to iterate over a string.
*

View File

@ -2304,7 +2304,7 @@ _getPrevNorm32(UCharIterator &src, uint32_t minC, uint32_t mask, UChar &c, UChar
}
} else {
/* unpaired second surrogate, undo the c2=src.previous() movement */
src.move(&src, 1, UITERATOR_CURRENT);
src.move(&src, 1, UITER_CURRENT);
return 0;
}
}
@ -2364,7 +2364,7 @@ _findPreviousIterationBoundary(UCharIterator &src,
if(!u_growBufferFromStatic(stackBuffer, &buffer, &bufferCapacity, 2*bufferCapacity, bufferLength)) {
*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
src.move(&src, 0, UITERATOR_START);
src.move(&src, 0, UITER_START);
return 0;
}
@ -2456,7 +2456,7 @@ unorm_previous(UCharIterator *src,
}
c=c2; /* lead surrogate to be written below */
} else {
src->move(src, 1, UITERATOR_CURRENT);
src->move(src, 1, UITER_CURRENT);
}
}
@ -2530,7 +2530,7 @@ _getNextNorm32(UCharIterator &src, uint32_t minC, uint32_t mask, UChar &c, UChar
norm32=_getNorm32(c);
if(UTF_IS_FIRST_SURROGATE(c)) {
if(src.hasNext(&src) && UTF_IS_SECOND_SURROGATE(c2=(UChar)src.current(&src))) {
src.move(&src, 1, UITERATOR_CURRENT); /* skip the c2 surrogate */
src.move(&src, 1, UITER_CURRENT); /* skip the c2 surrogate */
if((norm32&mask)==0) {
/* irrelevant data */
return 0;
@ -2601,7 +2601,7 @@ _findNextIterationBoundary(UCharIterator &src,
if(UTF_IS_SECOND_SURROGATE(c2=(UChar)src.next(&src))) {
buffer[bufferIndex++]=c2;
} else {
src.move(&src, -1, UITERATOR_CURRENT); /* back out the non-trail-surrogate */
src.move(&src, -1, UITER_CURRENT); /* back out the non-trail-surrogate */
}
}
@ -2610,7 +2610,7 @@ _findNextIterationBoundary(UCharIterator &src,
while(src.hasNext(&src)) {
if(isNextBoundary(src, minC, mask, c, c2)) {
/* back out the latest movement to stop at the boundary */
src.move(&src, c2==0 ? -1 : -2, UITERATOR_CURRENT);
src.move(&src, c2==0 ? -1 : -2, UITER_CURRENT);
break;
} else {
if(bufferIndex+(c2==0 ? 1 : 2)<=bufferCapacity ||
@ -2625,7 +2625,7 @@ _findNextIterationBoundary(UCharIterator &src,
}
} else {
*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
src.move(&src, 0, UITERATOR_LIMIT);
src.move(&src, 0, UITER_LIMIT);
return 0;
}
}
@ -2703,7 +2703,7 @@ unorm_next(UCharIterator *src,
}
/* lead surrogate to be written below */
} else {
src->move(src, -1, UITERATOR_CURRENT);
src->move(src, -1, UITER_CURRENT);
}
}