2002-02-09 21:55:36 +00:00
|
|
|
/*
|
|
|
|
*******************************************************************************
|
|
|
|
*
|
|
|
|
* Copyright (C) 2002, International Business Machines
|
|
|
|
* Corporation and others. All Rights Reserved.
|
|
|
|
*
|
|
|
|
*******************************************************************************
|
|
|
|
* file name: uiter.cpp
|
|
|
|
* encoding: US-ASCII
|
|
|
|
* tab size: 8 (not used)
|
|
|
|
* indentation:4
|
|
|
|
*
|
|
|
|
* created on: 2002jan18
|
|
|
|
* created by: Markus W. Scherer
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "unicode/utypes.h"
|
|
|
|
#include "unicode/ustring.h"
|
|
|
|
#include "unicode/chariter.h"
|
|
|
|
#include "unicode/rep.h"
|
|
|
|
#include "unicode/uiter.h"
|
|
|
|
|
|
|
|
U_CDECL_BEGIN
|
|
|
|
|
|
|
|
/* No-Op UCharIterator implementation for illegal input --------------------- */
|
|
|
|
|
2002-02-16 01:21:15 +00:00
|
|
|
static int32_t U_CALLCONV
|
2002-02-19 23:50:19 +00:00
|
|
|
noopGetIndex(UCharIterator * /*iter*/, UCharIteratorOrigin /*origin*/) {
|
2002-02-16 01:21:15 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2002-02-09 21:55:36 +00:00
|
|
|
static int32_t U_CALLCONV
|
2002-02-19 23:50:19 +00:00
|
|
|
noopMove(UCharIterator * /*iter*/, int32_t /*delta*/, UCharIteratorOrigin /*origin*/) {
|
2002-02-09 21:55:36 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static UBool U_CALLCONV
|
2002-02-19 23:50:19 +00:00
|
|
|
noopHasNext(UCharIterator * /*iter*/) {
|
2002-02-09 21:55:36 +00:00
|
|
|
return FALSE;
|
|
|
|
}
|
|
|
|
|
2002-02-16 01:21:15 +00:00
|
|
|
static int32_t U_CALLCONV
|
2002-02-19 23:50:19 +00:00
|
|
|
noopCurrent(UCharIterator * /*iter*/) {
|
2002-02-16 01:21:15 +00:00
|
|
|
return -1;
|
2002-02-09 21:55:36 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static const UCharIterator noopIterator={
|
2002-02-16 01:21:15 +00:00
|
|
|
0, 0, 0, 0, 0, 0,
|
|
|
|
noopGetIndex,
|
2002-02-09 21:55:36 +00:00
|
|
|
noopMove,
|
|
|
|
noopHasNext,
|
|
|
|
noopHasNext,
|
|
|
|
noopCurrent,
|
|
|
|
noopCurrent,
|
2002-02-16 01:21:15 +00:00
|
|
|
noopCurrent,
|
|
|
|
0
|
2002-02-09 21:55:36 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
/* UCharIterator implementation for simple strings -------------------------- */
|
|
|
|
|
|
|
|
/*
|
|
|
|
* This is an implementation of a code unit (UChar) iterator
|
|
|
|
* for UChar * strings.
|
|
|
|
*
|
|
|
|
* The UCharIterator.context field holds a pointer to the string.
|
|
|
|
*/
|
|
|
|
|
2002-02-16 01:21:15 +00:00
|
|
|
static int32_t U_CALLCONV
|
|
|
|
stringIteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin) {
|
|
|
|
switch(origin) {
|
2002-02-20 02:04:23 +00:00
|
|
|
case UITER_START:
|
2002-02-16 01:21:15 +00:00
|
|
|
return iter->start;
|
2002-02-20 02:04:23 +00:00
|
|
|
case UITER_CURRENT:
|
2002-02-16 01:21:15 +00:00
|
|
|
return iter->index;
|
2002-02-20 02:04:23 +00:00
|
|
|
case UITER_LIMIT:
|
2002-02-16 01:21:15 +00:00
|
|
|
return iter->limit;
|
|
|
|
default:
|
|
|
|
/* not a valid origin */
|
|
|
|
/* Should never get here! */
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2002-02-09 21:55:36 +00:00
|
|
|
static int32_t U_CALLCONV
|
|
|
|
stringIteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin) {
|
|
|
|
int32_t pos;
|
|
|
|
|
|
|
|
switch(origin) {
|
2002-02-20 02:04:23 +00:00
|
|
|
case UITER_START:
|
2002-02-09 21:55:36 +00:00
|
|
|
pos=iter->start+delta;
|
|
|
|
break;
|
2002-02-20 02:04:23 +00:00
|
|
|
case UITER_CURRENT:
|
2002-02-09 21:55:36 +00:00
|
|
|
pos=iter->index+delta;
|
|
|
|
break;
|
2002-02-20 02:04:23 +00:00
|
|
|
case UITER_LIMIT:
|
2002-02-09 21:55:36 +00:00
|
|
|
pos=iter->limit+delta;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
/* not a valid origin, no move */
|
|
|
|
/* Should never get here! */
|
|
|
|
pos = iter->start;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if(pos<iter->start) {
|
|
|
|
pos=iter->start;
|
|
|
|
} else if(pos>iter->limit) {
|
|
|
|
pos=iter->limit;
|
|
|
|
}
|
|
|
|
|
|
|
|
return iter->index=pos;
|
|
|
|
}
|
|
|
|
|
|
|
|
static UBool U_CALLCONV
|
|
|
|
stringIteratorHasNext(UCharIterator *iter) {
|
|
|
|
return iter->index<iter->limit;
|
|
|
|
}
|
|
|
|
|
|
|
|
static UBool U_CALLCONV
|
|
|
|
stringIteratorHasPrevious(UCharIterator *iter) {
|
|
|
|
return iter->index>iter->start;
|
|
|
|
}
|
|
|
|
|
2002-02-16 01:21:15 +00:00
|
|
|
static int32_t U_CALLCONV
|
2002-02-09 21:55:36 +00:00
|
|
|
stringIteratorCurrent(UCharIterator *iter) {
|
|
|
|
if(iter->index<iter->limit) {
|
|
|
|
return ((const UChar *)(iter->context))[iter->index];
|
|
|
|
} else {
|
2002-02-16 01:21:15 +00:00
|
|
|
return -1;
|
2002-02-09 21:55:36 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2002-02-16 01:21:15 +00:00
|
|
|
static int32_t U_CALLCONV
|
2002-02-09 21:55:36 +00:00
|
|
|
stringIteratorNext(UCharIterator *iter) {
|
|
|
|
if(iter->index<iter->limit) {
|
|
|
|
return ((const UChar *)(iter->context))[iter->index++];
|
|
|
|
} else {
|
2002-02-16 01:21:15 +00:00
|
|
|
return -1;
|
2002-02-09 21:55:36 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2002-02-16 01:21:15 +00:00
|
|
|
static int32_t U_CALLCONV
|
2002-02-09 21:55:36 +00:00
|
|
|
stringIteratorPrevious(UCharIterator *iter) {
|
|
|
|
if(iter->index>iter->start) {
|
|
|
|
return ((const UChar *)(iter->context))[--iter->index];
|
|
|
|
} else {
|
2002-02-16 01:21:15 +00:00
|
|
|
return -1;
|
2002-02-09 21:55:36 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static const UCharIterator stringIterator={
|
2002-02-16 01:21:15 +00:00
|
|
|
0, 0, 0, 0, 0, 0,
|
|
|
|
stringIteratorGetIndex,
|
2002-02-09 21:55:36 +00:00
|
|
|
stringIteratorMove,
|
|
|
|
stringIteratorHasNext,
|
|
|
|
stringIteratorHasPrevious,
|
|
|
|
stringIteratorCurrent,
|
|
|
|
stringIteratorNext,
|
2002-02-16 01:21:15 +00:00
|
|
|
stringIteratorPrevious,
|
|
|
|
0
|
2002-02-09 21:55:36 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
U_CAPI void U_EXPORT2
|
|
|
|
uiter_setString(UCharIterator *iter, const UChar *s, int32_t length) {
|
|
|
|
if(iter!=0) {
|
|
|
|
if(s!=0 && length>=-1) {
|
|
|
|
*iter=stringIterator;
|
|
|
|
iter->context=s;
|
|
|
|
if(length>=0) {
|
|
|
|
iter->length=length;
|
|
|
|
} else {
|
|
|
|
iter->length=u_strlen(s);
|
|
|
|
}
|
|
|
|
iter->limit=iter->length;
|
|
|
|
} else {
|
|
|
|
*iter=noopIterator;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* UCharIterator wrapper around CharacterIterator --------------------------- */
|
|
|
|
|
|
|
|
/*
|
|
|
|
* This is wrapper code around a C++ CharacterIterator to
|
|
|
|
* look like a C UCharIterator.
|
|
|
|
*
|
|
|
|
* The UCharIterator.context field holds a pointer to the CharacterIterator.
|
|
|
|
*/
|
|
|
|
|
2002-02-16 01:21:15 +00:00
|
|
|
static int32_t U_CALLCONV
|
|
|
|
characterIteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin) {
|
|
|
|
switch(origin) {
|
2002-02-20 02:04:23 +00:00
|
|
|
case UITER_START:
|
2002-02-16 01:21:15 +00:00
|
|
|
return ((CharacterIterator *)(iter->context))->startIndex();
|
2002-02-20 02:04:23 +00:00
|
|
|
case UITER_CURRENT:
|
2002-02-16 01:21:15 +00:00
|
|
|
return ((CharacterIterator *)(iter->context))->getIndex();
|
2002-02-20 02:04:23 +00:00
|
|
|
case UITER_LIMIT:
|
2002-02-16 01:21:15 +00:00
|
|
|
return ((CharacterIterator *)(iter->context))->endIndex();
|
|
|
|
default:
|
|
|
|
/* not a valid origin */
|
|
|
|
/* Should never get here! */
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2002-02-09 21:55:36 +00:00
|
|
|
static int32_t U_CALLCONV
|
|
|
|
characterIteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin) {
|
|
|
|
return ((CharacterIterator *)(iter->context))->move(delta, (CharacterIterator::EOrigin)origin);
|
|
|
|
}
|
|
|
|
|
|
|
|
static UBool U_CALLCONV
|
|
|
|
characterIteratorHasNext(UCharIterator *iter) {
|
|
|
|
return ((CharacterIterator *)(iter->context))->hasNext();
|
|
|
|
}
|
|
|
|
|
|
|
|
static UBool U_CALLCONV
|
|
|
|
characterIteratorHasPrevious(UCharIterator *iter) {
|
|
|
|
return ((CharacterIterator *)(iter->context))->hasPrevious();
|
|
|
|
}
|
|
|
|
|
2002-02-16 01:21:15 +00:00
|
|
|
static int32_t U_CALLCONV
|
2002-02-09 21:55:36 +00:00
|
|
|
characterIteratorCurrent(UCharIterator *iter) {
|
2002-02-16 01:21:15 +00:00
|
|
|
int32_t c;
|
|
|
|
|
|
|
|
c=((CharacterIterator *)(iter->context))->current();
|
|
|
|
if(c!=0xffff || ((CharacterIterator *)(iter->context))->hasNext()) {
|
|
|
|
return c;
|
|
|
|
} else {
|
|
|
|
return -1;
|
|
|
|
}
|
2002-02-09 21:55:36 +00:00
|
|
|
}
|
|
|
|
|
2002-02-16 01:21:15 +00:00
|
|
|
static int32_t U_CALLCONV
|
2002-02-09 21:55:36 +00:00
|
|
|
characterIteratorNext(UCharIterator *iter) {
|
2002-02-16 01:21:15 +00:00
|
|
|
if(((CharacterIterator *)(iter->context))->hasNext()) {
|
|
|
|
return ((CharacterIterator *)(iter->context))->nextPostInc();
|
|
|
|
} else {
|
|
|
|
return -1;
|
|
|
|
}
|
2002-02-09 21:55:36 +00:00
|
|
|
}
|
|
|
|
|
2002-02-16 01:21:15 +00:00
|
|
|
static int32_t U_CALLCONV
|
2002-02-09 21:55:36 +00:00
|
|
|
characterIteratorPrevious(UCharIterator *iter) {
|
2002-02-16 01:21:15 +00:00
|
|
|
if(((CharacterIterator *)(iter->context))->hasPrevious()) {
|
|
|
|
return ((CharacterIterator *)(iter->context))->previous();
|
|
|
|
} else {
|
|
|
|
return -1;
|
|
|
|
}
|
2002-02-09 21:55:36 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static const UCharIterator characterIteratorWrapper={
|
2002-02-16 01:21:15 +00:00
|
|
|
0, 0, 0, 0, 0, 0,
|
|
|
|
characterIteratorGetIndex,
|
2002-02-09 21:55:36 +00:00
|
|
|
characterIteratorMove,
|
|
|
|
characterIteratorHasNext,
|
|
|
|
characterIteratorHasPrevious,
|
|
|
|
characterIteratorCurrent,
|
|
|
|
characterIteratorNext,
|
2002-02-16 01:21:15 +00:00
|
|
|
characterIteratorPrevious,
|
|
|
|
0
|
2002-02-09 21:55:36 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
U_CAPI void U_EXPORT2
|
|
|
|
uiter_setCharacterIterator(UCharIterator *iter, CharacterIterator *charIter) {
|
|
|
|
if(iter!=0) {
|
|
|
|
if(charIter!=0) {
|
|
|
|
*iter=characterIteratorWrapper;
|
|
|
|
iter->context=charIter;
|
|
|
|
} else {
|
|
|
|
*iter=noopIterator;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* UCharIterator wrapper around Replaceable --------------------------------- */
|
|
|
|
|
|
|
|
/*
|
|
|
|
* This is an implementation of a code unit (UChar) iterator
|
|
|
|
* based on a Replaceable object.
|
|
|
|
*
|
|
|
|
* The UCharIterator.context field holds a pointer to the Replaceable.
|
|
|
|
* UCharIterator.length and UCharIterator.index hold Replaceable.length()
|
|
|
|
* and the iteration index.
|
|
|
|
*/
|
|
|
|
|
2002-02-16 01:21:15 +00:00
|
|
|
static int32_t U_CALLCONV
|
2002-02-09 21:55:36 +00:00
|
|
|
replaceableIteratorCurrent(UCharIterator *iter) {
|
|
|
|
if(iter->index<iter->limit) {
|
|
|
|
return ((Replaceable *)(iter->context))->charAt(iter->index);
|
|
|
|
} else {
|
2002-02-16 01:21:15 +00:00
|
|
|
return -1;
|
2002-02-09 21:55:36 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2002-02-16 01:21:15 +00:00
|
|
|
static int32_t U_CALLCONV
|
2002-02-09 21:55:36 +00:00
|
|
|
replaceableIteratorNext(UCharIterator *iter) {
|
|
|
|
if(iter->index<iter->limit) {
|
|
|
|
return ((Replaceable *)(iter->context))->charAt(iter->index++);
|
|
|
|
} else {
|
2002-02-16 01:21:15 +00:00
|
|
|
return -1;
|
2002-02-09 21:55:36 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2002-02-16 01:21:15 +00:00
|
|
|
static int32_t U_CALLCONV
|
2002-02-09 21:55:36 +00:00
|
|
|
replaceableIteratorPrevious(UCharIterator *iter) {
|
|
|
|
if(iter->index>iter->start) {
|
|
|
|
return ((Replaceable *)(iter->context))->charAt(--iter->index);
|
|
|
|
} else {
|
2002-02-16 01:21:15 +00:00
|
|
|
return -1;
|
2002-02-09 21:55:36 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static const UCharIterator replaceableIterator={
|
2002-02-16 01:21:15 +00:00
|
|
|
0, 0, 0, 0, 0, 0,
|
|
|
|
stringIteratorGetIndex,
|
2002-02-09 21:55:36 +00:00
|
|
|
stringIteratorMove,
|
|
|
|
stringIteratorHasNext,
|
|
|
|
stringIteratorHasPrevious,
|
|
|
|
replaceableIteratorCurrent,
|
|
|
|
replaceableIteratorNext,
|
2002-02-16 01:21:15 +00:00
|
|
|
replaceableIteratorPrevious,
|
|
|
|
0
|
2002-02-09 21:55:36 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
U_CAPI void U_EXPORT2
|
|
|
|
uiter_setReplaceable(UCharIterator *iter, const Replaceable *rep) {
|
|
|
|
if(iter!=0) {
|
|
|
|
if(rep!=0) {
|
|
|
|
*iter=replaceableIterator;
|
|
|
|
iter->context=rep;
|
|
|
|
iter->limit=iter->length=rep->length();
|
|
|
|
} else {
|
|
|
|
*iter=noopIterator;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2002-02-20 02:04:23 +00:00
|
|
|
/* Helper functions --------------------------------------------------------- */
|
|
|
|
|
|
|
|
U_CAPI int32_t U_EXPORT2
|
|
|
|
uiter_current32(UCharIterator *iter) {
|
|
|
|
int32_t c, c2;
|
|
|
|
|
|
|
|
c=iter->current(iter);
|
|
|
|
if(UTF_IS_SURROGATE(c)) {
|
|
|
|
if(UTF_IS_SURROGATE_FIRST(c)) {
|
|
|
|
/*
|
|
|
|
* go to the next code unit
|
|
|
|
* we know that we are not at the limit because c!=-1
|
|
|
|
*/
|
|
|
|
iter->move(iter, 1, UITER_CURRENT);
|
|
|
|
if(UTF_IS_SECOND_SURROGATE(c2=iter->current(iter))) {
|
|
|
|
c=UTF16_GET_PAIR_VALUE(c, c2);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* undo index movement */
|
|
|
|
iter->move(iter, -1, UITER_CURRENT);
|
|
|
|
} else {
|
|
|
|
if(UTF_IS_FIRST_SURROGATE(c2=iter->previous(iter))) {
|
|
|
|
c=UTF16_GET_PAIR_VALUE(c2, c);
|
|
|
|
}
|
|
|
|
if(c2>=0) {
|
|
|
|
/* undo index movement */
|
|
|
|
iter->move(iter, 1, UITER_CURRENT);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return c;
|
|
|
|
}
|
|
|
|
|
|
|
|
U_CAPI int32_t U_EXPORT2
|
|
|
|
uiter_next32(UCharIterator *iter) {
|
|
|
|
int32_t c, c2;
|
|
|
|
|
|
|
|
c=iter->next(iter);
|
|
|
|
if(UTF_IS_FIRST_SURROGATE(c)) {
|
|
|
|
if(UTF_IS_SECOND_SURROGATE(c2=iter->next(iter))) {
|
|
|
|
c=UTF16_GET_PAIR_VALUE(c, c2);
|
|
|
|
} else if(c2>=0) {
|
|
|
|
/* unmatched first surrogate, undo index movement */
|
|
|
|
iter->move(iter, -1, UITER_CURRENT);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return c;
|
|
|
|
}
|
|
|
|
|
|
|
|
U_CAPI int32_t U_EXPORT2
|
|
|
|
uiter_previous32(UCharIterator *iter) {
|
|
|
|
int32_t c, c2;
|
|
|
|
|
|
|
|
c=iter->previous(iter);
|
|
|
|
if(UTF_IS_SECOND_SURROGATE(c)) {
|
|
|
|
if(UTF_IS_FIRST_SURROGATE(c2=iter->previous(iter))) {
|
|
|
|
c=UTF16_GET_PAIR_VALUE(c2, c);
|
|
|
|
} else if(c2>=0) {
|
|
|
|
/* unmatched second surrogate, undo index movement */
|
|
|
|
iter->move(iter, 1, UITER_CURRENT);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return c;
|
|
|
|
}
|
|
|
|
|
2002-02-09 21:55:36 +00:00
|
|
|
U_CDECL_END
|