1999-08-16 21:50:52 +00:00
|
|
|
/*
|
2001-03-21 20:44:20 +00:00
|
|
|
******************************************************************************
|
2010-05-19 17:29:33 +00:00
|
|
|
* Copyright (C) 1998-2010, International Business Machines Corporation and
|
|
|
|
* others. All Rights Reserved.
|
2001-03-21 20:44:20 +00:00
|
|
|
******************************************************************************
|
1999-08-16 21:50:52 +00:00
|
|
|
*/
|
|
|
|
|
2010-05-19 17:29:33 +00:00
|
|
|
#include <typeinfo> // for 'typeid' to work
|
|
|
|
|
1999-12-28 23:39:02 +00:00
|
|
|
#include "unicode/uchriter.h"
|
2001-11-11 00:52:59 +00:00
|
|
|
#include "unicode/ustring.h"
|
2000-04-12 19:36:30 +00:00
|
|
|
#include "uhash.h"
|
|
|
|
|
2001-10-08 23:26:58 +00:00
|
|
|
U_NAMESPACE_BEGIN
|
|
|
|
|
2003-08-27 01:01:42 +00:00
|
|
|
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UCharCharacterIterator)
|
2001-08-29 18:14:28 +00:00
|
|
|
|
2000-04-12 19:36:30 +00:00
|
|
|
UCharCharacterIterator::UCharCharacterIterator()
|
|
|
|
: CharacterIterator(),
|
2000-04-20 23:02:20 +00:00
|
|
|
text(0)
|
2000-04-12 19:36:30 +00:00
|
|
|
{
|
|
|
|
// never default construct!
|
|
|
|
}
|
1999-08-16 21:50:52 +00:00
|
|
|
|
2000-09-27 23:39:36 +00:00
|
|
|
UCharCharacterIterator::UCharCharacterIterator(const UChar* textPtr,
|
|
|
|
int32_t length)
|
2001-11-11 00:52:59 +00:00
|
|
|
: CharacterIterator(textPtr != 0 ? (length>=0 ? length : u_strlen(textPtr)) : 0),
|
2000-09-27 23:39:36 +00:00
|
|
|
text(textPtr)
|
1999-08-16 21:50:52 +00:00
|
|
|
{
|
2000-04-12 19:36:30 +00:00
|
|
|
}
|
|
|
|
|
2000-09-27 23:39:36 +00:00
|
|
|
UCharCharacterIterator::UCharCharacterIterator(const UChar* textPtr,
|
|
|
|
int32_t length,
|
2002-03-12 01:32:42 +00:00
|
|
|
int32_t position)
|
2001-11-11 00:52:59 +00:00
|
|
|
: CharacterIterator(textPtr != 0 ? (length>=0 ? length : u_strlen(textPtr)) : 0, position),
|
2000-09-27 23:39:36 +00:00
|
|
|
text(textPtr)
|
2000-04-12 19:36:30 +00:00
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2000-09-27 23:39:36 +00:00
|
|
|
UCharCharacterIterator::UCharCharacterIterator(const UChar* textPtr,
|
|
|
|
int32_t length,
|
2002-03-12 01:32:42 +00:00
|
|
|
int32_t textBegin,
|
|
|
|
int32_t textEnd,
|
|
|
|
int32_t position)
|
2001-11-11 00:52:59 +00:00
|
|
|
: CharacterIterator(textPtr != 0 ? (length>=0 ? length : u_strlen(textPtr)) : 0, textBegin, textEnd, position),
|
2000-09-27 23:39:36 +00:00
|
|
|
text(textPtr)
|
2000-04-12 19:36:30 +00:00
|
|
|
{
|
1999-08-16 21:50:52 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
UCharCharacterIterator::UCharCharacterIterator(const UCharCharacterIterator& that)
|
|
|
|
: CharacterIterator(that),
|
2000-04-20 23:02:20 +00:00
|
|
|
text(that.text)
|
1999-08-16 21:50:52 +00:00
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
UCharCharacterIterator&
|
2000-04-12 19:36:30 +00:00
|
|
|
UCharCharacterIterator::operator=(const UCharCharacterIterator& that) {
|
2000-04-20 23:02:20 +00:00
|
|
|
CharacterIterator::operator=(that);
|
1999-08-16 21:50:52 +00:00
|
|
|
text = that.text;
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
|
2000-04-12 19:36:30 +00:00
|
|
|
UCharCharacterIterator::~UCharCharacterIterator() {
|
|
|
|
}
|
1999-08-16 21:50:52 +00:00
|
|
|
|
2000-05-18 22:08:39 +00:00
|
|
|
UBool
|
2000-04-20 23:02:20 +00:00
|
|
|
UCharCharacterIterator::operator==(const ForwardCharacterIterator& that) const {
|
2000-04-12 19:36:30 +00:00
|
|
|
if (this == &that) {
|
1999-08-16 21:50:52 +00:00
|
|
|
return TRUE;
|
2000-04-12 19:36:30 +00:00
|
|
|
}
|
2010-05-19 17:29:33 +00:00
|
|
|
if (typeid(*this) != typeid(that)) {
|
1999-08-16 21:50:52 +00:00
|
|
|
return FALSE;
|
2000-04-12 19:36:30 +00:00
|
|
|
}
|
1999-08-16 21:50:52 +00:00
|
|
|
|
|
|
|
UCharCharacterIterator& realThat = (UCharCharacterIterator&)that;
|
|
|
|
|
|
|
|
return text == realThat.text
|
2000-04-12 19:36:30 +00:00
|
|
|
&& textLength == realThat.textLength
|
1999-08-16 21:50:52 +00:00
|
|
|
&& pos == realThat.pos
|
|
|
|
&& begin == realThat.begin
|
|
|
|
&& end == realThat.end;
|
|
|
|
}
|
|
|
|
|
|
|
|
int32_t
|
2000-04-12 19:36:30 +00:00
|
|
|
UCharCharacterIterator::hashCode() const {
|
|
|
|
return uhash_hashUCharsN(text, textLength) ^ pos ^ begin ^ end;
|
1999-08-16 21:50:52 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
CharacterIterator*
|
2000-04-12 19:36:30 +00:00
|
|
|
UCharCharacterIterator::clone() const {
|
1999-08-16 21:50:52 +00:00
|
|
|
return new UCharCharacterIterator(*this);
|
|
|
|
}
|
|
|
|
|
|
|
|
UChar
|
2000-04-12 19:36:30 +00:00
|
|
|
UCharCharacterIterator::first() {
|
1999-08-16 21:50:52 +00:00
|
|
|
pos = begin;
|
2000-04-12 19:36:30 +00:00
|
|
|
if(pos < end) {
|
|
|
|
return text[pos];
|
|
|
|
} else {
|
|
|
|
return DONE;
|
|
|
|
}
|
1999-08-16 21:50:52 +00:00
|
|
|
}
|
|
|
|
|
2000-04-20 23:02:20 +00:00
|
|
|
UChar
|
|
|
|
UCharCharacterIterator::firstPostInc() {
|
|
|
|
pos = begin;
|
|
|
|
if(pos < end) {
|
|
|
|
return text[pos++];
|
|
|
|
} else {
|
|
|
|
return DONE;
|
|
|
|
}
|
1999-08-16 21:50:52 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
UChar
|
2000-04-12 19:36:30 +00:00
|
|
|
UCharCharacterIterator::last() {
|
|
|
|
pos = end;
|
|
|
|
if(pos > begin) {
|
|
|
|
return text[--pos];
|
|
|
|
} else {
|
|
|
|
return DONE;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
1999-08-16 21:50:52 +00:00
|
|
|
UChar
|
2002-03-12 01:32:42 +00:00
|
|
|
UCharCharacterIterator::setIndex(int32_t position) {
|
2000-09-28 05:46:28 +00:00
|
|
|
if(position < begin) {
|
2000-04-12 19:36:30 +00:00
|
|
|
pos = begin;
|
2000-09-28 05:46:28 +00:00
|
|
|
} else if(position > end) {
|
2000-04-12 19:36:30 +00:00
|
|
|
pos = end;
|
2000-09-28 05:46:28 +00:00
|
|
|
} else {
|
|
|
|
pos = position;
|
2000-04-12 19:36:30 +00:00
|
|
|
}
|
|
|
|
if(pos < end) {
|
1999-08-16 21:50:52 +00:00
|
|
|
return text[pos];
|
2000-04-12 19:36:30 +00:00
|
|
|
} else {
|
|
|
|
return DONE;
|
|
|
|
}
|
1999-08-16 21:50:52 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
UChar
|
2000-04-12 19:36:30 +00:00
|
|
|
UCharCharacterIterator::current() const {
|
|
|
|
if (pos >= begin && pos < end) {
|
1999-08-16 21:50:52 +00:00
|
|
|
return text[pos];
|
2000-04-12 19:36:30 +00:00
|
|
|
} else {
|
|
|
|
return DONE;
|
1999-08-16 21:50:52 +00:00
|
|
|
}
|
2000-04-12 19:36:30 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
UChar
|
|
|
|
UCharCharacterIterator::next() {
|
|
|
|
if (pos + 1 < end) {
|
|
|
|
return text[++pos];
|
|
|
|
} else {
|
|
|
|
/* make current() return DONE */
|
1999-08-16 21:50:52 +00:00
|
|
|
pos = end;
|
2000-04-12 19:36:30 +00:00
|
|
|
return DONE;
|
1999-08-16 21:50:52 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
UChar
|
2000-04-12 19:36:30 +00:00
|
|
|
UCharCharacterIterator::nextPostInc() {
|
|
|
|
if (pos < end) {
|
|
|
|
return text[pos++];
|
|
|
|
} else {
|
|
|
|
return DONE;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2000-05-18 22:08:39 +00:00
|
|
|
UBool
|
2004-03-30 22:39:45 +00:00
|
|
|
UCharCharacterIterator::hasNext() {
|
2000-08-10 01:59:18 +00:00
|
|
|
return (UBool)(pos < end ? TRUE : FALSE);
|
2000-04-12 19:36:30 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
UChar
|
|
|
|
UCharCharacterIterator::previous() {
|
|
|
|
if (pos > begin) {
|
1999-08-16 21:50:52 +00:00
|
|
|
return text[--pos];
|
2000-04-12 19:36:30 +00:00
|
|
|
} else {
|
|
|
|
return DONE;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2000-05-18 22:08:39 +00:00
|
|
|
UBool
|
2004-03-30 22:39:45 +00:00
|
|
|
UCharCharacterIterator::hasPrevious() {
|
2000-08-10 01:59:18 +00:00
|
|
|
return (UBool)(pos > begin ? TRUE : FALSE);
|
2000-04-12 19:36:30 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
UChar32
|
|
|
|
UCharCharacterIterator::first32() {
|
|
|
|
pos = begin;
|
|
|
|
if(pos < end) {
|
2002-03-12 01:32:42 +00:00
|
|
|
int32_t i = pos;
|
2000-04-12 19:36:30 +00:00
|
|
|
UChar32 c;
|
|
|
|
UTF_NEXT_CHAR(text, i, end, c);
|
|
|
|
return c;
|
|
|
|
} else {
|
|
|
|
return DONE;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2000-04-20 23:02:20 +00:00
|
|
|
UChar32
|
|
|
|
UCharCharacterIterator::first32PostInc() {
|
|
|
|
pos = begin;
|
|
|
|
if(pos < end) {
|
|
|
|
UChar32 c;
|
|
|
|
UTF_NEXT_CHAR(text, pos, end, c);
|
|
|
|
return c;
|
|
|
|
} else {
|
|
|
|
return DONE;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2000-04-12 19:36:30 +00:00
|
|
|
UChar32
|
|
|
|
UCharCharacterIterator::last32() {
|
|
|
|
pos = end;
|
|
|
|
if(pos > begin) {
|
|
|
|
UChar32 c;
|
|
|
|
UTF_PREV_CHAR(text, begin, pos, c);
|
|
|
|
return c;
|
|
|
|
} else {
|
|
|
|
return DONE;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
UChar32
|
2002-03-12 01:32:42 +00:00
|
|
|
UCharCharacterIterator::setIndex32(int32_t position) {
|
2000-09-28 05:46:28 +00:00
|
|
|
if(position < begin) {
|
|
|
|
position = begin;
|
|
|
|
} else if(position > end) {
|
|
|
|
position = end;
|
2000-04-12 19:36:30 +00:00
|
|
|
}
|
2000-09-28 05:46:28 +00:00
|
|
|
if(position < end) {
|
|
|
|
UTF_SET_CHAR_START(text, begin, position);
|
2002-03-12 01:32:42 +00:00
|
|
|
int32_t i = this->pos = position;
|
2000-04-12 19:36:30 +00:00
|
|
|
UChar32 c;
|
|
|
|
UTF_NEXT_CHAR(text, i, end, c);
|
|
|
|
return c;
|
|
|
|
} else {
|
2000-09-28 05:46:28 +00:00
|
|
|
this->pos = position;
|
2000-04-12 19:36:30 +00:00
|
|
|
return DONE;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
UChar32
|
|
|
|
UCharCharacterIterator::current32() const {
|
|
|
|
if (pos >= begin && pos < end) {
|
|
|
|
UChar32 c;
|
|
|
|
UTF_GET_CHAR(text, begin, pos, end, c);
|
|
|
|
return c;
|
|
|
|
} else {
|
|
|
|
return DONE;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
UChar32
|
|
|
|
UCharCharacterIterator::next32() {
|
|
|
|
if (pos < end) {
|
|
|
|
UTF_FWD_1(text, pos, end);
|
|
|
|
if(pos < end) {
|
2002-03-12 01:32:42 +00:00
|
|
|
int32_t i = pos;
|
2000-04-12 19:36:30 +00:00
|
|
|
UChar32 c;
|
|
|
|
UTF_NEXT_CHAR(text, i, end, c);
|
|
|
|
return c;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
/* make current() return DONE */
|
|
|
|
pos = end;
|
|
|
|
return DONE;
|
|
|
|
}
|
|
|
|
|
|
|
|
UChar32
|
|
|
|
UCharCharacterIterator::next32PostInc() {
|
|
|
|
if (pos < end) {
|
|
|
|
UChar32 c;
|
|
|
|
UTF_NEXT_CHAR(text, pos, end, c);
|
|
|
|
return c;
|
|
|
|
} else {
|
1999-08-16 21:50:52 +00:00
|
|
|
return DONE;
|
2000-04-12 19:36:30 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
UChar32
|
|
|
|
UCharCharacterIterator::previous32() {
|
|
|
|
if (pos > begin) {
|
|
|
|
UChar32 c;
|
|
|
|
UTF_PREV_CHAR(text, begin, pos, c);
|
|
|
|
return c;
|
|
|
|
} else {
|
|
|
|
return DONE;
|
|
|
|
}
|
1999-08-16 21:50:52 +00:00
|
|
|
}
|
|
|
|
|
2002-03-12 01:32:42 +00:00
|
|
|
int32_t
|
2000-04-20 23:02:20 +00:00
|
|
|
UCharCharacterIterator::move(int32_t delta, CharacterIterator::EOrigin origin) {
|
|
|
|
switch(origin) {
|
|
|
|
case kStart:
|
|
|
|
pos = begin + delta;
|
|
|
|
break;
|
|
|
|
case kCurrent:
|
|
|
|
pos += delta;
|
|
|
|
break;
|
|
|
|
case kEnd:
|
|
|
|
pos = end + delta;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
1999-08-16 21:50:52 +00:00
|
|
|
|
2000-04-20 23:02:20 +00:00
|
|
|
if(pos < begin) {
|
|
|
|
pos = begin;
|
|
|
|
} else if(pos > end) {
|
|
|
|
pos = end;
|
|
|
|
}
|
|
|
|
|
|
|
|
return pos;
|
1999-08-16 21:50:52 +00:00
|
|
|
}
|
|
|
|
|
2002-03-12 01:32:42 +00:00
|
|
|
int32_t
|
2000-04-20 23:02:20 +00:00
|
|
|
UCharCharacterIterator::move32(int32_t delta, CharacterIterator::EOrigin origin) {
|
|
|
|
// this implementation relies on the "safe" version of the UTF macros
|
|
|
|
// (or the trustworthiness of the caller)
|
|
|
|
switch(origin) {
|
|
|
|
case kStart:
|
|
|
|
pos = begin;
|
|
|
|
if(delta > 0) {
|
|
|
|
UTF_FWD_N(text, pos, end, delta);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case kCurrent:
|
|
|
|
if(delta > 0) {
|
|
|
|
UTF_FWD_N(text, pos, end, delta);
|
|
|
|
} else {
|
2000-06-21 22:48:46 +00:00
|
|
|
UTF_BACK_N(text, begin, pos, -delta);
|
2000-04-20 23:02:20 +00:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
case kEnd:
|
|
|
|
pos = end;
|
|
|
|
if(delta < 0) {
|
2000-06-21 22:48:46 +00:00
|
|
|
UTF_BACK_N(text, begin, pos, -delta);
|
2000-04-20 23:02:20 +00:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
1999-08-16 21:50:52 +00:00
|
|
|
return pos;
|
|
|
|
}
|
|
|
|
|
2000-01-08 02:05:05 +00:00
|
|
|
void UCharCharacterIterator::setText(const UChar* newText,
|
2000-04-12 19:36:30 +00:00
|
|
|
int32_t newTextLength) {
|
2000-01-08 02:05:05 +00:00
|
|
|
text = newText;
|
2000-04-12 19:36:30 +00:00
|
|
|
if(newText == 0 || newTextLength < 0) {
|
|
|
|
newTextLength = 0;
|
|
|
|
}
|
|
|
|
end = textLength = newTextLength;
|
|
|
|
pos = begin = 0;
|
2000-01-08 02:05:05 +00:00
|
|
|
}
|
|
|
|
|
1999-08-16 21:50:52 +00:00
|
|
|
void
|
2004-03-30 22:39:45 +00:00
|
|
|
UCharCharacterIterator::getText(UnicodeString& result) {
|
2000-04-12 19:36:30 +00:00
|
|
|
result = UnicodeString(text, textLength);
|
1999-08-16 21:50:52 +00:00
|
|
|
}
|
|
|
|
|
2001-10-08 23:26:58 +00:00
|
|
|
U_NAMESPACE_END
|