ICU-4152 UnicodeSetIterator::getString() works with code points

X-SVN-Rev: 18958
This commit is contained in:
Andy Heninger 2006-01-07 01:35:28 +00:00
parent 5cb171a524
commit e6b05fd11e
3 changed files with 48 additions and 10 deletions

View File

@ -1,6 +1,6 @@
/* /*
********************************************************************** **********************************************************************
* Copyright (c) 2002-2005, International Business Machines * Copyright (c) 2002-2006, International Business Machines
* Corporation and others. All Rights Reserved. * Corporation and others. All Rights Reserved.
********************************************************************** **********************************************************************
*/ */
@ -28,6 +28,10 @@ class UnicodeString;
* code points or ranges have been returned, it returns the * code points or ranges have been returned, it returns the
* multicharacter strings of the UnicodSet, if any. * multicharacter strings of the UnicodSet, if any.
* *
* This class is not intended to be subclassed. Consider any fields
* or methods declared as "protected" to be private. The use of
* protected in this class is an artifact of history.
*
* <p>To iterate over code points, use a loop like this: * <p>To iterate over code points, use a loop like this:
* <pre> * <pre>
* UnicodeSetIterator it(set); * UnicodeSetIterator it(set);
@ -145,10 +149,16 @@ class U_COMMON_API UnicodeSetIterator : public UObject {
/** /**
* Returns the current string, if <tt>isString()</tt> returned * Returns the current string, if <tt>isString()</tt> returned
* true. Otherwise returns an undefined result. * true. If the current iteration item is a code point, a UnicodeString
* containing that single code point is returned.
*
* Ownership of the returned string remains with the iterator.
* The string is guaranteed to remain valid only until the iterator is
* advanced to the next item, or until the iterator is deleted.
*
* @stable ICU 2.4 * @stable ICU 2.4
*/ */
inline const UnicodeString& getString() const; const UnicodeString& getString();
/** /**
* Returns the next element in the set, either a single code point * Returns the next element in the set, either a single code point
@ -259,6 +269,13 @@ class U_COMMON_API UnicodeSetIterator : public UObject {
*/ */
int32_t stringCount; int32_t stringCount;
/**
* Points to the string to use when the caller asks for a
* string and the current iteration item is a code point, not a string.
* @internal
*/
UnicodeString *cpString;
/** Copy constructor. Disallowed. /** Copy constructor. Disallowed.
* @stable ICU 2.4 * @stable ICU 2.4
*/ */
@ -288,9 +305,6 @@ inline UChar32 UnicodeSetIterator::getCodepointEnd() const {
return codepointEnd; return codepointEnd;
} }
inline const UnicodeString& UnicodeSetIterator::getString() const {
return *string;
}
U_NAMESPACE_END U_NAMESPACE_END

View File

@ -1,6 +1,6 @@
/* /*
********************************************************************** **********************************************************************
* Copyright (c) 2002-2003, International Business Machines * Copyright (c) 2002-2006, International Business Machines
* Corporation and others. All Rights Reserved. * Corporation and others. All Rights Reserved.
********************************************************************** **********************************************************************
*/ */
@ -18,6 +18,7 @@ UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UnicodeSetIterator)
* @param set set to iterate over * @param set set to iterate over
*/ */
UnicodeSetIterator::UnicodeSetIterator(const UnicodeSet& uSet) { UnicodeSetIterator::UnicodeSetIterator(const UnicodeSet& uSet) {
cpString = NULL;
reset(uSet); reset(uSet);
} }
@ -26,11 +27,12 @@ UnicodeSetIterator::UnicodeSetIterator(const UnicodeSet& uSet) {
*/ */
UnicodeSetIterator::UnicodeSetIterator() { UnicodeSetIterator::UnicodeSetIterator() {
this->set = NULL; this->set = NULL;
cpString = NULL;
reset(); reset();
} }
UnicodeSetIterator::~UnicodeSetIterator() { UnicodeSetIterator::~UnicodeSetIterator() {
// Nothing to do delete cpString;
} }
/** /**
@ -45,11 +47,13 @@ UnicodeSetIterator::~UnicodeSetIterator() {
UBool UnicodeSetIterator::next() { UBool UnicodeSetIterator::next() {
if (nextElement <= endElement) { if (nextElement <= endElement) {
codepoint = codepointEnd = nextElement++; codepoint = codepointEnd = nextElement++;
string = NULL;
return TRUE; return TRUE;
} }
if (range < endRange) { if (range < endRange) {
loadRange(++range); loadRange(++range);
codepoint = codepointEnd = nextElement++; codepoint = codepointEnd = nextElement++;
string = NULL;
return TRUE; return TRUE;
} }
@ -71,6 +75,7 @@ UBool UnicodeSetIterator::next() {
* <br>Note also that the codepointEnd is undefined after calling this method. * <br>Note also that the codepointEnd is undefined after calling this method.
*/ */
UBool UnicodeSetIterator::nextRange() { UBool UnicodeSetIterator::nextRange() {
string = NULL;
if (nextElement <= endElement) { if (nextElement <= endElement) {
codepointEnd = endElement; codepointEnd = endElement;
codepoint = nextElement; codepoint = nextElement;
@ -118,6 +123,7 @@ void UnicodeSetIterator::reset() {
loadRange(range); loadRange(range);
} }
nextString = 0; nextString = 0;
string = NULL;
} }
void UnicodeSetIterator::loadRange(int32_t iRange) { void UnicodeSetIterator::loadRange(int32_t iRange) {
@ -125,6 +131,20 @@ void UnicodeSetIterator::loadRange(int32_t iRange) {
endElement = set->getRangeEnd(iRange); endElement = set->getRangeEnd(iRange);
} }
const UnicodeString& UnicodeSetIterator::UnicodeSetIterator::getString() {
if (string==NULL && codepoint!=(UChar32)IS_STRING) {
if (cpString == NULL) {
cpString = new UnicodeString();
}
if (cpString != NULL) {
cpString->setTo((UChar32)codepoint);
}
string = cpString;
}
return *string;
}
U_NAMESPACE_END U_NAMESPACE_END
//eof //eof

View File

@ -1,6 +1,6 @@
/* /*
********************************************************************** **********************************************************************
* Copyright (C) 2000-2005, International Business Machines * Copyright (C) 2000-2006, International Business Machines
* Corporation and others. All Rights Reserved. * Corporation and others. All Rights Reserved.
********************************************************************** **********************************************************************
* Date Name Description * Date Name Description
@ -271,6 +271,10 @@ UBool LegalGreek::isRho(UChar c) {
} }
// AbbreviatedUnicodeSetIterator Interface --------------------------------------------- // AbbreviatedUnicodeSetIterator Interface ---------------------------------------------
//
// Iterate over a UnicodeSet, only returning a sampling of the contained code points.
// density is the approximate total number of code points to returned for the entire set.
//
class AbbreviatedUnicodeSetIterator : public UnicodeSetIterator { class AbbreviatedUnicodeSetIterator : public UnicodeSetIterator {
public : public :
@ -291,7 +295,7 @@ public :
private : private :
UBool abbreviated; UBool abbreviated;
int32_t perRange; int32_t perRange; // The maximum number of code points to be returned from each range
virtual void loadRange(int32_t range); virtual void loadRange(int32_t range);
/** /**