diff --git a/icu4c/source/common/unicode/usetiter.h b/icu4c/source/common/unicode/usetiter.h index 3e77ddde04..01a49c1140 100644 --- a/icu4c/source/common/unicode/usetiter.h +++ b/icu4c/source/common/unicode/usetiter.h @@ -1,6 +1,6 @@ /* ********************************************************************** -* Copyright (c) 2002-2005, International Business Machines +* Copyright (c) 2002-2006, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** */ @@ -28,6 +28,10 @@ class UnicodeString; * code points or ranges have been returned, it returns the * multicharacter strings of the UnicodSet, if any. * + * This class is not intended to be subclassed. Consider any fields + * or methods declared as "protected" to be private. The use of + * protected in this class is an artifact of history. + * *
To iterate over code points, use a loop like this: *
* UnicodeSetIterator it(set); @@ -145,10 +149,16 @@ class U_COMMON_API UnicodeSetIterator : public UObject { /** * Returns the current string, if isString() returned - * true. Otherwise returns an undefined result. + * true. If the current iteration item is a code point, a UnicodeString + * containing that single code point is returned. + * + * Ownership of the returned string remains with the iterator. + * The string is guaranteed to remain valid only until the iterator is + * advanced to the next item, or until the iterator is deleted. + * * @stable ICU 2.4 */ - inline const UnicodeString& getString() const; + const UnicodeString& getString(); /** * Returns the next element in the set, either a single code point @@ -259,6 +269,13 @@ class U_COMMON_API UnicodeSetIterator : public UObject { */ int32_t stringCount; + /** + * Points to the string to use when the caller asks for a + * string and the current iteration item is a code point, not a string. + * @internal + */ + UnicodeString *cpString; + /** Copy constructor. Disallowed. * @stable ICU 2.4 */ @@ -288,9 +305,6 @@ inline UChar32 UnicodeSetIterator::getCodepointEnd() const { return codepointEnd; } -inline const UnicodeString& UnicodeSetIterator::getString() const { - return *string; -} U_NAMESPACE_END diff --git a/icu4c/source/common/usetiter.cpp b/icu4c/source/common/usetiter.cpp index 75a75de4cc..ade6fde9bb 100644 --- a/icu4c/source/common/usetiter.cpp +++ b/icu4c/source/common/usetiter.cpp @@ -1,6 +1,6 @@ /* ********************************************************************** -* Copyright (c) 2002-2003, International Business Machines +* Copyright (c) 2002-2006, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** */ @@ -18,6 +18,7 @@ UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UnicodeSetIterator) * @param set set to iterate over */ UnicodeSetIterator::UnicodeSetIterator(const UnicodeSet& uSet) { + cpString = NULL; reset(uSet); } @@ -26,11 +27,12 @@ UnicodeSetIterator::UnicodeSetIterator(const UnicodeSet& uSet) { */ UnicodeSetIterator::UnicodeSetIterator() { this->set = NULL; + cpString = NULL; reset(); } UnicodeSetIterator::~UnicodeSetIterator() { - // Nothing to do + delete cpString; } /** @@ -45,11 +47,13 @@ UnicodeSetIterator::~UnicodeSetIterator() { UBool UnicodeSetIterator::next() { if (nextElement <= endElement) { codepoint = codepointEnd = nextElement++; + string = NULL; return TRUE; } if (range < endRange) { loadRange(++range); codepoint = codepointEnd = nextElement++; + string = NULL; return TRUE; } @@ -71,6 +75,7 @@ UBool UnicodeSetIterator::next() { *
Note also that the codepointEnd is undefined after calling this method. */ UBool UnicodeSetIterator::nextRange() { + string = NULL; if (nextElement <= endElement) { codepointEnd = endElement; codepoint = nextElement; @@ -118,6 +123,7 @@ void UnicodeSetIterator::reset() { loadRange(range); } nextString = 0; + string = NULL; } void UnicodeSetIterator::loadRange(int32_t iRange) { @@ -125,6 +131,20 @@ void UnicodeSetIterator::loadRange(int32_t iRange) { endElement = set->getRangeEnd(iRange); } + +const UnicodeString& UnicodeSetIterator::UnicodeSetIterator::getString() { + if (string==NULL && codepoint!=(UChar32)IS_STRING) { + if (cpString == NULL) { + cpString = new UnicodeString(); + } + if (cpString != NULL) { + cpString->setTo((UChar32)codepoint); + } + string = cpString; + } + return *string; +} + U_NAMESPACE_END //eof diff --git a/icu4c/source/test/intltest/transrt.cpp b/icu4c/source/test/intltest/transrt.cpp index f36df47bab..7190a9d0c5 100644 --- a/icu4c/source/test/intltest/transrt.cpp +++ b/icu4c/source/test/intltest/transrt.cpp @@ -1,6 +1,6 @@ /* ********************************************************************** -* Copyright (C) 2000-2005, International Business Machines +* Copyright (C) 2000-2006, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** * Date Name Description @@ -271,6 +271,10 @@ UBool LegalGreek::isRho(UChar c) { } // AbbreviatedUnicodeSetIterator Interface --------------------------------------------- +// +// Iterate over a UnicodeSet, only returning a sampling of the contained code points. +// density is the approximate total number of code points to returned for the entire set. +// class AbbreviatedUnicodeSetIterator : public UnicodeSetIterator { public : @@ -291,7 +295,7 @@ public : private : UBool abbreviated; - int32_t perRange; + int32_t perRange; // The maximum number of code points to be returned from each range virtual void loadRange(int32_t range); /**