diff --git a/icu4c/source/common/unicode/usetiter.h b/icu4c/source/common/unicode/usetiter.h index 3e77ddde04..01a49c1140 100644 --- a/icu4c/source/common/unicode/usetiter.h +++ b/icu4c/source/common/unicode/usetiter.h @@ -1,6 +1,6 @@ /* ********************************************************************** -* Copyright (c) 2002-2005, International Business Machines +* Copyright (c) 2002-2006, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** */ @@ -28,6 +28,10 @@ class UnicodeString; * code points or ranges have been returned, it returns the * multicharacter strings of the UnicodSet, if any. * + * This class is not intended to be subclassed. Consider any fields + * or methods declared as "protected" to be private. The use of + * protected in this class is an artifact of history. + * *

To iterate over code points, use a loop like this: *

  * UnicodeSetIterator it(set);
@@ -145,10 +149,16 @@ class U_COMMON_API UnicodeSetIterator : public UObject {
 
     /**
      * Returns the current string, if isString() returned
-     * true.  Otherwise returns an undefined result.
+     * true.  If the current iteration item is a code point, a UnicodeString
+     * containing that single code point is returned.
+     *
+     * Ownership of the returned string remains with the iterator.
+     * The string is guaranteed to remain valid only until the iterator is
+     *   advanced to the next item, or until the iterator is deleted.
+     * 
      * @stable ICU 2.4
      */
-    inline const UnicodeString& getString() const;
+    const UnicodeString& getString();
 
     /**
      * Returns the next element in the set, either a single code point
@@ -259,6 +269,13 @@ class U_COMMON_API UnicodeSetIterator : public UObject {
      */
     int32_t stringCount;
 
+    /**
+     *  Points to the string to use when the caller asks for a
+     *  string and the current iteration item is a code point, not a string.
+     *  @internal
+     */
+    UnicodeString *cpString;
+
     /** Copy constructor. Disallowed.
      * @stable ICU 2.4
      */
@@ -288,9 +305,6 @@ inline UChar32 UnicodeSetIterator::getCodepointEnd() const {
     return codepointEnd;
 }
 
-inline const UnicodeString& UnicodeSetIterator::getString() const {
-    return *string;
-}
 
 U_NAMESPACE_END
 
diff --git a/icu4c/source/common/usetiter.cpp b/icu4c/source/common/usetiter.cpp
index 75a75de4cc..ade6fde9bb 100644
--- a/icu4c/source/common/usetiter.cpp
+++ b/icu4c/source/common/usetiter.cpp
@@ -1,6 +1,6 @@
 /*
 **********************************************************************
-* Copyright (c) 2002-2003, International Business Machines
+* Copyright (c) 2002-2006, International Business Machines
 * Corporation and others.  All Rights Reserved.
 **********************************************************************
 */
@@ -18,6 +18,7 @@ UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UnicodeSetIterator)
  * @param set set to iterate over
  */
 UnicodeSetIterator::UnicodeSetIterator(const UnicodeSet& uSet) {
+    cpString  = NULL;
     reset(uSet);
 }
 
@@ -26,11 +27,12 @@ UnicodeSetIterator::UnicodeSetIterator(const UnicodeSet& uSet) {
  */
 UnicodeSetIterator::UnicodeSetIterator() {
     this->set = NULL;
+    cpString  = NULL;
     reset();
 }
 
 UnicodeSetIterator::~UnicodeSetIterator() {
-    // Nothing to do
+    delete cpString;
 }
 
 /**
@@ -45,11 +47,13 @@ UnicodeSetIterator::~UnicodeSetIterator() {
 UBool UnicodeSetIterator::next() {
     if (nextElement <= endElement) {
         codepoint = codepointEnd = nextElement++;
+        string = NULL;
         return TRUE;
     }
     if (range < endRange) {
         loadRange(++range);
         codepoint = codepointEnd = nextElement++;
+        string = NULL;
         return TRUE;
     }
 
@@ -71,6 +75,7 @@ UBool UnicodeSetIterator::next() {
  * 
Note also that the codepointEnd is undefined after calling this method. */ UBool UnicodeSetIterator::nextRange() { + string = NULL; if (nextElement <= endElement) { codepointEnd = endElement; codepoint = nextElement; @@ -118,6 +123,7 @@ void UnicodeSetIterator::reset() { loadRange(range); } nextString = 0; + string = NULL; } void UnicodeSetIterator::loadRange(int32_t iRange) { @@ -125,6 +131,20 @@ void UnicodeSetIterator::loadRange(int32_t iRange) { endElement = set->getRangeEnd(iRange); } + +const UnicodeString& UnicodeSetIterator::UnicodeSetIterator::getString() { + if (string==NULL && codepoint!=(UChar32)IS_STRING) { + if (cpString == NULL) { + cpString = new UnicodeString(); + } + if (cpString != NULL) { + cpString->setTo((UChar32)codepoint); + } + string = cpString; + } + return *string; +} + U_NAMESPACE_END //eof diff --git a/icu4c/source/test/intltest/transrt.cpp b/icu4c/source/test/intltest/transrt.cpp index f36df47bab..7190a9d0c5 100644 --- a/icu4c/source/test/intltest/transrt.cpp +++ b/icu4c/source/test/intltest/transrt.cpp @@ -1,6 +1,6 @@ /* ********************************************************************** -* Copyright (C) 2000-2005, International Business Machines +* Copyright (C) 2000-2006, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** * Date Name Description @@ -271,6 +271,10 @@ UBool LegalGreek::isRho(UChar c) { } // AbbreviatedUnicodeSetIterator Interface --------------------------------------------- +// +// Iterate over a UnicodeSet, only returning a sampling of the contained code points. +// density is the approximate total number of code points to returned for the entire set. +// class AbbreviatedUnicodeSetIterator : public UnicodeSetIterator { public : @@ -291,7 +295,7 @@ public : private : UBool abbreviated; - int32_t perRange; + int32_t perRange; // The maximum number of code points to be returned from each range virtual void loadRange(int32_t range); /**