ICU-5445 Consolidate RuleWhiteSpace implementations, and reduce casting between USet and UnicodeSet.

X-SVN-Rev: 22135
2007-07-25 02:51:25 +00:00 · 2007-07-25 02:51:25 +00:00 · ab838a4310
commit ab838a4310
parent faab4a6efb
5 changed files with 30 additions and 28 deletions
--- a/icu4c/source/common/rbbiscan.cpp
+++ b/icu4c/source/common/rbbiscan.cpp
@ -2,7 +2,7 @@
 //
 //  file:  rbbiscan.cpp
 //
-//  Copyright (C) 2002-2006, International Business Machines Corporation and others.
+//  Copyright (C) 2002-2007, International Business Machines Corporation and others.
 //  All Rights Reserved.
 //
 //  This file contains the Rule Based Break Iterator Rule Builder functions for
@ -23,7 +23,7 @@
 #include "unicode/uchriter.h"
 #include "unicode/parsepos.h"
 #include "unicode/parseerr.h"
-#include "uprops.h"
+#include "util.h"
 #include "cmemory.h"
 #include "cstring.h"

@ -127,7 +127,7 @@ RBBIRuleScanner::RBBIRuleScanner(RBBIRuleBuilder *rb)
    //            and the time to build these few sets should be small compared to a
    //            full break iterator build.
    fRuleSets[kRuleSet_rule_char-128]       = new UnicodeSet(gRuleSet_rule_char_pattern,       *rb->fStatus);
-    fRuleSets[kRuleSet_white_space-128]     = (UnicodeSet*) uprv_openRuleWhiteSpaceSet(rb->fStatus);
+    fRuleSets[kRuleSet_white_space-128]     = uprv_openRuleWhiteSpaceSet(rb->fStatus);
    fRuleSets[kRuleSet_name_char-128]       = new UnicodeSet(gRuleSet_name_char_pattern,       *rb->fStatus);
    fRuleSets[kRuleSet_name_start_char-128] = new UnicodeSet(gRuleSet_name_start_char_pattern, *rb->fStatus);
    fRuleSets[kRuleSet_digit_char-128]      = new UnicodeSet(gRuleSet_digit_char_pattern,      *rb->fStatus);
--- a/icu4c/source/common/uset.cpp
+++ b/icu4c/source/common/uset.cpp
@ -303,18 +303,6 @@ uset_getItem(const USet* uset, int32_t itemIndex,
 //    return TRUE;
 //}

-U_CAPI USet* U_EXPORT2
-uprv_openRuleWhiteSpaceSet(UErrorCode* ec) {
-    if(U_FAILURE(*ec)) {
-        return NULL;
-    }
-    // create a set with the Pattern_White_Space characters,
-    // without a pattern for fewer code dependencies
-    UnicodeSet *set=new UnicodeSet(9, 0xd);
-    set->UnicodeSet::add(0x20).add(0x85).add(0x200e, 0x200f).add(0x2028, 0x2029);
-    return (USet *)set;
-}
-
 /*
 * Serialize a USet into 16-bit units.
 * Store BMP code points as themselves with one 16-bit unit each.
--- a/icu4c/source/common/uset_imp.h
+++ b/icu4c/source/common/uset_imp.h
@ -1,7 +1,7 @@
 /*
 *******************************************************************************
 *
-*   Copyright (C) 2004-2005, International Business Machines
+*   Copyright (C) 2004-2007, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 *******************************************************************************
@ -52,16 +52,5 @@ typedef struct USetAdder USetAdder;

 U_CDECL_END

-/**
- * Get the set of "white space" characters in the sense of ICU rule
- * parsers.  Caller must close/delete result.
- * Equivalent to the set of characters with the Pattern_White_Space Unicode property.
- * Stable set of characters, won't change.
- * See UAX #31 Identifier and Pattern Syntax: http://www.unicode.org/reports/tr31/
- * @internal
- */
-U_CAPI USet* U_EXPORT2
-uprv_openRuleWhiteSpaceSet(UErrorCode* ec);
-
 #endif

--- a/icu4c/source/common/util.cpp
+++ b/icu4c/source/common/util.cpp
@ -1,6 +1,6 @@
 /*
 **********************************************************************
-*   Copyright (c) 2001-2006, International Business Machines
+*   Copyright (c) 2001-2007, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 **********************************************************************
 *   Date        Name        Description
@ -10,6 +10,7 @@

 #include "util.h"
 #include "unicode/unimatch.h"
+#include "unicode/uniset.h"

 // Define UChar constants using hex for EBCDIC compatibility

@ -427,4 +428,16 @@ uprv_isRuleWhiteSpace(UChar32 c) {
             c == 0x200E || c == 0x200F || c >= 0x2028));
 }

+U_CAPI UnicodeSet* U_EXPORT2
+uprv_openRuleWhiteSpaceSet(UErrorCode* ec) {
+    if(U_FAILURE(*ec)) {
+        return NULL;
+    }
+    // create a set with the Pattern_White_Space characters,
+    // without a pattern for fewer code dependencies
+    UnicodeSet *set=new UnicodeSet(9, 0xd);
+    set->UnicodeSet::add(0x20).add(0x85).add(0x200e, 0x200f).add(0x2028, 0x2029);
+    return set;
+}
+
 //eof
--- a/icu4c/source/common/util.h
+++ b/icu4c/source/common/util.h
@ -22,6 +22,7 @@
 U_NAMESPACE_BEGIN

 class UnicodeMatcher;
+class UnicodeSet;

 class U_COMMON_API ICU_Utility /* not : public UObject because all methods are static */ {
 public:
@ -234,6 +235,17 @@ private:

 U_NAMESPACE_END

+/**
+ * Get the set of "white space" characters in the sense of ICU rule
+ * parsers.  Caller must close/delete result.
+ * Equivalent to the set of characters with the Pattern_White_Space Unicode property.
+ * Stable set of characters, won't change.
+ * See UAX #31 Identifier and Pattern Syntax: http://www.unicode.org/reports/tr31/
+ * @internal
+ */
+U_CAPI UnicodeSet* U_EXPORT2
+uprv_openRuleWhiteSpaceSet(UErrorCode* ec);
+
 /**
 * Is this character a "white space" in the sense of ICU rule parsers?
 * Equivalent to test for Pattern_White_Space Unicode property.