ICU-5445 Consolidate RuleWhiteSpace implementations, and reduce casting between USet and UnicodeSet.

X-SVN-Rev: 22135
This commit is contained in:
George Rhoten 2007-07-25 02:51:25 +00:00
parent faab4a6efb
commit ab838a4310
5 changed files with 30 additions and 28 deletions

View File

@ -2,7 +2,7 @@
//
// file: rbbiscan.cpp
//
// Copyright (C) 2002-2006, International Business Machines Corporation and others.
// Copyright (C) 2002-2007, International Business Machines Corporation and others.
// All Rights Reserved.
//
// This file contains the Rule Based Break Iterator Rule Builder functions for
@ -23,7 +23,7 @@
#include "unicode/uchriter.h"
#include "unicode/parsepos.h"
#include "unicode/parseerr.h"
#include "uprops.h"
#include "util.h"
#include "cmemory.h"
#include "cstring.h"
@ -127,7 +127,7 @@ RBBIRuleScanner::RBBIRuleScanner(RBBIRuleBuilder *rb)
// and the time to build these few sets should be small compared to a
// full break iterator build.
fRuleSets[kRuleSet_rule_char-128] = new UnicodeSet(gRuleSet_rule_char_pattern, *rb->fStatus);
fRuleSets[kRuleSet_white_space-128] = (UnicodeSet*) uprv_openRuleWhiteSpaceSet(rb->fStatus);
fRuleSets[kRuleSet_white_space-128] = uprv_openRuleWhiteSpaceSet(rb->fStatus);
fRuleSets[kRuleSet_name_char-128] = new UnicodeSet(gRuleSet_name_char_pattern, *rb->fStatus);
fRuleSets[kRuleSet_name_start_char-128] = new UnicodeSet(gRuleSet_name_start_char_pattern, *rb->fStatus);
fRuleSets[kRuleSet_digit_char-128] = new UnicodeSet(gRuleSet_digit_char_pattern, *rb->fStatus);

View File

@ -303,18 +303,6 @@ uset_getItem(const USet* uset, int32_t itemIndex,
// return TRUE;
//}
U_CAPI USet* U_EXPORT2
uprv_openRuleWhiteSpaceSet(UErrorCode* ec) {
if(U_FAILURE(*ec)) {
return NULL;
}
// create a set with the Pattern_White_Space characters,
// without a pattern for fewer code dependencies
UnicodeSet *set=new UnicodeSet(9, 0xd);
set->UnicodeSet::add(0x20).add(0x85).add(0x200e, 0x200f).add(0x2028, 0x2029);
return (USet *)set;
}
/*
* Serialize a USet into 16-bit units.
* Store BMP code points as themselves with one 16-bit unit each.

View File

@ -1,7 +1,7 @@
/*
*******************************************************************************
*
* Copyright (C) 2004-2005, International Business Machines
* Copyright (C) 2004-2007, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@ -52,16 +52,5 @@ typedef struct USetAdder USetAdder;
U_CDECL_END
/**
* Get the set of "white space" characters in the sense of ICU rule
* parsers. Caller must close/delete result.
* Equivalent to the set of characters with the Pattern_White_Space Unicode property.
* Stable set of characters, won't change.
* See UAX #31 Identifier and Pattern Syntax: http://www.unicode.org/reports/tr31/
* @internal
*/
U_CAPI USet* U_EXPORT2
uprv_openRuleWhiteSpaceSet(UErrorCode* ec);
#endif

View File

@ -1,6 +1,6 @@
/*
**********************************************************************
* Copyright (c) 2001-2006, International Business Machines
* Copyright (c) 2001-2007, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* Date Name Description
@ -10,6 +10,7 @@
#include "util.h"
#include "unicode/unimatch.h"
#include "unicode/uniset.h"
// Define UChar constants using hex for EBCDIC compatibility
@ -427,4 +428,16 @@ uprv_isRuleWhiteSpace(UChar32 c) {
c == 0x200E || c == 0x200F || c >= 0x2028));
}
U_CAPI UnicodeSet* U_EXPORT2
uprv_openRuleWhiteSpaceSet(UErrorCode* ec) {
if(U_FAILURE(*ec)) {
return NULL;
}
// create a set with the Pattern_White_Space characters,
// without a pattern for fewer code dependencies
UnicodeSet *set=new UnicodeSet(9, 0xd);
set->UnicodeSet::add(0x20).add(0x85).add(0x200e, 0x200f).add(0x2028, 0x2029);
return set;
}
//eof

View File

@ -22,6 +22,7 @@
U_NAMESPACE_BEGIN
class UnicodeMatcher;
class UnicodeSet;
class U_COMMON_API ICU_Utility /* not : public UObject because all methods are static */ {
public:
@ -234,6 +235,17 @@ private:
U_NAMESPACE_END
/**
* Get the set of "white space" characters in the sense of ICU rule
* parsers. Caller must close/delete result.
* Equivalent to the set of characters with the Pattern_White_Space Unicode property.
* Stable set of characters, won't change.
* See UAX #31 Identifier and Pattern Syntax: http://www.unicode.org/reports/tr31/
* @internal
*/
U_CAPI UnicodeSet* U_EXPORT2
uprv_openRuleWhiteSpaceSet(UErrorCode* ec);
/**
* Is this character a "white space" in the sense of ICU rule parsers?
* Equivalent to test for Pattern_White_Space Unicode property.