ICU-5445 Consolidate RuleWhiteSpace implementations, and reduce casting between USet and UnicodeSet.
X-SVN-Rev: 22135
This commit is contained in:
parent
faab4a6efb
commit
ab838a4310
@ -2,7 +2,7 @@
|
||||
//
|
||||
// file: rbbiscan.cpp
|
||||
//
|
||||
// Copyright (C) 2002-2006, International Business Machines Corporation and others.
|
||||
// Copyright (C) 2002-2007, International Business Machines Corporation and others.
|
||||
// All Rights Reserved.
|
||||
//
|
||||
// This file contains the Rule Based Break Iterator Rule Builder functions for
|
||||
@ -23,7 +23,7 @@
|
||||
#include "unicode/uchriter.h"
|
||||
#include "unicode/parsepos.h"
|
||||
#include "unicode/parseerr.h"
|
||||
#include "uprops.h"
|
||||
#include "util.h"
|
||||
#include "cmemory.h"
|
||||
#include "cstring.h"
|
||||
|
||||
@ -127,7 +127,7 @@ RBBIRuleScanner::RBBIRuleScanner(RBBIRuleBuilder *rb)
|
||||
// and the time to build these few sets should be small compared to a
|
||||
// full break iterator build.
|
||||
fRuleSets[kRuleSet_rule_char-128] = new UnicodeSet(gRuleSet_rule_char_pattern, *rb->fStatus);
|
||||
fRuleSets[kRuleSet_white_space-128] = (UnicodeSet*) uprv_openRuleWhiteSpaceSet(rb->fStatus);
|
||||
fRuleSets[kRuleSet_white_space-128] = uprv_openRuleWhiteSpaceSet(rb->fStatus);
|
||||
fRuleSets[kRuleSet_name_char-128] = new UnicodeSet(gRuleSet_name_char_pattern, *rb->fStatus);
|
||||
fRuleSets[kRuleSet_name_start_char-128] = new UnicodeSet(gRuleSet_name_start_char_pattern, *rb->fStatus);
|
||||
fRuleSets[kRuleSet_digit_char-128] = new UnicodeSet(gRuleSet_digit_char_pattern, *rb->fStatus);
|
||||
|
@ -303,18 +303,6 @@ uset_getItem(const USet* uset, int32_t itemIndex,
|
||||
// return TRUE;
|
||||
//}
|
||||
|
||||
U_CAPI USet* U_EXPORT2
|
||||
uprv_openRuleWhiteSpaceSet(UErrorCode* ec) {
|
||||
if(U_FAILURE(*ec)) {
|
||||
return NULL;
|
||||
}
|
||||
// create a set with the Pattern_White_Space characters,
|
||||
// without a pattern for fewer code dependencies
|
||||
UnicodeSet *set=new UnicodeSet(9, 0xd);
|
||||
set->UnicodeSet::add(0x20).add(0x85).add(0x200e, 0x200f).add(0x2028, 0x2029);
|
||||
return (USet *)set;
|
||||
}
|
||||
|
||||
/*
|
||||
* Serialize a USet into 16-bit units.
|
||||
* Store BMP code points as themselves with one 16-bit unit each.
|
||||
|
@ -1,7 +1,7 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2004-2005, International Business Machines
|
||||
* Copyright (C) 2004-2007, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
@ -52,16 +52,5 @@ typedef struct USetAdder USetAdder;
|
||||
|
||||
U_CDECL_END
|
||||
|
||||
/**
|
||||
* Get the set of "white space" characters in the sense of ICU rule
|
||||
* parsers. Caller must close/delete result.
|
||||
* Equivalent to the set of characters with the Pattern_White_Space Unicode property.
|
||||
* Stable set of characters, won't change.
|
||||
* See UAX #31 Identifier and Pattern Syntax: http://www.unicode.org/reports/tr31/
|
||||
* @internal
|
||||
*/
|
||||
U_CAPI USet* U_EXPORT2
|
||||
uprv_openRuleWhiteSpaceSet(UErrorCode* ec);
|
||||
|
||||
#endif
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
**********************************************************************
|
||||
* Copyright (c) 2001-2006, International Business Machines
|
||||
* Copyright (c) 2001-2007, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* Date Name Description
|
||||
@ -10,6 +10,7 @@
|
||||
|
||||
#include "util.h"
|
||||
#include "unicode/unimatch.h"
|
||||
#include "unicode/uniset.h"
|
||||
|
||||
// Define UChar constants using hex for EBCDIC compatibility
|
||||
|
||||
@ -427,4 +428,16 @@ uprv_isRuleWhiteSpace(UChar32 c) {
|
||||
c == 0x200E || c == 0x200F || c >= 0x2028));
|
||||
}
|
||||
|
||||
U_CAPI UnicodeSet* U_EXPORT2
|
||||
uprv_openRuleWhiteSpaceSet(UErrorCode* ec) {
|
||||
if(U_FAILURE(*ec)) {
|
||||
return NULL;
|
||||
}
|
||||
// create a set with the Pattern_White_Space characters,
|
||||
// without a pattern for fewer code dependencies
|
||||
UnicodeSet *set=new UnicodeSet(9, 0xd);
|
||||
set->UnicodeSet::add(0x20).add(0x85).add(0x200e, 0x200f).add(0x2028, 0x2029);
|
||||
return set;
|
||||
}
|
||||
|
||||
//eof
|
||||
|
@ -22,6 +22,7 @@
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
class UnicodeMatcher;
|
||||
class UnicodeSet;
|
||||
|
||||
class U_COMMON_API ICU_Utility /* not : public UObject because all methods are static */ {
|
||||
public:
|
||||
@ -234,6 +235,17 @@ private:
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
/**
|
||||
* Get the set of "white space" characters in the sense of ICU rule
|
||||
* parsers. Caller must close/delete result.
|
||||
* Equivalent to the set of characters with the Pattern_White_Space Unicode property.
|
||||
* Stable set of characters, won't change.
|
||||
* See UAX #31 Identifier and Pattern Syntax: http://www.unicode.org/reports/tr31/
|
||||
* @internal
|
||||
*/
|
||||
U_CAPI UnicodeSet* U_EXPORT2
|
||||
uprv_openRuleWhiteSpaceSet(UErrorCode* ec);
|
||||
|
||||
/**
|
||||
* Is this character a "white space" in the sense of ICU rule parsers?
|
||||
* Equivalent to test for Pattern_White_Space Unicode property.
|
||||
|
Loading…
Reference in New Issue
Block a user