ICU-4078 Fix for AIX with Visual Age 5 compiler, and make sure that the
disentanglement is correct by putting all the UnicodeSet virtual functions are in one file. Also move some of the rule whitespace handling into better locations. X-SVN-Rev: 16519
This commit is contained in:
parent
3ac97089c4
commit
e69fca9d5f
@ -8,6 +8,9 @@
|
|||||||
**********************************************************************
|
**********************************************************************
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#ifndef CHARSTRING_H
|
||||||
|
#define CHARSTRING_H
|
||||||
|
|
||||||
#include "unicode/utypes.h"
|
#include "unicode/utypes.h"
|
||||||
#include "unicode/uobject.h"
|
#include "unicode/uobject.h"
|
||||||
#include "unicode/unistr.h"
|
#include "unicode/unistr.h"
|
||||||
@ -78,4 +81,5 @@ inline CharString::~CharString() {
|
|||||||
|
|
||||||
U_NAMESPACE_END
|
U_NAMESPACE_END
|
||||||
|
|
||||||
|
#endif
|
||||||
//eof
|
//eof
|
||||||
|
@ -12,7 +12,7 @@
|
|||||||
#include "unicode/parsepos.h"
|
#include "unicode/parsepos.h"
|
||||||
#include "unicode/unistr.h"
|
#include "unicode/unistr.h"
|
||||||
#include "unicode/symtable.h"
|
#include "unicode/symtable.h"
|
||||||
#include "uprops.h"
|
#include "util.h"
|
||||||
|
|
||||||
U_NAMESPACE_BEGIN
|
U_NAMESPACE_BEGIN
|
||||||
|
|
||||||
|
@ -534,7 +534,7 @@ public:
|
|||||||
* U+000A, U+0020..U+007E.
|
* U+000A, U+0020..U+007E.
|
||||||
* @stable ICU 2.0
|
* @stable ICU 2.0
|
||||||
*/
|
*/
|
||||||
UnicodeString& toPattern(UnicodeString& result,
|
virtual UnicodeString& toPattern(UnicodeString& result,
|
||||||
UBool escapeUnprintable = FALSE) const;
|
UBool escapeUnprintable = FALSE) const;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -23,6 +23,24 @@
|
|||||||
#include "uassert.h"
|
#include "uassert.h"
|
||||||
#include "hash.h"
|
#include "hash.h"
|
||||||
|
|
||||||
|
// Define UChar constants using hex for EBCDIC compatibility
|
||||||
|
// Used #define to reduce private static exports and memory access time.
|
||||||
|
#define SET_OPEN ((UChar)0x005B) /*[*/
|
||||||
|
#define SET_CLOSE ((UChar)0x005D) /*]*/
|
||||||
|
#define HYPHEN ((UChar)0x002D) /*-*/
|
||||||
|
#define COMPLEMENT ((UChar)0x005E) /*^*/
|
||||||
|
#define COLON ((UChar)0x003A) /*:*/
|
||||||
|
#define BACKSLASH ((UChar)0x005C) /*\*/
|
||||||
|
#define INTERSECTION ((UChar)0x0026) /*&*/
|
||||||
|
#define UPPER_U ((UChar)0x0055) /*U*/
|
||||||
|
#define LOWER_U ((UChar)0x0075) /*u*/
|
||||||
|
#define OPEN_BRACE ((UChar)123) /*{*/
|
||||||
|
#define CLOSE_BRACE ((UChar)125) /*}*/
|
||||||
|
#define UPPER_P ((UChar)0x0050) /*P*/
|
||||||
|
#define LOWER_P ((UChar)0x0070) /*p*/
|
||||||
|
#define UPPER_N ((UChar)78) /*N*/
|
||||||
|
#define EQUALS ((UChar)0x003D) /*=*/
|
||||||
|
|
||||||
// HIGH_VALUE > all valid values. 110000 for codepoints
|
// HIGH_VALUE > all valid values. 110000 for codepoints
|
||||||
#define UNICODESET_HIGH 0x0110000
|
#define UNICODESET_HIGH 0x0110000
|
||||||
|
|
||||||
@ -1645,4 +1663,172 @@ void UnicodeSet::retain(const UChar32* other, int32_t otherLen, int8_t polarity)
|
|||||||
pat.truncate(0);
|
pat.truncate(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Append the <code>toPattern()</code> representation of a
|
||||||
|
* string to the given <code>StringBuffer</code>.
|
||||||
|
*/
|
||||||
|
void UnicodeSet::_appendToPat(UnicodeString& buf, const UnicodeString& s, UBool
|
||||||
|
escapeUnprintable) {
|
||||||
|
UChar32 cp;
|
||||||
|
for (int32_t i = 0; i < s.length(); i += UTF_CHAR_LENGTH(cp)) {
|
||||||
|
_appendToPat(buf, cp = s.char32At(i), escapeUnprintable);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Append the <code>toPattern()</code> representation of a
|
||||||
|
* character to the given <code>StringBuffer</code>.
|
||||||
|
*/
|
||||||
|
void UnicodeSet::_appendToPat(UnicodeString& buf, UChar32 c, UBool
|
||||||
|
escapeUnprintable) {
|
||||||
|
if (escapeUnprintable && ICU_Utility::isUnprintable(c)) {
|
||||||
|
// Use hex escape notation (\uxxxx or \Uxxxxxxxx) for anything
|
||||||
|
// unprintable
|
||||||
|
if (ICU_Utility::escapeUnprintable(buf, c)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Okay to let ':' pass through
|
||||||
|
switch (c) {
|
||||||
|
case SET_OPEN:
|
||||||
|
case SET_CLOSE:
|
||||||
|
case HYPHEN:
|
||||||
|
case COMPLEMENT:
|
||||||
|
case INTERSECTION:
|
||||||
|
case BACKSLASH:
|
||||||
|
case OPEN_BRACE:
|
||||||
|
case CLOSE_BRACE:
|
||||||
|
case COLON:
|
||||||
|
case SymbolTable::SYMBOL_REF:
|
||||||
|
buf.append(BACKSLASH);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
// Escape whitespace
|
||||||
|
if (uprv_isRuleWhiteSpace(c)) {
|
||||||
|
buf.append(BACKSLASH);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
buf.append(c);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Append a string representation of this set to result. This will be
|
||||||
|
* a cleaned version of the string passed to applyPattern(), if there
|
||||||
|
* is one. Otherwise it will be generated.
|
||||||
|
*/
|
||||||
|
UnicodeString& UnicodeSet::_toPattern(UnicodeString& result,
|
||||||
|
UBool escapeUnprintable) const {
|
||||||
|
if (pat.length() > 0) {
|
||||||
|
int32_t i;
|
||||||
|
int32_t backslashCount = 0;
|
||||||
|
for (i=0; i<pat.length(); ) {
|
||||||
|
UChar32 c = pat.char32At(i);
|
||||||
|
i += UTF_CHAR_LENGTH(c);
|
||||||
|
if (escapeUnprintable && ICU_Utility::isUnprintable(c)) {
|
||||||
|
// If the unprintable character is preceded by an odd
|
||||||
|
// number of backslashes, then it has been escaped.
|
||||||
|
// Before unescaping it, we delete the final
|
||||||
|
// backslash.
|
||||||
|
if ((backslashCount % 2) == 1) {
|
||||||
|
result.truncate(result.length() - 1);
|
||||||
|
}
|
||||||
|
ICU_Utility::escapeUnprintable(result, c);
|
||||||
|
backslashCount = 0;
|
||||||
|
} else {
|
||||||
|
result.append(c);
|
||||||
|
if (c == BACKSLASH) {
|
||||||
|
++backslashCount;
|
||||||
|
} else {
|
||||||
|
backslashCount = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
return _generatePattern(result, escapeUnprintable);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a string representation of this set. If the result of
|
||||||
|
* calling this function is passed to a UnicodeSet constructor, it
|
||||||
|
* will produce another set that is equal to this one.
|
||||||
|
*/
|
||||||
|
UnicodeString& UnicodeSet::toPattern(UnicodeString& result,
|
||||||
|
UBool escapeUnprintable) const {
|
||||||
|
result.truncate(0);
|
||||||
|
return _toPattern(result, escapeUnprintable);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generate and append a string representation of this set to result.
|
||||||
|
* This does not use this.pat, the cleaned up copy of the string
|
||||||
|
* passed to applyPattern().
|
||||||
|
*/
|
||||||
|
UnicodeString& UnicodeSet::_generatePattern(UnicodeString& result,
|
||||||
|
UBool escapeUnprintable) const {
|
||||||
|
result.append(SET_OPEN);
|
||||||
|
|
||||||
|
// // Check against the predefined categories. We implicitly build
|
||||||
|
// // up ALL category sets the first time toPattern() is called.
|
||||||
|
// for (int8_t cat=0; cat<Unicode::GENERAL_TYPES_COUNT; ++cat) {
|
||||||
|
// if (*this == getCategorySet(cat)) {
|
||||||
|
// result.append(COLON);
|
||||||
|
// result.append(CATEGORY_NAMES, cat*2, 2);
|
||||||
|
// return result.append(CATEGORY_CLOSE);
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
|
||||||
|
int32_t count = getRangeCount();
|
||||||
|
|
||||||
|
// If the set contains at least 2 intervals and includes both
|
||||||
|
// MIN_VALUE and MAX_VALUE, then the inverse representation will
|
||||||
|
// be more economical.
|
||||||
|
if (count > 1 &&
|
||||||
|
getRangeStart(0) == MIN_VALUE &&
|
||||||
|
getRangeEnd(count-1) == MAX_VALUE) {
|
||||||
|
|
||||||
|
// Emit the inverse
|
||||||
|
result.append(COMPLEMENT);
|
||||||
|
|
||||||
|
for (int32_t i = 1; i < count; ++i) {
|
||||||
|
UChar32 start = getRangeEnd(i-1)+1;
|
||||||
|
UChar32 end = getRangeStart(i)-1;
|
||||||
|
_appendToPat(result, start, escapeUnprintable);
|
||||||
|
if (start != end) {
|
||||||
|
if ((start+1) != end) {
|
||||||
|
result.append(HYPHEN);
|
||||||
|
}
|
||||||
|
_appendToPat(result, end, escapeUnprintable);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Default; emit the ranges as pairs
|
||||||
|
else {
|
||||||
|
for (int32_t i = 0; i < count; ++i) {
|
||||||
|
UChar32 start = getRangeStart(i);
|
||||||
|
UChar32 end = getRangeEnd(i);
|
||||||
|
_appendToPat(result, start, escapeUnprintable);
|
||||||
|
if (start != end) {
|
||||||
|
if ((start+1) != end) {
|
||||||
|
result.append(HYPHEN);
|
||||||
|
}
|
||||||
|
_appendToPat(result, end, escapeUnprintable);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int32_t i = 0; i<strings->size(); ++i) {
|
||||||
|
result.append(OPEN_BRACE);
|
||||||
|
_appendToPat(result,
|
||||||
|
*(const UnicodeString*) strings->elementAt(i),
|
||||||
|
escapeUnprintable);
|
||||||
|
result.append(CLOSE_BRACE);
|
||||||
|
}
|
||||||
|
return result.append(SET_CLOSE);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
U_NAMESPACE_END
|
U_NAMESPACE_END
|
||||||
|
@ -397,171 +397,6 @@ UBool UnicodeSet::resemblesPattern(const UnicodeString& pattern, int32_t pos) {
|
|||||||
resemblesPropertyPattern(pattern, pos);
|
resemblesPropertyPattern(pattern, pos);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Append the <code>toPattern()</code> representation of a
|
|
||||||
* string to the given <code>StringBuffer</code>.
|
|
||||||
*/
|
|
||||||
void UnicodeSet::_appendToPat(UnicodeString& buf, const UnicodeString& s, UBool escapeUnprintable) {
|
|
||||||
UChar32 cp;
|
|
||||||
for (int32_t i = 0; i < s.length(); i += UTF_CHAR_LENGTH(cp)) {
|
|
||||||
_appendToPat(buf, cp = s.char32At(i), escapeUnprintable);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Append the <code>toPattern()</code> representation of a
|
|
||||||
* character to the given <code>StringBuffer</code>.
|
|
||||||
*/
|
|
||||||
void UnicodeSet::_appendToPat(UnicodeString& buf, UChar32 c, UBool escapeUnprintable) {
|
|
||||||
if (escapeUnprintable && ICU_Utility::isUnprintable(c)) {
|
|
||||||
// Use hex escape notation (\uxxxx or \Uxxxxxxxx) for anything
|
|
||||||
// unprintable
|
|
||||||
if (ICU_Utility::escapeUnprintable(buf, c)) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Okay to let ':' pass through
|
|
||||||
switch (c) {
|
|
||||||
case SET_OPEN:
|
|
||||||
case SET_CLOSE:
|
|
||||||
case HYPHEN:
|
|
||||||
case COMPLEMENT:
|
|
||||||
case INTERSECTION:
|
|
||||||
case BACKSLASH:
|
|
||||||
case 123/*{*/:
|
|
||||||
case 125/*}*/:
|
|
||||||
case SymbolTable::SYMBOL_REF:
|
|
||||||
case COLON:
|
|
||||||
buf.append(BACKSLASH);
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
// Escape whitespace
|
|
||||||
if (uprv_isRuleWhiteSpace(c)) {
|
|
||||||
buf.append(BACKSLASH);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
buf.append(c);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns a string representation of this set. If the result of
|
|
||||||
* calling this function is passed to a UnicodeSet constructor, it
|
|
||||||
* will produce another set that is equal to this one.
|
|
||||||
*/
|
|
||||||
UnicodeString& UnicodeSet::toPattern(UnicodeString& result,
|
|
||||||
UBool escapeUnprintable) const {
|
|
||||||
result.truncate(0);
|
|
||||||
return _toPattern(result, escapeUnprintable);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Append a string representation of this set to result. This will be
|
|
||||||
* a cleaned version of the string passed to applyPattern(), if there
|
|
||||||
* is one. Otherwise it will be generated.
|
|
||||||
*/
|
|
||||||
UnicodeString& UnicodeSet::_toPattern(UnicodeString& result,
|
|
||||||
UBool escapeUnprintable) const {
|
|
||||||
if (pat.length() > 0) {
|
|
||||||
int32_t i;
|
|
||||||
int32_t backslashCount = 0;
|
|
||||||
for (i=0; i<pat.length(); ) {
|
|
||||||
UChar32 c = pat.char32At(i);
|
|
||||||
i += UTF_CHAR_LENGTH(c);
|
|
||||||
if (escapeUnprintable && ICU_Utility::isUnprintable(c)) {
|
|
||||||
// If the unprintable character is preceded by an odd
|
|
||||||
// number of backslashes, then it has been escaped.
|
|
||||||
// Before unescaping it, we delete the final
|
|
||||||
// backslash.
|
|
||||||
if ((backslashCount % 2) == 1) {
|
|
||||||
result.truncate(result.length() - 1);
|
|
||||||
}
|
|
||||||
ICU_Utility::escapeUnprintable(result, c);
|
|
||||||
backslashCount = 0;
|
|
||||||
} else {
|
|
||||||
result.append(c);
|
|
||||||
if (c == BACKSLASH) {
|
|
||||||
++backslashCount;
|
|
||||||
} else {
|
|
||||||
backslashCount = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
return _generatePattern(result, escapeUnprintable);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Generate and append a string representation of this set to result.
|
|
||||||
* This does not use this.pat, the cleaned up copy of the string
|
|
||||||
* passed to applyPattern().
|
|
||||||
*/
|
|
||||||
UnicodeString& UnicodeSet::_generatePattern(UnicodeString& result,
|
|
||||||
UBool escapeUnprintable) const {
|
|
||||||
result.append(SET_OPEN);
|
|
||||||
|
|
||||||
// // Check against the predefined categories. We implicitly build
|
|
||||||
// // up ALL category sets the first time toPattern() is called.
|
|
||||||
// for (int8_t cat=0; cat<Unicode::GENERAL_TYPES_COUNT; ++cat) {
|
|
||||||
// if (*this == getCategorySet(cat)) {
|
|
||||||
// result.append(COLON);
|
|
||||||
// result.append(CATEGORY_NAMES, cat*2, 2);
|
|
||||||
// return result.append(CATEGORY_CLOSE);
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
|
|
||||||
int32_t count = getRangeCount();
|
|
||||||
|
|
||||||
// If the set contains at least 2 intervals and includes both
|
|
||||||
// MIN_VALUE and MAX_VALUE, then the inverse representation will
|
|
||||||
// be more economical.
|
|
||||||
if (count > 1 &&
|
|
||||||
getRangeStart(0) == MIN_VALUE &&
|
|
||||||
getRangeEnd(count-1) == MAX_VALUE) {
|
|
||||||
|
|
||||||
// Emit the inverse
|
|
||||||
result.append(COMPLEMENT);
|
|
||||||
|
|
||||||
for (int32_t i = 1; i < count; ++i) {
|
|
||||||
UChar32 start = getRangeEnd(i-1)+1;
|
|
||||||
UChar32 end = getRangeStart(i)-1;
|
|
||||||
_appendToPat(result, start, escapeUnprintable);
|
|
||||||
if (start != end) {
|
|
||||||
if ((start+1) != end) {
|
|
||||||
result.append(HYPHEN);
|
|
||||||
}
|
|
||||||
_appendToPat(result, end, escapeUnprintable);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Default; emit the ranges as pairs
|
|
||||||
else {
|
|
||||||
for (int32_t i = 0; i < count; ++i) {
|
|
||||||
UChar32 start = getRangeStart(i);
|
|
||||||
UChar32 end = getRangeEnd(i);
|
|
||||||
_appendToPat(result, start, escapeUnprintable);
|
|
||||||
if (start != end) {
|
|
||||||
if ((start+1) != end) {
|
|
||||||
result.append(HYPHEN);
|
|
||||||
}
|
|
||||||
_appendToPat(result, end, escapeUnprintable);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for (int32_t i = 0; i<strings->size(); ++i) {
|
|
||||||
result.append(OPEN_BRACE);
|
|
||||||
_appendToPat(result,
|
|
||||||
*(const UnicodeString*) strings->elementAt(i),
|
|
||||||
escapeUnprintable);
|
|
||||||
result.append(CLOSE_BRACE);
|
|
||||||
}
|
|
||||||
return result.append(SET_CLOSE);
|
|
||||||
}
|
|
||||||
|
|
||||||
//----------------------------------------------------------------
|
//----------------------------------------------------------------
|
||||||
// Implementation: Pattern parsing
|
// Implementation: Pattern parsing
|
||||||
//----------------------------------------------------------------
|
//----------------------------------------------------------------
|
||||||
|
@ -142,30 +142,6 @@ u_hasBinaryProperty(UChar32 c, UProperty which) {
|
|||||||
return FALSE;
|
return FALSE;
|
||||||
}
|
}
|
||||||
|
|
||||||
U_CAPI UBool U_EXPORT2
|
|
||||||
uprv_isRuleWhiteSpace(UChar32 c) {
|
|
||||||
/* "white space" in the sense of ICU rule parsers
|
|
||||||
This is a FIXED LIST that is NOT DEPENDENT ON UNICODE PROPERTIES.
|
|
||||||
See UTR #31: http://www.unicode.org/reports/tr31/.
|
|
||||||
U+0009..U+000D, U+0020, U+0085, U+200E..U+200F, and U+2028..U+2029
|
|
||||||
*/
|
|
||||||
return (c >= 0x0009 && c <= 0x2029 &&
|
|
||||||
(c <= 0x000D || c == 0x0020 || c == 0x0085 ||
|
|
||||||
c == 0x200E || c == 0x200F || c >= 0x2028));
|
|
||||||
}
|
|
||||||
|
|
||||||
static const UChar _PATTERN[] = {
|
|
||||||
/* "[[:Cf:][:WSpace:]]" */
|
|
||||||
91, 91, 58, 67, 102, 58, 93, 91, 58, 87,
|
|
||||||
83, 112, 97, 99, 101, 58, 93, 93, 0
|
|
||||||
};
|
|
||||||
|
|
||||||
U_CAPI USet* U_EXPORT2
|
|
||||||
uprv_openRuleWhiteSpaceSet(UErrorCode* ec) {
|
|
||||||
return uset_openPattern(_PATTERN,
|
|
||||||
sizeof(_PATTERN)/sizeof(_PATTERN[0])-1, ec);
|
|
||||||
}
|
|
||||||
|
|
||||||
U_CAPI int32_t U_EXPORT2
|
U_CAPI int32_t U_EXPORT2
|
||||||
u_getIntPropertyValue(UChar32 c, UProperty which) {
|
u_getIntPropertyValue(UChar32 c, UProperty which) {
|
||||||
UErrorCode errorCode;
|
UErrorCode errorCode;
|
||||||
|
@ -278,21 +278,6 @@ enum {
|
|||||||
ZWNBSP =0xfeff
|
ZWNBSP =0xfeff
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
|
||||||
* Is this character a "white space" in the sense of ICU rule parsers?
|
|
||||||
* @internal
|
|
||||||
*/
|
|
||||||
U_CAPI UBool U_EXPORT2
|
|
||||||
uprv_isRuleWhiteSpace(UChar32 c);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get the set of "white space" characters in the sense of ICU rule
|
|
||||||
* parsers. Caller must close/delete result.
|
|
||||||
* @internal
|
|
||||||
*/
|
|
||||||
U_CAPI USet* U_EXPORT2
|
|
||||||
uprv_openRuleWhiteSpaceSet(UErrorCode* ec);
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get the maximum length of a (regular/1.0/extended) character name.
|
* Get the maximum length of a (regular/1.0/extended) character name.
|
||||||
* @return 0 if no character names available.
|
* @return 0 if no character names available.
|
||||||
|
@ -48,4 +48,13 @@ typedef struct USetAdder USetAdder;
|
|||||||
|
|
||||||
U_CDECL_END
|
U_CDECL_END
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the set of "white space" characters in the sense of ICU rule
|
||||||
|
* parsers. Caller must close/delete result.
|
||||||
|
* @internal
|
||||||
|
*/
|
||||||
|
U_CAPI USet* U_EXPORT2
|
||||||
|
uprv_openRuleWhiteSpaceSet(UErrorCode* ec);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -132,3 +132,15 @@ uset_toPattern(const USet* set,
|
|||||||
((const UnicodeSet*) set)->toPattern(pat, escapeUnprintable);
|
((const UnicodeSet*) set)->toPattern(pat, escapeUnprintable);
|
||||||
return pat.extract(result, resultCapacity, *ec);
|
return pat.extract(result, resultCapacity, *ec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
U_CAPI USet* U_EXPORT2
|
||||||
|
uprv_openRuleWhiteSpaceSet(UErrorCode* ec) {
|
||||||
|
static const UChar _PATTERN[] = {
|
||||||
|
/* "[[:Cf:][:WSpace:]]" */
|
||||||
|
91, 91, 58, 67, 102, 58, 93, 91, 58, 87,
|
||||||
|
83, 112, 97, 99, 101, 58, 93, 93, 0
|
||||||
|
};
|
||||||
|
return uset_openPattern(_PATTERN,
|
||||||
|
sizeof(_PATTERN)/sizeof(_PATTERN[0])-1, ec);
|
||||||
|
}
|
||||||
|
|
||||||
|
@ -606,4 +606,16 @@ void ICU_Utility::appendToRule(UnicodeString& rule,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
U_CAPI UBool U_EXPORT2
|
||||||
|
uprv_isRuleWhiteSpace(UChar32 c) {
|
||||||
|
/* "white space" in the sense of ICU rule parsers
|
||||||
|
This is a FIXED LIST that is NOT DEPENDENT ON UNICODE PROPERTIES.
|
||||||
|
See UTR #31: http://www.unicode.org/reports/tr31/.
|
||||||
|
U+0009..U+000D, U+0020, U+0085, U+200E..U+200F, and U+2028..U+2029
|
||||||
|
*/
|
||||||
|
return (c >= 0x0009 && c <= 0x2029 &&
|
||||||
|
(c <= 0x000D || c == 0x0020 || c == 0x0085 ||
|
||||||
|
c == 0x200E || c == 0x200F || c >= 0x2028));
|
||||||
|
}
|
||||||
|
|
||||||
//eof
|
//eof
|
||||||
|
@ -231,6 +231,13 @@ private:
|
|||||||
ICU_Utility();
|
ICU_Utility();
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Is this character a "white space" in the sense of ICU rule parsers?
|
||||||
|
* @internal
|
||||||
|
*/
|
||||||
|
U_CAPI UBool U_EXPORT2
|
||||||
|
uprv_isRuleWhiteSpace(UChar32 c);
|
||||||
|
|
||||||
U_NAMESPACE_END
|
U_NAMESPACE_END
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -50,7 +50,7 @@
|
|||||||
#include "unicode/uchar.h"
|
#include "unicode/uchar.h"
|
||||||
#include "unicode/curramt.h"
|
#include "unicode/curramt.h"
|
||||||
#include "ucurrimp.h"
|
#include "ucurrimp.h"
|
||||||
#include "uprops.h"
|
#include "util.h"
|
||||||
#include "digitlst.h"
|
#include "digitlst.h"
|
||||||
#include "cmemory.h"
|
#include "cmemory.h"
|
||||||
#include "cstring.h"
|
#include "cstring.h"
|
||||||
|
@ -34,7 +34,7 @@
|
|||||||
#include "unicode/rbnf.h"
|
#include "unicode/rbnf.h"
|
||||||
#include "ustrfmt.h"
|
#include "ustrfmt.h"
|
||||||
#include "cmemory.h"
|
#include "cmemory.h"
|
||||||
#include "uprops.h"
|
#include "util.h"
|
||||||
#include "uassert.h"
|
#include "uassert.h"
|
||||||
|
|
||||||
// *****************************************************************************
|
// *****************************************************************************
|
||||||
|
@ -25,7 +25,7 @@
|
|||||||
#include "cmemory.h"
|
#include "cmemory.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include "uprops.h"
|
#include "util.h"
|
||||||
|
|
||||||
U_NAMESPACE_BEGIN
|
U_NAMESPACE_BEGIN
|
||||||
|
|
||||||
|
@ -25,7 +25,7 @@
|
|||||||
#include "nfrlist.h"
|
#include "nfrlist.h"
|
||||||
#include "nfsubs.h"
|
#include "nfsubs.h"
|
||||||
|
|
||||||
#include "uprops.h"
|
#include "util.h"
|
||||||
|
|
||||||
U_NAMESPACE_BEGIN
|
U_NAMESPACE_BEGIN
|
||||||
|
|
||||||
|
@ -22,7 +22,7 @@
|
|||||||
|
|
||||||
#include "cmemory.h"
|
#include "cmemory.h"
|
||||||
#include "cstring.h"
|
#include "cstring.h"
|
||||||
#include "uprops.h"
|
#include "util.h"
|
||||||
|
|
||||||
// debugging
|
// debugging
|
||||||
// #define DEBUG
|
// #define DEBUG
|
||||||
|
@ -21,7 +21,7 @@
|
|||||||
#include "unicode/parsepos.h"
|
#include "unicode/parsepos.h"
|
||||||
#include "unicode/parseerr.h"
|
#include "unicode/parseerr.h"
|
||||||
#include "unicode/regex.h"
|
#include "unicode/regex.h"
|
||||||
#include "uprops.h"
|
#include "util.h"
|
||||||
#include "cmemory.h"
|
#include "cmemory.h"
|
||||||
#include "cstring.h"
|
#include "cstring.h"
|
||||||
#include "uvectr32.h"
|
#include "uvectr32.h"
|
||||||
|
@ -39,7 +39,7 @@
|
|||||||
#include "unicode/dcfmtsym.h"
|
#include "unicode/dcfmtsym.h"
|
||||||
#include "unicode/uchar.h"
|
#include "unicode/uchar.h"
|
||||||
#include "unicode/ustring.h"
|
#include "unicode/ustring.h"
|
||||||
#include "uprops.h"
|
#include "util.h"
|
||||||
#include "gregoimp.h"
|
#include "gregoimp.h"
|
||||||
#include "cstring.h"
|
#include "cstring.h"
|
||||||
#include "uassert.h"
|
#include "uassert.h"
|
||||||
|
@ -28,7 +28,7 @@
|
|||||||
|
|
||||||
#include "ucol_tok.h"
|
#include "ucol_tok.h"
|
||||||
#include "cmemory.h"
|
#include "cmemory.h"
|
||||||
#include "uprops.h"
|
#include "util.h"
|
||||||
|
|
||||||
U_CDECL_BEGIN
|
U_CDECL_BEGIN
|
||||||
static int32_t U_EXPORT2 U_CALLCONV
|
static int32_t U_EXPORT2 U_CALLCONV
|
||||||
|
Loading…
Reference in New Issue
Block a user