ICU-2657 add/change properties functions to match Java
X-SVN-Rev: 10999
This commit is contained in:
parent
e87f9e1104
commit
77a600126a
@ -316,10 +316,7 @@ U_CAPI UBool U_EXPORT2
|
||||
u_isdigit(UChar32 c) {
|
||||
uint32_t props;
|
||||
GET_PROPS(c, props);
|
||||
return (UBool)(((1UL<<GET_CATEGORY(props))&
|
||||
(1UL<<U_DECIMAL_DIGIT_NUMBER|1UL<<U_OTHER_NUMBER|1UL<<U_LETTER_NUMBER)
|
||||
)!=0);
|
||||
/* ### TODO: should this not check only U_DECIMAL_DIGIT_NUMBER?! */
|
||||
return (UBool)(GET_CATEGORY(props)==U_DECIMAL_DIGIT_NUMBER);
|
||||
}
|
||||
|
||||
/* Checks if the Unicode character is a letter.*/
|
||||
@ -327,9 +324,7 @@ U_CAPI UBool U_EXPORT2
|
||||
u_isalpha(UChar32 c) {
|
||||
uint32_t props;
|
||||
GET_PROPS(c, props);
|
||||
return (UBool)(((1UL<<GET_CATEGORY(props))&
|
||||
(1UL<<U_UPPERCASE_LETTER|1UL<<U_LOWERCASE_LETTER|1UL<<U_TITLECASE_LETTER|1UL<<U_MODIFIER_LETTER|1UL<<U_OTHER_LETTER)
|
||||
)!=0);
|
||||
return (UBool)((U_MASK(GET_CATEGORY(props))&U_GC_L_MASK)!=0);
|
||||
}
|
||||
|
||||
/* Checks if ch is a letter or a decimal digit */
|
||||
@ -337,10 +332,7 @@ U_CAPI UBool U_EXPORT2
|
||||
u_isalnum(UChar32 c) {
|
||||
uint32_t props;
|
||||
GET_PROPS(c, props);
|
||||
return (UBool)(((1UL<<GET_CATEGORY(props))&
|
||||
(1UL<<U_DECIMAL_DIGIT_NUMBER|1UL<<U_OTHER_NUMBER|1UL<<U_LETTER_NUMBER|
|
||||
1UL<<U_UPPERCASE_LETTER|1UL<<U_LOWERCASE_LETTER|1UL<<U_TITLECASE_LETTER|1UL<<U_MODIFIER_LETTER|1UL<<U_OTHER_LETTER)
|
||||
)!=0);
|
||||
return (UBool)((U_MASK(GET_CATEGORY(props))&(U_GC_L_MASK|U_GC_ND_MASK))!=0);
|
||||
}
|
||||
|
||||
/* Checks if ch is a unicode character with assigned character type.*/
|
||||
@ -374,6 +366,11 @@ u_iscntrl(UChar32 c) {
|
||||
)!=0);
|
||||
}
|
||||
|
||||
U_CAPI UBool U_EXPORT2
|
||||
u_isISOControl(UChar32 c) {
|
||||
return (uint32_t)c<=0x9f && (c<=0x1f || c>=0x7f);
|
||||
}
|
||||
|
||||
/* Some control characters that are used as space. */
|
||||
#define IS_THAT_CONTROL_SPACE(c) \
|
||||
((c>=TAB && c<=CR) || (c>=0x1c && c <=0x1f) || c==NL)
|
||||
@ -388,16 +385,23 @@ u_isspace(UChar32 c) {
|
||||
)!=0) || IS_THAT_CONTROL_SPACE(c));
|
||||
}
|
||||
|
||||
U_CAPI UBool U_EXPORT2
|
||||
u_isJavaSpaceChar(UChar32 c) {
|
||||
uint32_t props;
|
||||
GET_PROPS(c, props);
|
||||
return (UBool)((U_MASK(GET_CATEGORY(props))&U_GC_Z_MASK)!=0);
|
||||
}
|
||||
|
||||
/* Checks if the Unicode character is a whitespace character.*/
|
||||
U_CAPI UBool U_EXPORT2
|
||||
u_isWhitespace(UChar32 c) {
|
||||
uint32_t props;
|
||||
GET_PROPS(c, props);
|
||||
return (UBool)((((1UL<<GET_CATEGORY(props))&
|
||||
(1UL<<U_SPACE_SEPARATOR|1UL<<U_LINE_SEPARATOR|1UL<<U_PARAGRAPH_SEPARATOR)
|
||||
)!=0 &&
|
||||
c!=NBSP && c!=NNBSP && c!=ZWNBSP) || /* exclude no-break spaces */
|
||||
IS_THAT_CONTROL_SPACE(c));
|
||||
return (UBool)(
|
||||
((U_MASK(GET_CATEGORY(props))&U_GC_Z_MASK)!=0 &&
|
||||
c!=NBSP && c!=FIGURESP && c!=NNBSP) || /* exclude no-break spaces */
|
||||
IS_THAT_CONTROL_SPACE(c)
|
||||
);
|
||||
}
|
||||
|
||||
/* Checks if the Unicode character is printable.*/
|
||||
@ -419,9 +423,7 @@ u_isIDStart(UChar32 c) {
|
||||
/* same as u_isalpha() */
|
||||
uint32_t props;
|
||||
GET_PROPS(c, props);
|
||||
return (UBool)(((1UL<<GET_CATEGORY(props))&
|
||||
(1UL<<U_UPPERCASE_LETTER|1UL<<U_LOWERCASE_LETTER|1UL<<U_TITLECASE_LETTER|1UL<<U_MODIFIER_LETTER|1UL<<U_OTHER_LETTER)
|
||||
)!=0);
|
||||
return (UBool)((U_MASK(GET_CATEGORY(props))&(U_GC_L_MASK|U_GC_NL_MASK))!=0);
|
||||
}
|
||||
|
||||
/* Checks if the Unicode character can be a Unicode identifier part other than starting the
|
||||
@ -442,12 +444,13 @@ u_isIDPart(UChar32 c) {
|
||||
/*Checks if the Unicode character can be ignorable in a Java or Unicode identifier.*/
|
||||
U_CAPI UBool U_EXPORT2
|
||||
u_isIDIgnorable(UChar32 c) {
|
||||
return (UBool)((uint32_t)c<=8 ||
|
||||
(uint32_t)(c-0xe)<=(0x1b-0xe) ||
|
||||
(uint32_t)(c-0x7f)<=(0x9f-0x7f) ||
|
||||
(uint32_t)(c-0x200a)<=(0x200f-0x200a) ||
|
||||
(uint32_t)(c-0x206a)<=(0x206f-0x206a) ||
|
||||
c==0xfeff);
|
||||
if(c<=0x9f) {
|
||||
return u_isISOControl(c) && !IS_THAT_CONTROL_SPACE(c);
|
||||
} else {
|
||||
uint32_t props;
|
||||
GET_PROPS(c, props);
|
||||
return (UBool)(GET_CATEGORY(props)==U_FORMAT_CHAR);
|
||||
}
|
||||
}
|
||||
|
||||
/*Checks if the Unicode character can start a Java identifier.*/
|
||||
@ -916,6 +919,7 @@ uchar_addPropertyStarts(USet *set) {
|
||||
|
||||
/* add no-break spaces for u_isWhitespace() what was not added above */
|
||||
USET_ADD_CP_AND_NEXT(set, NBSP);
|
||||
USET_ADD_CP_AND_NEXT(set, FIGURESP);
|
||||
USET_ADD_CP_AND_NEXT(set, NNBSP);
|
||||
|
||||
/* add for u_charDigitValue() */
|
||||
|
@ -55,6 +55,11 @@ U_CDECL_BEGIN
|
||||
* as well as semantic information such as whether a character is a digit or
|
||||
* uppercase, lowercase, or uncased.
|
||||
* <P>
|
||||
*
|
||||
* Many functions are designed to match java.lang.Character functions.
|
||||
* See the individual function documentation,
|
||||
* and see the JDK 1.4.1 java.lang.Character documentation
|
||||
* at http://java.sun.com/j2se/1.4.1/docs/api/java/lang/Character.html
|
||||
*/
|
||||
|
||||
/**
|
||||
@ -1445,6 +1450,7 @@ u_isUUppercase(UChar32 c);
|
||||
* @see UCHAR_WHITE_SPACE
|
||||
* @see u_isWhitespace
|
||||
* @see u_isspace
|
||||
* @see u_isJavaSpaceChar
|
||||
* @see u_hasBinaryProperty
|
||||
* @draft ICU 2.1
|
||||
*/
|
||||
@ -1618,21 +1624,30 @@ U_CAPI UBool U_EXPORT2
|
||||
u_istitle(UChar32 c);
|
||||
|
||||
/**
|
||||
* Determines whether the specified character is a digit according to UnicodeData.txt.
|
||||
* Determines whether the specified code point is a digit character according to Java.
|
||||
* True for characters with general category "Nd" (decimal digit numbers).
|
||||
*
|
||||
* Same as java.lang.Character.isDigit().
|
||||
*
|
||||
* @param c the code point to be tested
|
||||
* @return TRUE if the code point is a digit character according to Character.isDigit()
|
||||
*
|
||||
* @param c the character to be tested
|
||||
* @return true if the character is a digit; false otherwise.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI UBool U_EXPORT2
|
||||
u_isdigit(UChar32 c);
|
||||
|
||||
/**
|
||||
* Determines whether the specified character is an alphanumeric character
|
||||
* (letter or digit)according to UnicodeData.txt.
|
||||
* Determines whether the specified code point is an alphanumeric character
|
||||
* (letter or digit) according to Java.
|
||||
* True for characters with general categories
|
||||
* "L" (letters) and "Nd" (decimal digit numbers).
|
||||
*
|
||||
* Same as java.lang.Character.isLetterOrDigit().
|
||||
*
|
||||
* @param c the code point to be tested
|
||||
* @return TRUE if the code point is an alphanumeric character according to Character.isLetterOrDigit()
|
||||
*
|
||||
* @param c the character to be tested
|
||||
* @return true if the character is a letter or a digit; false otherwise.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI UBool U_EXPORT2
|
||||
@ -1672,40 +1687,63 @@ u_isalpha(UChar32 c);
|
||||
|
||||
/**
|
||||
* Determines if the specified character is a space character or not.
|
||||
* ### same as u_isJavaSpaceChar except... (ISO control codes)
|
||||
*
|
||||
* @param c the character to be tested
|
||||
* @return true if the character is a space character; false otherwise.
|
||||
*
|
||||
* @see u_isJavaSpaceChar
|
||||
* @see u_isWhitespace
|
||||
* @see u_isUWhiteSpace
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI UBool U_EXPORT2
|
||||
u_isspace(UChar32 c);
|
||||
|
||||
/**
|
||||
* Determines if the specified character is white space according to ICU.
|
||||
* A character is considered to be an ICU whitespace character if and only
|
||||
* if it satisfies one of the following criteria:
|
||||
* <ul>
|
||||
* <li> It is a Unicode space separator (category "Zs"), but is not
|
||||
* a no-break space (\u00A0 or \uFEFF).
|
||||
* <li> It is a Unicode line separator (category "Zl").
|
||||
* <li> It is a Unicode paragraph separator (category "Zp").
|
||||
* <li> It is \u0009, HORIZONTAL TABULATION.
|
||||
* <li> It is \u000A, LINE FEED.
|
||||
* <li> It is \u000B, VERTICAL TABULATION.
|
||||
* <li> It is \u000C, FORM FEED.
|
||||
* <li> It is \u000D, CARRIAGE RETURN.
|
||||
* <li> It is \u001C, FILE SEPARATOR.
|
||||
* <li> It is \u001D, GROUP SEPARATOR.
|
||||
* <li> It is \u001E, RECORD SEPARATOR.
|
||||
* <li> It is \u001F, UNIT SEPARATOR.
|
||||
* </ul>
|
||||
* Note: This method corresponds to the Java method
|
||||
* <tt>java.lang.Character.isWhitespace()</tt>.
|
||||
* Determine if the specified code point is a space character according to Java.
|
||||
* True for characters with general categories "Z" (separators),
|
||||
* which does not include control codes (e.g., TAB or Line Feed).
|
||||
*
|
||||
* @param ch the character to be tested.
|
||||
* @return true if the character is an ICU whitespace character;
|
||||
* false otherwise.
|
||||
* @see #u_isspace
|
||||
* Same as java.lang.Character.isSpaceChar().
|
||||
*
|
||||
* @param c the code point to be tested
|
||||
* @return TRUE if the code point is a space character according to Character.isSpaceChar()
|
||||
*
|
||||
* @see u_isspace
|
||||
* @see u_isWhitespace
|
||||
* @see u_isUWhiteSpace
|
||||
* @draft ICU 2.6
|
||||
*/
|
||||
U_CAPI UBool U_EXPORT2
|
||||
u_isJavaSpaceChar(UChar32 c);
|
||||
|
||||
/**
|
||||
* Determines if the specified code point is a whitespace character according to Java/ICU.
|
||||
* A character is considered to be a Java whitespace character if and only
|
||||
* if it satisfies one of the following criteria:
|
||||
*
|
||||
* - It is a Unicode separator (categories "Z"), but is not
|
||||
* a no-break space (U+00A0 NBSP or U+2007 Figure Space or U+202F Narrow NBSP).
|
||||
* - It is U+0009 HORIZONTAL TABULATION.
|
||||
* - It is U+000A LINE FEED.
|
||||
* - It is U+000B VERTICAL TABULATION.
|
||||
* - It is U+000C FORM FEED.
|
||||
* - It is U+000D CARRIAGE RETURN.
|
||||
* - It is U+001C FILE SEPARATOR.
|
||||
* - It is U+001D GROUP SEPARATOR.
|
||||
* - It is U+001E RECORD SEPARATOR.
|
||||
* - It is U+001F UNIT SEPARATOR.
|
||||
* - It is U+0085 NEXT LINE.
|
||||
*
|
||||
* Same as java.lang.Character.isWhitespace() except that Java omits U+0085.
|
||||
*
|
||||
* @param c the code point to be tested
|
||||
* @return TRUE if the code point is a whitespace character according to Java/ICU
|
||||
*
|
||||
* @see u_isspace
|
||||
* @see u_isJavaSpaceChar
|
||||
* @see u_isUWhiteSpace
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI UBool U_EXPORT2
|
||||
@ -1729,6 +1767,20 @@ u_isWhitespace(UChar32 c);
|
||||
U_CAPI UBool U_EXPORT2
|
||||
u_iscntrl(UChar32 c);
|
||||
|
||||
/**
|
||||
* Determines whether the specified code point is an ISO control code.
|
||||
* True for U+0000..U+001f and U+007f..U+009f.
|
||||
*
|
||||
* Same as java.lang.Character.isISOControl().
|
||||
*
|
||||
* @param c the code point to be tested
|
||||
* @return TRUE if the code point is an ISO control code
|
||||
*
|
||||
* @see u_iscntrl
|
||||
* @draft ICU 2.6
|
||||
*/
|
||||
U_CAPI UBool U_EXPORT2
|
||||
u_isISOControl(UChar32 c);
|
||||
|
||||
/**
|
||||
* Determines whether the specified character is a printable character according
|
||||
@ -2248,20 +2300,19 @@ U_CAPI int32_t U_EXPORT2
|
||||
u_getPropertyValueEnum(UProperty property,
|
||||
const char* alias);
|
||||
|
||||
/**
|
||||
* The following functions are java specific.
|
||||
*/
|
||||
/**
|
||||
* A convenience method for determining if a Unicode character
|
||||
* is allowed to start in a Unicode identifier.
|
||||
* A character may start a Unicode identifier if and only if
|
||||
* it is a letter.
|
||||
* Determines if the specified character is permissible as the
|
||||
* first character in a Unicode identifier according to Java.
|
||||
* ### according to Unicode as well?!
|
||||
* True for characters with general categories "L" (letters) and "Nl" (letter numbers).
|
||||
*
|
||||
* @param c the Unicode character.
|
||||
* @return TRUE if the character may start a Unicode identifier;
|
||||
* FALSE otherwise.
|
||||
* @see u_isalpha
|
||||
* @see u_isIDPart
|
||||
* Same as java.lang.Character.isUnicodeIdentifierStart().
|
||||
*
|
||||
* @param c the code point to be tested
|
||||
* @return TRUE if the code point may start an identifier according to Java
|
||||
*
|
||||
* @see u_isalpha
|
||||
* @see u_isIDPart
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI UBool U_EXPORT2
|
||||
@ -2287,35 +2338,32 @@ u_isIDStart(UChar32 c);
|
||||
* @param c the Unicode character.
|
||||
* @return TRUE if the character may be part of a Unicode identifier;
|
||||
* FALSE otherwise.
|
||||
* @see u_isIDIgnorable
|
||||
*
|
||||
* @see u_isIDStart
|
||||
* @see u_isIDIgnorable
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI UBool U_EXPORT2
|
||||
u_isIDPart(UChar32 c);
|
||||
|
||||
/**
|
||||
* A convenience method for determining if a Unicode character
|
||||
* should be regarded as an ignorable character
|
||||
* in a Unicode identifier.
|
||||
* <P>
|
||||
* The following Unicode characters are ignorable in a
|
||||
* Unicode identifier:
|
||||
* <table>
|
||||
* <tr><td>0x0000 through 0x0008,</td>
|
||||
* <td>ISO control characters that</td></tr>
|
||||
* <tr><td>0x000E through 0x001B,</td> <td>are not whitespace</td></tr>
|
||||
* <tr><td>and 0x007F through 0x009F</td></tr>
|
||||
* <tr><td>0x200C through 0x200F</td> <td>join controls</td></tr>
|
||||
* <tr><td>0x200A through 0x200E</td> <td>bidirectional controls</td></tr>
|
||||
* <tr><td>0x206A through 0x206F</td> <td>format controls</td></tr>
|
||||
* <tr><td>0xFEFF</td> <td>zero-width no-break space</td></tr>
|
||||
* </table>
|
||||
*
|
||||
* @param c the Unicode character.
|
||||
* @return TRUE if the character may be part of a Unicode identifier;
|
||||
* FALSE otherwise.
|
||||
* @see u_isIDPart
|
||||
* Determines if the specified character should be regarded
|
||||
* as an ignorable character in an identifier,
|
||||
* according to Java.
|
||||
* True for characters with general category "Cf" (format controls) as well as
|
||||
* non-whitespace ISO controls
|
||||
* (U+0000..U+0008, U+000E..U+001B, U+007F..U+0084, U+0086..U+009F).
|
||||
*
|
||||
* Same as java.lang.Character.isIdentifierIgnorable()
|
||||
* except that Java also returns TRUE for U+0085 Next Line
|
||||
* (it omits U+0085 from whitespace ISO controls).
|
||||
*
|
||||
* @param c the code point to be tested
|
||||
* @return TRUE if the code point is ignorable in identifiers according to Java
|
||||
*
|
||||
* @see UCHAR_DEFAULT_IGNORABLE_CODE_POINT
|
||||
* @see u_isIDStart
|
||||
* @see u_isIDPart
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI UBool U_EXPORT2
|
||||
|
@ -247,6 +247,7 @@ enum {
|
||||
NL =0x0085,
|
||||
NBSP =0x00a0,
|
||||
CGJ =0x034f,
|
||||
FIGURESP=0x2007,
|
||||
HAIRSP =0x200a,
|
||||
ZWNJ =0x200c,
|
||||
ZWJ =0x200d,
|
||||
|
Loading…
Reference in New Issue
Block a user