ICU-221 move isWhitespace() implementation to C
X-SVN-Rev: 1386
This commit is contained in:
parent
0fa8946325
commit
4c2b7dfd0e
@ -513,6 +513,37 @@ u_isalpha(UChar32 c);
|
||||
U_CAPI bool_t U_EXPORT2
|
||||
u_isspace(UChar32 c);
|
||||
|
||||
/**
|
||||
* Determines if the specified character is white space according to ICU.
|
||||
* A character is considered to be an ICU whitespace character if and only
|
||||
* if it satisfies one of the following criteria:
|
||||
* <ul>
|
||||
* <li> It is a Unicode space separator (category "Zs"), but is not
|
||||
* a no-break space (\u00A0 or \uFEFF).
|
||||
* <li> It is a Unicode line separator (category "Zl").
|
||||
* <li> It is a Unicode paragraph separator (category "Zp").
|
||||
* <li> It is \u0009, HORIZONTAL TABULATION.
|
||||
* <li> It is \u000A, LINE FEED.
|
||||
* <li> It is \u000B, VERTICAL TABULATION.
|
||||
* <li> It is \u000C, FORM FEED.
|
||||
* <li> It is \u000D, CARRIAGE RETURN.
|
||||
* <li> It is \u001C, FILE SEPARATOR.
|
||||
* <li> It is \u001D, GROUP SEPARATOR.
|
||||
* <li> It is \u001E, RECORD SEPARATOR.
|
||||
* <li> It is \u001F, UNIT SEPARATOR.
|
||||
* </ul>
|
||||
* Note: This method corresponds to the Java method
|
||||
* <tt>java.lang.Character.isWhitespace()</tt>.
|
||||
*
|
||||
* @param ch the character to be tested.
|
||||
* @return true if the character is an ICU whitespace character;
|
||||
* false otherwise.
|
||||
* @see #isspace
|
||||
* @draft
|
||||
*/
|
||||
U_CAPI bool_t U_EXPORT2
|
||||
u_isWhitespace(UChar32 c);
|
||||
|
||||
/**
|
||||
* Determines whether the specified character is a control character or not.
|
||||
*
|
||||
|
@ -1141,46 +1141,7 @@ Unicode::isSpaceChar(UChar32 ch) {
|
||||
// Determines if the specified character is white space according to ICU.
|
||||
inline bool_t
|
||||
Unicode::isWhitespace(UChar32 ch) {
|
||||
// ### TODO Move this implementation to C, and make this call the C
|
||||
// implementation.
|
||||
// TODO Optional -- reimplement in terms of modified category
|
||||
// code -- see Mark Davis's note (below). If this is done,
|
||||
// the implementation still must conform to the specified
|
||||
// semantics. That is, U+00A0 and U+FEFF must return false,
|
||||
// and the ranges U+0009 - U+000D and U+001C - U+001F must
|
||||
// return true. Characters other than these in Zs, Zl, or Zp
|
||||
// must return true.
|
||||
|
||||
int8_t cat = Unicode::getType(ch);
|
||||
return
|
||||
(cat == SPACE_SEPARATOR && ch != 0x00A0 && ch != 0xFEFF) ||
|
||||
(((((int32_t(1) << LINE_SEPARATOR) |
|
||||
(int32_t(1) << PARAGRAPH_SEPARATOR)) >> cat) & int32_t(1)) != 0) ||
|
||||
(ch <= 0x1F && ((((int32_t(1) << 0x0009) |
|
||||
(int32_t(1) << 0x000A) |
|
||||
(int32_t(1) << 0x000B) |
|
||||
(int32_t(1) << 0x000C) |
|
||||
(int32_t(1) << 0x000D) |
|
||||
(int32_t(1) << 0x001C) |
|
||||
(int32_t(1) << 0x001D) |
|
||||
(int32_t(1) << 0x001E) |
|
||||
(int32_t(1) << 0x001F)) >> ch) & int32_t(1)) != 0);
|
||||
|
||||
// From Mark Davis:
|
||||
//| What we should do is to make sure that the special Cc characters like CR
|
||||
//| have either Zs, Zl, or Zp in the property database. We can then just call
|
||||
//| the equivalent of:
|
||||
//|
|
||||
//| public static boolean isWhileSpace(char ch) {
|
||||
//| return ((1 << Character.getType(c)) & WHITESPACE_MASK) != 0; }
|
||||
//|
|
||||
//| where WHITESPACE_MASK = (1 << Zs) | (1 << Zl) | (1 << Zp);
|
||||
//|
|
||||
//| This is much faster code, since it just looksup the property value and does
|
||||
//| a couple of arithmetics to get the right answer.
|
||||
//
|
||||
// (We still have to make sure U+00A0 and U+FEFF are excluded, so the code
|
||||
// might not be as simple as this. - aliu)
|
||||
return u_isWhitespace(ch);
|
||||
}
|
||||
|
||||
// Gets if the Unicode character's character property.
|
||||
|
Loading…
Reference in New Issue
Block a user