ICU-221 move isWhitespace() implementation to C

X-SVN-Rev: 1386
This commit is contained in:
Markus Scherer 2000-05-18 17:40:19 +00:00
parent 0fa8946325
commit 4c2b7dfd0e
2 changed files with 32 additions and 40 deletions

View File

@ -513,6 +513,37 @@ u_isalpha(UChar32 c);
U_CAPI bool_t U_EXPORT2
u_isspace(UChar32 c);
/**
* Determines if the specified character is white space according to ICU.
* A character is considered to be an ICU whitespace character if and only
* if it satisfies one of the following criteria:
* <ul>
* <li> It is a Unicode space separator (category "Zs"), but is not
* a no-break space (&#92;u00A0 or &#92;uFEFF).
* <li> It is a Unicode line separator (category "Zl").
* <li> It is a Unicode paragraph separator (category "Zp").
* <li> It is &#92;u0009, HORIZONTAL TABULATION.
* <li> It is &#92;u000A, LINE FEED.
* <li> It is &#92;u000B, VERTICAL TABULATION.
* <li> It is &#92;u000C, FORM FEED.
* <li> It is &#92;u000D, CARRIAGE RETURN.
* <li> It is &#92;u001C, FILE SEPARATOR.
* <li> It is &#92;u001D, GROUP SEPARATOR.
* <li> It is &#92;u001E, RECORD SEPARATOR.
* <li> It is &#92;u001F, UNIT SEPARATOR.
* </ul>
* Note: This method corresponds to the Java method
* <tt>java.lang.Character.isWhitespace()</tt>.
*
* @param ch the character to be tested.
* @return true if the character is an ICU whitespace character;
* false otherwise.
* @see #isspace
* @draft
*/
U_CAPI bool_t U_EXPORT2
u_isWhitespace(UChar32 c);
/**
* Determines whether the specified character is a control character or not.
*

View File

@ -1141,46 +1141,7 @@ Unicode::isSpaceChar(UChar32 ch) {
// Determines if the specified character is white space according to ICU.
inline bool_t
Unicode::isWhitespace(UChar32 ch) {
// ### TODO Move this implementation to C, and make this call the C
// implementation.
// TODO Optional -- reimplement in terms of modified category
// code -- see Mark Davis's note (below). If this is done,
// the implementation still must conform to the specified
// semantics. That is, U+00A0 and U+FEFF must return false,
// and the ranges U+0009 - U+000D and U+001C - U+001F must
// return true. Characters other than these in Zs, Zl, or Zp
// must return true.
int8_t cat = Unicode::getType(ch);
return
(cat == SPACE_SEPARATOR && ch != 0x00A0 && ch != 0xFEFF) ||
(((((int32_t(1) << LINE_SEPARATOR) |
(int32_t(1) << PARAGRAPH_SEPARATOR)) >> cat) & int32_t(1)) != 0) ||
(ch <= 0x1F && ((((int32_t(1) << 0x0009) |
(int32_t(1) << 0x000A) |
(int32_t(1) << 0x000B) |
(int32_t(1) << 0x000C) |
(int32_t(1) << 0x000D) |
(int32_t(1) << 0x001C) |
(int32_t(1) << 0x001D) |
(int32_t(1) << 0x001E) |
(int32_t(1) << 0x001F)) >> ch) & int32_t(1)) != 0);
// From Mark Davis:
//| What we should do is to make sure that the special Cc characters like CR
//| have either Zs, Zl, or Zp in the property database. We can then just call
//| the equivalent of:
//|
//| public static boolean isWhileSpace(char ch) {
//| return ((1 << Character.getType(c)) & WHITESPACE_MASK) != 0; }
//|
//| where WHITESPACE_MASK = (1 << Zs) | (1 << Zl) | (1 << Zp);
//|
//| This is much faster code, since it just looksup the property value and does
//| a couple of arithmetics to get the right answer.
//
// (We still have to make sure U+00A0 and U+FEFF are excluded, so the code
// might not be as simple as this. - aliu)
return u_isWhitespace(ch);
}
// Gets if the Unicode character's character property.