ICU-221 move isWhitespace() implementation to C

X-SVN-Rev: 1386
2000-05-18 17:40:19 +00:00 · 2000-05-18 17:40:19 +00:00 · 4c2b7dfd0e
commit 4c2b7dfd0e
parent 0fa8946325
2 changed files with 32 additions and 40 deletions
--- a/icu4c/source/common/unicode/uchar.h
+++ b/icu4c/source/common/unicode/uchar.h
@ -513,6 +513,37 @@ u_isalpha(UChar32 c);
 U_CAPI bool_t U_EXPORT2
 u_isspace(UChar32 c);

+/**
+ * Determines if the specified character is white space according to ICU.
+ * A character is considered to be an ICU whitespace character if and only
+ * if it satisfies one of the following criteria:
+ * <ul>
+ * <li> It is a Unicode space separator (category "Zs"), but is not
+ *      a no-break space (&#92;u00A0 or &#92;uFEFF).
+ * <li> It is a Unicode line separator (category "Zl").
+ * <li> It is a Unicode paragraph separator (category "Zp").
+ * <li> It is &#92;u0009, HORIZONTAL TABULATION.
+ * <li> It is &#92;u000A, LINE FEED.
+ * <li> It is &#92;u000B, VERTICAL TABULATION.
+ * <li> It is &#92;u000C, FORM FEED.
+ * <li> It is &#92;u000D, CARRIAGE RETURN.
+ * <li> It is &#92;u001C, FILE SEPARATOR.
+ * <li> It is &#92;u001D, GROUP SEPARATOR.
+ * <li> It is &#92;u001E, RECORD SEPARATOR.
+ * <li> It is &#92;u001F, UNIT SEPARATOR.
+ * </ul>
+ * Note: This method corresponds to the Java method
+ * <tt>java.lang.Character.isWhitespace()</tt>.
+ *
+ * @param   ch	the character to be tested.
+ * @return  true if the character is an ICU whitespace character;
+ *          false otherwise.
+ * @see     #isspace
+ * @draft
+ */
+U_CAPI bool_t U_EXPORT2
+u_isWhitespace(UChar32 c);
+
 /**
 * Determines whether the specified character is a control character or not.
 *
--- a/icu4c/source/common/unicode/unicode.h
+++ b/icu4c/source/common/unicode/unicode.h
@ -1141,46 +1141,7 @@ Unicode::isSpaceChar(UChar32 ch) {
 // Determines if the specified character is white space according to ICU.
 inline bool_t
 Unicode::isWhitespace(UChar32 ch) {
-    // ### TODO Move this implementation to C, and make this call the C
-    //      implementation.
-    // TODO Optional -- reimplement in terms of modified category
-    //      code -- see Mark Davis's note (below).  If this is done,
-    //      the implementation still must conform to the specified
-    //      semantics.  That is, U+00A0 and U+FEFF must return false,
-    //      and the ranges U+0009 - U+000D and U+001C - U+001F must
-    //      return true.  Characters other than these in Zs, Zl, or Zp
-    //      must return true.
-
-    int8_t cat = Unicode::getType(ch);
-    return
-        (cat == SPACE_SEPARATOR && ch != 0x00A0 && ch != 0xFEFF) ||
-        (((((int32_t(1) << LINE_SEPARATOR) |
-            (int32_t(1) << PARAGRAPH_SEPARATOR)) >> cat) & int32_t(1)) != 0) ||
-        (ch <= 0x1F && ((((int32_t(1) << 0x0009) |
-                          (int32_t(1) << 0x000A) |
-                          (int32_t(1) << 0x000B) |
-                          (int32_t(1) << 0x000C) |
-                          (int32_t(1) << 0x000D) |
-                          (int32_t(1) << 0x001C) |
-                          (int32_t(1) << 0x001D) |
-                          (int32_t(1) << 0x001E) |
-                          (int32_t(1) << 0x001F)) >> ch) & int32_t(1)) != 0);
-
-    // From Mark Davis:
-    //| What we should do is to make sure that the special Cc characters like CR
-    //| have either Zs, Zl, or Zp in the property database. We can then just call
-    //| the equivalent of:
-    //| 
-    //|  public static boolean isWhileSpace(char ch) {
-    //|   return ((1 << Character.getType(c)) & WHITESPACE_MASK) != 0; }
-    //| 
-    //| where WHITESPACE_MASK = (1 << Zs) | (1 << Zl) | (1 << Zp);
-    //| 
-    //| This is much faster code, since it just looksup the property value and does
-    //| a couple of arithmetics to get the right answer.
-    //
-    // (We still have to make sure U+00A0 and U+FEFF are excluded, so the code
-    //  might not be as simple as this. - aliu)
+    return u_isWhitespace(ch);
 }

 // Gets if the Unicode character's character property.