ICU-6689 Updated isWhitespace to return true for FIGURE SPACE (U+2007).

X-SVN-Rev: 25846
This commit is contained in:
Yoshito Umaoka 2009-04-20 20:08:08 +00:00
parent d555b5f267
commit b480173cab
2 changed files with 25 additions and 26 deletions

View File

@ -1,6 +1,6 @@
/**
*******************************************************************************
* Copyright (C) 1996-2008, International Business Machines Corporation and *
* Copyright (C) 1996-2009, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*/
@ -139,10 +139,10 @@ public final class UCharacterTest extends TestFmwk
*/
public void TestSpaces()
{
int spaces[] = {0x0020, 0x0000a0, 0x002000, 0x002001, 0x002005};
int nonspaces[] = {0x61, 0x0062, 0x0063, 0x0064, 0x0074};
int whitespaces[] = {0x2008, 0x002009, 0x00200a, 0x00001c, 0x00000c};
int nonwhitespaces[] = {0x61, 0x0062, 0x003c, 0x0028, 0x003f};
int spaces[] = {0x0020, 0x00a0, 0x2000, 0x2001, 0x2005};
int nonspaces[] = {0x0061, 0x0062, 0x0063, 0x0064, 0x0074};
int whitespaces[] = {0x2008, 0x2009, 0x200a, 0x001c, 0x000c /* ,0x200b */}; // 0x200b was "Zs" in Unicode 4.0, but it is "Cf" in Unicode 4.1
int nonwhitespaces[] = {0x0061, 0x0062, 0x003c, 0x0028, 0x003f, 0x00a0, 0x2007, 0x202f, 0xfefe, 0x200b};
int size = spaces.length;
for (int i = 0; i < size; i ++)

View File

@ -3411,10 +3411,8 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
* A code point is considered to be an whitespace character if and only
* if it satisfies one of the following criteria:
* <ul>
* <li> It is a Unicode space separator (category "Zs"), but is not
* a no-break space (&#92u00A0 or &#92u202F or &#92uFEFF).
* <li> It is a Unicode line separator (category "Zl").
* <li> It is a Unicode paragraph separator (category "Zp").
* <li> It is a Unicode space character (categories "Zs" or "Zl" or "Zp"), but is not
* also a no-break space (&#92u00A0 or &#92u2007 or &#92u202F).
* <li> It is &#92u0009, HORIZONTAL TABULATION.
* <li> It is &#92u000A, LINE FEED.
* <li> It is &#92u000B, VERTICAL TABULATION.
@ -3427,7 +3425,9 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
* </ul>
*
* This API tries to synch to the semantics of the Java API,
* java.lang.Character.isWhitespace().
* java.lang.Character.isWhitespace(), but it may not return
* the exactly same results because of the Unicode version
* difference.
* @param ch code point to determine if it is a white space
* @return true if the specified code point is a white space character
* @stable ICU 2.1
@ -3440,8 +3440,7 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
((1 << UCharacterCategory.SPACE_SEPARATOR)
| (1 << UCharacterCategory.LINE_SEPARATOR)
| (1 << UCharacterCategory.PARAGRAPH_SEPARATOR))) != 0
&& (ch != NO_BREAK_SPACE_) && (ch != NARROW_NO_BREAK_SPACE_)
&& (ch != ZERO_WIDTH_NO_BREAK_SPACE_)
&& (ch != NO_BREAK_SPACE_) && (ch != FIGURE_SPACE_) && (ch != NARROW_NO_BREAK_SPACE_)
// TAB VT LF FF CR FS GS RS US NL are all control characters
// that are white spaces.
|| (ch >= 0x9 && ch <= 0xd) || (ch >= 0x1c && ch <= 0x1f);
@ -6241,52 +6240,52 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
* No break space code point
*/
private static final int NO_BREAK_SPACE_ = 0xA0;
/**
* Figure space code point
*/
private static final int FIGURE_SPACE_ = 0x2007;
/**
* Narrow no break space code point
*/
private static final int NARROW_NO_BREAK_SPACE_ = 0x202F;
/**
* Zero width no break space code point
*/
private static final int ZERO_WIDTH_NO_BREAK_SPACE_ = 0xFEFF;
/**
* Ideographic number zero code point
*/
private static final int IDEOGRAPHIC_NUMBER_ZERO_ = 0x3007;
/**
* CJK Ideograph, First code point
*/
private static final int CJK_IDEOGRAPH_FIRST_ = 0x4e00;
/**
* CJK Ideograph, Second code point
*/
private static final int CJK_IDEOGRAPH_SECOND_ = 0x4e8c;
/**
* CJK Ideograph, Third code point
*/
private static final int CJK_IDEOGRAPH_THIRD_ = 0x4e09;
/**
* CJK Ideograph, Fourth code point
*/
private static final int CJK_IDEOGRAPH_FOURTH_ = 0x56d8;
/**
* CJK Ideograph, FIFTH code point
*/
private static final int CJK_IDEOGRAPH_FIFTH_ = 0x4e94;
/**
* CJK Ideograph, Sixth code point
*/
private static final int CJK_IDEOGRAPH_SIXTH_ = 0x516d;
/**
* CJK Ideograph, Seventh code point
*/