ICU-6689 Updated isWhitespace to return true for FIGURE SPACE (U+2007).
X-SVN-Rev: 25846
This commit is contained in:
parent
d555b5f267
commit
b480173cab
@ -1,6 +1,6 @@
|
||||
/**
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1996-2008, International Business Machines Corporation and *
|
||||
* Copyright (C) 1996-2009, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -139,10 +139,10 @@ public final class UCharacterTest extends TestFmwk
|
||||
*/
|
||||
public void TestSpaces()
|
||||
{
|
||||
int spaces[] = {0x0020, 0x0000a0, 0x002000, 0x002001, 0x002005};
|
||||
int nonspaces[] = {0x61, 0x0062, 0x0063, 0x0064, 0x0074};
|
||||
int whitespaces[] = {0x2008, 0x002009, 0x00200a, 0x00001c, 0x00000c};
|
||||
int nonwhitespaces[] = {0x61, 0x0062, 0x003c, 0x0028, 0x003f};
|
||||
int spaces[] = {0x0020, 0x00a0, 0x2000, 0x2001, 0x2005};
|
||||
int nonspaces[] = {0x0061, 0x0062, 0x0063, 0x0064, 0x0074};
|
||||
int whitespaces[] = {0x2008, 0x2009, 0x200a, 0x001c, 0x000c /* ,0x200b */}; // 0x200b was "Zs" in Unicode 4.0, but it is "Cf" in Unicode 4.1
|
||||
int nonwhitespaces[] = {0x0061, 0x0062, 0x003c, 0x0028, 0x003f, 0x00a0, 0x2007, 0x202f, 0xfefe, 0x200b};
|
||||
|
||||
int size = spaces.length;
|
||||
for (int i = 0; i < size; i ++)
|
||||
|
@ -3411,10 +3411,8 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
|
||||
* A code point is considered to be an whitespace character if and only
|
||||
* if it satisfies one of the following criteria:
|
||||
* <ul>
|
||||
* <li> It is a Unicode space separator (category "Zs"), but is not
|
||||
* a no-break space (\u00A0 or \u202F or \uFEFF).
|
||||
* <li> It is a Unicode line separator (category "Zl").
|
||||
* <li> It is a Unicode paragraph separator (category "Zp").
|
||||
* <li> It is a Unicode space character (categories "Zs" or "Zl" or "Zp"), but is not
|
||||
* also a no-break space (\u00A0 or \u2007 or \u202F).
|
||||
* <li> It is \u0009, HORIZONTAL TABULATION.
|
||||
* <li> It is \u000A, LINE FEED.
|
||||
* <li> It is \u000B, VERTICAL TABULATION.
|
||||
@ -3427,7 +3425,9 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
|
||||
* </ul>
|
||||
*
|
||||
* This API tries to synch to the semantics of the Java API,
|
||||
* java.lang.Character.isWhitespace().
|
||||
* java.lang.Character.isWhitespace(), but it may not return
|
||||
* the exactly same results because of the Unicode version
|
||||
* difference.
|
||||
* @param ch code point to determine if it is a white space
|
||||
* @return true if the specified code point is a white space character
|
||||
* @stable ICU 2.1
|
||||
@ -3440,8 +3440,7 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
|
||||
((1 << UCharacterCategory.SPACE_SEPARATOR)
|
||||
| (1 << UCharacterCategory.LINE_SEPARATOR)
|
||||
| (1 << UCharacterCategory.PARAGRAPH_SEPARATOR))) != 0
|
||||
&& (ch != NO_BREAK_SPACE_) && (ch != NARROW_NO_BREAK_SPACE_)
|
||||
&& (ch != ZERO_WIDTH_NO_BREAK_SPACE_)
|
||||
&& (ch != NO_BREAK_SPACE_) && (ch != FIGURE_SPACE_) && (ch != NARROW_NO_BREAK_SPACE_)
|
||||
// TAB VT LF FF CR FS GS RS US NL are all control characters
|
||||
// that are white spaces.
|
||||
|| (ch >= 0x9 && ch <= 0xd) || (ch >= 0x1c && ch <= 0x1f);
|
||||
@ -6241,52 +6240,52 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
|
||||
* No break space code point
|
||||
*/
|
||||
private static final int NO_BREAK_SPACE_ = 0xA0;
|
||||
|
||||
|
||||
/**
|
||||
* Figure space code point
|
||||
*/
|
||||
private static final int FIGURE_SPACE_ = 0x2007;
|
||||
|
||||
/**
|
||||
* Narrow no break space code point
|
||||
*/
|
||||
private static final int NARROW_NO_BREAK_SPACE_ = 0x202F;
|
||||
|
||||
/**
|
||||
* Zero width no break space code point
|
||||
*/
|
||||
private static final int ZERO_WIDTH_NO_BREAK_SPACE_ = 0xFEFF;
|
||||
|
||||
|
||||
/**
|
||||
* Ideographic number zero code point
|
||||
*/
|
||||
private static final int IDEOGRAPHIC_NUMBER_ZERO_ = 0x3007;
|
||||
|
||||
|
||||
/**
|
||||
* CJK Ideograph, First code point
|
||||
*/
|
||||
private static final int CJK_IDEOGRAPH_FIRST_ = 0x4e00;
|
||||
|
||||
|
||||
/**
|
||||
* CJK Ideograph, Second code point
|
||||
*/
|
||||
private static final int CJK_IDEOGRAPH_SECOND_ = 0x4e8c;
|
||||
|
||||
|
||||
/**
|
||||
* CJK Ideograph, Third code point
|
||||
*/
|
||||
private static final int CJK_IDEOGRAPH_THIRD_ = 0x4e09;
|
||||
|
||||
|
||||
/**
|
||||
* CJK Ideograph, Fourth code point
|
||||
*/
|
||||
private static final int CJK_IDEOGRAPH_FOURTH_ = 0x56d8;
|
||||
|
||||
|
||||
/**
|
||||
* CJK Ideograph, FIFTH code point
|
||||
*/
|
||||
private static final int CJK_IDEOGRAPH_FIFTH_ = 0x4e94;
|
||||
|
||||
|
||||
/**
|
||||
* CJK Ideograph, Sixth code point
|
||||
*/
|
||||
private static final int CJK_IDEOGRAPH_SIXTH_ = 0x516d;
|
||||
|
||||
|
||||
/**
|
||||
* CJK Ideograph, Seventh code point
|
||||
*/
|
||||
|
Loading…
Reference in New Issue
Block a user