Adding new APIs for
* Unicode blocks
* Non-binary properties
* Cell widths
* New numeric implementation
* ISO comment

X-SVN-Rev: 9959
This commit is contained in:
Syn Wee Quek 2002-10-03 23:42:02 +00:00
parent f3f1d80b58
commit 409a9247ba
7 changed files with 2892 additions and 404 deletions

View File

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/lang/UCharacterCaseTest.java,v $
* $Date: 2002/07/11 21:25:24 $
* $Revision: 1.5 $
* $Date: 2002/10/03 23:42:02 $
* $Revision: 1.6 $
*
*******************************************************************************
*/
@ -473,6 +473,77 @@ public final class UCharacterCaseTest extends TestFmwk
}
}
public void TestUpperLower()
{
int upper[] = {0x0041, 0x0042, 0x00b2, 0x01c4, 0x01c6, 0x01c9, 0x01c8,
0x01c9, 0x000c};
int lower[] = {0x0061, 0x0062, 0x00b2, 0x01c6, 0x01c6, 0x01c9, 0x01c9,
0x01c9, 0x000c};
String upperTest = "abcdefg123hij.?:klmno";
String lowerTest = "ABCDEFG123HIJ.?:KLMNO";
// Checks LetterLike Symbols which were previously a source of
// confusion [Bertrand A. D. 02/04/98]
for (int i = 0x2100; i < 0x2138; i ++) {
if (i != 0x2126 && i != 0x212a && i != 0x212b) {
if (i != UCharacter.toLowerCase(i)) { // itself
errln("Failed case conversion with itself: \\u"
+ Utility.hex(i, 4));
}
if (i != UCharacter.toUpperCase(i)) {
errln("Failed case conversion with itself: \\u"
+ Utility.hex(i, 4));
}
}
}
for (int i = 0; i < upper.length; i ++) {
if (UCharacter.toLowerCase(upper[i]) != lower[i]) {
errln("FAILED UCharacter.tolower() for \\u"
+ Utility.hex(upper[i], 4)
+ " Expected \\u" + Utility.hex(lower[i], 4)
+ " Got \\u"
+ Utility.hex(UCharacter.toLowerCase(upper[i]), 4));
}
}
logln("testing upper lower");
for (int i = 0; i < upperTest.length(); i ++) {
logln("testing to upper to lower");
if (UCharacter.isLetter(upperTest.charAt(i)) &&
!UCharacter.isLowerCase(upperTest.charAt(i))) {
errln("Failed isLowerCase test at \\u"
+ Utility.hex(upperTest.charAt(i), 4));
}
else if (UCharacter.isLetter(lowerTest.charAt(i))
&& !UCharacter.isUpperCase(lowerTest.charAt(i))) {
errln("Failed isUpperCase test at \\u"
+ Utility.hex(lowerTest.charAt(i), 4));
}
else if (upperTest.charAt(i)
!= UCharacter.toLowerCase(lowerTest.charAt(i))) {
errln("Failed case conversion from \\u"
+ Utility.hex(lowerTest.charAt(i), 4) + " To \\u"
+ Utility.hex(upperTest.charAt(i), 4));
}
else if (lowerTest.charAt(i)
!= UCharacter.toUpperCase(upperTest.charAt(i))) {
errln("Failed case conversion : \\u"
+ Utility.hex(upperTest.charAt(i), 4) + " To \\u"
+ Utility.hex(lowerTest.charAt(i), 4));
}
else if (upperTest.charAt(i)
!= UCharacter.toLowerCase(upperTest.charAt(i))) {
errln("Failed case conversion with itself: \\u"
+ Utility.hex(upperTest.charAt(i)));
}
else if (lowerTest.charAt(i)
!= UCharacter.toUpperCase(lowerTest.charAt(i))) {
errln("Failed case conversion with itself: \\u"
+ Utility.hex(lowerTest.charAt(i)));
}
}
logln("done testing upper Lower");
}
// private data members - test data --------------------------------------
private static final Locale TURKISH_LOCALE_ = new Locale("tr", "TR");

View File

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/lang/UCharacterTest.java,v $
* $Date: 2002/09/19 21:24:29 $
* $Revision: 1.42 $
* $Date: 2002/10/03 23:42:02 $
* $Revision: 1.43 $
*
*******************************************************************************
*/
@ -19,12 +19,14 @@ import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.lang.UCharacterCategory;
import com.ibm.icu.lang.UCharacterDirection;
import com.ibm.icu.lang.UProperty;
import com.ibm.icu.lang.UScript;
import com.ibm.icu.text.UTF16;
import com.ibm.icu.text.UnicodeSet;
import com.ibm.icu.util.RangeValueIterator;
import com.ibm.icu.util.ValueIterator;
import com.ibm.icu.util.VersionInfo;
import com.ibm.icu.impl.UCharacterName;
import com.ibm.icu.impl.Utility;
import java.io.BufferedReader;
import java.util.Arrays;
@ -59,10 +61,7 @@ public final class UCharacterTest extends TestFmwk
try
{
UCharacterTest test = new UCharacterTest();
long starttime = System.currentTimeMillis();
test.run(arg);
System.out.println(System.currentTimeMillis() - starttime);
// test.TestNames();
test.run(arg);
}
catch (Exception e)
{
@ -243,7 +242,7 @@ public final class UCharacterTest extends TestFmwk
{
for (int i = '0'; i < '9'; i ++) {
int n1 = UCharacter.getNumericValue(i);
int n2 = UCharacter.getUnicodeNumericValue(i);
double n2 = UCharacter.getUnicodeNumericValue(i);
if (n1 != n2 || n1 != (i - '0')) {
errln("Numeric value of " + (char)i + " expected to be " +
(i - '0'));
@ -251,8 +250,8 @@ public final class UCharacterTest extends TestFmwk
}
for (int i = 'A'; i < 'F'; i ++) {
int n1 = UCharacter.getNumericValue(i);
int n2 = UCharacter.getUnicodeNumericValue(i);
if (n2 != -1 || n1 != (i - 'A' + 10)) {
double n2 = UCharacter.getUnicodeNumericValue(i);
if (n2 != UCharacter.NO_NUMERIC_VALUE || n1 != (i - 'A' + 10)) {
errln("Numeric value of " + (char)i + " expected to be " +
(i - 'A' + 10));
}
@ -260,8 +259,8 @@ public final class UCharacterTest extends TestFmwk
for (int i = 0xFF21; i < 0xFF26; i ++) {
// testing full wideth latin characters A-F
int n1 = UCharacter.getNumericValue(i);
int n2 = UCharacter.getUnicodeNumericValue(i);
if (n2 != -1 || n1 != (i - 0xFF21 + 10)) {
double n2 = UCharacter.getUnicodeNumericValue(i);
if (n2 != UCharacter.NO_NUMERIC_VALUE || n1 != (i - 0xFF21 + 10)) {
errln("Numeric value of " + (char)i + " expected to be " +
(i - 0xFF21 + 10));
}
@ -513,10 +512,32 @@ public final class UCharacterTest extends TestFmwk
index = s.indexOf(';', 5);
String t = s.substring(index + 1, index + 3);
index += 4;
int cc = Integer.parseInt(s.substring(index, s.indexOf(';',
index)));
index = s.indexOf(';', index);
String d = s.substring(index + 1, s.indexOf(';', index + 1));
int oldindex = index;
index = s.indexOf(';', index);
int cc = Integer.parseInt(s.substring(oldindex, index));
oldindex = index + 1;
index = s.indexOf(';', oldindex);
String d = s.substring(oldindex, index);
for (int i = 0; i < 6; i ++) {
index = s.indexOf(';', index + 1);
// skipping to the 11th field
}
// iso comment
oldindex = index + 1;
index = s.indexOf(';', oldindex);
String isocomment = s.substring(oldindex, index);
// uppercase
oldindex = index + 1;
index = s.indexOf(';', oldindex);
String upper = s.substring(oldindex, index);
// lowercase
oldindex = index + 1;
index = s.indexOf(';', oldindex);
String lower = s.substring(oldindex, index);
// titlecase last element
oldindex = index + 1;
String title = s.substring(oldindex);
// testing the category
// we override the general category of some control
@ -552,6 +573,48 @@ public final class UCharacterTest extends TestFmwk
" expected wrong direction " + dir);
break;
}
// testing iso comment
String comment = UCharacter.getISOComment(ch);
if (comment == null) {
comment = "";
}
if (!comment.equals(isocomment)) {
errln("FAIL \\u" + hex(ch) +
" expected iso comment " + isocomment);
break;
}
int tempchar = ch;
if (upper.length() > 0) {
tempchar = Integer.parseInt(upper, 16);
}
if (UCharacter.toUpperCase(ch) != tempchar) {
errln("FAIL \\u" + Utility.hex(ch, 4)
+ " expected uppercase \\u"
+ Utility.hex(tempchar, 4));
break;
}
tempchar = ch;
if (lower.length() > 0) {
tempchar = Integer.parseInt(lower, 16);
}
if (UCharacter.toLowerCase(ch) != tempchar) {
errln("FAIL \\u" + Utility.hex(ch, 4)
+ " expected lowercase \\u"
+ Utility.hex(tempchar, 4));
break;
}
tempchar = ch;
if (title.length() > 0) {
tempchar = Integer.parseInt(title, 16);
}
if (UCharacter.toTitleCase(ch) != tempchar) {
errln("FAIL \\u" + Utility.hex(ch, 4)
+ " expected titlecase \\u"
+ Utility.hex(tempchar, 4));
break;
}
}
input.close();
}
@ -559,6 +622,55 @@ public final class UCharacterTest extends TestFmwk
{
e.printStackTrace();
}
if (UCharacter.UnicodeBlock.of(0x0041)
!= UCharacter.UnicodeBlock.BASIC_LATIN
|| UCharacter.getIntPropertyValue(0x41, UProperty.BLOCK)
!= UCharacter.UnicodeBlock.BASIC_LATIN.getID()) {
errln("UCharacter.UnicodeBlock.of(\\u0041) property failed! "
+ "Expected : "
+ UCharacter.UnicodeBlock.BASIC_LATIN.getID() + " got "
+ UCharacter.UnicodeBlock.of(0x0041));
}
// sanity check on repeated properties
for (ch = 0xfffe; ch <= 0x10ffff;) {
if (UCharacter.getType(ch) != UCharacterCategory.UNASSIGNED) {
errln("error: UCharacter.getType(\\u" + Utility.hex(ch, 4)
+ " != UCharacterCategory.UNASSIGNED (returns "
+ UCharacterCategory.toString(UCharacter.getType(ch))
+ ")");
}
if ((ch & 0xffff) == 0xfffe) {
++ ch;
}
else {
ch += 0xffff;
}
}
// test that PUA is not "unassigned"
for(ch = 0xe000; ch <= 0x10fffd;) {
type = UCharacter.getType(ch);
if (type == UCharacterCategory.UNASSIGNED) {
errln("error: UCharacter.getType(\\u"
+ Utility.hex(ch, 4)
+ ") == UCharacterCategory.UNASSIGNED");
}
else if (type != UCharacterCategory.PRIVATE_USE) {
logln("PUA override: UCharacter.getType(\\u"
+ Utility.hex(ch, 4) + ")=" + type);
}
if (ch == 0xf8ff) {
ch = 0xf0000;
}
else if (ch == 0xffffd) {
ch = 0x100000;
}
else {
++ ch;
}
}
}
@ -1073,18 +1185,18 @@ public final class UCharacterTest extends TestFmwk
//
// PUA characters are listed explicitly with "XX".
// Verify that no assigned character has "XX".
/* synwee this is not ported to java yet
* if (result.value != UCharacterCategory.UNASSIGNED
if (result.value != UCharacterCategory.UNASSIGNED
&& result.value != UCharacterCategory.PRIVATE_USE) {
int c = result.start;
while (c < result.limit) {
if (0 == u_getIntPropertyValue(c, UCHAR_LINE_BREAK)) {
log_err("error UCHAR_LINE_BREAK(assigned U+%04lx)=XX\n", c);
if (0 == UCharacter.getIntPropertyValue(c,
UProperty.LINE_BREAK)) {
logln("error UProperty.LINE_BREAK(assigned \\u"
+ Utility.hex(c, 4) + ")=XX");
}
++c;
++ c;
}
}
*/
/*
* Verify default Bidi classes.
@ -1098,12 +1210,10 @@ public final class UCharacterTest extends TestFmwk
++ i) {
if (c < defaultBidi[i][0]) {
while (c < result.limit && c < defaultBidi[i][0]) {
if (UCharacter.getDirection(c)
!= defaultBidi[i][1]
/* synwee this is not ported yet ||
u_getIntPropertyValue(c, UCHAR_BIDI_CLASS)
!= defaultBidi[i][1] */
) {
if (UCharacter.getDirection(c) != defaultBidi[i][1]
|| UCharacter.getIntPropertyValue(c,
UProperty.BIDI_CLASS)
!= defaultBidi[i][1]) {
errln("error: getDirection(unassigned/PUA "
+ Integer.toHexString(c)
+ ") should be "
@ -1280,33 +1390,229 @@ public final class UCharacterTest extends TestFmwk
{ 0x0049, UProperty.SOFT_DOTTED },
{ 0xfa11, UProperty.UNIFIED_IDEOGRAPH },
{ 0xfa12, UProperty.UNIFIED_IDEOGRAPH }
{ 0xfa12, UProperty.UNIFIED_IDEOGRAPH },
{ 0x02AF, UProperty.BLOCK},
{ 0x0C4E, UProperty.BLOCK},
{ 0x155A, UProperty.BLOCK},
{ 0x1717, UProperty.BLOCK},
{ 0x1AFF, UProperty.BLOCK},
{ 0x3040, UProperty.BLOCK},
{ 0x1D0FF, UProperty.BLOCK},
{ 0x10D0FF, UProperty.BLOCK},
{ 0xEFFFF, UProperty.BLOCK},
// UProperty.CANONICAL_COMBINING_CLASS tested for assigned
// characters in TestUnicodeData()
{ 0xd7d7, UProperty.CANONICAL_COMBINING_CLASS},
{ 0x00A0, UProperty.DECOMPOSITION_TYPE},
{ 0x00A8, UProperty.DECOMPOSITION_TYPE},
{ 0x00bf, UProperty.DECOMPOSITION_TYPE},
{ 0x00c0, UProperty.DECOMPOSITION_TYPE},
{ 0x1E9B, UProperty.DECOMPOSITION_TYPE},
{ 0xBCDE, UProperty.DECOMPOSITION_TYPE},
{ 0xFB5D, UProperty.DECOMPOSITION_TYPE},
{ 0x1D736, UProperty.DECOMPOSITION_TYPE},
{ 0xe0033, UProperty.DECOMPOSITION_TYPE},
{ 0x0009, UProperty.EAST_ASIAN_WIDTH},
{ 0x0020, UProperty.EAST_ASIAN_WIDTH},
{ 0x00B1, UProperty.EAST_ASIAN_WIDTH},
{ 0x20A9, UProperty.EAST_ASIAN_WIDTH},
{ 0x2FFB, UProperty.EAST_ASIAN_WIDTH},
{ 0x3000, UProperty.EAST_ASIAN_WIDTH},
{ 0x35bb, UProperty.EAST_ASIAN_WIDTH},
{ 0x58bd, UProperty.EAST_ASIAN_WIDTH},
{ 0xD7A3, UProperty.EAST_ASIAN_WIDTH},
{ 0xEEEE, UProperty.EAST_ASIAN_WIDTH},
{ 0x1D198, UProperty.EAST_ASIAN_WIDTH},
{ 0x20000, UProperty.EAST_ASIAN_WIDTH},
{ 0x2F8C7, UProperty.EAST_ASIAN_WIDTH},
{ 0x3a5bd, UProperty.EAST_ASIAN_WIDTH},
{ 0xFEEEE, UProperty.EAST_ASIAN_WIDTH},
{ 0x10EEEE, UProperty.EAST_ASIAN_WIDTH},
// UProperty.GENERAL_CATEGORY tested for assigned characters in
// TestUnicodeData()
{ 0xd7d7, UProperty.GENERAL_CATEGORY},
{ 0x0444, UProperty.JOINING_GROUP},
{ 0x0639, UProperty.JOINING_GROUP},
{ 0x072A, UProperty.JOINING_GROUP},
{ 0x0647, UProperty.JOINING_GROUP},
{ 0x06C1, UProperty.JOINING_GROUP},
{ 0x06C3, UProperty.JOINING_GROUP},
{ 0x200C, UProperty.JOINING_TYPE},
{ 0x200D, UProperty.JOINING_TYPE},
{ 0x0639, UProperty.JOINING_TYPE},
{ 0x0640, UProperty.JOINING_TYPE},
{ 0x06C3, UProperty.JOINING_TYPE},
{ 0x0300, UProperty.JOINING_TYPE},
{ 0x070F, UProperty.JOINING_TYPE},
{ 0xe0033, UProperty.JOINING_TYPE},
// TestUnicodeData() verifies that no assigned character has "XX"
// (unknown)
{ 0xe7e7, UProperty.LINE_BREAK},
{ 0x10fffd, UProperty.LINE_BREAK},
{ 0x0028, UProperty.LINE_BREAK},
{ 0x232A, UProperty.LINE_BREAK},
{ 0x3401, UProperty.LINE_BREAK},
{ 0x4e02, UProperty.LINE_BREAK},
{ 0xac03, UProperty.LINE_BREAK},
{ 0x20004, UProperty.LINE_BREAK},
{ 0xf905, UProperty.LINE_BREAK},
{ 0xdb7e, UProperty.LINE_BREAK},
{ 0xdbfd, UProperty.LINE_BREAK},
{ 0xdffc, UProperty.LINE_BREAK},
{ 0x2762, UProperty.LINE_BREAK},
{ 0x002F, UProperty.LINE_BREAK},
{ 0x1D49C, UProperty.LINE_BREAK},
{ 0x1731, UProperty.LINE_BREAK},
// UCHAR_NUMERIC_TYPE tested in TestNumericProperties()
// UCHAR_SCRIPT tested in TestUScriptCodeAPI()
// undefined UProperty values
{ 0x61, 0x4a7},
{ 0x234bc, 0x15ed}
};
boolean expected[] = { true, true, false, true, false,
true, false, true, false, true,
false, true, false, true, false,
true, true, false, true, true,
false, true, false, true, true,
false, true, true, false, true,
true, false, true, false, true,
true, false, true, true, false,
true, true, false, true, true,
false, true, false, true, true,
false, true, true, false, true,
true, false, true, true, false,
false, true, true, false, true,
false, true, false, false, true,
true, false, true, false, true,
false, true, false, true, false,
true, false, true, false, true,
false};
int expected[] = { 1, 1, 0, 1, 0,
1, 0, 1, 0, 1,
0, 1, 0, 1, 0,
1, 1, 0, 1, 1,
0, 1, 0, 1, 1,
0, 1, 1, 0, 1,
1, 0, 1, 0, 1,
1, 0, 1, 1, 0,
1, 1, 0, 1, 1,
0, 1, 0, 1, 1,
0, 1, 1, 0, 1,
1, 0, 1, 1, 0,
0, 1, 1, 0, 1,
0, 1, 0, 0, 1,
1, 0, 1, 0, 1,
0, 1, 0, 1, 0,
1, 0, 1, 0, 1,
0,
UCharacter.UnicodeBlock.IPA_EXTENSIONS_ID,
UCharacter.UnicodeBlock.TELUGU_ID,
UCharacter.UnicodeBlock.UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_ID,
UCharacter.UnicodeBlock.TAGALOG_ID,
UCharacter.UnicodeBlock.INVALID_CODE_ID,
UCharacter.UnicodeBlock.HIRAGANA_ID,
UCharacter.UnicodeBlock.BYZANTINE_MUSICAL_SYMBOLS_ID,
UCharacter.UnicodeBlock.SUPPLEMENTARY_PRIVATE_USE_AREA_B_ID,
UCharacter.UnicodeBlock.INVALID_CODE_ID,
// UProperty.CANONICAL_COMBINING_CLASS tested for
// assigned characters in TestUnicodeData()
0,
UCharacter.DecompositionType.NOBREAK,
UCharacter.DecompositionType.COMPAT,
UCharacter.DecompositionType.NONE,
UCharacter.DecompositionType.CANONICAL,
UCharacter.DecompositionType.CANONICAL,
UCharacter.DecompositionType.CANONICAL,
UCharacter.DecompositionType.MEDIAL,
UCharacter.DecompositionType.FONT,
UCharacter.DecompositionType.NONE,
UCharacter.EastAsianWidth.NEUTRAL,
UCharacter.EastAsianWidth.NARROW,
UCharacter.EastAsianWidth.AMBIGUOUS,
UCharacter.EastAsianWidth.HALFWIDTH,
UCharacter.EastAsianWidth.WIDE,
UCharacter.EastAsianWidth.FULLWIDTH,
UCharacter.EastAsianWidth.WIDE,
UCharacter.EastAsianWidth.WIDE,
UCharacter.EastAsianWidth.WIDE,
UCharacter.EastAsianWidth.AMBIGUOUS,
UCharacter.EastAsianWidth.NEUTRAL,
UCharacter.EastAsianWidth.WIDE,
UCharacter.EastAsianWidth.WIDE,
UCharacter.EastAsianWidth.NEUTRAL,
UCharacter.EastAsianWidth.AMBIGUOUS,
UCharacter.EastAsianWidth.AMBIGUOUS,
// UProperty.GENERAL_CATEGORY tested for assigned
// characters in TestUnicodeData()
0,
UCharacter.JoiningGroup.NO_JOINING_GROUP,
UCharacter.JoiningGroup.AIN,
UCharacter.JoiningGroup.DALATH_RISH,
UCharacter.JoiningGroup.HEH,
UCharacter.JoiningGroup.HEH_GOAL,
UCharacter.JoiningGroup.HAMZA_ON_HEH_GOAL,
UCharacter.JoiningType.NON_JOINING,
UCharacter.JoiningType.JOIN_CAUSING,
UCharacter.JoiningType.DUAL_JOINING,
UCharacter.JoiningType.JOIN_CAUSING,
UCharacter.JoiningType.RIGHT_JOINING,
UCharacter.JoiningType.TRANSPARENT,
UCharacter.JoiningType.TRANSPARENT,
UCharacter.JoiningType.TRANSPARENT,
// TestUnicodeData() verifies that no assigned
// character has "XX" (unknown)
UCharacter.LineBreak.UNKNOWN,
UCharacter.LineBreak.UNKNOWN,
UCharacter.LineBreak.OPEN_PUNCTUATION,
UCharacter.LineBreak.CLOSE_PUNCTUATION,
UCharacter.LineBreak.IDEOGRAPHIC,
UCharacter.LineBreak.IDEOGRAPHIC,
UCharacter.LineBreak.IDEOGRAPHIC,
UCharacter.LineBreak.IDEOGRAPHIC,
UCharacter.LineBreak.IDEOGRAPHIC,
UCharacter.LineBreak.SURROGATE,
UCharacter.LineBreak.SURROGATE,
UCharacter.LineBreak.SURROGATE,
UCharacter.LineBreak.EXCLAMATION,
UCharacter.LineBreak.BREAK_SYMBOLS,
UCharacter.LineBreak.ALPHABETIC,
UCharacter.LineBreak.ALPHABETIC,
// UCHAR_NUMERIC_TYPE tested in
// TestNumericProperties()
// UCHAR_SCRIPT tested in TestUScriptCodeAPI()
0,
0
};
VersionInfo version = UCharacter.getUnicodeVersion();
if (UCharacter.getIntPropertyMinValue(UProperty.DASH) != 0
|| UCharacter.getIntPropertyMinValue(UProperty.BIDI_CLASS) != 0
|| UCharacter.getIntPropertyMinValue(UProperty.BLOCK)
!= UCharacter.UnicodeBlock.INVALID_CODE_ID
|| UCharacter.getIntPropertyMinValue(UProperty.SCRIPT)
!= UScript.INVALID_CODE
|| UCharacter.getIntPropertyMinValue(0x2345) != 0) {
errln("error: UCharacter.getIntPropertyMinValue() wrong");
}
if (UCharacter.getIntPropertyMaxValue(UProperty.DASH) != 1
|| UCharacter.getIntPropertyMaxValue(UProperty.ID_CONTINUE) != 1
|| UCharacter.getIntPropertyMaxValue(UProperty.BINARY_LIMIT-1) != 1
|| UCharacter.getIntPropertyMaxValue(UProperty.BIDI_CLASS)
!= UCharacterDirection.CHAR_DIRECTION_COUNT - 1
|| UCharacter.getIntPropertyMaxValue(UProperty.BLOCK)
!= UCharacter.UnicodeBlock.COUNT - 1
|| UCharacter.getIntPropertyMaxValue(UProperty.LINE_BREAK)
!= UCharacter.LineBreak.COUNT - 1
|| UCharacter.getIntPropertyMaxValue(UProperty.SCRIPT)
!= UScript.CODE_LIMIT - 1
|| UCharacter.getIntPropertyMaxValue(0x2345) != 0) {
errln("error: UCharacter.getIntPropertyMaxValue() wrong");
}
VersionInfo version = UCharacter.getUnicodeVersion();
// test hasBinaryProperty()
for (int i = 0; i < props.length; ++ i) {
if (props[i][0] < 0) {
if (props[i][0] < 0) {
if (version.compareTo(VersionInfo.getInstance(props[i][1] >> 4,
props[i][1] & 0xF,
0, 0)) < 0) {
@ -1314,39 +1620,53 @@ public final class UCharacterTest extends TestFmwk
}
continue;
}
if (UCharacter.hasBinaryProperty(props[i][0], props[i][1])
!= expected[i]) {
errln("error: UCharacter.hasBinaryProperty(\\u" +
Integer.toHexString(props[i][0]) + ", " +
Integer.toHexString(props[i][1]) + ") has an error expected " +
expected[i]);
}
boolean expect = true;
if (expected[i] == 0) {
expect = false;
}
if (props[i][1] < UProperty.INT_START) {
if (UCharacter.hasBinaryProperty(props[i][0], props[i][1])
!= expect) {
errln("error: UCharacter.hasBinaryProperty(\\u" +
Integer.toHexString(props[i][0]) + ", " +
Integer.toHexString(props[i][1])
+ ") has an error expected " + expected[i]);
}
}
if (UCharacter.getIntPropertyValue(props[i][0], props[i][1])
!= expected[i]) {
errln("error: UCharacter.getIntPropertyValue(\\u" +
Utility.hex(props[i][0], 4) +
", " + props[i][1] + " is wrong, should be "
+ expected[i]);
}
// test separate functions, too
switch (props[i][1]) {
case UProperty.ALPHABETIC:
if (UCharacter.isUAlphabetic(props[i][0]) != expected[i]) {
if (UCharacter.isUAlphabetic(props[i][0]) != expect) {
errln("error: UCharacter.isUAlphabetic(\\u" +
Integer.toHexString(props[i][0]) +
") is wrong expected " + expected[i]);
}
break;
case UProperty.LOWERCASE:
if (UCharacter.isULowercase(props[i][0]) != expected[i]) {
if (UCharacter.isULowercase(props[i][0]) != expect) {
errln("error: UCharacter.isULowercase(\\u" +
Integer.toHexString(props[i][0]) +
") is wrong expected " + expected[i]);
}
break;
case UProperty.UPPERCASE:
if (UCharacter.isUUppercase(props[i][0]) != expected[i]) {
if (UCharacter.isUUppercase(props[i][0]) != expect) {
errln("error: UCharacter.isUUppercase(\\u" +
Integer.toHexString(props[i][0]) +
") is wrong expected " + expected[i]);
}
break;
case UProperty.WHITE_SPACE:
if (UCharacter.isUWhiteSpace(props[i][0]) != expected[i]) {
if (UCharacter.isUWhiteSpace(props[i][0]) != expect) {
errln("error: UCharacter.isUWhiteSpace(\\u" +
Integer.toHexString(props[i][0]) +
") is wrong expected " + expected[i]);
@ -1357,5 +1677,95 @@ public final class UCharacterTest extends TestFmwk
}
}
}
public void TestNumericProperties()
{
// see UnicodeData.txt, DerivedNumericValues.txt
int testvar[][] = {
{ 0x0F33, UCharacter.NumericType.NUMERIC },
{ 0x0C66, UCharacter.NumericType.DECIMAL },
{ 0x2159, UCharacter.NumericType.NUMERIC },
{ 0x00BD, UCharacter.NumericType.NUMERIC },
{ 0x0031, UCharacter.NumericType.DECIMAL },
{ 0x10320, UCharacter.NumericType.NUMERIC },
{ 0x0F2B, UCharacter.NumericType.NUMERIC },
{ 0x00B2, UCharacter.NumericType.DECIMAL },
{ 0x1813, UCharacter.NumericType.DECIMAL },
{ 0x2173, UCharacter.NumericType.NUMERIC },
{ 0x278E, UCharacter.NumericType.DIGIT },
{ 0x1D7F2, UCharacter.NumericType.DECIMAL },
{ 0x247A, UCharacter.NumericType.DIGIT },
{ 0x1372, UCharacter.NumericType.NUMERIC },
{ 0x216B, UCharacter.NumericType.NUMERIC },
{ 0x16EE, UCharacter.NumericType.NUMERIC },
{ 0x249A, UCharacter.NumericType.NUMERIC },
{ 0x303A, UCharacter.NumericType.NUMERIC },
{ 0x32B2, UCharacter.NumericType.NUMERIC },
{ 0x1375, UCharacter.NumericType.NUMERIC },
{ 0x10323, UCharacter.NumericType.NUMERIC },
{ 0x0BF1, UCharacter.NumericType.NUMERIC },
{ 0x217E, UCharacter.NumericType.NUMERIC },
{ 0x2180, UCharacter.NumericType.NUMERIC },
{ 0x2181, UCharacter.NumericType.NUMERIC },
{ 0x137C, UCharacter.NumericType.NUMERIC },
{ 0x61, UCharacter.NumericType.NONE },
{ 0x3000, UCharacter.NumericType.NONE },
{ 0xfffe, UCharacter.NumericType.NONE },
{ 0x10301, UCharacter.NumericType.NONE },
{ 0xe0033, UCharacter.NumericType.NONE },
{ 0x10ffff, UCharacter.NumericType.NONE }
};
double expected[] = {-1/(double)2,
0,
1/(double)6,
1/(double)2,
1,
1,
3/(double)2,
2,
3,
4,
5,
6,
7,
10,
12,
17,
19,
30,
37,
40,
50,
100,
500,
1000,
5000,
10000,
UCharacter.NO_NUMERIC_VALUE,
UCharacter.NO_NUMERIC_VALUE,
UCharacter.NO_NUMERIC_VALUE,
UCharacter.NO_NUMERIC_VALUE,
UCharacter.NO_NUMERIC_VALUE,
UCharacter.NO_NUMERIC_VALUE
};
for (int i = 0; i < testvar.length; ++ i) {
int c = testvar[i][0];
int type = UCharacter.getIntPropertyValue(c,
UProperty.NUMERIC_TYPE);
double nv = UCharacter.getUnicodeNumericValue(c);
if (type != testvar[i][1]) {
errln("UProperty.NUMERIC_TYPE(\\u" + Utility.hex(c, 4)
+ ") = " + type + " should be " + testvar[i][1]);
}
if (0.000001 <= Math.abs(nv - expected[i])) {
errln("UCharacter.getNumericValue(\\u" + Utility.hex(c, 4)
+ ") = " + nv + " should be " + expected[i]);
}
}
}
}

View File

@ -6,8 +6,8 @@
*
* $Source:
* /usr/cvs/icu4j/icu4j/src/com/ibm/icu/text/UCharacterPropertyDB.java $
* $Date: 2002/09/19 21:24:30 $
* $Revision: 1.16 $
* $Date: 2002/10/03 23:42:02 $
* $Revision: 1.17 $
*
*******************************************************************************
*/
@ -98,22 +98,6 @@ public final class UCharacterProperty implements Trie.DataManipulate
* value
*/
public static final int EXC_COMBINING_CLASS_ = 9;
/**
* Non numeric type
*/
public static final int NON_NUMERIC_TYPE_ = 0;
/**
* Numeric type for decimal digits
*/
public static final int DECIMAL_DIGIT_NUMERIC_TYPE_ = 1;
/**
* Numeric type for digits
*/
public static final int DIGIT_NUMERIC_TYPE_ = 2;
/**
* Numeric type for non digits numbers
*/
public static final int NON_DIGIT_NUMERIC_TYPE_ = 3;
/**
* Maximum number of expansion for a case mapping
*/
@ -142,20 +126,20 @@ public final class UCharacterProperty implements Trie.DataManipulate
* Latin lowercase i
*/
public static final char LATIN_SMALL_LETTER_I_ = 0x69;
/**
* Character type mask
*/
public static final int TYPE_MASK = 0x1F;
/**
* Exception test mask
*/
public static final int EXCEPTION_MASK = 0x20;
/**
* Mirror test mask
*/
public static final int MIRROR_MASK = 1 << 11;
// public methods ----------------------------------------------------
/**
* Extracts out the type value from property.
* For use in enumeration.
* @param value of trie value associated with a codepoint
*/
public int extract(int value)
{
// access the general category from the 32-bit properties, and those
// from the 16-bit trie value
return getPropType(m_property_[value]);
}
/**
* Called by com.ibm.icu.util.Trie to extract from a lead surrogate's
@ -184,40 +168,6 @@ public final class UCharacterProperty implements Trie.DataManipulate
return m_property_[m_trie_.getCodePointValue(ch)];
}
/**
* Returns a value indicating a character category from the argument property
* value
* @param unicode character property
* @return category
*/
public static int getPropType(int prop)
{
// Since character information data are packed together.
// This is the category mask for getting the category information
return prop & LAST_5_BIT_MASK_;
}
/**
* Determines if the argument props indicates that the exception block has
* to be accessed for data
* @param prop property value
* @return true if this is an exception indicator false otherwise
*/
public static boolean isExceptionIndicator(int prop)
{
return (prop & EXCEPTION_MASK_) != 0;
}
/**
* Getting the numberic type
* @param prop property value
* @return number type in prop
*/
public static int getNumericType(int prop)
{
return (prop >> NUMERIC_TYPE_SHIFT_) & NUMERIC_TYPE_MASK_;
}
/**
* Getting the signed numeric value of a character embedded in the property
* argument
@ -238,26 +188,6 @@ public final class UCharacterProperty implements Trie.DataManipulate
{
return (prop >> VALUE_SHIFT_) & UNSIGNED_VALUE_MASK_AFTER_SHIFT_;
}
/**
* Checking if property indicates mirror element
* @param prop property value
* @return true if mirror indicator is set, false otherwise
*/
public static boolean isMirrored(int prop)
{
return (prop & MIRROR_MASK_) != 0;
}
/**
* Getting the direction data in the property value
* @param prop property value
* @return direction value in property
*/
public static int getDirection(int prop)
{
return (prop >> BIDI_SHIFT_) & BIDI_MASK_AFTER_SHIFT_;
}
/**
* Getting the unsigned numeric value of a character embedded in the property
@ -296,14 +226,13 @@ public final class UCharacterProperty implements Trie.DataManipulate
// contained in exception data
int evalue = m_exception_[index];
switch (etype)
{
case EXC_COMBINING_CLASS_ :
return evalue;
default :
index ++;
// contained in the exception digit address
index = addExceptionOffset(evalue, etype, index);
switch (etype) {
case EXC_COMBINING_CLASS_ :
return evalue;
default :
index ++;
// contained in the exception digit address
index = addExceptionOffset(evalue, etype, index);
}
return m_exception_[index];
}
@ -481,7 +410,7 @@ public final class UCharacterProperty implements Trie.DataManipulate
switch(property) {
case UProperty.ALPHABETIC: {
// Lu+Ll+Lt+Lm+Lo+Nl+Other_Alphabetic
int generaltype = getPropType(getProperty(codepoint));
int generaltype = getProperty(codepoint) & TYPE_MASK;
boolean generalmatch =
generaltype == UCharacterCategory.UPPERCASE_LETTER
|| generaltype == UCharacterCategory.LOWERCASE_LETTER
@ -502,7 +431,7 @@ public final class UCharacterProperty implements Trie.DataManipulate
BIDI_CONTROL_PROPERTY_);
}
case UProperty.BIDI_MIRRORED: {
return isMirrored(getProperty(codepoint));
return (getProperty(codepoint) & MIRROR_MASK) != 0;
}
case UProperty.DASH: {
return compareAdditionalType(getAdditional(codepoint, 1),
@ -524,7 +453,7 @@ public final class UCharacterProperty implements Trie.DataManipulate
}
if (!compareAdditionalType(additionalproperty,
WHITE_SPACE_PROPERTY_)) {
int generaltype = getPropType(getProperty(codepoint));
int generaltype = getProperty(codepoint) & TYPE_MASK;
if (generaltype == UCharacterCategory.FORMAT
|| generaltype == UCharacterCategory.CONTROL
|| generaltype == UCharacterCategory.SURROGATE) {
@ -553,7 +482,7 @@ public final class UCharacterProperty implements Trie.DataManipulate
// [0..10FFFF]-Cc-Cf-Cs-Co-Cn-Zl-Zp-Grapheme_Link-(Me+Mn+Mc+Other_Grapheme_Extend)-CGJ ==
// [0..10FFFF]-Cc-Cf-Cs-Co-Cn-Zl-Zp-Me-Mn-Mc-Grapheme_Link-Other_Grapheme_Extend-CGJ
if (codepoint != 0x34f) { // CGJ
int generaltype = getPropType(getProperty(codepoint));
int generaltype = getProperty(codepoint) & TYPE_MASK;
if (generaltype != UCharacterCategory.CONTROL
&& generaltype != UCharacterCategory.FORMAT
&& generaltype != UCharacterCategory.SURROGATE
@ -587,7 +516,7 @@ public final class UCharacterProperty implements Trie.DataManipulate
OTHER_GRAPHEME_EXTEND_PROPERTY_)) {
return true;
}
int generaltype = getPropType(getProperty(codepoint));
int generaltype = getProperty(codepoint) & TYPE_MASK;
if (generaltype == UCharacterCategory.ENCLOSING_MARK ||
generaltype == UCharacterCategory.NON_SPACING_MARK ||
generaltype
@ -612,7 +541,7 @@ public final class UCharacterProperty implements Trie.DataManipulate
}
case UProperty.ID_CONTINUE: {
// ID_Start+Mn+Mc+Nd+Pc == Lu+Ll+Lt+Lm+Lo+Nl+Mn+Mc+Nd+Pc
int generaltype = getPropType(getProperty(codepoint));
int generaltype = getProperty(codepoint) & TYPE_MASK;
return generaltype == UCharacterCategory.UPPERCASE_LETTER ||
generaltype == UCharacterCategory.LOWERCASE_LETTER ||
generaltype == UCharacterCategory.TITLECASE_LETTER ||
@ -628,7 +557,7 @@ public final class UCharacterProperty implements Trie.DataManipulate
}
case UProperty.ID_START: {
// Lu+Ll+Lt+Lm+Lo+Nl
int generaltype = getPropType(getProperty(codepoint));
int generaltype = getProperty(codepoint) & TYPE_MASK;
return generaltype == UCharacterCategory.UPPERCASE_LETTER ||
generaltype == UCharacterCategory.LOWERCASE_LETTER ||
generaltype == UCharacterCategory.TITLECASE_LETTER ||
@ -658,7 +587,7 @@ public final class UCharacterProperty implements Trie.DataManipulate
}
case UProperty.LOWERCASE: {
// Ll+Other_Lowercase
int generaltype = getPropType(getProperty(codepoint));
int generaltype = getProperty(codepoint) & TYPE_MASK;
if (generaltype == UCharacterCategory.LOWERCASE_LETTER) {
return true;
}
@ -667,7 +596,7 @@ public final class UCharacterProperty implements Trie.DataManipulate
}
case UProperty.MATH: {
// Sm+Other_Math
int generaltype = getPropType(getProperty(codepoint));
int generaltype = getProperty(codepoint) & TYPE_MASK;
if (generaltype == UCharacterCategory.MATH_SYMBOL) {
return true;
}
@ -700,7 +629,7 @@ public final class UCharacterProperty implements Trie.DataManipulate
}
case UProperty.UPPERCASE: {
// Lu+Other_Uppercase
int generaltype = getPropType(getProperty(codepoint));
int generaltype = getProperty(codepoint) & TYPE_MASK;
if (generaltype == UCharacterCategory.UPPERCASE_LETTER) {
return true;
}
@ -942,8 +871,8 @@ public final class UCharacterProperty implements Trie.DataManipulate
StringBuffer buffer)
{
int props = getProperty(ch);
if (!UCharacterProperty.isExceptionIndicator(props)) {
int type = UCharacterProperty.getPropType(props);
if ((props & EXCEPTION_MASK) == 0) {
int type = props & TYPE_MASK;
if (type == UCharacterCategory.UPPERCASE_LETTER ||
type == UCharacterCategory.TITLECASE_LETTER) {
ch += UCharacterProperty.getSignedValue(props);
@ -976,8 +905,8 @@ public final class UCharacterProperty implements Trie.DataManipulate
UnicodeCharacterIterator uchariter, char buffer[])
{
int props = getProperty(ch);
if (!UCharacterProperty.isExceptionIndicator(props)) {
int type = UCharacterProperty.getPropType(props);
if ((props & EXCEPTION_MASK) == 0) {
int type = props & TYPE_MASK;
if (type == UCharacterCategory.UPPERCASE_LETTER ||
type == UCharacterCategory.TITLECASE_LETTER) {
ch += UCharacterProperty.getSignedValue(props);
@ -1120,8 +1049,8 @@ public final class UCharacterProperty implements Trie.DataManipulate
boolean upperflag, StringBuffer buffer)
{
int props = getProperty(ch);
if (!UCharacterProperty.isExceptionIndicator(props)) {
int type = UCharacterProperty.getPropType(props);
if ((props & EXCEPTION_MASK) == 0) {
int type = props & TYPE_MASK;
if (type == UCharacterCategory.LOWERCASE_LETTER) {
ch -= UCharacterProperty.getSignedValue(props);
}
@ -1162,8 +1091,8 @@ public final class UCharacterProperty implements Trie.DataManipulate
boolean upperflag, char buffer[])
{
int props = getProperty(ch);
if (!UCharacterProperty.isExceptionIndicator(props)) {
int type = UCharacterProperty.getPropType(props);
if ((props & EXCEPTION_MASK) == 0) {
int type = props & TYPE_MASK;
if (type == UCharacterCategory.LOWERCASE_LETTER) {
ch -= UCharacterProperty.getSignedValue(props);
}
@ -1352,34 +1281,49 @@ public final class UCharacterProperty implements Trie.DataManipulate
{
// "white space" in the sense of ICU rule parsers: Cf+White_Space
UCharacterProperty property = UCharacterProperty.getInstance();
return property.getType(c) == UCharacterCategory.FORMAT ||
property.hasBinaryProperty(c, UProperty.WHITE_SPACE);
return (property.getProperty(c) & TYPE_MASK)
== UCharacterCategory.FORMAT
|| property.hasBinaryProperty(c, UProperty.WHITE_SPACE);
}
/**
* Get the the maximum values for some enum/int properties.
* @return maximum values for the integer properties.
*/
public int getMaxBlockScriptValues()
{
return m_maxBlockScriptValue_;
}
// protected variables -----------------------------------------------
/**
* Case table
*/
protected char m_case_[];
char m_case_[];
/**
* Exception property table
*/
protected int m_exception_[];
int m_exception_[];
/**
* Extra property trie
*/
protected CharTrie m_additionalTrie_;
CharTrie m_additionalTrie_;
/**
* Extra property vectors, 1st column for age and second for binary
* properties.
*/
protected int m_additionalVectors_[];
int m_additionalVectors_[];
/**
* Number of additional columns
*/
protected int m_additionalColumnsCount_;
int m_additionalColumnsCount_;
/**
* Maximum values for block and script codes, bits used as in vector word
* 0
*/
int m_maxBlockScriptValue_;
// private variables -------------------------------------------------
@ -1445,42 +1389,12 @@ public final class UCharacterProperty implements Trie.DataManipulate
*/
private static final int VALUE_SHIFT_ = 20;
/**
* Exception test mask
*/
private static final int EXCEPTION_MASK_ = 0x20;
/**
* Mask to be applied after shifting to obtain an unsigned numeric value
*/
private static final int UNSIGNED_VALUE_MASK_AFTER_SHIFT_ = 0x7FF;
/**
* Shift to get bidi bits
*/
private static final int BIDI_SHIFT_ = 6;
/**
* Mask to be applied after shifting to get bidi bits
*/
private static final int BIDI_MASK_AFTER_SHIFT_ = 0x1F;
/**
* Mirror test mask
*/
private static final int MIRROR_MASK_ = 1 << 11;
/**
* Shift to get numeric type
*/
private static final int NUMERIC_TYPE_SHIFT_ = 12;
/**
* Mask to get numeric type
*/
private static final int NUMERIC_TYPE_MASK_ = 0x7;
/**
* Shift to get reserved value
*/
private static final int RESERVED_SHIFT_ = 15;
@ -1509,7 +1423,6 @@ public final class UCharacterProperty implements Trie.DataManipulate
*/
private static int MAX_EXCEPTIONS_COUNT_ = 1 << VALUE_BITS_;
/**
* To get the last 5 bits out from a data type
*/
@ -1704,17 +1617,6 @@ public final class UCharacterProperty implements Trie.DataManipulate
}
// private methods -------------------------------------------------------
/**
* <p>Returns a value indicating a code point's Unicode category.</p>
* <p>This method does not check for the codepoint validity</p>
* @param ch code point whose type is to be determined
* @return category which is a value of UCharacterCategory
*/
private int getType(int ch)
{
return getPropType(getProperty(ch));
}
/**
* Unicode 3.2 UAX 21 "Case Mappings" defines the conditions as follows:
@ -1818,7 +1720,7 @@ public final class UCharacterProperty implements Trie.DataManipulate
int ch = uchariter.nextCodePoint(); // start checking
while (ch != UnicodeCharacterIterator.DONE_CODEPOINT) {
int cat = getType(ch);
int cat = getProperty(ch) & TYPE_MASK;
if (isCased(ch, cat)) {
return false; // followed by cased letter
}
@ -1846,7 +1748,7 @@ public final class UCharacterProperty implements Trie.DataManipulate
int ch = uchariter.previousCodePoint();
while (ch != UnicodeCharacterIterator.DONE_CODEPOINT) {
int cat = getType(ch);
int cat = getProperty(ch) & TYPE_MASK;
if (isCased(ch, cat)) {
return true; // preceded by cased letter
}

View File

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/impl/UCharacterPropertyReader.java,v $
* $Date: 2002/08/01 19:50:26 $
* $Revision: 1.6 $
* $Date: 2002/10/03 23:42:02 $
* $Revision: 1.7 $
*
*******************************************************************************
*/
@ -79,6 +79,10 @@ final class UCharacterPropertyReader
count --;
m_reservedOffset_ = m_dataInputStream_.readInt();
count --;
m_dataInputStream_.skipBytes(3 << 2);
count -= 3;
ucharppty.m_maxBlockScriptValue_ = m_dataInputStream_.readInt();
count --; // 10
m_dataInputStream_.skipBytes(count << 2);
// read the trie index block

File diff suppressed because it is too large Load Diff

View File

@ -5,8 +5,8 @@
******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/lang/UCharacterTypeIterator.java,v $
* $Date: 2002/03/15 22:48:07 $
* $Revision: 1.5 $
* $Date: 2002/10/03 23:42:02 $
* $Revision: 1.6 $
*
******************************************************************************
*/
@ -68,7 +68,7 @@ class UCharacterTypeIterator extends TrieIterator
if (m_property_ == null) {
m_property_ = UCharacterProperty.getInstance().m_property_;
}
return UCharacterProperty.getPropType(m_property_[value]);
return m_property_[value] & UCharacterProperty.TYPE_MASK;
}
// private data members ---------------------------------------------

View File

@ -6,8 +6,8 @@
*
* $Source:
* /usr/cvs/icu4j/icu4j/src/com/ibm/icu/text/UCharacterCategory.java $
* $Date: 2002/07/08 23:52:13 $
* $Revision: 1.3 $
* $Date: 2002/10/03 23:42:02 $
* $Revision: 1.4 $
*
*******************************************************************************
*/
@ -32,7 +32,6 @@ package com.ibm.icu.lang;
* @author Syn Wee Quek
* @since March 8 2002
* @see com.ibm.icu.lang.UCharacter
* @draft ICU 2.1
*/
public interface UProperty
{
@ -43,24 +42,20 @@ public interface UProperty
* <p>Property for UCharacter.isUAlphabetic(), different from the property
* in UCharacter.isalpha().</p>
* <p>Lu + Ll + Lt + Lm + Lo + Nl + Other_Alphabetic.</p>
* @draft ICU 2.1
*/
public static final int ALPHABETIC = 0;
/**
* First constant for binary Unicode properties.
* @draft ICU 2.1
*/
public static final int BINARY_START = ALPHABETIC;
/**
* Binary property ASCII_Hex_Digit (0-9 A-F a-f).
* @draft ICU 2.1
*/
public static final int ASCII_HEX_DIGIT = 1;
/**
* <p>Binary property Bidi_Control.</p>
* <p>Format controls which have specific functions in the Bidi Algorithm.
* </p>
* @draft ICU 2.1
*/
public static final int BIDI_CONTROL = 2;
/**
@ -68,13 +63,11 @@ public interface UProperty
* <p>Characters that may change display in RTL text.</p>
* <p>Property for UCharacter.isMirrored().</p>
* <p>See Bidi Algorithm; UTR 9.</p>
* @draft ICU 2.1
*/
public static final int BIDI_MIRRORED = 3;
/**
* <p>Binary property Dash.</p>
* <p>Variations of dashes.</p>
* @draft ICU 2.1
*/
public static final int DASH = 4;
/**
@ -84,114 +77,97 @@ public interface UProperty
* </p>
* <p>Codepoints (2060..206F, FFF0..FFFB, E0000..E0FFF) +
* Other_Default_Ignorable_Code_Point + (Cf + Cc + Cs - White_Space)</p>
* @draft ICU 2.1
*/
public static final int DEFAULT_IGNORABLE_CODE_POINT = 5;
/**
* <p>Binary property Deprecated (new).</p>
* <p>The usage of deprecated characters is strongly discouraged.</p>
* @draft ICU 2.1
*/
public static final int DEPRECATED = 6;
/**
* <p>Binary property Diacritic.</p>
* <p>Characters that linguistically modify the meaning of another
* character to which they apply.</p>
* @draft ICU 2.1
*/
public static final int DIACRITIC = 7;
/**
* <p>Binary property Extender.</p>
* <p>Extend the value or shape of a preceding alphabetic character, e.g.
* length and iteration marks.</p>
* @draft ICU 2.1
*/
public static final int EXTENDER = 8;
/**
* <p>Binary property Full_Composition_Exclusion.</p>
* <p>CompositionExclusions.txt + Singleton Decompositions +
* Non-Starter Decompositions.</p>
* @draft ICU 2.1
*/
public static final int FULL_COMPOSITION_EXCLUSION = 9;
/**
* <p>Binary property Grapheme_Base (new).</p>
* <p>For programmatic determination of grapheme cluster boundaries.
* [0..10FFFF]-Cc-Cf-Cs-Co-Cn-Zl-Zp-Grapheme_Link-Grapheme_Extend-CGJ</p>
* @draft ICU 2.1
*/
public static final int GRAPHEME_BASE = 10;
/**
* <p>Binary property Grapheme_Extend (new).</p>
* <p>For programmatic determination of grapheme cluster boundaries.</p>
* <p>Me+Mn+Mc+Other_Grapheme_Extend-Grapheme_Link-CGJ</p>
* @draft ICU 2.1
*/
public static final int GRAPHEME_EXTEND = 11;
/**
* <p>Binary property Grapheme_Link (new).</p>
* <p>For programmatic determination of grapheme cluster boundaries.</p>
* @draft ICU 2.1
*/
public static final int GRAPHEME_LINK = 12;
/**
* <p>Binary property Hex_Digit.</p>
* <p>Characters commonly used for hexadecimal numbers.</p>
* @draft ICU 2.1
*/
public static final int HEX_DIGIT = 13;
/**
* <p>Binary property Hyphen.</p>
* <p>Dashes used to mark connections between pieces of words, plus the
* Katakana middle dot.</p>
* @draft ICU 2.1
*/
public static final int HYPHEN = 14;
/**
* <p>Binary property ID_Continue.</p>
* <p>Characters that can continue an identifier.</p>
* <p>ID_Start+Mn+Mc+Nd+Pc</p>
* @draft ICU 2.1
*/
public static final int ID_CONTINUE = 15;
/**
* <p>Binary property ID_Start.</p>
* <p>Characters that can start an identifier.</p>
* <p>Lu+Ll+Lt+Lm+Lo+Nl</p>
* @draft ICU 2.1
*/
public static final int ID_START = 16;
/**
* <p>Binary property Ideographic.</p>
* <p>CJKV ideographs.</p>
* @draft ICU 2.1
*/
public static final int IDEOGRAPHIC = 17;
/**
* <p>Binary property IDS_Binary_Operator (new).</p>
* <p>For programmatic determination of Ideographic Description Sequences.
* </p>
* @draft ICU 2.1
*/
public static final int IDS_BINARY_OPERATOR = 18;
/**
* <p>Binary property IDS_Trinary_Operator (new).</p>
* <p?For programmatic determination of Ideographic Description
* Sequences.</p>
* @draft ICU 2.1
*/
public static final int IDS_TRINARY_OPERATOR = 19;
/**
* <p>Binary property Join_Control.</p>
* <p>Format controls for cursive joining and ligation.</p>
* @draft ICU 2.1
*/
public static final int JOIN_CONTROL = 20;
/**
* <p>Binary property Logical_Order_Exception (new).</p>
* <p>Characters that do not use logical order and require special
* handling in most processing.</p>
* @draft ICU 2.1
*/
public static final int LOGICAL_ORDER_EXCEPTION = 21;
/**
@ -199,52 +175,44 @@ public interface UProperty
* <p>Same as UCharacter.isULowercase(), different from
* UCharacter.islower().</p>
* <p>Ll+Other_Lowercase</p>
* @draft ICU 2.1
*/
public static final int LOWERCASE = 22;
/** <p>Binary property Math.</p>
* <p>Sm+Other_Math</p>
* @draft ICU 2.1
*/
public static final int MATH = 23;
/**
* <p>Binary property Noncharacter_Code_Point.</p>
* <p>Code points that are explicitly defined as illegal for the encoding
* of characters.</p>
* @draft ICU 2.1
*/
public static final int NONCHARACTER_CODE_POINT = 24;
/**
* <p>Binary property Quotation_Mark.</p>
* @draft ICU 2.1
*/
public static final int QUOTATION_MARK = 25;
/**
* <p>Binary property Radical (new).</p>
* <p>For programmatic determination of Ideographic Description
* Sequences.</p>
* @draft ICU 2.1
*/
public static final int RADICAL = 26;
/**
* <p>Binary property Soft_Dotted (new).</p>
* <p>Characters with a "soft dot", like i or j.</p>
* <p>An accent placed on these characters causes the dot to disappear.</p>
* @draft ICU 2.1
*/
public static final int SOFT_DOTTED = 27;
/**
* <p>Binary property Terminal_Punctuation.</p>
* <p>Punctuation characters that generally mark the end of textual
* units.</p>
* @draft ICU 2.1
*/
public static final int TERMINAL_PUNCTUATION = 28;
/**
* <p>Binary property Unified_Ideograph (new).</p>
* <p>For programmatic determination of Ideographic Description
* Sequences.</p>
* @draft ICU 2.1
*/
public static final int UNIFIED_IDEOGRAPH = 29;
/**
@ -252,7 +220,6 @@ public interface UProperty
* <p>Same as UCharacter.isUUppercase(), different from
* UCharacter.isUpperCase().</p>
* <p>Lu+Other_Uppercase</p>
* @draft ICU 2.1
*/
public static final int UPPERCASE = 30;
/**
@ -260,104 +227,101 @@ public interface UProperty
* <p>Same as UCharacter.isUWhiteSpace(), different from
* UCharacter.isSpace() and UCharacter.isWhitespace().</p>
* Space characters+TAB+CR+LF-ZWSP-ZWNBSP</p>
* @draft ICU 2.1
*/
public static final int WHITE_SPACE = 31;
/**
* <p>Binary property XID_Continue.</p>
* <p>ID_Continue modified to allow closure under normalization forms
* NFKC and NFKD.</p>
* @draft ICU 2.1
*/
public static final int XID_CONTINUE = 32;
/**
* <p>Binary property XID_Start.</p>
* <p>ID_Start modified to allow closure under normalization forms NFKC
* and NFKD.</p>
* @draft ICU 2.1
*/
public static final int XID_START = 33;
/**
* <p>One more than the last constant for binary Unicode properties.</p>
* @draft ICU 2.1
*/
public static final int BINARY_LIMIT = 34;
/**
* Enumerated property Bidi_Class.
* Same as u_charDirection, returns UCharDirection values.
* @draft ICU 2.2
* Same as UCharacter.getDirection(int), returns UCharacterDirection values.
* @draft ICU 2.4
*/
// public static final int BIDI_CLASS = 0x1000;
public static final int BIDI_CLASS = 0x1000;
/**
* First constant for enumerated/integer Unicode properties.
* @draft ICU 2.2
* @draft ICU 2.4
*/
// public static final int INT_START = BIDI_CLASS;
public static final int INT_START = BIDI_CLASS;
/**
* Enumerated property Block.
* Same as ublock_getCode, returns UBlockCode values.
* @draft ICU 2.2
* Same as UCharacter.UnicodeBlock.of(int), returns UCharacter.UnicodeBlock
* values.
* @draft ICU 2.4
*/
// public static final int BLOCK = 0x1001;
public static final int BLOCK = 0x1001;
/**
* Enumerated property Canonical_Combining_Class.
* Same as getCombiningClass, returns 8-bit numeric values.
* @draft ICU 2.2
* Same as UCharacter.getCombiningClass(int), returns 8-bit numeric values.
* @draft ICU 2.4
*/
// public static final int CANONICAL_COMBINING_CLASS = 0x1002;
public static final int CANONICAL_COMBINING_CLASS = 0x1002;
/**
* Enumerated property Decomposition_Type.
* Returns UDecompositionType values.
* @draft ICU 2.2
* Returns UCharacter.DecompositionType values.
* @draft ICU 2.4
*/
// public static final int DECOMPOSITION_TYPE = 0x1003;
public static final int DECOMPOSITION_TYPE = 0x1003;
/**
* Enumerated property East_Asian_Width.
* See http://www.unicode.org/reports/tr11/
* Returns UEastAsianWidth values.
* @draft ICU 2.2
* Returns UCharacter.EastAsianWidth values.
* @draft ICU 2.4
*/
// public static final int EAST_ASIAN_WIDTH = 0x1004;
public static final int EAST_ASIAN_WIDTH = 0x1004;
/**
* Enumerated property General_Category.
* Same as charType, returns UCharacterCategory values.
* @draft ICU 2.2
* Same as UCharacter.getType(int), returns UCharacterCategory values.
* @draft ICU 2.4
*/
// public static final int GENERAL_CATEGORY = 0x1005;
public static final int GENERAL_CATEGORY = 0x1005;
/**
* Enumerated property Joining_Group.
* Returns UJoiningGroup values.
* @draft ICU 2.2
* Returns UCharacter.JoiningGroup values.
* @draft ICU 2.4
*/
// public static final int JOINING_GROUP = 0x1006;
public static final int JOINING_GROUP = 0x1006;
/**
* Enumerated property Joining_Type.
* Returns UJoiningType values.
* @draft ICU 2.2
* Returns UCharacter.JoiningType values.
* @draft ICU 2.4
*/
// public static final int JOINING_TYPE = 0x1007;
public static final int JOINING_TYPE = 0x1007;
/**
* Enumerated property Line_Break.
* Returns ULineBreak values.
* @draft ICU 2.2
* Returns UCharacter.LineBreak values.
* @draft ICU 2.4
*/
// public static final int LINE_BREAK = 0x1008;
public static final int LINE_BREAK = 0x1008;
/**
* Enumerated property Numeric_Type.
* Returns UNumericType values.
* @draft ICU 2.2
* Returns UCharacter.NumericType values.
* @draft ICU 2.4
*/
// public static final int NUMERIC_TYPE = 0x1009;
public static final int NUMERIC_TYPE = 0x1009;
/**
* Enumerated property Script.
* Same as uscript_getScript, returns UScriptCode values.
* @draft ICU 2.2
* Same as UScript.getScript(int), returns UScript values.
* @draft ICU 2.4
*/
// public static final int SCRIPT = 0x100A;
public static final int SCRIPT = 0x100A;
/**
* One more than the last constant for enumerated/integer Unicode
* properties.
* @draft ICU 2.2
* @draft ICU 2.4
*/
// public static final int INT_LIMIT = 0x100B;
public static final int INT_LIMIT = 0x100B;
}