diff --git a/icu4j/src/com/ibm/icu/lang/UCharacter.java b/icu4j/src/com/ibm/icu/lang/UCharacter.java index 1e2a6d507f..ccea0c8934 100755 --- a/icu4j/src/com/ibm/icu/lang/UCharacter.java +++ b/icu4j/src/com/ibm/icu/lang/UCharacter.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/lang/UCharacter.java,v $ -* $Date: 2002/11/22 22:53:13 $ -* $Revision: 1.53 $ +* $Date: 2002/12/03 00:47:50 $ +* $Revision: 1.54 $ * ******************************************************************************* */ @@ -27,64 +27,64 @@ import com.ibm.icu.impl.UCharacterNameChoice; import com.ibm.icu.impl.UPropertyAliases; /** -*
-* The UCharacter class provides extensions to the -* -* java.lang.Character class. These extensions provide support for -* Unicode 3.1 properties and together with the UTF16 -* class, provide support for supplementary characters (those with code -* points above U+FFFF). -*
-*-* Code points are represented in these API using ints. While it would be -* more convenient in Java to have a separate primitive datatype for them, -* ints suffice in the meantime. -*
-*
-* To use this class please add the jar file name icu4j.jar to the
-* class path, since it contains data files which supply the information used
-* by this file.
-* E.g. In Windows
-* set CLASSPATH=%CLASSPATH%;$JAR_FILE_PATH/ucharacter.jar
.
-* Otherwise, another method would be to copy the files uprops.dat and
-* unames.icu from the icu4j source subdirectory
-* $ICU4J_SRC/src/com.ibm.icu.impl.data to your class directory
-* $ICU4J_CLASS/com.ibm.icu.impl.data.
-*
-* Aside from the additions for UTF-16 support, and the updated Unicode 3.1 -* properties, the main differences between UCharacter and Character are: -*
-* Further detail differences can be determined from the program -* -* com.ibm.icu.dev.test.lang.UCharacterCompare -*
-*-* This class is not subclassable -*
-* @author Syn Wee Quek -* @since oct 06 2000 -* @see com.ibm.icu.lang.UCharacterCategory -* @see com.ibm.icu.lang.UCharacterDirection -*/ + *+ * The UCharacter class provides extensions to the + * + * java.lang.Character class. These extensions provide support for + * Unicode 3.1 properties and together with the UTF16 + * class, provide support for supplementary characters (those with code + * points above U+FFFF). + *
+ *+ * Code points are represented in these API using ints. While it would be + * more convenient in Java to have a separate primitive datatype for them, + * ints suffice in the meantime. + *
+ *
+ * To use this class please add the jar file name icu4j.jar to the
+ * class path, since it contains data files which supply the information used
+ * by this file.
+ * E.g. In Windows
+ * set CLASSPATH=%CLASSPATH%;$JAR_FILE_PATH/ucharacter.jar
.
+ * Otherwise, another method would be to copy the files uprops.dat and
+ * unames.icu from the icu4j source subdirectory
+ * $ICU4J_SRC/src/com.ibm.icu.impl.data to your class directory
+ * $ICU4J_CLASS/com.ibm.icu.impl.data.
+ *
+ * Aside from the additions for UTF-16 support, and the updated Unicode 3.1 + * properties, the main differences between UCharacter and Character are: + *
+ * Further detail differences can be determined from the program + * + * com.ibm.icu.dev.test.lang.UCharacterCompare + *
+ *+ * This class is not subclassable + *
+ * @author Syn Wee Quek + * @stable ICU 2.1 + * @see com.ibm.icu.lang.UCharacterCategory + * @see com.ibm.icu.lang.UCharacterDirection + */ /* * notes: @@ -1816,34 +1816,38 @@ public final class UCharacter // public data members ----------------------------------------------- /** - * The lowest Unicode code point value. - */ + * The lowest Unicode code point value. + * @stable ICU 2.1 + */ public static final int MIN_VALUE = UTF16.CODEPOINT_MIN_VALUE; /** - * The highest Unicode code point value (scalar value) according to the - * Unicode Standard. - * This is a 21-bit value (21 bits, rounded up).java.lang.Character.digit()
. Note that this
- * will return positive values for code points for which isDigit
- * returns false, just like java.lang.Character.
- * java.lang.Character.digit()
. Note that this
+ * will return positive values for code points for which isDigit
+ * returns false, just like java.lang.Character.
+ * digit(int, int)
- * that provides a decimal radix.
- * digit(int, int)
+ * that provides a decimal radix.
+ * Get the numeric value for a Unicode code point as defined in the @@ -2003,7 +1991,7 @@ public final class UCharacter *
* @param ch Code point to get the numeric value for. * @return numeric value of ch, or NO_NUMERIC_VALUE if none is defined. - * @draft 2.4 + * @draft ICU 2.4 */ public static double getUnicodeNumericValue(int ch) { @@ -2064,14 +2052,16 @@ public final class UCharacter } /** - * Returns a value indicating a code point's Unicode category. - * Up-to-date Unicode implementation of java.lang.Character.getType() except - * for the above mentioned code points that had their category changed.java.lang.Character.isDigit()
. It returns true for
- * decimal digits only.
- * java.lang.Character.isDigit()
. It returns true for decimal
+ * digits only.
+ * Retrieves a name for a valid codepoint. Unlike, getName(int) and - * getName1_0(int), this method will return a name even for codepoints that - * are not assigned a name in UnicodeData.txt. - *
- * The names are returned in the following order. - *Retrieves a name for a valid codepoint. Unlike, getName(int) and + * getName1_0(int), this method will return a name even for codepoints that + * are not assigned a name in UnicodeData.txt. + *
+ * The names are returned in the following order. + *Find a Unicode code point by its most current Unicode name and - * return its code point value. All Unicode names are in uppercase.
- * Note calling any methods related to code point names, e.g. get*Name*() - * incurs a one-time initialisation cost to construct the name tables. - * @param name most current Unicode character name whose code point is to be - * returned - * @return code point or -1 if name is not found - */ + *Find a Unicode code point by its most current Unicode name and + * return its code point value. All Unicode names are in uppercase.
+ * Note calling any methods related to code point names, e.g. get*Name*() + * incurs a one-time initialisation cost to construct the name tables. + * @param name most current Unicode character name whose code point is to + * be returned + * @return code point or -1 if name is not found + * @stable ICU 2.1 + */ public static int getCharFromName(String name) { return NAME_.getCharFromName( @@ -2827,14 +2852,15 @@ public final class UCharacter } /** - *Find a Unicode character by its version 1.0 Unicode name and return - * its code point value. All Unicode names are in uppercase.
- * Note calling any methods related to code point names, e.g. get*Name*() - * incurs a one-time initialisation cost to construct the name tables. - * @param name Unicode 1.0 code point name whose code point is to - * returned - * @return code point or -1 if name is not found - */ + *Find a Unicode character by its version 1.0 Unicode name and return + * its code point value. All Unicode names are in uppercase.
+ * Note calling any methods related to code point names, e.g. get*Name*() + * incurs a one-time initialisation cost to construct the name tables. + * @param name Unicode 1.0 code point name whose code point is to + * returned + * @return code point or -1 if name is not found + * @stable ICU 2.1 + */ public static int getCharFromName1_0(String name) { return NAME_.getCharFromName( @@ -2842,24 +2868,24 @@ public final class UCharacter } /** - *Find a Unicode character by either its name and return its code - * point value. All Unicode names are in uppercase. - * Extended names are all lowercase except for numbers and are contained - * within angle brackets.
- * The names are searched in the following order - *Find a Unicode character by either its name and return its code + * point value. All Unicode names are in uppercase. + * Extended names are all lowercase except for numbers and are contained + * within angle brackets.
+ * The names are searched in the following order + *Gets the titlecase version of the argument string.
- *Position for titlecasing is determined by the argument break - * iterator, hence the user can customized his break iterator for - * a specialized titlecasing. In this case only the forward iteration - * needs to be implemented. - * If the break iterator passed in is null, the default Unicode algorithm - * will be used to determine the titlecase positions. - *
- *Only positions returned by the break iterator will be title cased, - * character in between the positions will all be in lower case.
- *Casing is dependent on the default locale and context-sensitive
- * @param str source string to be performed on - * @param breakiter break iterator to determine the positions in which - * the character should be title cased. - * @return lowercase version of the argument string - * @draft 2.1 - */ + *Gets the titlecase version of the argument string.
+ *Position for titlecasing is determined by the argument break + * iterator, hence the user can customized his break iterator for + * a specialized titlecasing. In this case only the forward iteration + * needs to be implemented. + * If the break iterator passed in is null, the default Unicode algorithm + * will be used to determine the titlecase positions. + *
+ *Only positions returned by the break iterator will be title cased, + * character in between the positions will all be in lower case.
+ *Casing is dependent on the default locale and context-sensitive
+ * @param str source string to be performed on + * @param breakiter break iterator to determine the positions in which + * the character should be title cased. + * @return lowercase version of the argument string + * @draft ICU 2.1 + */ public static String toTitleCase(String str, BreakIterator breakiter) { return toTitleCase(Locale.getDefault(), str, breakiter); } /** - * Gets uppercase version of the argument string. - * Casing is dependent on the argument locale and context-sensitive. - * @param locale which string is to be converted in - * @param str source string to be performed on - * @return uppercase version of the argument string - */ + * Gets uppercase version of the argument string. + * Casing is dependent on the argument locale and context-sensitive. + * @param locale which string is to be converted in + * @param str source string to be performed on + * @return uppercase version of the argument string + * @stable ICU 2.1 + */ public static String toUpperCase(Locale locale, String str) { if (locale == null) { @@ -3093,12 +3120,13 @@ public final class UCharacter } /** - * Gets lowercase version of the argument string. - * Casing is dependent on the argument locale and context-sensitive - * @param locale which string is to be converted in - * @param str source string to be performed on - * @return lowercase version of the argument string - */ + * Gets lowercase version of the argument string. + * Casing is dependent on the argument locale and context-sensitive + * @param locale which string is to be converted in + * @param str source string to be performed on + * @return lowercase version of the argument string + * @stable ICU 2.1 + */ public static String toLowerCase(Locale locale, String str) { int length = str.length(); @@ -3111,24 +3139,24 @@ public final class UCharacter } /** - *Gets the titlecase version of the argument string.
- *Position for titlecasing is determined by the argument break - * iterator, hence the user can customized his break iterator for - * a specialized titlecasing. In this case only the forward iteration - * needs to be implemented. - * If the break iterator passed in is null, the default Unicode algorithm - * will be used to determine the titlecase positions. - *
- *Only positions returned by the break iterator will be title cased, - * character in between the positions will all be in lower case.
- *Casing is dependent on the argument locale and context-sensitive
- * @param locale which string is to be converted in - * @param str source string to be performed on - * @param breakiter break iterator to determine the positions in which - * the character should be title cased. - * @return lowercase version of the argument string - * @draft 2.1 - */ + *Gets the titlecase version of the argument string.
+ *Position for titlecasing is determined by the argument break + * iterator, hence the user can customized his break iterator for + * a specialized titlecasing. In this case only the forward iteration + * needs to be implemented. + * If the break iterator passed in is null, the default Unicode algorithm + * will be used to determine the titlecase positions. + *
+ *Only positions returned by the break iterator will be title cased, + * character in between the positions will all be in lower case.
+ *Casing is dependent on the argument locale and context-sensitive
+ * @param locale which string is to be converted in + * @param str source string to be performed on + * @param breakiter break iterator to determine the positions in which + * the character should be title cased. + * @return lowercase version of the argument string + * @draft ICU 2.1 + */ public static String toTitleCase(Locale locale, String str, BreakIterator breakiter) { @@ -3142,21 +3170,22 @@ public final class UCharacter } /** - * The given character is mapped to its case folding equivalent according to - * UnicodeData.txt and CaseFolding.txt; if the character has no case folding - * equivalent, the character itself is returned. - * Only "simple", single-code point case folding mappings are used. - * For "full", multiple-code point mappings use the API - * foldCase(String str, boolean defaultmapping). - * @param ch the character to be converted - * @param defaultmapping Indicates if all mappings defined in CaseFolding.txt - * is to be used, otherwise the mappings for dotted I - * and dotless i marked with 'I' in CaseFolding.txt will - * be skipped. - * @return the case folding equivalent of the character, if any; - * otherwise the character itself. - * @see #foldCase(String, boolean) - */ + * The given character is mapped to its case folding equivalent according + * to UnicodeData.txt and CaseFolding.txt; if the character has no case + * folding equivalent, the character itself is returned. + * Only "simple", single-code point case folding mappings are used. + * For "full", multiple-code point mappings use the API + * foldCase(String str, boolean defaultmapping). + * @param ch the character to be converted + * @param defaultmapping Indicates if all mappings defined in + * CaseFolding.txt is to be used, otherwise the + * mappings for dotted I and dotless i marked with + * 'I' in CaseFolding.txt will be skipped. + * @return the case folding equivalent of the character, if + * any; otherwise the character itself. + * @see #foldCase(String, boolean) + * @stable ICU 2.1 + */ public static int foldCase(int ch, boolean defaultmapping) { // Some special cases are hardcoded because their conditions cannot be @@ -3255,21 +3284,22 @@ public final class UCharacter } /** - * The given string is mapped to its case folding equivalent according to - * UnicodeData.txt and CaseFolding.txt; if any character has no case folding - * equivalent, the character itself is returned. - * "Full", multiple-code point case folding mappings are returned here. - * For "simple" single-code point mappings use the API - * foldCase(int ch, boolean defaultmapping). - * @param str the String to be converted - * @param defaultmapping Indicates if all mappings defined in CaseFolding.txt - * is to be used, otherwise the mappings for dotted I - * and dotless i marked with 'I' in CaseFolding.txt will - * be skipped. - * @return the case folding equivalent of the character, if any; - * otherwise the character itself. - * @see #foldCase(int, boolean) - */ + * The given string is mapped to its case folding equivalent according to + * UnicodeData.txt and CaseFolding.txt; if any character has no case + * folding equivalent, the character itself is returned. + * "Full", multiple-code point case folding mappings are returned here. + * For "simple" single-code point mappings use the API + * foldCase(int ch, boolean defaultmapping). + * @param str the String to be converted + * @param defaultmapping Indicates if all mappings defined in + * CaseFolding.txt is to be used, otherwise the + * mappings for dotted I and dotless i marked with + * 'I' in CaseFolding.txt will be skipped. + * @return the case folding equivalent of the character, if + * any; otherwise the character itself. + * @see #foldCase(int, boolean) + * @stable ICU 2.1 + */ public static String foldCase(String str, boolean defaultmapping) { int size = str.length(); @@ -3357,17 +3387,17 @@ public final class UCharacter } /** - * Return numeric value of Han code points. - *Gets an iterator for character types, iterating over codepoints.
- * Example of use:- * RangeValueIterator iterator = UCharacter.getTypeIterator(); - * RangeValueIterator.Element element = new RangeValueIterator.Element(); - * while (iterator.next(element)) { - * System.out.println("Codepoint \\u" + - * Integer.toHexString(element.start) + - * " to codepoint \\u" + - * Integer.toHexString(element.limit - 1) + - * " has the character type " + - * element.value); - * } - *- * @return an iterator - * @draft 2.1 - */ + *
Gets an iterator for character types, iterating over codepoints.
+ * Example of use:+ * RangeValueIterator iterator = UCharacter.getTypeIterator(); + * RangeValueIterator.Element element = new RangeValueIterator.Element(); + * while (iterator.next(element)) { + * System.out.println("Codepoint \\u" + + * Integer.toHexString(element.start) + + * " to codepoint \\u" + + * Integer.toHexString(element.limit - 1) + + * " has the character type " + + * element.value); + * } + *+ * @return an iterator + * @draft ICU 2.1 + */ public static RangeValueIterator getTypeIterator() { return new UCharacterTypeIterator(PROPERTY_); } /** - *
Gets an iterator for character names, iterating over codepoints.
- *This API only gets the iterator for the modern, most up-to-date - * Unicode names. For older 1.0 Unicode names use get1_0NameIterator() or - * for extended names use getExtendedNameIterator().
- * Example of use:- * ValueIterator iterator = UCharacter.getNameIterator(); - * ValueIterator.Element element = new ValueIterator.Element(); - * while (iterator.next(element)) { - * System.out.println("Codepoint \\u" + - * Integer.toHexString(element.codepoint) + - * " has the name " + (String)element.value); - * } - *- *
The maximal range which the name iterator iterates is from - * UCharacter.MIN_VALUE to UCharacter.MAX_VALUE.
- * @return an iterator - * @draft 2.1 - */ + *Gets an iterator for character names, iterating over codepoints.
+ *This API only gets the iterator for the modern, most up-to-date + * Unicode names. For older 1.0 Unicode names use get1_0NameIterator() or + * for extended names use getExtendedNameIterator().
+ * Example of use:+ * ValueIterator iterator = UCharacter.getNameIterator(); + * ValueIterator.Element element = new ValueIterator.Element(); + * while (iterator.next(element)) { + * System.out.println("Codepoint \\u" + + * Integer.toHexString(element.codepoint) + + * " has the name " + (String)element.value); + * } + *+ *
The maximal range which the name iterator iterates is from + * UCharacter.MIN_VALUE to UCharacter.MAX_VALUE.
+ * @return an iterator + * @draft ICU 2.1 + */ public static ValueIterator getNameIterator() { return new UCharacterNameIterator(NAME_, @@ -3469,24 +3499,24 @@ public final class UCharacter } /** - *Gets an iterator for character names, iterating over codepoints.
- *This API only gets the iterator for the older 1.0 Unicode names. - * For modern, most up-to-date Unicode names use getNameIterator() or - * for extended names use getExtendedNameIterator().
- * Example of use:- * ValueIterator iterator = UCharacter.get1_0NameIterator(); - * ValueIterator.Element element = new ValueIterator.Element(); - * while (iterator.next(element)) { - * System.out.println("Codepoint \\u" + - * Integer.toHexString(element.codepoint) + - * " has the name " + (String)element.value); - * } - *- *
The maximal range which the name iterator iterates is from - * @return an iterator - * @draft 2.1 - */ + *
Gets an iterator for character names, iterating over codepoints.
+ *This API only gets the iterator for the older 1.0 Unicode names. + * For modern, most up-to-date Unicode names use getNameIterator() or + * for extended names use getExtendedNameIterator().
+ * Example of use:+ * ValueIterator iterator = UCharacter.get1_0NameIterator(); + * ValueIterator.Element element = new ValueIterator.Element(); + * while (iterator.next(element)) { + * System.out.println("Codepoint \\u" + + * Integer.toHexString(element.codepoint) + + * " has the name " + (String)element.value); + * } + *+ *
The maximal range which the name iterator iterates is from + * @return an iterator + * @draft ICU 2.1 + */ public static ValueIterator getName1_0Iterator() { return new UCharacterNameIterator(NAME_, @@ -3494,24 +3524,24 @@ public final class UCharacter } /** - *
Gets an iterator for character names, iterating over codepoints.
- *This API only gets the iterator for the extended names. - * For modern, most up-to-date Unicode names use getNameIterator() or - * for older 1.0 Unicode names use get1_0NameIterator().
- * Example of use:- * ValueIterator iterator = UCharacter.getExtendedNameIterator(); - * ValueIterator.Element element = new ValueIterator.Element(); - * while (iterator.next(element)) { - * System.out.println("Codepoint \\u" + - * Integer.toHexString(element.codepoint) + - * " has the name " + (String)element.value); - * } - *- *
The maximal range which the name iterator iterates is from - * @return an iterator - * @draft 2.1 - */ + *
Gets an iterator for character names, iterating over codepoints.
+ *This API only gets the iterator for the extended names. + * For modern, most up-to-date Unicode names use getNameIterator() or + * for older 1.0 Unicode names use get1_0NameIterator().
+ * Example of use:+ * ValueIterator iterator = UCharacter.getExtendedNameIterator(); + * ValueIterator.Element element = new ValueIterator.Element(); + * while (iterator.next(element)) { + * System.out.println("Codepoint \\u" + + * Integer.toHexString(element.codepoint) + + * " has the name " + (String)element.value); + * } + *+ *
The maximal range which the name iterator iterates is from + * @return an iterator + * @draft ICU 2.1 + */ public static ValueIterator getExtendedNameIterator() { return new UCharacterNameIterator(NAME_, @@ -3844,12 +3874,12 @@ public final class UCharacter /** * Database storing the sets of character name */ - protected static final UCharacterName NAME_; + static final UCharacterName NAME_; /** * Singleton object encapsulating the imported pnames.icu property aliases */ - protected static final UPropertyAliases PNAMES_; + static final UPropertyAliases PNAMES_; // block to initialise name database and unicode 1.0 data static @@ -3868,8 +3898,8 @@ public final class UCharacter // private variables ------------------------------------------------- /** - * Database storing the sets of character property - */ + * Database storing the sets of character property + */ private static final UCharacterProperty PROPERTY_; // block to initialise character property database @@ -3886,112 +3916,112 @@ public final class UCharacter } /** - * To get the last character out from a data type - */ + * To get the last character out from a data type + */ private static final int LAST_CHAR_MASK_ = 0xFFFF; /** - * To get the last byte out from a data type - */ + * To get the last byte out from a data type + */ private static final int LAST_BYTE_MASK_ = 0xFF; /** - * Shift 16 bits - */ + * Shift 16 bits + */ private static final int SHIFT_16_ = 16; /** - * Shift 24 bits - */ + * Shift 24 bits + */ private static final int SHIFT_24_ = 24; /** - * Decimal radix - */ + * Decimal radix + */ private static final int DECIMAL_RADIX_ = 10; /** - * No break space code point - */ + * No break space code point + */ private static final int NO_BREAK_SPACE_ = 0xA0; /** - * Narrow no break space code point - */ + * Narrow no break space code point + */ private static final int NARROW_NO_BREAK_SPACE_ = 0x202F; /** - * Zero width no break space code point - */ + * Zero width no break space code point + */ private static final int ZERO_WIDTH_NO_BREAK_SPACE_ = 0xFEFF; /** - * Ideographic number zero code point - */ + * Ideographic number zero code point + */ private static final int IDEOGRAPHIC_NUMBER_ZERO_ = 0x3007; /** - * CJK Ideograph, First code point - */ + * CJK Ideograph, First code point + */ private static final int CJK_IDEOGRAPH_FIRST_ = 0x4e00; /** - * CJK Ideograph, Second code point - */ + * CJK Ideograph, Second code point + */ private static final int CJK_IDEOGRAPH_SECOND_ = 0x4e8c; /** - * CJK Ideograph, Third code point - */ + * CJK Ideograph, Third code point + */ private static final int CJK_IDEOGRAPH_THIRD_ = 0x4e09; /** - * CJK Ideograph, Fourth code point - */ + * CJK Ideograph, Fourth code point + */ private static final int CJK_IDEOGRAPH_FOURTH_ = 0x56d8; /** - * CJK Ideograph, FIFTH code point - */ + * CJK Ideograph, FIFTH code point + */ private static final int CJK_IDEOGRAPH_FIFTH_ = 0x4e94; /** - * CJK Ideograph, Sixth code point - */ + * CJK Ideograph, Sixth code point + */ private static final int CJK_IDEOGRAPH_SIXTH_ = 0x516d; /** - * CJK Ideograph, Seventh code point - */ + * CJK Ideograph, Seventh code point + */ private static final int CJK_IDEOGRAPH_SEVENTH_ = 0x4e03; /** - * CJK Ideograph, Eighth code point - */ + * CJK Ideograph, Eighth code point + */ private static final int CJK_IDEOGRAPH_EIGHTH_ = 0x516b; /** - * CJK Ideograph, Nineth code point - */ + * CJK Ideograph, Nineth code point + */ private static final int CJK_IDEOGRAPH_NINETH_ = 0x4e5d; /** - * Application Program command code point - */ + * Application Program command code point + */ private static final int APPLICATION_PROGRAM_COMMAND_ = 0x009F; /** - * Unit separator code point - */ + * Unit separator code point + */ private static final int UNIT_SEPARATOR_ = 0x001F; /** - * Delete code point - */ + * Delete code point + */ private static final int DELETE_ = 0x007F; /** - * ISO control character first range upper limit 0x0 - 0x1F - */ + * ISO control character first range upper limit 0x0 - 0x1F + */ private static final int ISO_CONTROL_FIRST_RANGE_MAX_ = 0x1F; /** * Shift to get numeric type @@ -4002,17 +4032,17 @@ public final class UCharacter */ private static final int NUMERIC_TYPE_MASK_ = 0x7; /** - * Shift to get bidi bits - */ + * Shift to get bidi bits + */ private static final int BIDI_SHIFT_ = 6; /** - * Mask to be applied after shifting to get bidi bits - */ + * Mask to be applied after shifting to get bidi bits + */ private static final int BIDI_MASK_AFTER_SHIFT_ = 0x1F; /** - * Han digit characters - */ + * Han digit characters + */ private static final int CJK_IDEOGRAPH_COMPLEX_ZERO_ = 0x96f6; private static final int CJK_IDEOGRAPH_COMPLEX_ONE_ = 0x58f9; private static final int CJK_IDEOGRAPH_COMPLEX_TWO_ = 0x8cb3; @@ -4112,8 +4142,8 @@ public final class UCharacter // private constructor ----------------------------------------------- /** - * Private constructor to prevent instantiation - */ + * Private constructor to prevent instantiation + */ private UCharacter() { } diff --git a/icu4j/src/com/ibm/icu/lang/UCharacterCategory.java b/icu4j/src/com/ibm/icu/lang/UCharacterCategory.java index 4d86d4e232..79516acbc6 100755 --- a/icu4j/src/com/ibm/icu/lang/UCharacterCategory.java +++ b/icu4j/src/com/ibm/icu/lang/UCharacterCategory.java @@ -6,8 +6,8 @@ * * $Source: * /usr/cvs/icu4j/icu4j/src/com/ibm/icu/text/UCharacterCategory.java $ -* $Date: 2002/09/19 21:18:14 $ -* $Revision: 1.9 $ +* $Date: 2002/12/03 00:47:53 $ +* $Revision: 1.10 $ * ******************************************************************************* */ @@ -15,238 +15,269 @@ package com.ibm.icu.lang; /** -* Enumerated Unicode category types from the UnicodeData.txt file. -* Used as return results from UCharacter -* Equivalent to icu's UCharCategory. -* Refer to -* Unicode Consortium for more information about UnicodeData.txt. -*
-* This class is not subclassable -*
-* @author Syn Wee Quek -* @since oct0300 -*/ + * Enumerated Unicode category types from the UnicodeData.txt file. + * Used as return results from UCharacter + * Equivalent to icu's UCharCategory. + * Refer to + * Unicode Consortium for more information about UnicodeData.txt. + *+ * This class is not subclassable + *
+ * @author Syn Wee Quek + * @stable ICU 2.1 + */ public final class UCharacterCategory { - // public variable ----------------------------------------------------- - - /** - * Unassigned character type - */ - public static final int UNASSIGNED = 0; - /** - * Character type Cn - * Not Assigned (no characters in [UnicodeData.txt] have this property) - * @draft 2.1 - */ - public static final int GENERAL_OTHER_TYPES = 0; - /** - * Character type Lu - */ - public static final int UPPERCASE_LETTER = 1; - /** - * Character type Ll - */ - public static final int LOWERCASE_LETTER = 2; - /** - * Character type Lt - */ - public static final int TITLECASE_LETTER = 3; - /** - * Character type Lm - */ - public static final int MODIFIER_LETTER = 4; - /** - * Character type Lo - */ - public static final int OTHER_LETTER = 5; - /** - * Character type Mn - */ - public static final int NON_SPACING_MARK = 6; - /** - * Character type Me - */ - public static final int ENCLOSING_MARK = 7; - /** - * Character type Mc - */ - public static final int COMBINING_SPACING_MARK = 8; - /** - * Character type Nd - */ - public static final int DECIMAL_DIGIT_NUMBER = 9; - /** - * Character type Nl - */ - public static final int LETTER_NUMBER = 10; - - // start of 11------------ - - /** - * Character type No - */ - public static final int OTHER_NUMBER = 11; - /** - * Character type Zs - */ - public static final int SPACE_SEPARATOR = 12; - /** - * Character type Zl - */ - public static final int LINE_SEPARATOR = 13; - /** - * Character type Zp - */ - public static final int PARAGRAPH_SEPARATOR = 14; - /** - * Character type Cc - */ - public static final int CONTROL = 15; - /** - * Character type Cf - */ - public static final int FORMAT = 16; - /** - * Character type Co - */ - public static final int PRIVATE_USE = 17; - /** - * Character type Cs - */ - public static final int SURROGATE = 18; - /** - * Character type Pd - */ - public static final int DASH_PUNCTUATION = 19; - /** - * Character type Ps - */ - public static final int START_PUNCTUATION = 20; - - // start of 21 ------------ - - /** - * Character type Pe - */ - public static final int END_PUNCTUATION = 21; - /** - * Character type Pc - */ - public static final int CONNECTOR_PUNCTUATION = 22; - /** - * Character type Po - */ - public static final int OTHER_PUNCTUATION = 23; - /** - * Character type Sm - */ - public static final int MATH_SYMBOL = 24; - /** - * Character type Sc - */ - public static final int CURRENCY_SYMBOL = 25; - /** - * Character type Sk - */ - public static final int MODIFIER_SYMBOL = 26; - /** - * Character type So - */ - public static final int OTHER_SYMBOL = 27; - /** - * Character type Pi - */ - public static final int INITIAL_PUNCTUATION = 28; - /** - * Character type Pf - */ - public static final int FINAL_PUNCTUATION = 29; - - // start of 31 ------------ - - /** - * Character type count - */ - public static final int CHAR_CATEGORY_COUNT = 30; - - /** - * Gets the name of the argument category - * @param category to retrieve name - * @return category name - */ - public static String toString(int category) - { - switch (category) - { - case UPPERCASE_LETTER : - return "Letter, Uppercase"; - case LOWERCASE_LETTER : - return "Letter, Lowercase"; - case TITLECASE_LETTER : - return "Letter, Titlecase"; - case MODIFIER_LETTER : - return "Letter, Modifier"; - case OTHER_LETTER : - return "Letter, Other"; - case NON_SPACING_MARK : - return "Mark, Non-Spacing"; - case ENCLOSING_MARK : - return "Mark, Enclosing"; - case COMBINING_SPACING_MARK : - return "Mark, Spacing Combining"; - case DECIMAL_DIGIT_NUMBER : - return "Number, Decimal Digit"; - case LETTER_NUMBER : - return "Number, Letter"; - case OTHER_NUMBER : - return "Number, Other"; - case SPACE_SEPARATOR : - return "Separator, Space"; - case LINE_SEPARATOR : - return "Separator, Line"; - case PARAGRAPH_SEPARATOR : - return "Separator, Paragraph"; - case CONTROL : - return "Other, Control"; - case FORMAT : - return "Other, Format"; - case PRIVATE_USE : - return "Other, Private Use"; - case SURROGATE : - return "Other, Surrogate"; - case DASH_PUNCTUATION : - return "Punctuation, Dash"; - case START_PUNCTUATION : - return "Punctuation, Open"; - case END_PUNCTUATION : - return "Punctuation, Close"; - case CONNECTOR_PUNCTUATION : - return "Punctuation, Connector"; - case OTHER_PUNCTUATION : - return "Punctuation, Other"; - case MATH_SYMBOL : - return "Symbol, Math"; - case CURRENCY_SYMBOL : - return "Symbol, Currency"; - case MODIFIER_SYMBOL : - return "Symbol, Modifier"; - case OTHER_SYMBOL : - return "Symbol, Other"; - case INITIAL_PUNCTUATION : - return "Punctuation, Initial quote"; - case FINAL_PUNCTUATION : - return "Punctuation, Final quote"; - } - return "Unassigned"; - } - - // private constructor ----------------------------------------------- + // public variable ----------------------------------------------------- /** - * Private constructor to prevent initialisation - */ + * Unassigned character type + * @stable ICU 2.1 + */ + public static final int UNASSIGNED = 0; + /** + * Character type Cn + * Not Assigned (no characters in [UnicodeData.txt] have this property) + * @draft ICU 2.1 + */ + public static final int GENERAL_OTHER_TYPES = 0; + /** + * Character type Lu + * @stable ICU 2.1 + */ + public static final int UPPERCASE_LETTER = 1; + /** + * Character type Ll + * @stable ICU 2.1 + */ + public static final int LOWERCASE_LETTER = 2; + /** + * Character type Lt + * @stable ICU 2.1 + */ + public static final int TITLECASE_LETTER = 3; + /** + * Character type Lm + * @stable ICU 2.1 + */ + public static final int MODIFIER_LETTER = 4; + /** + * Character type Lo + * @stable ICU 2.1 + */ + public static final int OTHER_LETTER = 5; + /** + * Character type Mn + * @stable ICU 2.1 + */ + public static final int NON_SPACING_MARK = 6; + /** + * Character type Me + * @stable ICU 2.1 + */ + public static final int ENCLOSING_MARK = 7; + /** + * Character type Mc + * @stable ICU 2.1 + */ + public static final int COMBINING_SPACING_MARK = 8; + /** + * Character type Nd + * @stable ICU 2.1 + */ + public static final int DECIMAL_DIGIT_NUMBER = 9; + /** + * Character type Nl + * @stable ICU 2.1 + */ + public static final int LETTER_NUMBER = 10; + + // start of 11------------ + + /** + * Character type No + * @stable ICU 2.1 + */ + public static final int OTHER_NUMBER = 11; + /** + * Character type Zs + * @stable ICU 2.1 + */ + public static final int SPACE_SEPARATOR = 12; + /** + * Character type Zl + * @stable ICU 2.1 + */ + public static final int LINE_SEPARATOR = 13; + /** + * Character type Zp + * @stable ICU 2.1 + */ + public static final int PARAGRAPH_SEPARATOR = 14; + /** + * Character type Cc + * @stable ICU 2.1 + */ + public static final int CONTROL = 15; + /** + * Character type Cf + * @stable ICU 2.1 + */ + public static final int FORMAT = 16; + /** + * Character type Co + * @stable ICU 2.1 + */ + public static final int PRIVATE_USE = 17; + /** + * Character type Cs + * @stable ICU 2.1 + */ + public static final int SURROGATE = 18; + /** + * Character type Pd + * @stable ICU 2.1 + */ + public static final int DASH_PUNCTUATION = 19; + /** + * Character type Ps + * @stable ICU 2.1 + */ + public static final int START_PUNCTUATION = 20; + + // start of 21 ------------ + + /** + * Character type Pe + * @stable ICU 2.1 + */ + public static final int END_PUNCTUATION = 21; + /** + * Character type Pc + * @stable ICU 2.1 + */ + public static final int CONNECTOR_PUNCTUATION = 22; + /** + * Character type Po + * @stable ICU 2.1 + */ + public static final int OTHER_PUNCTUATION = 23; + /** + * Character type Sm + * @stable ICU 2.1 + */ + public static final int MATH_SYMBOL = 24; + /** + * Character type Sc + * @stable ICU 2.1 + */ + public static final int CURRENCY_SYMBOL = 25; + /** + * Character type Sk + * @stable ICU 2.1 + */ + public static final int MODIFIER_SYMBOL = 26; + /** + * Character type So + * @stable ICU 2.1 + */ + public static final int OTHER_SYMBOL = 27; + /** + * Character type Pi + * @stable ICU 2.1 + */ + public static final int INITIAL_PUNCTUATION = 28; + /** + * Character type Pf + * @stable ICU 2.1 + */ + public static final int FINAL_PUNCTUATION = 29; + + // start of 31 ------------ + + /** + * Character type count + * @stable ICU 2.1 + */ + public static final int CHAR_CATEGORY_COUNT = 30; + + /** + * Gets the name of the argument category + * @param category to retrieve name + * @return category name + * @stable ICU 2.1 + */ + public static String toString(int category) + { + switch (category) { + case UPPERCASE_LETTER : + return "Letter, Uppercase"; + case LOWERCASE_LETTER : + return "Letter, Lowercase"; + case TITLECASE_LETTER : + return "Letter, Titlecase"; + case MODIFIER_LETTER : + return "Letter, Modifier"; + case OTHER_LETTER : + return "Letter, Other"; + case NON_SPACING_MARK : + return "Mark, Non-Spacing"; + case ENCLOSING_MARK : + return "Mark, Enclosing"; + case COMBINING_SPACING_MARK : + return "Mark, Spacing Combining"; + case DECIMAL_DIGIT_NUMBER : + return "Number, Decimal Digit"; + case LETTER_NUMBER : + return "Number, Letter"; + case OTHER_NUMBER : + return "Number, Other"; + case SPACE_SEPARATOR : + return "Separator, Space"; + case LINE_SEPARATOR : + return "Separator, Line"; + case PARAGRAPH_SEPARATOR : + return "Separator, Paragraph"; + case CONTROL : + return "Other, Control"; + case FORMAT : + return "Other, Format"; + case PRIVATE_USE : + return "Other, Private Use"; + case SURROGATE : + return "Other, Surrogate"; + case DASH_PUNCTUATION : + return "Punctuation, Dash"; + case START_PUNCTUATION : + return "Punctuation, Open"; + case END_PUNCTUATION : + return "Punctuation, Close"; + case CONNECTOR_PUNCTUATION : + return "Punctuation, Connector"; + case OTHER_PUNCTUATION : + return "Punctuation, Other"; + case MATH_SYMBOL : + return "Symbol, Math"; + case CURRENCY_SYMBOL : + return "Symbol, Currency"; + case MODIFIER_SYMBOL : + return "Symbol, Modifier"; + case OTHER_SYMBOL : + return "Symbol, Other"; + case INITIAL_PUNCTUATION : + return "Punctuation, Initial quote"; + case FINAL_PUNCTUATION : + return "Punctuation, Final quote"; + } + return "Unassigned"; + } + + // private constructor ----------------------------------------------- + + /** + * Private constructor to prevent initialisation + */ private UCharacterCategory() { } diff --git a/icu4j/src/com/ibm/icu/lang/UCharacterDirection.java b/icu4j/src/com/ibm/icu/lang/UCharacterDirection.java index fe7d93e91c..d24c31d8da 100755 --- a/icu4j/src/com/ibm/icu/lang/UCharacterDirection.java +++ b/icu4j/src/com/ibm/icu/lang/UCharacterDirection.java @@ -6,8 +6,8 @@ * * $Source: * /usr/cvs/icu4j/icu4j/src/com/ibm/icu/text/UCharacterDirection.java $ -* $Date: 2002/09/11 00:12:39 $ -* $Revision: 1.5 $ +* $Date: 2002/12/03 00:47:50 $ +* $Revision: 1.6 $ * ******************************************************************************* */ @@ -15,160 +15,181 @@ package com.ibm.icu.lang; /** -* Enumerated Unicode character linguistic direction constants. -* Used as return results from UCharacter -*-* This class is not subclassable -*
-* @author Syn Wee Quek -* @since oct0300 -*/ + * Enumerated Unicode character linguistic direction constants. + * Used as return results from UCharacter + *+ * This class is not subclassable + *
+ * @author Syn Wee Quek + * @stable ICU 2.1 + */ public final class UCharacterDirection { - // private constructor ========================================= - - /** - * Private constructor to prevent initialisation - */ - private UCharacterDirection() - { - } - - // public variable ============================================= - - /** - * Directional type L - */ - public static final int LEFT_TO_RIGHT = 0; - /** - * Directional type R - */ - public static final int RIGHT_TO_LEFT = 1; - /** - * Directional type EN - */ - public static final int EUROPEAN_NUMBER = 2; - /** - * Directional type ES - */ - public static final int EUROPEAN_NUMBER_SEPARATOR = 3; - /** - * Directional type ET - */ - public static final int EUROPEAN_NUMBER_TERMINATOR = 4; - /** - * Directional type AN - */ - public static final int ARABIC_NUMBER = 5; - /** - * Directional type CS - */ - public static final int COMMON_NUMBER_SEPARATOR = 6; - /** - * Directional type B - */ - public static final int BLOCK_SEPARATOR = 7; - /** - * Directional type S - */ - public static final int SEGMENT_SEPARATOR = 8; - /** - * Directional type WS - */ - public static final int WHITE_SPACE_NEUTRAL = 9; - - // start of 11 --------------- - - /** - * Directional type ON - */ - public static final int OTHER_NEUTRAL = 10; - /** - * Directional type LRE - */ - public static final int LEFT_TO_RIGHT_EMBEDDING = 11; - /** - * Directional type LRO - */ - public static final int LEFT_TO_RIGHT_OVERRIDE = 12; - /** - * Directional type AL - */ - public static final int RIGHT_TO_LEFT_ARABIC = 13; - /** - * Directional type RLE - */ - public static final int RIGHT_TO_LEFT_EMBEDDING = 14; - /** - * Directional type RLO - */ - public static final int RIGHT_TO_LEFT_OVERRIDE = 15; - /** - * Directional type PDF - */ - public static final int POP_DIRECTIONAL_FORMAT = 16; - /** - * Directional type NSM - */ - public static final int DIR_NON_SPACING_MARK = 17; - /** - * Directional type BN - */ - public static final int BOUNDARY_NEUTRAL = 18; - /** - * Number of directional type - */ + // private constructor ========================================= + + /** + * Private constructor to prevent initialisation + */ + private UCharacterDirection() + { + } + + // public variable ============================================= + + /** + * Directional type L + * @stable ICU 2.1 + */ + public static final int LEFT_TO_RIGHT = 0; + /** + * Directional type R + * @stable ICU 2.1 + */ + public static final int RIGHT_TO_LEFT = 1; + /** + * Directional type EN + * @stable ICU 2.1 + */ + public static final int EUROPEAN_NUMBER = 2; + /** + * Directional type ES + * @stable ICU 2.1 + */ + public static final int EUROPEAN_NUMBER_SEPARATOR = 3; + /** + * Directional type ET + * @stable ICU 2.1 + */ + public static final int EUROPEAN_NUMBER_TERMINATOR = 4; + /** + * Directional type AN + * @stable ICU 2.1 + */ + public static final int ARABIC_NUMBER = 5; + /** + * Directional type CS + * @stable ICU 2.1 + */ + public static final int COMMON_NUMBER_SEPARATOR = 6; + /** + * Directional type B + * @stable ICU 2.1 + */ + public static final int BLOCK_SEPARATOR = 7; + /** + * Directional type S + * @stable ICU 2.1 + */ + public static final int SEGMENT_SEPARATOR = 8; + /** + * Directional type WS + * @stable ICU 2.1 + */ + public static final int WHITE_SPACE_NEUTRAL = 9; + + // start of 11 --------------- + + /** + * Directional type ON + * @stable ICU 2.1 + */ + public static final int OTHER_NEUTRAL = 10; + /** + * Directional type LRE + * @stable ICU 2.1 + */ + public static final int LEFT_TO_RIGHT_EMBEDDING = 11; + /** + * Directional type LRO + * @stable ICU 2.1 + */ + public static final int LEFT_TO_RIGHT_OVERRIDE = 12; + /** + * Directional type AL + * @stable ICU 2.1 + */ + public static final int RIGHT_TO_LEFT_ARABIC = 13; + /** + * Directional type RLE + * @stable ICU 2.1 + */ + public static final int RIGHT_TO_LEFT_EMBEDDING = 14; + /** + * Directional type RLO + * @stable ICU 2.1 + */ + public static final int RIGHT_TO_LEFT_OVERRIDE = 15; + /** + * Directional type PDF + * @stable ICU 2.1 + */ + public static final int POP_DIRECTIONAL_FORMAT = 16; + /** + * Directional type NSM + * @stable ICU 2.1 + */ + public static final int DIR_NON_SPACING_MARK = 17; + /** + * Directional type BN + * @stable ICU 2.1 + */ + public static final int BOUNDARY_NEUTRAL = 18; + /** + * Number of directional type + * @stable ICU 2.1 + */ public static final int CHAR_DIRECTION_COUNT = 19; /** - * Gets the name of the argument direction - * @param dir direction type to retrieve name - * @return directional name - */ + * Gets the name of the argument direction + * @param dir direction type to retrieve name + * @return directional name + * @stable ICU 2.1 + */ public static String toString(int dir) { - switch(dir) - { - case LEFT_TO_RIGHT : - return "Left-to-Right"; - case RIGHT_TO_LEFT : - return "Right-to-Left"; - case EUROPEAN_NUMBER : - return "European Number"; - case EUROPEAN_NUMBER_SEPARATOR : - return "European Number Separator"; - case EUROPEAN_NUMBER_TERMINATOR : - return "European Number Terminator"; - case ARABIC_NUMBER : - return "Arabic Number"; - case COMMON_NUMBER_SEPARATOR : - return "Common Number Separator"; - case BLOCK_SEPARATOR : - return "Paragraph Separator"; - case SEGMENT_SEPARATOR : - return "Segment Separator"; - case WHITE_SPACE_NEUTRAL : - return "Whitespace"; - case OTHER_NEUTRAL : - return "Other Neutrals"; - case LEFT_TO_RIGHT_EMBEDDING : - return "Left-to-Right Embedding"; - case LEFT_TO_RIGHT_OVERRIDE : - return "Left-to-Right Override"; - case RIGHT_TO_LEFT_ARABIC : - return "Right-to-Left Arabic"; - case RIGHT_TO_LEFT_EMBEDDING : - return "Right-to-Left Embedding"; - case RIGHT_TO_LEFT_OVERRIDE : - return "Right-to-Left Override"; - case POP_DIRECTIONAL_FORMAT : - return "Pop Directional Format"; - case DIR_NON_SPACING_MARK : - return "Non-Spacing Mark"; - case BOUNDARY_NEUTRAL : - return "Boundary Neutral"; - } - return "Unassigned"; + switch(dir) + { + case LEFT_TO_RIGHT : + return "Left-to-Right"; + case RIGHT_TO_LEFT : + return "Right-to-Left"; + case EUROPEAN_NUMBER : + return "European Number"; + case EUROPEAN_NUMBER_SEPARATOR : + return "European Number Separator"; + case EUROPEAN_NUMBER_TERMINATOR : + return "European Number Terminator"; + case ARABIC_NUMBER : + return "Arabic Number"; + case COMMON_NUMBER_SEPARATOR : + return "Common Number Separator"; + case BLOCK_SEPARATOR : + return "Paragraph Separator"; + case SEGMENT_SEPARATOR : + return "Segment Separator"; + case WHITE_SPACE_NEUTRAL : + return "Whitespace"; + case OTHER_NEUTRAL : + return "Other Neutrals"; + case LEFT_TO_RIGHT_EMBEDDING : + return "Left-to-Right Embedding"; + case LEFT_TO_RIGHT_OVERRIDE : + return "Left-to-Right Override"; + case RIGHT_TO_LEFT_ARABIC : + return "Right-to-Left Arabic"; + case RIGHT_TO_LEFT_EMBEDDING : + return "Right-to-Left Embedding"; + case RIGHT_TO_LEFT_OVERRIDE : + return "Right-to-Left Override"; + case POP_DIRECTIONAL_FORMAT : + return "Pop Directional Format"; + case DIR_NON_SPACING_MARK : + return "Non-Spacing Mark"; + case BOUNDARY_NEUTRAL : + return "Boundary Neutral"; + } + return "Unassigned"; } } diff --git a/icu4j/src/com/ibm/icu/lang/UProperty.java b/icu4j/src/com/ibm/icu/lang/UProperty.java index 57f14b8f27..3f1557e969 100644 --- a/icu4j/src/com/ibm/icu/lang/UProperty.java +++ b/icu4j/src/com/ibm/icu/lang/UProperty.java @@ -6,8 +6,8 @@ * * $Source: * /usr/cvs/icu4j/icu4j/src/com/ibm/icu/text/UCharacterCategory.java $ -* $Date: 2002/11/06 19:48:58 $ -* $Revision: 1.5 $ +* $Date: 2002/12/03 00:47:53 $ +* $Revision: 1.6 $ * ******************************************************************************* */ @@ -30,7 +30,7 @@ package com.ibm.icu.lang; * 3.2, then properties marked with "new" are not or not fully * available. Check UCharacter.getUnicodeVersion() to be sure. * @author Syn Wee Quek - * @since March 8 2002 + * @draft ICU 2.1 * @see com.ibm.icu.lang.UCharacter */ public interface UProperty @@ -42,20 +42,24 @@ public interface UProperty *Property for UCharacter.isUAlphabetic(), different from the property * in UCharacter.isalpha().
*Lu + Ll + Lt + Lm + Lo + Nl + Other_Alphabetic.
+ * @draft ICU 2.1 */ public static final int ALPHABETIC = 0; /** * First constant for binary Unicode properties. + * @draft ICU 2.1 */ public static final int BINARY_START = ALPHABETIC; /** * Binary property ASCII_Hex_Digit (0-9 A-F a-f). + * @draft ICU 2.1 */ public static final int ASCII_HEX_DIGIT = 1; /** *Binary property Bidi_Control.
*Format controls which have specific functions in the Bidi Algorithm. *
+ * @draft ICU 2.1 */ public static final int BIDI_CONTROL = 2; /** @@ -63,11 +67,13 @@ public interface UProperty *Characters that may change display in RTL text.
*Property for UCharacter.isMirrored().
*See Bidi Algorithm; UTR 9.
+ * @draft ICU 2.1 */ public static final int BIDI_MIRRORED = 3; /** *Binary property Dash.
*Variations of dashes.
+ * @draft ICU 2.1 */ public static final int DASH = 4; /** @@ -77,97 +83,114 @@ public interface UProperty * *Codepoints (2060..206F, FFF0..FFFB, E0000..E0FFF) + * Other_Default_Ignorable_Code_Point + (Cf + Cc + Cs - White_Space)
+ * @draft ICU 2.1 */ public static final int DEFAULT_IGNORABLE_CODE_POINT = 5; /** *Binary property Deprecated (new).
*The usage of deprecated characters is strongly discouraged.
+ * @draft ICU 2.1 */ public static final int DEPRECATED = 6; /** *Binary property Diacritic.
*Characters that linguistically modify the meaning of another * character to which they apply.
+ * @draft ICU 2.1 */ public static final int DIACRITIC = 7; /** *Binary property Extender.
*Extend the value or shape of a preceding alphabetic character, e.g. * length and iteration marks.
+ * @draft ICU 2.1 */ public static final int EXTENDER = 8; /** *Binary property Full_Composition_Exclusion.
*CompositionExclusions.txt + Singleton Decompositions + * Non-Starter Decompositions.
+ * @draft ICU 2.1 */ public static final int FULL_COMPOSITION_EXCLUSION = 9; /** *Binary property Grapheme_Base (new).
*For programmatic determination of grapheme cluster boundaries. * [0..10FFFF]-Cc-Cf-Cs-Co-Cn-Zl-Zp-Grapheme_Link-Grapheme_Extend-CGJ
+ * @draft ICU 2.1 */ public static final int GRAPHEME_BASE = 10; /** *Binary property Grapheme_Extend (new).
*For programmatic determination of grapheme cluster boundaries.
*Me+Mn+Mc+Other_Grapheme_Extend-Grapheme_Link-CGJ
+ * @draft ICU 2.1 */ public static final int GRAPHEME_EXTEND = 11; /** *Binary property Grapheme_Link (new).
*For programmatic determination of grapheme cluster boundaries.
+ * @draft ICU 2.1 */ public static final int GRAPHEME_LINK = 12; /** *Binary property Hex_Digit.
*Characters commonly used for hexadecimal numbers.
+ * @draft ICU 2.1 */ public static final int HEX_DIGIT = 13; /** *Binary property Hyphen.
*Dashes used to mark connections between pieces of words, plus the * Katakana middle dot.
+ * @draft ICU 2.1 */ public static final int HYPHEN = 14; /** *Binary property ID_Continue.
*Characters that can continue an identifier.
*ID_Start+Mn+Mc+Nd+Pc
+ * @draft ICU 2.1 */ public static final int ID_CONTINUE = 15; /** *Binary property ID_Start.
*Characters that can start an identifier.
*Lu+Ll+Lt+Lm+Lo+Nl
+ * @draft ICU 2.1 */ public static final int ID_START = 16; /** *Binary property Ideographic.
*CJKV ideographs.
+ * @draft ICU 2.1 */ public static final int IDEOGRAPHIC = 17; /** *Binary property IDS_Binary_Operator (new).
*For programmatic determination of Ideographic Description Sequences. *
+ * @draft ICU 2.1 */ public static final int IDS_BINARY_OPERATOR = 18; /** *Binary property IDS_Trinary_Operator (new).
*+ * @draft ICU 2.1 */ public static final int IDS_TRINARY_OPERATOR = 19; /** *
Binary property Join_Control.
*Format controls for cursive joining and ligation.
+ * @draft ICU 2.1 */ public static final int JOIN_CONTROL = 20; /** *Binary property Logical_Order_Exception (new).
*Characters that do not use logical order and require special * handling in most processing.
+ * @draft ICU 2.1 */ public static final int LOGICAL_ORDER_EXCEPTION = 21; /** @@ -175,44 +198,52 @@ public interface UProperty *Same as UCharacter.isULowercase(), different from * UCharacter.islower().
*Ll+Other_Lowercase
+ * @draft ICU 2.1 */ public static final int LOWERCASE = 22; /**Binary property Math.
*Sm+Other_Math
+ * @draft ICU 2.1 */ public static final int MATH = 23; /** *Binary property Noncharacter_Code_Point.
*Code points that are explicitly defined as illegal for the encoding * of characters.
+ * @draft ICU 2.1 */ public static final int NONCHARACTER_CODE_POINT = 24; /** *Binary property Quotation_Mark.
+ * @draft ICU 2.1 */ public static final int QUOTATION_MARK = 25; /** *Binary property Radical (new).
*For programmatic determination of Ideographic Description * Sequences.
+ * @draft ICU 2.1 */ public static final int RADICAL = 26; /** *Binary property Soft_Dotted (new).
*Characters with a "soft dot", like i or j.
*An accent placed on these characters causes the dot to disappear.
+ * @draft ICU 2.1 */ public static final int SOFT_DOTTED = 27; /** *Binary property Terminal_Punctuation.
*Punctuation characters that generally mark the end of textual * units.
+ * @draft ICU 2.1 */ public static final int TERMINAL_PUNCTUATION = 28; /** *Binary property Unified_Ideograph (new).
*For programmatic determination of Ideographic Description * Sequences.
+ * @draft ICU 2.1 */ public static final int UNIFIED_IDEOGRAPH = 29; /** @@ -220,6 +251,7 @@ public interface UProperty *Same as UCharacter.isUUppercase(), different from * UCharacter.isUpperCase().
*Lu+Other_Uppercase
+ * @draft ICU 2.1 */ public static final int UPPERCASE = 30; /** @@ -227,22 +259,26 @@ public interface UProperty *Same as UCharacter.isUWhiteSpace(), different from * UCharacter.isSpace() and UCharacter.isWhitespace().
* Space characters+TAB+CR+LF-ZWSP-ZWNBSP + * @draft ICU 2.1 */ public static final int WHITE_SPACE = 31; /** *Binary property XID_Continue.
*ID_Continue modified to allow closure under normalization forms * NFKC and NFKD.
+ * @draft ICU 2.1 */ public static final int XID_CONTINUE = 32; /** *Binary property XID_Start.
*ID_Start modified to allow closure under normalization forms NFKC * and NFKD.
+ * @draft ICU 2.1 */ public static final int XID_START = 33; /** *One more than the last constant for binary Unicode properties.
+ * @draft ICU 2.1 */ public static final int BINARY_LIMIT = 34; /** @@ -442,7 +478,7 @@ public interface UProperty * * @see UCharacter#getPropertyName * @see UCharacter#getPropertyValueName - * @since ICU 2.4 + * @draft ICU 2.4 */ public interface NameChoice { /**