diff --git a/icu4j/src/com/ibm/icu/lang/UCharacter.java b/icu4j/src/com/ibm/icu/lang/UCharacter.java index 1e2a6d507f..ccea0c8934 100755 --- a/icu4j/src/com/ibm/icu/lang/UCharacter.java +++ b/icu4j/src/com/ibm/icu/lang/UCharacter.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/lang/UCharacter.java,v $ -* $Date: 2002/11/22 22:53:13 $ -* $Revision: 1.53 $ +* $Date: 2002/12/03 00:47:50 $ +* $Revision: 1.54 $ * ******************************************************************************* */ @@ -27,64 +27,64 @@ import com.ibm.icu.impl.UCharacterNameChoice; import com.ibm.icu.impl.UPropertyAliases; /** -*

-* The UCharacter class provides extensions to the -* -* java.lang.Character class. These extensions provide support for -* Unicode 3.1 properties and together with the UTF16 -* class, provide support for supplementary characters (those with code -* points above U+FFFF). -*

-*

-* Code points are represented in these API using ints. While it would be -* more convenient in Java to have a separate primitive datatype for them, -* ints suffice in the meantime. -*

-*

-* To use this class please add the jar file name icu4j.jar to the -* class path, since it contains data files which supply the information used -* by this file.
-* E.g. In Windows
-* set CLASSPATH=%CLASSPATH%;$JAR_FILE_PATH/ucharacter.jar.
-* Otherwise, another method would be to copy the files uprops.dat and -* unames.icu from the icu4j source subdirectory -* $ICU4J_SRC/src/com.ibm.icu.impl.data to your class directory -* $ICU4J_CLASS/com.ibm.icu.impl.data. -*

-*

-* Aside from the additions for UTF-16 support, and the updated Unicode 3.1 -* properties, the main differences between UCharacter and Character are: -*

-*

-* Further detail differences can be determined from the program -* -* com.ibm.icu.dev.test.lang.UCharacterCompare -*

-*

-* This class is not subclassable -*

-* @author Syn Wee Quek -* @since oct 06 2000 -* @see com.ibm.icu.lang.UCharacterCategory -* @see com.ibm.icu.lang.UCharacterDirection -*/ + *

+ * The UCharacter class provides extensions to the + * + * java.lang.Character class. These extensions provide support for + * Unicode 3.1 properties and together with the UTF16 + * class, provide support for supplementary characters (those with code + * points above U+FFFF). + *

+ *

+ * Code points are represented in these API using ints. While it would be + * more convenient in Java to have a separate primitive datatype for them, + * ints suffice in the meantime. + *

+ *

+ * To use this class please add the jar file name icu4j.jar to the + * class path, since it contains data files which supply the information used + * by this file.
+ * E.g. In Windows
+ * set CLASSPATH=%CLASSPATH%;$JAR_FILE_PATH/ucharacter.jar.
+ * Otherwise, another method would be to copy the files uprops.dat and + * unames.icu from the icu4j source subdirectory + * $ICU4J_SRC/src/com.ibm.icu.impl.data to your class directory + * $ICU4J_CLASS/com.ibm.icu.impl.data. + *

+ *

+ * Aside from the additions for UTF-16 support, and the updated Unicode 3.1 + * properties, the main differences between UCharacter and Character are: + *

+ *

+ * Further detail differences can be determined from the program + * + * com.ibm.icu.dev.test.lang.UCharacterCompare + *

+ *

+ * This class is not subclassable + *

+ * @author Syn Wee Quek + * @stable ICU 2.1 + * @see com.ibm.icu.lang.UCharacterCategory + * @see com.ibm.icu.lang.UCharacterDirection + */ /* * notes: @@ -1816,34 +1816,38 @@ public final class UCharacter // public data members ----------------------------------------------- /** - * The lowest Unicode code point value. - */ + * The lowest Unicode code point value. + * @stable ICU 2.1 + */ public static final int MIN_VALUE = UTF16.CODEPOINT_MIN_VALUE; /** - * The highest Unicode code point value (scalar value) according to the - * Unicode Standard. - * This is a 21-bit value (21 bits, rounded up).
- * Up-to-date Unicode implementation of java.lang.Character.MIN_VALUE - */ + * The highest Unicode code point value (scalar value) according to the + * Unicode Standard. + * This is a 21-bit value (21 bits, rounded up).
+ * Up-to-date Unicode implementation of java.lang.Character.MIN_VALUE + * @stable ICU 2.1 + */ public static final int MAX_VALUE = UTF16.CODEPOINT_MAX_VALUE; /** - * The minimum value for Supplementary code points - */ + * The minimum value for Supplementary code points + * @stable ICU 2.1 + */ public static final int SUPPLEMENTARY_MIN_VALUE = UTF16.SUPPLEMENTARY_MIN_VALUE; /** - * Unicode value used when translating into Unicode encoding form and there - * is no existing character. - */ + * Unicode value used when translating into Unicode encoding form and there + * is no existing character. + * @stable ICU 2.1 + */ public static final int REPLACEMENT_CHAR = '\uFFFD'; /** * Special value that is returned by getUnicodeNumericValue(int) when no * numeric value is defined for a code point. - * @draft 2.4 + * @draft ICU 2.4 * @see #getUnicodeNumericValue */ public static final double NO_NUMERIC_VALUE = -123456789; @@ -1851,27 +1855,28 @@ public final class UCharacter // public methods ---------------------------------------------------- /** - * Retrieves the numeric value of a decimal digit code point. - *
This method observes the semantics of - * java.lang.Character.digit(). Note that this - * will return positive values for code points for which isDigit - * returns false, just like java.lang.Character. - *
Semantic Change: In release 1.3.1 and - * prior, this did not treat the European letters as having a - * digit value, and also treated numeric letters and other numbers as - * digits. - * This has been changed to conform to the java semantics. - *
A code point is a valid digit if and only if: - * - * @param ch the code point to query - * @param radix the radix - * @return the numeric value represented by the code point in the - * specified radix, or -1 if the code point is not a decimal digit - * or if its value is too large for the radix - */ + * Retrieves the numeric value of a decimal digit code point. + *
This method observes the semantics of + * java.lang.Character.digit(). Note that this + * will return positive values for code points for which isDigit + * returns false, just like java.lang.Character. + *
Semantic Change: In release 1.3.1 and + * prior, this did not treat the European letters as having a + * digit value, and also treated numeric letters and other numbers as + * digits. + * This has been changed to conform to the java semantics. + *
A code point is a valid digit if and only if: + * + * @param ch the code point to query + * @param radix the radix + * @return the numeric value represented by the code point in the + * specified radix, or -1 if the code point is not a decimal digit + * or if its value is too large for the radix + * @stable ICU 2.1 + */ public static int digit(int ch, int radix) { // when ch is out of bounds getProperty == 0 @@ -1906,17 +1911,18 @@ public final class UCharacter } /** - * Retrieves the numeric value of a decimal digit code point. - *
This is a convenience overload of digit(int, int) - * that provides a decimal radix. - *
Semantic Change: In release 1.3.1 and prior, this - * treated numeric letters and other numbers as digits. This has - * been changed to conform to the java semantics. - * @param ch the code point to query - * @return the numeric value represented by the code point, - * or -1 if the code point is not a decimal digit or if its - * value is too large for a decimal radix - */ + * Retrieves the numeric value of a decimal digit code point. + *
This is a convenience overload of digit(int, int) + * that provides a decimal radix. + *
Semantic Change: In release 1.3.1 and prior, this + * treated numeric letters and other numbers as digits. This has + * been changed to conform to the java semantics. + * @param ch the code point to query + * @return the numeric value represented by the code point, + * or -1 if the code point is not a decimal digit or if its + * value is too large for a decimal radix + * @stable ICU 2.1 + */ public static int digit(int ch) { return digit(ch, DECIMAL_RADIX_); @@ -1934,6 +1940,7 @@ public final class UCharacter * @return the numeric value of the code point, or -1 if it has no numeric * value, or -2 if it has a numeric value that cannot be represented as a * nonnegative integer + * @stable ICU 2.1 */ public static int getNumericValue(int ch) { @@ -1970,25 +1977,6 @@ public final class UCharacter return result; } - - /* - * Returns the Unicode numeric value of the code point as a nonnegative - * integer. - *
If the code point does not have a numeric value, then -1 is returned.
- * If the code point has a numeric value that cannot be represented as a - * nonnegative integer (for example, a fractional value), then -2 is - * returned. - * This returns values other than -1 for all and only those code points - * whose type is a numeric type. - * @param ch the code point to query - * @return the numeric value of the code point, or -1 if it has no numeric - * value, or -2 if it has a numeric value that cannot be represented as a - * nonnegative integer - public static int getUnicodeNumericValue(int ch) - { - return getNumericValueInternal(ch, false); - } - */ /** *

Get the numeric value for a Unicode code point as defined in the @@ -2003,7 +1991,7 @@ public final class UCharacter *

* @param ch Code point to get the numeric value for. * @return numeric value of ch, or NO_NUMERIC_VALUE if none is defined. - * @draft 2.4 + * @draft ICU 2.4 */ public static double getUnicodeNumericValue(int ch) { @@ -2064,14 +2052,16 @@ public final class UCharacter } /** - * Returns a value indicating a code point's Unicode category. - * Up-to-date Unicode implementation of java.lang.Character.getType() except - * for the above mentioned code points that had their category changed.
- * Return results are constants from the interface - * UCharacterCategory - * @param ch code point whose type is to be determined - * @return category which is a value of UCharacterCategory - */ + * Returns a value indicating a code point's Unicode category. + * Up-to-date Unicode implementation of java.lang.Character.getType() + * except for the above mentioned code points that had their category + * changed.
+ * Return results are constants from the interface + * UCharacterCategory + * @param ch code point whose type is to be determined + * @return category which is a value of UCharacterCategory + * @stable ICU 2.1 + */ public static int getType(int ch) { // when ch is out of bounds getProperty == 0 @@ -2080,44 +2070,48 @@ public final class UCharacter } /** - * Determines if a code point has a defined meaning in the up-to-date Unicode - * standard. - * E.g. supplementary code points though allocated space are not defined in - * Unicode yet.
- * Up-to-date Unicode implementation of java.lang.Character.isDefined() - * @param ch code point to be determined if it is defined in the most current - * version of Unicode - * @return true if this code point is defined in unicode - */ + * Determines if a code point has a defined meaning in the up-to-date + * Unicode standard. + * E.g. supplementary code points though allocated space are not defined in + * Unicode yet.
+ * Up-to-date Unicode implementation of java.lang.Character.isDefined() + * @param ch code point to be determined if it is defined in the most + * current version of Unicode + * @return true if this code point is defined in unicode + * @stable ICU 2.1 + */ public static boolean isDefined(int ch) { return getType(ch) != 0; } - /** - * Determines if a code point is a Java digit. - *
This method observes the semantics of - * java.lang.Character.isDigit(). It returns true for - * decimal digits only. - *
Semantic Change: In release 1.3.1 and prior, this - * treated numeric letters and other numbers as digits. This has - * been changed to conform to the java semantics. - * @param ch code point to query - * @return true if this code point is a digit */ + /** + * Determines if a code point is a Java digit. + *
This method observes the semantics of + * java.lang.Character.isDigit(). It returns true for decimal + * digits only. + *
Semantic Change: In release 1.3.1 and prior, this treated + * numeric letters and other numbers as digits. + * This has been changed to conform to the java semantics. + * @param ch code point to query + * @return true if this code point is a digit + * @stable ICU 2.1 + */ public static boolean isDigit(int ch) { return getType(ch) == UCharacterCategory.DECIMAL_DIGIT_NUMBER; } /** - * Determines if the specified code point is an ISO control character. - * A code point is considered to be an ISO control character if it is in the - * range \u0000 through \u001F or in the range \u007F through - * \u009F.
- * Up-to-date Unicode implementation of java.lang.Character.isISOControl() - * @param ch code point to determine if it is an ISO control character - * @return true if code point is a ISO control character - */ + * Determines if the specified code point is an ISO control character. + * A code point is considered to be an ISO control character if it is in + * the range \u0000 through \u001F or in the range \u007F through + * \u009F.
+ * Up-to-date Unicode implementation of java.lang.Character.isISOControl() + * @param ch code point to determine if it is an ISO control character + * @return true if code point is a ISO control character + * @stable ICU 2.1 + */ public static boolean isISOControl(int ch) { return ch >= 0 && ch <= APPLICATION_PROGRAM_COMMAND_ && @@ -2125,11 +2119,12 @@ public final class UCharacter } /** - * Determines if the specified code point is a letter. - * Up-to-date Unicode implementation of java.lang.Character.isLetter() - * @param ch code point to determine if it is a letter - * @return true if code point is a letter - */ + * Determines if the specified code point is a letter. + * Up-to-date Unicode implementation of java.lang.Character.isLetter() + * @param ch code point to determine if it is a letter + * @return true if code point is a letter + * @stable ICU 2.1 + */ public static boolean isLetter(int ch) { int cat = getType(ch); @@ -2142,12 +2137,13 @@ public final class UCharacter } /** - * Determines if the specified code point is a letter or digit. - * Note this method, unlike java.lang.Character does not regard the ascii - * characters 'A' - 'Z' and 'a' - 'z' as digits. - * @param ch code point to determine if it is a letter or a digit - * @return true if code point is a letter or a digit - */ + * Determines if the specified code point is a letter or digit. + * Note this method, unlike java.lang.Character does not regard the ascii + * characters 'A' - 'Z' and 'a' - 'z' as digits. + * @param ch code point to determine if it is a letter or a digit + * @return true if code point is a letter or a digit + * @stable ICU 2.1 + */ public static boolean isLetterOrDigit(int ch) { int cat = getType(ch); @@ -2160,16 +2156,18 @@ public final class UCharacter } /** - * Determines if the specified code point is a lowercase character. - * UnicodeData only contains case mappings for code points where they are - * one-to-one mappings; it also omits information about context-sensitive - * case mappings.
For more information about Unicode case mapping please - * refer to the - * Technical report #21.
- * Up-to-date Unicode implementation of java.lang.Character.isLowerCase() - * @param ch code point to determine if it is in lowercase - * @return true if code point is a lowercase character - */ + * Determines if the specified code point is a lowercase character. + * UnicodeData only contains case mappings for code points where they are + * one-to-one mappings; it also omits information about context-sensitive + * case mappings.
For more information about Unicode case mapping + * please refer to the + * Technical report + * #21.
+ * Up-to-date Unicode implementation of java.lang.Character.isLowerCase() + * @param ch code point to determine if it is in lowercase + * @return true if code point is a lowercase character + * @stable ICU 2.1 + */ public static boolean isLowerCase(int ch) { // if props == 0, it will just fall through and return false @@ -2177,29 +2175,30 @@ public final class UCharacter } /** - * Determines if the specified code point is a white space character. - * A code point is considered to be an whitespace character if and only - * if it satisfies one of the following criteria: - * - * - * Up-to-date Unicode implementation of java.lang.Character.isWhitespace(). - * @param ch code point to determine if it is a white space - * @return true if the specified code point is a white space character - */ + * Determines if the specified code point is a white space character. + * A code point is considered to be an whitespace character if and only + * if it satisfies one of the following criteria: + * + * + * Up-to-date Unicode implementation of java.lang.Character.isWhitespace(). + * @param ch code point to determine if it is a white space + * @return true if the specified code point is a white space character + * @stable ICU 2.1 + */ public static boolean isWhitespace(int ch) { int cat = getType(ch); @@ -2217,12 +2216,13 @@ public final class UCharacter } /** - * Determines if the specified code point is a Unicode specified space - * character, i.e. if code point is in the category Zs, Zl and Zp. - * Up-to-date Unicode implementation of java.lang.Character.isSpaceChar(). - * @param ch code point to determine if it is a space - * @return true if the specified code point is a space character - */ + * Determines if the specified code point is a Unicode specified space + * character, i.e. if code point is in the category Zs, Zl and Zp. + * Up-to-date Unicode implementation of java.lang.Character.isSpaceChar(). + * @param ch code point to determine if it is a space + * @return true if the specified code point is a space character + * @stable ICU 2.1 + */ public static boolean isSpaceChar(int ch) { int cat = getType(ch); @@ -2233,17 +2233,18 @@ public final class UCharacter } /** - * Determines if the specified code point is a titlecase character. - * UnicodeData only contains case mappings for code points where they are - * one-to-one mappings; it also omits information about context-sensitive - * case mappings.
- * For more information about Unicode case mapping please refer to the - * - * Technical report #21.
- * Up-to-date Unicode implementation of java.lang.Character.isTitleCase(). - * @param ch code point to determine if it is in title case - * @return true if the specified code point is a titlecase character - */ + * Determines if the specified code point is a titlecase character. + * UnicodeData only contains case mappings for code points where they are + * one-to-one mappings; it also omits information about context-sensitive + * case mappings.
+ * For more information about Unicode case mapping please refer to the + * + * Technical report #21.
+ * Up-to-date Unicode implementation of java.lang.Character.isTitleCase(). + * @param ch code point to determine if it is in title case + * @return true if the specified code point is a titlecase character + * @stable ICU 2.1 + */ public static boolean isTitleCase(int ch) { int cat = getType(ch); @@ -2252,30 +2253,32 @@ public final class UCharacter } /** - * Determines if the specified code point may be any part of a Unicode - * identifier other than the starting character. - * A code point may be part of a Unicode identifier if and only if it is one - * of the following: - * - * Up-to-date Unicode implementation of - * java.lang.Character.isUnicodeIdentifierPart().
- * See UTR #8. - * @param ch code point to determine if is can be part of a Unicode identifier - * @return true if code point is any character belonging a unicode identifier - * suffix after the first character - */ + * Determines if the specified code point may be any part of a Unicode + * identifier other than the starting character. + * A code point may be part of a Unicode identifier if and only if it is + * one of the following: + * + * Up-to-date Unicode implementation of + * java.lang.Character.isUnicodeIdentifierPart().
+ * See UTR #8. + * @param ch code point to determine if is can be part of a Unicode + * identifier + * @return true if code point is any character belonging a unicode + * identifier suffix after the first character + * @stable ICU 2.1 + */ public static boolean isUnicodeIdentifierPart(int ch) { int cat = getType(ch); @@ -2295,24 +2298,25 @@ public final class UCharacter } /** - * Determines if the specified code point is permissible as the first - * character in a Unicode identifier. - * A code point may start a Unicode identifier if it is of type either - * - * Up-to-date Unicode implementation of - * java.lang.Character.isUnicodeIdentifierStart().
- * See UTR #8. - * @param ch code point to determine if it can start a Unicode identifier - * @return true if code point is the first character belonging a unicode - * identifier - */ + * Determines if the specified code point is permissible as the first + * character in a Unicode identifier. + * A code point may start a Unicode identifier if it is of type either + * + * Up-to-date Unicode implementation of + * java.lang.Character.isUnicodeIdentifierStart().
+ * See UTR #8. + * @param ch code point to determine if it can start a Unicode identifier + * @return true if code point is the first character belonging a unicode + * identifier + * @stable ICU 2.1 + */ public static boolean isUnicodeIdentifierStart(int ch) { int cat = getType(ch); @@ -2326,17 +2330,18 @@ public final class UCharacter } /** - * Determines if the specified code point should be regarded as an ignorable - * character in a Unicode identifier. - * A character is ignorable in the Unicode standard if it is of the type Cf, - * Formatting code.
- * Up-to-date Unicode implementation of - * java.lang.Character.isIdentifierIgnorable().
- * See UTR #8. - * @param ch code point to be determined if it can be ignored in a Unicode - * identifier. - * @return true if the code point is ignorable - */ + * Determines if the specified code point should be regarded as an + * ignorable character in a Unicode identifier. + * A character is ignorable in the Unicode standard if it is of the type + * Cf, Formatting code.
+ * Up-to-date Unicode implementation of + * java.lang.Character.isIdentifierIgnorable().
+ * See UTR #8. + * @param ch code point to be determined if it can be ignored in a Unicode + * identifier. + * @return true if the code point is ignorable + * @stable ICU 2.1 + */ public static boolean isIdentifierIgnorable(int ch) { // see java.lang.Character.isIdentifierIgnorable() on range of @@ -2348,21 +2353,22 @@ public final class UCharacter } /** - * Determines if the specified code point is an uppercase character. - * UnicodeData only contains case mappings for code point where they are - * one-to-one mappings; it also omits information about context-sensitive - * case mappings.
- * For language specific case conversion behavior, use - * toUpperCase(locale, str).
- * For example, the case conversion for dot-less i and dotted I in Turkish, - * or for final sigma in Greek. - * For more information about Unicode case mapping please refer to the - * - * Technical report #21.
- * Up-to-date Unicode implementation of java.lang.Character.isUpperCase(). - * @param ch code point to determine if it is in uppercase - * @return true if the code point is an uppercase character - */ + * Determines if the specified code point is an uppercase character. + * UnicodeData only contains case mappings for code point where they are + * one-to-one mappings; it also omits information about context-sensitive + * case mappings.
+ * For language specific case conversion behavior, use + * toUpperCase(locale, str).
+ * For example, the case conversion for dot-less i and dotted I in Turkish, + * or for final sigma in Greek. + * For more information about Unicode case mapping please refer to the + * + * Technical report #21.
+ * Up-to-date Unicode implementation of java.lang.Character.isUpperCase(). + * @param ch code point to determine if it is in uppercase + * @return true if the code point is an uppercase character + * @stable ICU 2.1 + */ public static boolean isUpperCase(int ch) { int cat = getType(ch); @@ -2371,22 +2377,23 @@ public final class UCharacter } /** - * The given code point is mapped to its lowercase equivalent; if the code - * point has no lowercase equivalent, the code point itself is returned. - * UnicodeData only contains case mappings for code point where they are - * one-to-one mappings; it also omits information about context-sensitive - * case mappings.
- * For language specific case conversion behavior, use - * toLowerCase(locale, str).
- * For example, the case conversion for dot-less i and dotted I in Turkish, - * or for final sigma in Greek. - * For more information about Unicode case mapping please refer to the - * - * Technical report #21.
- * Up-to-date Unicode implementation of java.lang.Character.toLowerCase() - * @param ch code point whose lowercase equivalent is to be retrieved - * @return the lowercase equivalent code point - */ + * The given code point is mapped to its lowercase equivalent; if the code + * point has no lowercase equivalent, the code point itself is returned. + * UnicodeData only contains case mappings for code point where they are + * one-to-one mappings; it also omits information about context-sensitive + * case mappings.
+ * For language specific case conversion behavior, use + * toLowerCase(locale, str).
+ * For example, the case conversion for dot-less i and dotted I in Turkish, + * or for final sigma in Greek. + * For more information about Unicode case mapping please refer to the + * + * Technical report #21.
+ * Up-to-date Unicode implementation of java.lang.Character.toLowerCase() + * @param ch code point whose lowercase equivalent is to be retrieved + * @return the lowercase equivalent code point + * @stable ICU 2.1 + */ public static int toLowerCase(int ch) { // when ch is out of bounds getProperty == 0 @@ -2412,17 +2419,18 @@ public final class UCharacter } /** - * Converts argument code point and returns a String object representing the - * code point's value in UTF16 format. - * The result is a string whose length is 1 for non-supplementary code points, - * 2 otherwise.
- * com.ibm.ibm.icu.UTF16 can be used to parse Strings generated by this - * function.
- * Up-to-date Unicode implementation of java.lang.Character.toString() - * @param ch code point - * @return string representation of the code point, null if code point is not - * defined in unicode - */ + * Converts argument code point and returns a String object representing + * the code point's value in UTF16 format. + * The result is a string whose length is 1 for non-supplementary code + * points, 2 otherwise.
+ * com.ibm.ibm.icu.UTF16 can be used to parse Strings generated by this + * function.
+ * Up-to-date Unicode implementation of java.lang.Character.toString() + * @param ch code point + * @return string representation of the code point, null if code point is not + * defined in unicode + * @stable ICU 2.1 + */ public static String toString(int ch) { if (ch < MIN_VALUE || ch > MAX_VALUE) { @@ -2440,21 +2448,22 @@ public final class UCharacter } /** - * Converts the code point argument to titlecase. - * UnicodeData only contains case mappings for code points where they are - * one-to-one mappings; it also omits information about context-sensitive - * case mappings.
- * There are only four Unicode characters that are truly titlecase forms - * that are distinct from uppercase forms. - * For more information about Unicode case mapping please refer - * to the - * Technical report #21.
- * If no titlecase is available, the uppercase is returned. If no uppercase - * is available, the code point itself is returned.
- * Up-to-date Unicode implementation of java.lang.Character.toTitleCase() - * @param ch code point whose title case is to be retrieved - * @return titlecase code point - */ + * Converts the code point argument to titlecase. + * UnicodeData only contains case mappings for code points where they are + * one-to-one mappings; it also omits information about context-sensitive + * case mappings.
+ * There are only four Unicode characters that are truly titlecase forms + * that are distinct from uppercase forms. + * For more information about Unicode case mapping please refer + * to the + * Technical report #21.
+ * If no titlecase is available, the uppercase is returned. If no uppercase + * is available, the code point itself is returned.
+ * Up-to-date Unicode implementation of java.lang.Character.toTitleCase() + * @param ch code point whose title case is to be retrieved + * @return titlecase code point + * @stable ICU 2.1 + */ public static int toTitleCase(int ch) { // when ch is out of bounds getProperty == 0 @@ -2487,18 +2496,19 @@ public final class UCharacter } /** - * Converts the character argument to uppercase. - * UnicodeData only contains case mappings for characters where they are - * one-to-one mappings; it also omits information about context-sensitive - * case mappings.
- * For more information about Unicode case mapping please refer - * to the - * Technical report #21.
- * If no uppercase is available, the character itself is returned.
- * Up-to-date Unicode implementation of java.lang.Character.toUpperCase() - * @param ch code point whose uppercase is to be retrieved - * @return uppercase code point - */ + * Converts the character argument to uppercase. + * UnicodeData only contains case mappings for characters where they are + * one-to-one mappings; it also omits information about context-sensitive + * case mappings.
+ * For more information about Unicode case mapping please refer + * to the + * Technical report #21.
+ * If no uppercase is available, the character itself is returned.
+ * Up-to-date Unicode implementation of java.lang.Character.toUpperCase() + * @param ch code point whose uppercase is to be retrieved + * @return uppercase code point + * @stable ICU 2.1 + */ public static int toUpperCase(int ch) { // when ch is out of bounds getProperty == 0 @@ -2526,12 +2536,14 @@ public final class UCharacter // extra methods not in java.lang.Character -------------------------- /** - * Determines if the code point is a supplementary character. - * A code point is a supplementary character if and only if it is greater than - * SUPPLEMENTARY_MIN_VALUE - * @param ch code point to be determined if it is in the supplementary plane - * @return true if code point is a supplementary character - */ + * Determines if the code point is a supplementary character. + * A code point is a supplementary character if and only if it is greater + * than SUPPLEMENTARY_MIN_VALUE + * @param ch code point to be determined if it is in the supplementary + * plane + * @return true if code point is a supplementary character + * @stable ICU 2.1 + */ public static boolean isSupplementary(int ch) { return ch >= UCharacter.SUPPLEMENTARY_MIN_VALUE && @@ -2539,22 +2551,24 @@ public final class UCharacter } /** - * Determines if the code point is in the BMP plane. - * @param ch code point to be determined if it is not a supplementary - * character - * @return true if code point is not a supplementary character - */ + * Determines if the code point is in the BMP plane. + * @param ch code point to be determined if it is not a supplementary + * character + * @return true if code point is not a supplementary character + * @stable ICU 2.1 + */ public static boolean isBMP(int ch) { return (ch >= 0 && ch <= LAST_CHAR_MASK_); } /** - * Determines whether the specified code point is a printable character - * according to the Unicode standard. - * @param ch code point to be determined if it is printable - * @return true if the code point is a printable character - */ + * Determines whether the specified code point is a printable character + * according to the Unicode standard. + * @param ch code point to be determined if it is printable + * @return true if the code point is a printable character + * @stable ICU 2.1 + */ public static boolean isPrintable(int ch) { int cat = getType(ch); @@ -2568,12 +2582,13 @@ public final class UCharacter } /** - * Determines whether the specified code point is of base form. - * A code point of base form does not graphically combine with preceding - * characters, and is neither a control nor a format character. - * @param ch code point to be determined if it is of base form - * @return true if the code point is of base form - */ + * Determines whether the specified code point is of base form. + * A code point of base form does not graphically combine with preceding + * characters, and is neither a control nor a format character. + * @param ch code point to be determined if it is of base form + * @return true if the code point is of base form + * @stable ICU 2.1 + */ public static boolean isBaseForm(int ch) { int cat = getType(ch); @@ -2592,14 +2607,15 @@ public final class UCharacter } /** - * Returns the Bidirection property of a code point. - * For example, 0x0041 (letter A) has the LEFT_TO_RIGHT directional - * property.
- * Result returned belongs to the interface - * UCharacterDirection - * @param ch the code point to be determined its direction - * @return direction constant from UCharacterDirection. - */ + * Returns the Bidirection property of a code point. + * For example, 0x0041 (letter A) has the LEFT_TO_RIGHT directional + * property.
+ * Result returned belongs to the interface + * UCharacterDirection + * @param ch the code point to be determined its direction + * @return direction constant from UCharacterDirection. + * @stable ICU 2.1 + */ public static int getDirection(int ch) { // when ch is out of bounds getProperty == 0 @@ -2608,13 +2624,14 @@ public final class UCharacter } /** - * Determines whether the code point has the "mirrored" property. - * This property is set for characters that are commonly used in - * Right-To-Left contexts and need to be displayed with a "mirrored" - * glyph. - * @param ch code point whose mirror is to be determined - * @return true if the code point has the "mirrored" property - */ + * Determines whether the code point has the "mirrored" property. + * This property is set for characters that are commonly used in + * Right-To-Left contexts and need to be displayed with a "mirrored" + * glyph. + * @param ch code point whose mirror is to be determined + * @return true if the code point has the "mirrored" property + * @stable ICU 2.1 + */ public static boolean isMirrored(int ch) { // when ch is out of bounds getProperty == 0 @@ -2623,18 +2640,19 @@ public final class UCharacter } /** - * Maps the specified code point to a "mirror-image" code point. - * For code points with the "mirrored" property, implementations sometimes - * need a "poor man's" mapping to another code point such that the default - * glyph may serve as the mirror-image of the default glyph of the specified - * code point.
- * This is useful for text conversion to and from codepages with visual - * order, and for displays without glyph selection capabilities. - * @param ch code point whose mirror is to be retrieved - * @return another code point that may serve as a mirror-image substitute, or - * ch itself if there is no such mapping or ch does not have the - * "mirrored" property - */ + * Maps the specified code point to a "mirror-image" code point. + * For code points with the "mirrored" property, implementations sometimes + * need a "poor man's" mapping to another code point such that the default + * glyph may serve as the mirror-image of the default glyph of the + * specified code point.
+ * This is useful for text conversion to and from codepages with visual + * order, and for displays without glyph selection capabilities. + * @param ch code point whose mirror is to be retrieved + * @return another code point that may serve as a mirror-image substitute, + * or ch itself if there is no such mapping or ch does not have the + * "mirrored" property + * @stable ICU 2.1 + */ public static int getMirror(int ch) { // when ch is out of bounds getProperty == 0 @@ -2658,10 +2676,11 @@ public final class UCharacter } /** - * Gets the combining class of the argument codepoint - * @param ch code point whose combining is to be retrieved - * @return the combining class of the codepoint - */ + * Gets the combining class of the argument codepoint + * @param ch code point whose combining is to be retrieved + * @return the combining class of the codepoint + * @stable ICU 2.1 + */ public static int getCombiningClass(int ch) { if (ch < MIN_VALUE || ch > MAX_VALUE) { @@ -2671,16 +2690,17 @@ public final class UCharacter } /** - * A code point is illegal if and only if - * - * Note: legal does not mean that it is assigned in this version of Unicode. - * @param ch code point to determine if it is a legal code point by itself - * @return true if and only if legal. - */ + * A code point is illegal if and only if + * + * Note: legal does not mean that it is assigned in this version of Unicode. + * @param ch code point to determine if it is a legal code point by itself + * @return true if and only if legal. + * @stable ICU 2.1 + */ public static boolean isLegal(int ch) { if (ch < MIN_VALUE) { @@ -2699,17 +2719,18 @@ public final class UCharacter } /** - * A string is legal iff all its code points are legal. - * A code point is illegal if and only if - * - * Note: legal does not mean that it is assigned in this version of Unicode. - * @param ch code point to determine if it is a legal code point by itself - * @return true if and only if legal. - */ + * A string is legal iff all its code points are legal. + * A code point is illegal if and only if + * + * Note: legal does not mean that it is assigned in this version of Unicode. + * @param ch code point to determine if it is a legal code point by itself + * @return true if and only if legal. + * @stable ICU 2.1 + */ public static boolean isLegal(String str) { int size = str.length(); @@ -2728,39 +2749,42 @@ public final class UCharacter } /** - * Gets the version of Unicode data used. - * @return the unicode version number used - */ + * Gets the version of Unicode data used. + * @return the unicode version number used + * @stable ICU 2.1 + */ public static VersionInfo getUnicodeVersion() { return PROPERTY_.m_unicodeVersion_; } /** - * Retrieve the most current Unicode name of the argument code point, or - * null if the character is unassigned or outside the range - * UCharacter.MIN_VALUE and UCharacter.MAX_VALUE or does not have a name. - *
- * Note calling any methods related to code point names, e.g. get*Name*() - * incurs a one-time initialisation cost to construct the name tables. - * @param ch the code point for which to get the name - * @return most current Unicode name - */ + * Retrieve the most current Unicode name of the argument code point, or + * null if the character is unassigned or outside the range + * UCharacter.MIN_VALUE and UCharacter.MAX_VALUE or does not have a name. + *
+ * Note calling any methods related to code point names, e.g. get*Name*() + * incurs a one-time initialisation cost to construct the name tables. + * @param ch the code point for which to get the name + * @return most current Unicode name + * @stable ICU 2.1 + */ public static String getName(int ch) { return NAME_.getName(ch, UCharacterNameChoice.UNICODE_CHAR_NAME); } /** - * Retrieve the earlier version 1.0 Unicode name of the argument code point, - * or null if the character is unassigned or outside the range - * UCharacter.MIN_VALUE and UCharacter.MAX_VALUE or does not have a name. - *
- * Note calling any methods related to code point names, e.g. get*Name*() - * incurs a one-time initialisation cost to construct the name tables. - * @param ch the code point for which to get the name - * @return version 1.0 Unicode name - */ + * Retrieve the earlier version 1.0 Unicode name of the argument code + * point, or null if the character is unassigned or outside the range + * UCharacter.MIN_VALUE and UCharacter.MAX_VALUE or does not have a name. + *
+ * Note calling any methods related to code point names, e.g. get*Name*() + * incurs a one-time initialisation cost to construct the name tables. + * @param ch the code point for which to get the name + * @return version 1.0 Unicode name + * @stable ICU 2.1 + */ public static String getName1_0(int ch) { return NAME_.getName(ch, @@ -2768,23 +2792,23 @@ public final class UCharacter } /** - *

Retrieves a name for a valid codepoint. Unlike, getName(int) and - * getName1_0(int), this method will return a name even for codepoints that - * are not assigned a name in UnicodeData.txt. - *

- * The names are returned in the following order. - * - * Note calling any methods related to code point names, e.g. get*Name*() - * incurs a one-time initialisation cost to construct the name tables. - * @param ch the code point for which to get the name - * @return a name for the argument codepoint - * @draft 2.1 - */ + *

Retrieves a name for a valid codepoint. Unlike, getName(int) and + * getName1_0(int), this method will return a name even for codepoints that + * are not assigned a name in UnicodeData.txt. + *

+ * The names are returned in the following order. + * + * Note calling any methods related to code point names, e.g. get*Name*() + * incurs a one-time initialisation cost to construct the name tables. + * @param ch the code point for which to get the name + * @return a name for the argument codepoint + * @draft ICU 2.1 + */ public static String getExtendedName(int ch) { return NAME_.getName(ch, UCharacterNameChoice.EXTENDED_CHAR_NAME); @@ -2812,14 +2836,15 @@ public final class UCharacter } /** - *

Find a Unicode code point by its most current Unicode name and - * return its code point value. All Unicode names are in uppercase.

- * Note calling any methods related to code point names, e.g. get*Name*() - * incurs a one-time initialisation cost to construct the name tables. - * @param name most current Unicode character name whose code point is to be - * returned - * @return code point or -1 if name is not found - */ + *

Find a Unicode code point by its most current Unicode name and + * return its code point value. All Unicode names are in uppercase.

+ * Note calling any methods related to code point names, e.g. get*Name*() + * incurs a one-time initialisation cost to construct the name tables. + * @param name most current Unicode character name whose code point is to + * be returned + * @return code point or -1 if name is not found + * @stable ICU 2.1 + */ public static int getCharFromName(String name) { return NAME_.getCharFromName( @@ -2827,14 +2852,15 @@ public final class UCharacter } /** - *

Find a Unicode character by its version 1.0 Unicode name and return - * its code point value. All Unicode names are in uppercase.

- * Note calling any methods related to code point names, e.g. get*Name*() - * incurs a one-time initialisation cost to construct the name tables. - * @param name Unicode 1.0 code point name whose code point is to - * returned - * @return code point or -1 if name is not found - */ + *

Find a Unicode character by its version 1.0 Unicode name and return + * its code point value. All Unicode names are in uppercase.

+ * Note calling any methods related to code point names, e.g. get*Name*() + * incurs a one-time initialisation cost to construct the name tables. + * @param name Unicode 1.0 code point name whose code point is to + * returned + * @return code point or -1 if name is not found + * @stable ICU 2.1 + */ public static int getCharFromName1_0(String name) { return NAME_.getCharFromName( @@ -2842,24 +2868,24 @@ public final class UCharacter } /** - *

Find a Unicode character by either its name and return its code - * point value. All Unicode names are in uppercase. - * Extended names are all lowercase except for numbers and are contained - * within angle brackets.

- * The names are searched in the following order - * - * Note calling any methods related to code point names, e.g. get*Name*() - * incurs a one-time initialisation cost to construct the name tables. - * @param name codepoint name - * @return code point associated with the name or -1 if the name is not - * found. - * @draft 2.1 - */ + *

Find a Unicode character by either its name and return its code + * point value. All Unicode names are in uppercase. + * Extended names are all lowercase except for numbers and are contained + * within angle brackets.

+ * The names are searched in the following order + * + * Note calling any methods related to code point names, e.g. get*Name*() + * incurs a one-time initialisation cost to construct the name tables. + * @param name codepoint name + * @return code point associated with the name or -1 if the name is not + * found. + * @draft ICU 2.1 + */ public static int getCharFromExtendedName(String name) { return NAME_.getCharFromName( @@ -2892,8 +2918,7 @@ public final class UCharacter * * @see UProperty * @see UProperty.NameChoice - * @since ICU 2.4 - * @draft 2.4 + * @draft ICU 2.4 */ public static String getPropertyName(int property, int nameChoice) { @@ -2915,8 +2940,7 @@ public final class UCharacter * is not recognized. * * @see UProperty - * @since ICU 2.4 - * @draft 2.4 + * @draft ICU 2.4 */ public static int getPropertyEnum(String propertyAlias) { return PNAMES_.getPropertyEnum(propertyAlias); @@ -2960,8 +2984,7 @@ public final class UCharacter * * @see UProperty * @see UProperty.NameChoice - * @since ICU 2.4 - * @draft 2.4 + * @draft ICU 2.4 */ public static String getPropertyValueName(int property, int value, @@ -2990,8 +3013,7 @@ public final class UCharacter * [:L:] to be represented. * * @see UProperty - * @since ICU 2.4 - * @draft 2.4 + * @draft ICU 2.4 */ public static int getPropertyValueEnum(int property, String valueAlias) { @@ -2999,13 +3021,14 @@ public final class UCharacter } /** - * Returns a code point corresponding to the two UTF16 characters. - * @param lead the lead char - * @param trail the trail char - * @return code point if surrogate characters are valid. - * @exception IllegalArgumentException thrown when argument characters do - * not form a valid codepoint - */ + * Returns a code point corresponding to the two UTF16 characters. + * @param lead the lead char + * @param trail the trail char + * @return code point if surrogate characters are valid. + * @exception IllegalArgumentException thrown when argument characters do + * not form a valid codepoint + * @stable ICU 2.1 + */ public static int getCodePoint(char lead, char trail) { if (lead >= UTF16.LEAD_SURROGATE_MIN_VALUE && @@ -3018,12 +3041,13 @@ public final class UCharacter } /** - * Returns the code point corresponding to the UTF16 character. - * @param char16 the UTF16 character - * @return code point if argument is a valid character. - * @exception IllegalArgumentException thrown when char16 is not a valid - * codepoint - */ + * Returns the code point corresponding to the UTF16 character. + * @param char16 the UTF16 character + * @return code point if argument is a valid character. + * @exception IllegalArgumentException thrown when char16 is not a valid + * codepoint + * @stable ICU 2.1 + */ public static int getCodePoint(char char16) { if (UCharacter.isLegal(char16)) { @@ -3033,57 +3057,60 @@ public final class UCharacter } /** - * Gets uppercase version of the argument string. - * Casing is dependent on the default locale and context-sensitive. - * @param str source string to be performed on - * @return uppercase version of the argument string - */ + * Gets uppercase version of the argument string. + * Casing is dependent on the default locale and context-sensitive. + * @param str source string to be performed on + * @return uppercase version of the argument string + * @stable ICU 2.1 + */ public static String toUpperCase(String str) { return toUpperCase(Locale.getDefault(), str); } /** - * Gets lowercase version of the argument string. - * Casing is dependent on the default locale and context-sensitive - * @param str source string to be performed on - * @return lowercase version of the argument string - */ + * Gets lowercase version of the argument string. + * Casing is dependent on the default locale and context-sensitive + * @param str source string to be performed on + * @return lowercase version of the argument string + * @stable ICU 2.1 + */ public static String toLowerCase(String str) { return toLowerCase(Locale.getDefault(), str); } /** - *

Gets the titlecase version of the argument string.

- *

Position for titlecasing is determined by the argument break - * iterator, hence the user can customized his break iterator for - * a specialized titlecasing. In this case only the forward iteration - * needs to be implemented. - * If the break iterator passed in is null, the default Unicode algorithm - * will be used to determine the titlecase positions. - *

- *

Only positions returned by the break iterator will be title cased, - * character in between the positions will all be in lower case.

- *

Casing is dependent on the default locale and context-sensitive

- * @param str source string to be performed on - * @param breakiter break iterator to determine the positions in which - * the character should be title cased. - * @return lowercase version of the argument string - * @draft 2.1 - */ + *

Gets the titlecase version of the argument string.

+ *

Position for titlecasing is determined by the argument break + * iterator, hence the user can customized his break iterator for + * a specialized titlecasing. In this case only the forward iteration + * needs to be implemented. + * If the break iterator passed in is null, the default Unicode algorithm + * will be used to determine the titlecase positions. + *

+ *

Only positions returned by the break iterator will be title cased, + * character in between the positions will all be in lower case.

+ *

Casing is dependent on the default locale and context-sensitive

+ * @param str source string to be performed on + * @param breakiter break iterator to determine the positions in which + * the character should be title cased. + * @return lowercase version of the argument string + * @draft ICU 2.1 + */ public static String toTitleCase(String str, BreakIterator breakiter) { return toTitleCase(Locale.getDefault(), str, breakiter); } /** - * Gets uppercase version of the argument string. - * Casing is dependent on the argument locale and context-sensitive. - * @param locale which string is to be converted in - * @param str source string to be performed on - * @return uppercase version of the argument string - */ + * Gets uppercase version of the argument string. + * Casing is dependent on the argument locale and context-sensitive. + * @param locale which string is to be converted in + * @param str source string to be performed on + * @return uppercase version of the argument string + * @stable ICU 2.1 + */ public static String toUpperCase(Locale locale, String str) { if (locale == null) { @@ -3093,12 +3120,13 @@ public final class UCharacter } /** - * Gets lowercase version of the argument string. - * Casing is dependent on the argument locale and context-sensitive - * @param locale which string is to be converted in - * @param str source string to be performed on - * @return lowercase version of the argument string - */ + * Gets lowercase version of the argument string. + * Casing is dependent on the argument locale and context-sensitive + * @param locale which string is to be converted in + * @param str source string to be performed on + * @return lowercase version of the argument string + * @stable ICU 2.1 + */ public static String toLowerCase(Locale locale, String str) { int length = str.length(); @@ -3111,24 +3139,24 @@ public final class UCharacter } /** - *

Gets the titlecase version of the argument string.

- *

Position for titlecasing is determined by the argument break - * iterator, hence the user can customized his break iterator for - * a specialized titlecasing. In this case only the forward iteration - * needs to be implemented. - * If the break iterator passed in is null, the default Unicode algorithm - * will be used to determine the titlecase positions. - *

- *

Only positions returned by the break iterator will be title cased, - * character in between the positions will all be in lower case.

- *

Casing is dependent on the argument locale and context-sensitive

- * @param locale which string is to be converted in - * @param str source string to be performed on - * @param breakiter break iterator to determine the positions in which - * the character should be title cased. - * @return lowercase version of the argument string - * @draft 2.1 - */ + *

Gets the titlecase version of the argument string.

+ *

Position for titlecasing is determined by the argument break + * iterator, hence the user can customized his break iterator for + * a specialized titlecasing. In this case only the forward iteration + * needs to be implemented. + * If the break iterator passed in is null, the default Unicode algorithm + * will be used to determine the titlecase positions. + *

+ *

Only positions returned by the break iterator will be title cased, + * character in between the positions will all be in lower case.

+ *

Casing is dependent on the argument locale and context-sensitive

+ * @param locale which string is to be converted in + * @param str source string to be performed on + * @param breakiter break iterator to determine the positions in which + * the character should be title cased. + * @return lowercase version of the argument string + * @draft ICU 2.1 + */ public static String toTitleCase(Locale locale, String str, BreakIterator breakiter) { @@ -3142,21 +3170,22 @@ public final class UCharacter } /** - * The given character is mapped to its case folding equivalent according to - * UnicodeData.txt and CaseFolding.txt; if the character has no case folding - * equivalent, the character itself is returned. - * Only "simple", single-code point case folding mappings are used. - * For "full", multiple-code point mappings use the API - * foldCase(String str, boolean defaultmapping). - * @param ch the character to be converted - * @param defaultmapping Indicates if all mappings defined in CaseFolding.txt - * is to be used, otherwise the mappings for dotted I - * and dotless i marked with 'I' in CaseFolding.txt will - * be skipped. - * @return the case folding equivalent of the character, if any; - * otherwise the character itself. - * @see #foldCase(String, boolean) - */ + * The given character is mapped to its case folding equivalent according + * to UnicodeData.txt and CaseFolding.txt; if the character has no case + * folding equivalent, the character itself is returned. + * Only "simple", single-code point case folding mappings are used. + * For "full", multiple-code point mappings use the API + * foldCase(String str, boolean defaultmapping). + * @param ch the character to be converted + * @param defaultmapping Indicates if all mappings defined in + * CaseFolding.txt is to be used, otherwise the + * mappings for dotted I and dotless i marked with + * 'I' in CaseFolding.txt will be skipped. + * @return the case folding equivalent of the character, if + * any; otherwise the character itself. + * @see #foldCase(String, boolean) + * @stable ICU 2.1 + */ public static int foldCase(int ch, boolean defaultmapping) { // Some special cases are hardcoded because their conditions cannot be @@ -3255,21 +3284,22 @@ public final class UCharacter } /** - * The given string is mapped to its case folding equivalent according to - * UnicodeData.txt and CaseFolding.txt; if any character has no case folding - * equivalent, the character itself is returned. - * "Full", multiple-code point case folding mappings are returned here. - * For "simple" single-code point mappings use the API - * foldCase(int ch, boolean defaultmapping). - * @param str the String to be converted - * @param defaultmapping Indicates if all mappings defined in CaseFolding.txt - * is to be used, otherwise the mappings for dotted I - * and dotless i marked with 'I' in CaseFolding.txt will - * be skipped. - * @return the case folding equivalent of the character, if any; - * otherwise the character itself. - * @see #foldCase(int, boolean) - */ + * The given string is mapped to its case folding equivalent according to + * UnicodeData.txt and CaseFolding.txt; if any character has no case + * folding equivalent, the character itself is returned. + * "Full", multiple-code point case folding mappings are returned here. + * For "simple" single-code point mappings use the API + * foldCase(int ch, boolean defaultmapping). + * @param str the String to be converted + * @param defaultmapping Indicates if all mappings defined in + * CaseFolding.txt is to be used, otherwise the + * mappings for dotted I and dotless i marked with + * 'I' in CaseFolding.txt will be skipped. + * @return the case folding equivalent of the character, if + * any; otherwise the character itself. + * @see #foldCase(int, boolean) + * @stable ICU 2.1 + */ public static String foldCase(String str, boolean defaultmapping) { int size = str.length(); @@ -3357,17 +3387,17 @@ public final class UCharacter } /** - * Return numeric value of Han code points. - *
This returns the value of Han 'numeric' code points, - * including those for zero, ten, hundred, thousand, ten thousand, - * and hundred million. Unicode does not consider these to be - * numeric. This includes both the standard and 'checkwriting' - * characters, the 'big circle' zero character, and the standard - * zero character. - * @draft - * @param ch code point to query - * @return value if it is a Han 'numeric character,' otherwise return -1. - */ + * Return numeric value of Han code points. + *
This returns the value of Han 'numeric' code points, + * including those for zero, ten, hundred, thousand, ten thousand, + * and hundred million. Unicode does not consider these to be + * numeric. This includes both the standard and 'checkwriting' + * characters, the 'big circle' zero character, and the standard + * zero character. + * @param ch code point to query + * @return value if it is a Han 'numeric character,' otherwise return -1. + * @stable ICU 2.4 + */ public static int getHanNumericValue(int ch) { switch(ch) @@ -3420,48 +3450,48 @@ public final class UCharacter } /** - *

Gets an iterator for character types, iterating over codepoints.

- * Example of use:
- *
-    * RangeValueIterator iterator = UCharacter.getTypeIterator();
-    * RangeValueIterator.Element element = new RangeValueIterator.Element();
-    * while (iterator.next(element)) {
-    *     System.out.println("Codepoint \\u" + 
-    *                        Integer.toHexString(element.start) + 
-    *                        " to codepoint \\u" +
-    *                        Integer.toHexString(element.limit - 1) + 
-    *                        " has the character type " + 
-    *                        element.value);
-    * }
-    * 
- * @return an iterator - * @draft 2.1 - */ + *

Gets an iterator for character types, iterating over codepoints.

+ * Example of use:
+ *
+     * RangeValueIterator iterator = UCharacter.getTypeIterator();
+     * RangeValueIterator.Element element = new RangeValueIterator.Element();
+     * while (iterator.next(element)) {
+     *     System.out.println("Codepoint \\u" + 
+     *                        Integer.toHexString(element.start) + 
+     *                        " to codepoint \\u" +
+     *                        Integer.toHexString(element.limit - 1) + 
+     *                        " has the character type " + 
+     *                        element.value);
+     * }
+     * 
+ * @return an iterator + * @draft ICU 2.1 + */ public static RangeValueIterator getTypeIterator() { return new UCharacterTypeIterator(PROPERTY_); } /** - *

Gets an iterator for character names, iterating over codepoints.

- *

This API only gets the iterator for the modern, most up-to-date - * Unicode names. For older 1.0 Unicode names use get1_0NameIterator() or - * for extended names use getExtendedNameIterator().

- * Example of use:
- *
-    * ValueIterator iterator = UCharacter.getNameIterator();
-    * ValueIterator.Element element = new ValueIterator.Element();
-    * while (iterator.next(element)) {
-    *     System.out.println("Codepoint \\u" + 
-    *                        Integer.toHexString(element.codepoint) +
-    *                        " has the name " + (String)element.value);
-    * }
-    * 
- *

The maximal range which the name iterator iterates is from - * UCharacter.MIN_VALUE to UCharacter.MAX_VALUE.

- * @return an iterator - * @draft 2.1 - */ + *

Gets an iterator for character names, iterating over codepoints.

+ *

This API only gets the iterator for the modern, most up-to-date + * Unicode names. For older 1.0 Unicode names use get1_0NameIterator() or + * for extended names use getExtendedNameIterator().

+ * Example of use:
+ *
+     * ValueIterator iterator = UCharacter.getNameIterator();
+     * ValueIterator.Element element = new ValueIterator.Element();
+     * while (iterator.next(element)) {
+     *     System.out.println("Codepoint \\u" + 
+     *                        Integer.toHexString(element.codepoint) +
+     *                        " has the name " + (String)element.value);
+     * }
+     * 
+ *

The maximal range which the name iterator iterates is from + * UCharacter.MIN_VALUE to UCharacter.MAX_VALUE.

+ * @return an iterator + * @draft ICU 2.1 + */ public static ValueIterator getNameIterator() { return new UCharacterNameIterator(NAME_, @@ -3469,24 +3499,24 @@ public final class UCharacter } /** - *

Gets an iterator for character names, iterating over codepoints.

- *

This API only gets the iterator for the older 1.0 Unicode names. - * For modern, most up-to-date Unicode names use getNameIterator() or - * for extended names use getExtendedNameIterator().

- * Example of use:
- *
-    * ValueIterator iterator = UCharacter.get1_0NameIterator();
-    * ValueIterator.Element element = new ValueIterator.Element();
-    * while (iterator.next(element)) {
-    *     System.out.println("Codepoint \\u" + 
-    *                        Integer.toHexString(element.codepoint) +
-    *                        " has the name " + (String)element.value);
-    * }
-    * 
- *

The maximal range which the name iterator iterates is from - * @return an iterator - * @draft 2.1 - */ + *

Gets an iterator for character names, iterating over codepoints.

+ *

This API only gets the iterator for the older 1.0 Unicode names. + * For modern, most up-to-date Unicode names use getNameIterator() or + * for extended names use getExtendedNameIterator().

+ * Example of use:
+ *
+     * ValueIterator iterator = UCharacter.get1_0NameIterator();
+     * ValueIterator.Element element = new ValueIterator.Element();
+     * while (iterator.next(element)) {
+     *     System.out.println("Codepoint \\u" + 
+     *                        Integer.toHexString(element.codepoint) +
+     *                        " has the name " + (String)element.value);
+     * }
+     * 
+ *

The maximal range which the name iterator iterates is from + * @return an iterator + * @draft ICU 2.1 + */ public static ValueIterator getName1_0Iterator() { return new UCharacterNameIterator(NAME_, @@ -3494,24 +3524,24 @@ public final class UCharacter } /** - *

Gets an iterator for character names, iterating over codepoints.

- *

This API only gets the iterator for the extended names. - * For modern, most up-to-date Unicode names use getNameIterator() or - * for older 1.0 Unicode names use get1_0NameIterator().

- * Example of use:
- *
-    * ValueIterator iterator = UCharacter.getExtendedNameIterator();
-    * ValueIterator.Element element = new ValueIterator.Element();
-    * while (iterator.next(element)) {
-    *     System.out.println("Codepoint \\u" + 
-    *                        Integer.toHexString(element.codepoint) +
-    *                        " has the name " + (String)element.value);
-    * }
-    * 
- *

The maximal range which the name iterator iterates is from - * @return an iterator - * @draft 2.1 - */ + *

Gets an iterator for character names, iterating over codepoints.

+ *

This API only gets the iterator for the extended names. + * For modern, most up-to-date Unicode names use getNameIterator() or + * for older 1.0 Unicode names use get1_0NameIterator().

+ * Example of use:
+ *
+     * ValueIterator iterator = UCharacter.getExtendedNameIterator();
+     * ValueIterator.Element element = new ValueIterator.Element();
+     * while (iterator.next(element)) {
+     *     System.out.println("Codepoint \\u" + 
+     *                        Integer.toHexString(element.codepoint) +
+     *                        " has the name " + (String)element.value);
+     * }
+     * 
+ *

The maximal range which the name iterator iterates is from + * @return an iterator + * @draft ICU 2.1 + */ public static ValueIterator getExtendedNameIterator() { return new UCharacterNameIterator(NAME_, @@ -3844,12 +3874,12 @@ public final class UCharacter /** * Database storing the sets of character name */ - protected static final UCharacterName NAME_; + static final UCharacterName NAME_; /** * Singleton object encapsulating the imported pnames.icu property aliases */ - protected static final UPropertyAliases PNAMES_; + static final UPropertyAliases PNAMES_; // block to initialise name database and unicode 1.0 data static @@ -3868,8 +3898,8 @@ public final class UCharacter // private variables ------------------------------------------------- /** - * Database storing the sets of character property - */ + * Database storing the sets of character property + */ private static final UCharacterProperty PROPERTY_; // block to initialise character property database @@ -3886,112 +3916,112 @@ public final class UCharacter } /** - * To get the last character out from a data type - */ + * To get the last character out from a data type + */ private static final int LAST_CHAR_MASK_ = 0xFFFF; /** - * To get the last byte out from a data type - */ + * To get the last byte out from a data type + */ private static final int LAST_BYTE_MASK_ = 0xFF; /** - * Shift 16 bits - */ + * Shift 16 bits + */ private static final int SHIFT_16_ = 16; /** - * Shift 24 bits - */ + * Shift 24 bits + */ private static final int SHIFT_24_ = 24; /** - * Decimal radix - */ + * Decimal radix + */ private static final int DECIMAL_RADIX_ = 10; /** - * No break space code point - */ + * No break space code point + */ private static final int NO_BREAK_SPACE_ = 0xA0; /** - * Narrow no break space code point - */ + * Narrow no break space code point + */ private static final int NARROW_NO_BREAK_SPACE_ = 0x202F; /** - * Zero width no break space code point - */ + * Zero width no break space code point + */ private static final int ZERO_WIDTH_NO_BREAK_SPACE_ = 0xFEFF; /** - * Ideographic number zero code point - */ + * Ideographic number zero code point + */ private static final int IDEOGRAPHIC_NUMBER_ZERO_ = 0x3007; /** - * CJK Ideograph, First code point - */ + * CJK Ideograph, First code point + */ private static final int CJK_IDEOGRAPH_FIRST_ = 0x4e00; /** - * CJK Ideograph, Second code point - */ + * CJK Ideograph, Second code point + */ private static final int CJK_IDEOGRAPH_SECOND_ = 0x4e8c; /** - * CJK Ideograph, Third code point - */ + * CJK Ideograph, Third code point + */ private static final int CJK_IDEOGRAPH_THIRD_ = 0x4e09; /** - * CJK Ideograph, Fourth code point - */ + * CJK Ideograph, Fourth code point + */ private static final int CJK_IDEOGRAPH_FOURTH_ = 0x56d8; /** - * CJK Ideograph, FIFTH code point - */ + * CJK Ideograph, FIFTH code point + */ private static final int CJK_IDEOGRAPH_FIFTH_ = 0x4e94; /** - * CJK Ideograph, Sixth code point - */ + * CJK Ideograph, Sixth code point + */ private static final int CJK_IDEOGRAPH_SIXTH_ = 0x516d; /** - * CJK Ideograph, Seventh code point - */ + * CJK Ideograph, Seventh code point + */ private static final int CJK_IDEOGRAPH_SEVENTH_ = 0x4e03; /** - * CJK Ideograph, Eighth code point - */ + * CJK Ideograph, Eighth code point + */ private static final int CJK_IDEOGRAPH_EIGHTH_ = 0x516b; /** - * CJK Ideograph, Nineth code point - */ + * CJK Ideograph, Nineth code point + */ private static final int CJK_IDEOGRAPH_NINETH_ = 0x4e5d; /** - * Application Program command code point - */ + * Application Program command code point + */ private static final int APPLICATION_PROGRAM_COMMAND_ = 0x009F; /** - * Unit separator code point - */ + * Unit separator code point + */ private static final int UNIT_SEPARATOR_ = 0x001F; /** - * Delete code point - */ + * Delete code point + */ private static final int DELETE_ = 0x007F; /** - * ISO control character first range upper limit 0x0 - 0x1F - */ + * ISO control character first range upper limit 0x0 - 0x1F + */ private static final int ISO_CONTROL_FIRST_RANGE_MAX_ = 0x1F; /** * Shift to get numeric type @@ -4002,17 +4032,17 @@ public final class UCharacter */ private static final int NUMERIC_TYPE_MASK_ = 0x7; /** - * Shift to get bidi bits - */ + * Shift to get bidi bits + */ private static final int BIDI_SHIFT_ = 6; /** - * Mask to be applied after shifting to get bidi bits - */ + * Mask to be applied after shifting to get bidi bits + */ private static final int BIDI_MASK_AFTER_SHIFT_ = 0x1F; /** - * Han digit characters - */ + * Han digit characters + */ private static final int CJK_IDEOGRAPH_COMPLEX_ZERO_ = 0x96f6; private static final int CJK_IDEOGRAPH_COMPLEX_ONE_ = 0x58f9; private static final int CJK_IDEOGRAPH_COMPLEX_TWO_ = 0x8cb3; @@ -4112,8 +4142,8 @@ public final class UCharacter // private constructor ----------------------------------------------- /** - * Private constructor to prevent instantiation - */ + * Private constructor to prevent instantiation + */ private UCharacter() { } diff --git a/icu4j/src/com/ibm/icu/lang/UCharacterCategory.java b/icu4j/src/com/ibm/icu/lang/UCharacterCategory.java index 4d86d4e232..79516acbc6 100755 --- a/icu4j/src/com/ibm/icu/lang/UCharacterCategory.java +++ b/icu4j/src/com/ibm/icu/lang/UCharacterCategory.java @@ -6,8 +6,8 @@ * * $Source: * /usr/cvs/icu4j/icu4j/src/com/ibm/icu/text/UCharacterCategory.java $ -* $Date: 2002/09/19 21:18:14 $ -* $Revision: 1.9 $ +* $Date: 2002/12/03 00:47:53 $ +* $Revision: 1.10 $ * ******************************************************************************* */ @@ -15,238 +15,269 @@ package com.ibm.icu.lang; /** -* Enumerated Unicode category types from the UnicodeData.txt file. -* Used as return results from UCharacter -* Equivalent to icu's UCharCategory. -* Refer to -* Unicode Consortium for more information about UnicodeData.txt. -*

-* This class is not subclassable -*

-* @author Syn Wee Quek -* @since oct0300 -*/ + * Enumerated Unicode category types from the UnicodeData.txt file. + * Used as return results from UCharacter + * Equivalent to icu's UCharCategory. + * Refer to + * Unicode Consortium for more information about UnicodeData.txt. + *

+ * This class is not subclassable + *

+ * @author Syn Wee Quek + * @stable ICU 2.1 + */ public final class UCharacterCategory { - // public variable ----------------------------------------------------- - - /** - * Unassigned character type - */ - public static final int UNASSIGNED = 0; - /** - * Character type Cn - * Not Assigned (no characters in [UnicodeData.txt] have this property) - * @draft 2.1 - */ - public static final int GENERAL_OTHER_TYPES = 0; - /** - * Character type Lu - */ - public static final int UPPERCASE_LETTER = 1; - /** - * Character type Ll - */ - public static final int LOWERCASE_LETTER = 2; - /** - * Character type Lt - */ - public static final int TITLECASE_LETTER = 3; - /** - * Character type Lm - */ - public static final int MODIFIER_LETTER = 4; - /** - * Character type Lo - */ - public static final int OTHER_LETTER = 5; - /** - * Character type Mn - */ - public static final int NON_SPACING_MARK = 6; - /** - * Character type Me - */ - public static final int ENCLOSING_MARK = 7; - /** - * Character type Mc - */ - public static final int COMBINING_SPACING_MARK = 8; - /** - * Character type Nd - */ - public static final int DECIMAL_DIGIT_NUMBER = 9; - /** - * Character type Nl - */ - public static final int LETTER_NUMBER = 10; - - // start of 11------------ - - /** - * Character type No - */ - public static final int OTHER_NUMBER = 11; - /** - * Character type Zs - */ - public static final int SPACE_SEPARATOR = 12; - /** - * Character type Zl - */ - public static final int LINE_SEPARATOR = 13; - /** - * Character type Zp - */ - public static final int PARAGRAPH_SEPARATOR = 14; - /** - * Character type Cc - */ - public static final int CONTROL = 15; - /** - * Character type Cf - */ - public static final int FORMAT = 16; - /** - * Character type Co - */ - public static final int PRIVATE_USE = 17; - /** - * Character type Cs - */ - public static final int SURROGATE = 18; - /** - * Character type Pd - */ - public static final int DASH_PUNCTUATION = 19; - /** - * Character type Ps - */ - public static final int START_PUNCTUATION = 20; - - // start of 21 ------------ - - /** - * Character type Pe - */ - public static final int END_PUNCTUATION = 21; - /** - * Character type Pc - */ - public static final int CONNECTOR_PUNCTUATION = 22; - /** - * Character type Po - */ - public static final int OTHER_PUNCTUATION = 23; - /** - * Character type Sm - */ - public static final int MATH_SYMBOL = 24; - /** - * Character type Sc - */ - public static final int CURRENCY_SYMBOL = 25; - /** - * Character type Sk - */ - public static final int MODIFIER_SYMBOL = 26; - /** - * Character type So - */ - public static final int OTHER_SYMBOL = 27; - /** - * Character type Pi - */ - public static final int INITIAL_PUNCTUATION = 28; - /** - * Character type Pf - */ - public static final int FINAL_PUNCTUATION = 29; - - // start of 31 ------------ - - /** - * Character type count - */ - public static final int CHAR_CATEGORY_COUNT = 30; - - /** - * Gets the name of the argument category - * @param category to retrieve name - * @return category name - */ - public static String toString(int category) - { - switch (category) - { - case UPPERCASE_LETTER : - return "Letter, Uppercase"; - case LOWERCASE_LETTER : - return "Letter, Lowercase"; - case TITLECASE_LETTER : - return "Letter, Titlecase"; - case MODIFIER_LETTER : - return "Letter, Modifier"; - case OTHER_LETTER : - return "Letter, Other"; - case NON_SPACING_MARK : - return "Mark, Non-Spacing"; - case ENCLOSING_MARK : - return "Mark, Enclosing"; - case COMBINING_SPACING_MARK : - return "Mark, Spacing Combining"; - case DECIMAL_DIGIT_NUMBER : - return "Number, Decimal Digit"; - case LETTER_NUMBER : - return "Number, Letter"; - case OTHER_NUMBER : - return "Number, Other"; - case SPACE_SEPARATOR : - return "Separator, Space"; - case LINE_SEPARATOR : - return "Separator, Line"; - case PARAGRAPH_SEPARATOR : - return "Separator, Paragraph"; - case CONTROL : - return "Other, Control"; - case FORMAT : - return "Other, Format"; - case PRIVATE_USE : - return "Other, Private Use"; - case SURROGATE : - return "Other, Surrogate"; - case DASH_PUNCTUATION : - return "Punctuation, Dash"; - case START_PUNCTUATION : - return "Punctuation, Open"; - case END_PUNCTUATION : - return "Punctuation, Close"; - case CONNECTOR_PUNCTUATION : - return "Punctuation, Connector"; - case OTHER_PUNCTUATION : - return "Punctuation, Other"; - case MATH_SYMBOL : - return "Symbol, Math"; - case CURRENCY_SYMBOL : - return "Symbol, Currency"; - case MODIFIER_SYMBOL : - return "Symbol, Modifier"; - case OTHER_SYMBOL : - return "Symbol, Other"; - case INITIAL_PUNCTUATION : - return "Punctuation, Initial quote"; - case FINAL_PUNCTUATION : - return "Punctuation, Final quote"; - } - return "Unassigned"; - } - - // private constructor ----------------------------------------------- + // public variable ----------------------------------------------------- /** - * Private constructor to prevent initialisation - */ + * Unassigned character type + * @stable ICU 2.1 + */ + public static final int UNASSIGNED = 0; + /** + * Character type Cn + * Not Assigned (no characters in [UnicodeData.txt] have this property) + * @draft ICU 2.1 + */ + public static final int GENERAL_OTHER_TYPES = 0; + /** + * Character type Lu + * @stable ICU 2.1 + */ + public static final int UPPERCASE_LETTER = 1; + /** + * Character type Ll + * @stable ICU 2.1 + */ + public static final int LOWERCASE_LETTER = 2; + /** + * Character type Lt + * @stable ICU 2.1 + */ + public static final int TITLECASE_LETTER = 3; + /** + * Character type Lm + * @stable ICU 2.1 + */ + public static final int MODIFIER_LETTER = 4; + /** + * Character type Lo + * @stable ICU 2.1 + */ + public static final int OTHER_LETTER = 5; + /** + * Character type Mn + * @stable ICU 2.1 + */ + public static final int NON_SPACING_MARK = 6; + /** + * Character type Me + * @stable ICU 2.1 + */ + public static final int ENCLOSING_MARK = 7; + /** + * Character type Mc + * @stable ICU 2.1 + */ + public static final int COMBINING_SPACING_MARK = 8; + /** + * Character type Nd + * @stable ICU 2.1 + */ + public static final int DECIMAL_DIGIT_NUMBER = 9; + /** + * Character type Nl + * @stable ICU 2.1 + */ + public static final int LETTER_NUMBER = 10; + + // start of 11------------ + + /** + * Character type No + * @stable ICU 2.1 + */ + public static final int OTHER_NUMBER = 11; + /** + * Character type Zs + * @stable ICU 2.1 + */ + public static final int SPACE_SEPARATOR = 12; + /** + * Character type Zl + * @stable ICU 2.1 + */ + public static final int LINE_SEPARATOR = 13; + /** + * Character type Zp + * @stable ICU 2.1 + */ + public static final int PARAGRAPH_SEPARATOR = 14; + /** + * Character type Cc + * @stable ICU 2.1 + */ + public static final int CONTROL = 15; + /** + * Character type Cf + * @stable ICU 2.1 + */ + public static final int FORMAT = 16; + /** + * Character type Co + * @stable ICU 2.1 + */ + public static final int PRIVATE_USE = 17; + /** + * Character type Cs + * @stable ICU 2.1 + */ + public static final int SURROGATE = 18; + /** + * Character type Pd + * @stable ICU 2.1 + */ + public static final int DASH_PUNCTUATION = 19; + /** + * Character type Ps + * @stable ICU 2.1 + */ + public static final int START_PUNCTUATION = 20; + + // start of 21 ------------ + + /** + * Character type Pe + * @stable ICU 2.1 + */ + public static final int END_PUNCTUATION = 21; + /** + * Character type Pc + * @stable ICU 2.1 + */ + public static final int CONNECTOR_PUNCTUATION = 22; + /** + * Character type Po + * @stable ICU 2.1 + */ + public static final int OTHER_PUNCTUATION = 23; + /** + * Character type Sm + * @stable ICU 2.1 + */ + public static final int MATH_SYMBOL = 24; + /** + * Character type Sc + * @stable ICU 2.1 + */ + public static final int CURRENCY_SYMBOL = 25; + /** + * Character type Sk + * @stable ICU 2.1 + */ + public static final int MODIFIER_SYMBOL = 26; + /** + * Character type So + * @stable ICU 2.1 + */ + public static final int OTHER_SYMBOL = 27; + /** + * Character type Pi + * @stable ICU 2.1 + */ + public static final int INITIAL_PUNCTUATION = 28; + /** + * Character type Pf + * @stable ICU 2.1 + */ + public static final int FINAL_PUNCTUATION = 29; + + // start of 31 ------------ + + /** + * Character type count + * @stable ICU 2.1 + */ + public static final int CHAR_CATEGORY_COUNT = 30; + + /** + * Gets the name of the argument category + * @param category to retrieve name + * @return category name + * @stable ICU 2.1 + */ + public static String toString(int category) + { + switch (category) { + case UPPERCASE_LETTER : + return "Letter, Uppercase"; + case LOWERCASE_LETTER : + return "Letter, Lowercase"; + case TITLECASE_LETTER : + return "Letter, Titlecase"; + case MODIFIER_LETTER : + return "Letter, Modifier"; + case OTHER_LETTER : + return "Letter, Other"; + case NON_SPACING_MARK : + return "Mark, Non-Spacing"; + case ENCLOSING_MARK : + return "Mark, Enclosing"; + case COMBINING_SPACING_MARK : + return "Mark, Spacing Combining"; + case DECIMAL_DIGIT_NUMBER : + return "Number, Decimal Digit"; + case LETTER_NUMBER : + return "Number, Letter"; + case OTHER_NUMBER : + return "Number, Other"; + case SPACE_SEPARATOR : + return "Separator, Space"; + case LINE_SEPARATOR : + return "Separator, Line"; + case PARAGRAPH_SEPARATOR : + return "Separator, Paragraph"; + case CONTROL : + return "Other, Control"; + case FORMAT : + return "Other, Format"; + case PRIVATE_USE : + return "Other, Private Use"; + case SURROGATE : + return "Other, Surrogate"; + case DASH_PUNCTUATION : + return "Punctuation, Dash"; + case START_PUNCTUATION : + return "Punctuation, Open"; + case END_PUNCTUATION : + return "Punctuation, Close"; + case CONNECTOR_PUNCTUATION : + return "Punctuation, Connector"; + case OTHER_PUNCTUATION : + return "Punctuation, Other"; + case MATH_SYMBOL : + return "Symbol, Math"; + case CURRENCY_SYMBOL : + return "Symbol, Currency"; + case MODIFIER_SYMBOL : + return "Symbol, Modifier"; + case OTHER_SYMBOL : + return "Symbol, Other"; + case INITIAL_PUNCTUATION : + return "Punctuation, Initial quote"; + case FINAL_PUNCTUATION : + return "Punctuation, Final quote"; + } + return "Unassigned"; + } + + // private constructor ----------------------------------------------- + + /** + * Private constructor to prevent initialisation + */ private UCharacterCategory() { } diff --git a/icu4j/src/com/ibm/icu/lang/UCharacterDirection.java b/icu4j/src/com/ibm/icu/lang/UCharacterDirection.java index fe7d93e91c..d24c31d8da 100755 --- a/icu4j/src/com/ibm/icu/lang/UCharacterDirection.java +++ b/icu4j/src/com/ibm/icu/lang/UCharacterDirection.java @@ -6,8 +6,8 @@ * * $Source: * /usr/cvs/icu4j/icu4j/src/com/ibm/icu/text/UCharacterDirection.java $ -* $Date: 2002/09/11 00:12:39 $ -* $Revision: 1.5 $ +* $Date: 2002/12/03 00:47:50 $ +* $Revision: 1.6 $ * ******************************************************************************* */ @@ -15,160 +15,181 @@ package com.ibm.icu.lang; /** -* Enumerated Unicode character linguistic direction constants. -* Used as return results from UCharacter -*

-* This class is not subclassable -*

-* @author Syn Wee Quek -* @since oct0300 -*/ + * Enumerated Unicode character linguistic direction constants. + * Used as return results from UCharacter + *

+ * This class is not subclassable + *

+ * @author Syn Wee Quek + * @stable ICU 2.1 + */ public final class UCharacterDirection { - // private constructor ========================================= - - /** - * Private constructor to prevent initialisation - */ - private UCharacterDirection() - { - } - - // public variable ============================================= - - /** - * Directional type L - */ - public static final int LEFT_TO_RIGHT = 0; - /** - * Directional type R - */ - public static final int RIGHT_TO_LEFT = 1; - /** - * Directional type EN - */ - public static final int EUROPEAN_NUMBER = 2; - /** - * Directional type ES - */ - public static final int EUROPEAN_NUMBER_SEPARATOR = 3; - /** - * Directional type ET - */ - public static final int EUROPEAN_NUMBER_TERMINATOR = 4; - /** - * Directional type AN - */ - public static final int ARABIC_NUMBER = 5; - /** - * Directional type CS - */ - public static final int COMMON_NUMBER_SEPARATOR = 6; - /** - * Directional type B - */ - public static final int BLOCK_SEPARATOR = 7; - /** - * Directional type S - */ - public static final int SEGMENT_SEPARATOR = 8; - /** - * Directional type WS - */ - public static final int WHITE_SPACE_NEUTRAL = 9; - - // start of 11 --------------- - - /** - * Directional type ON - */ - public static final int OTHER_NEUTRAL = 10; - /** - * Directional type LRE - */ - public static final int LEFT_TO_RIGHT_EMBEDDING = 11; - /** - * Directional type LRO - */ - public static final int LEFT_TO_RIGHT_OVERRIDE = 12; - /** - * Directional type AL - */ - public static final int RIGHT_TO_LEFT_ARABIC = 13; - /** - * Directional type RLE - */ - public static final int RIGHT_TO_LEFT_EMBEDDING = 14; - /** - * Directional type RLO - */ - public static final int RIGHT_TO_LEFT_OVERRIDE = 15; - /** - * Directional type PDF - */ - public static final int POP_DIRECTIONAL_FORMAT = 16; - /** - * Directional type NSM - */ - public static final int DIR_NON_SPACING_MARK = 17; - /** - * Directional type BN - */ - public static final int BOUNDARY_NEUTRAL = 18; - /** - * Number of directional type - */ + // private constructor ========================================= + + /** + * Private constructor to prevent initialisation + */ + private UCharacterDirection() + { + } + + // public variable ============================================= + + /** + * Directional type L + * @stable ICU 2.1 + */ + public static final int LEFT_TO_RIGHT = 0; + /** + * Directional type R + * @stable ICU 2.1 + */ + public static final int RIGHT_TO_LEFT = 1; + /** + * Directional type EN + * @stable ICU 2.1 + */ + public static final int EUROPEAN_NUMBER = 2; + /** + * Directional type ES + * @stable ICU 2.1 + */ + public static final int EUROPEAN_NUMBER_SEPARATOR = 3; + /** + * Directional type ET + * @stable ICU 2.1 + */ + public static final int EUROPEAN_NUMBER_TERMINATOR = 4; + /** + * Directional type AN + * @stable ICU 2.1 + */ + public static final int ARABIC_NUMBER = 5; + /** + * Directional type CS + * @stable ICU 2.1 + */ + public static final int COMMON_NUMBER_SEPARATOR = 6; + /** + * Directional type B + * @stable ICU 2.1 + */ + public static final int BLOCK_SEPARATOR = 7; + /** + * Directional type S + * @stable ICU 2.1 + */ + public static final int SEGMENT_SEPARATOR = 8; + /** + * Directional type WS + * @stable ICU 2.1 + */ + public static final int WHITE_SPACE_NEUTRAL = 9; + + // start of 11 --------------- + + /** + * Directional type ON + * @stable ICU 2.1 + */ + public static final int OTHER_NEUTRAL = 10; + /** + * Directional type LRE + * @stable ICU 2.1 + */ + public static final int LEFT_TO_RIGHT_EMBEDDING = 11; + /** + * Directional type LRO + * @stable ICU 2.1 + */ + public static final int LEFT_TO_RIGHT_OVERRIDE = 12; + /** + * Directional type AL + * @stable ICU 2.1 + */ + public static final int RIGHT_TO_LEFT_ARABIC = 13; + /** + * Directional type RLE + * @stable ICU 2.1 + */ + public static final int RIGHT_TO_LEFT_EMBEDDING = 14; + /** + * Directional type RLO + * @stable ICU 2.1 + */ + public static final int RIGHT_TO_LEFT_OVERRIDE = 15; + /** + * Directional type PDF + * @stable ICU 2.1 + */ + public static final int POP_DIRECTIONAL_FORMAT = 16; + /** + * Directional type NSM + * @stable ICU 2.1 + */ + public static final int DIR_NON_SPACING_MARK = 17; + /** + * Directional type BN + * @stable ICU 2.1 + */ + public static final int BOUNDARY_NEUTRAL = 18; + /** + * Number of directional type + * @stable ICU 2.1 + */ public static final int CHAR_DIRECTION_COUNT = 19; /** - * Gets the name of the argument direction - * @param dir direction type to retrieve name - * @return directional name - */ + * Gets the name of the argument direction + * @param dir direction type to retrieve name + * @return directional name + * @stable ICU 2.1 + */ public static String toString(int dir) { - switch(dir) - { - case LEFT_TO_RIGHT : - return "Left-to-Right"; - case RIGHT_TO_LEFT : - return "Right-to-Left"; - case EUROPEAN_NUMBER : - return "European Number"; - case EUROPEAN_NUMBER_SEPARATOR : - return "European Number Separator"; - case EUROPEAN_NUMBER_TERMINATOR : - return "European Number Terminator"; - case ARABIC_NUMBER : - return "Arabic Number"; - case COMMON_NUMBER_SEPARATOR : - return "Common Number Separator"; - case BLOCK_SEPARATOR : - return "Paragraph Separator"; - case SEGMENT_SEPARATOR : - return "Segment Separator"; - case WHITE_SPACE_NEUTRAL : - return "Whitespace"; - case OTHER_NEUTRAL : - return "Other Neutrals"; - case LEFT_TO_RIGHT_EMBEDDING : - return "Left-to-Right Embedding"; - case LEFT_TO_RIGHT_OVERRIDE : - return "Left-to-Right Override"; - case RIGHT_TO_LEFT_ARABIC : - return "Right-to-Left Arabic"; - case RIGHT_TO_LEFT_EMBEDDING : - return "Right-to-Left Embedding"; - case RIGHT_TO_LEFT_OVERRIDE : - return "Right-to-Left Override"; - case POP_DIRECTIONAL_FORMAT : - return "Pop Directional Format"; - case DIR_NON_SPACING_MARK : - return "Non-Spacing Mark"; - case BOUNDARY_NEUTRAL : - return "Boundary Neutral"; - } - return "Unassigned"; + switch(dir) + { + case LEFT_TO_RIGHT : + return "Left-to-Right"; + case RIGHT_TO_LEFT : + return "Right-to-Left"; + case EUROPEAN_NUMBER : + return "European Number"; + case EUROPEAN_NUMBER_SEPARATOR : + return "European Number Separator"; + case EUROPEAN_NUMBER_TERMINATOR : + return "European Number Terminator"; + case ARABIC_NUMBER : + return "Arabic Number"; + case COMMON_NUMBER_SEPARATOR : + return "Common Number Separator"; + case BLOCK_SEPARATOR : + return "Paragraph Separator"; + case SEGMENT_SEPARATOR : + return "Segment Separator"; + case WHITE_SPACE_NEUTRAL : + return "Whitespace"; + case OTHER_NEUTRAL : + return "Other Neutrals"; + case LEFT_TO_RIGHT_EMBEDDING : + return "Left-to-Right Embedding"; + case LEFT_TO_RIGHT_OVERRIDE : + return "Left-to-Right Override"; + case RIGHT_TO_LEFT_ARABIC : + return "Right-to-Left Arabic"; + case RIGHT_TO_LEFT_EMBEDDING : + return "Right-to-Left Embedding"; + case RIGHT_TO_LEFT_OVERRIDE : + return "Right-to-Left Override"; + case POP_DIRECTIONAL_FORMAT : + return "Pop Directional Format"; + case DIR_NON_SPACING_MARK : + return "Non-Spacing Mark"; + case BOUNDARY_NEUTRAL : + return "Boundary Neutral"; + } + return "Unassigned"; } } diff --git a/icu4j/src/com/ibm/icu/lang/UProperty.java b/icu4j/src/com/ibm/icu/lang/UProperty.java index 57f14b8f27..3f1557e969 100644 --- a/icu4j/src/com/ibm/icu/lang/UProperty.java +++ b/icu4j/src/com/ibm/icu/lang/UProperty.java @@ -6,8 +6,8 @@ * * $Source: * /usr/cvs/icu4j/icu4j/src/com/ibm/icu/text/UCharacterCategory.java $ -* $Date: 2002/11/06 19:48:58 $ -* $Revision: 1.5 $ +* $Date: 2002/12/03 00:47:53 $ +* $Revision: 1.6 $ * ******************************************************************************* */ @@ -30,7 +30,7 @@ package com.ibm.icu.lang; * 3.2, then properties marked with "new" are not or not fully * available. Check UCharacter.getUnicodeVersion() to be sure.

* @author Syn Wee Quek - * @since March 8 2002 + * @draft ICU 2.1 * @see com.ibm.icu.lang.UCharacter */ public interface UProperty @@ -42,20 +42,24 @@ public interface UProperty *

Property for UCharacter.isUAlphabetic(), different from the property * in UCharacter.isalpha().

*

Lu + Ll + Lt + Lm + Lo + Nl + Other_Alphabetic.

+ * @draft ICU 2.1 */ public static final int ALPHABETIC = 0; /** * First constant for binary Unicode properties. + * @draft ICU 2.1 */ public static final int BINARY_START = ALPHABETIC; /** * Binary property ASCII_Hex_Digit (0-9 A-F a-f). + * @draft ICU 2.1 */ public static final int ASCII_HEX_DIGIT = 1; /** *

Binary property Bidi_Control.

*

Format controls which have specific functions in the Bidi Algorithm. *

+ * @draft ICU 2.1 */ public static final int BIDI_CONTROL = 2; /** @@ -63,11 +67,13 @@ public interface UProperty *

Characters that may change display in RTL text.

*

Property for UCharacter.isMirrored().

*

See Bidi Algorithm; UTR 9.

+ * @draft ICU 2.1 */ public static final int BIDI_MIRRORED = 3; /** *

Binary property Dash.

*

Variations of dashes.

+ * @draft ICU 2.1 */ public static final int DASH = 4; /** @@ -77,97 +83,114 @@ public interface UProperty *

*

Codepoints (2060..206F, FFF0..FFFB, E0000..E0FFF) + * Other_Default_Ignorable_Code_Point + (Cf + Cc + Cs - White_Space)

+ * @draft ICU 2.1 */ public static final int DEFAULT_IGNORABLE_CODE_POINT = 5; /** *

Binary property Deprecated (new).

*

The usage of deprecated characters is strongly discouraged.

+ * @draft ICU 2.1 */ public static final int DEPRECATED = 6; /** *

Binary property Diacritic.

*

Characters that linguistically modify the meaning of another * character to which they apply.

+ * @draft ICU 2.1 */ public static final int DIACRITIC = 7; /** *

Binary property Extender.

*

Extend the value or shape of a preceding alphabetic character, e.g. * length and iteration marks.

+ * @draft ICU 2.1 */ public static final int EXTENDER = 8; /** *

Binary property Full_Composition_Exclusion.

*

CompositionExclusions.txt + Singleton Decompositions + * Non-Starter Decompositions.

+ * @draft ICU 2.1 */ public static final int FULL_COMPOSITION_EXCLUSION = 9; /** *

Binary property Grapheme_Base (new).

*

For programmatic determination of grapheme cluster boundaries. * [0..10FFFF]-Cc-Cf-Cs-Co-Cn-Zl-Zp-Grapheme_Link-Grapheme_Extend-CGJ

+ * @draft ICU 2.1 */ public static final int GRAPHEME_BASE = 10; /** *

Binary property Grapheme_Extend (new).

*

For programmatic determination of grapheme cluster boundaries.

*

Me+Mn+Mc+Other_Grapheme_Extend-Grapheme_Link-CGJ

+ * @draft ICU 2.1 */ public static final int GRAPHEME_EXTEND = 11; /** *

Binary property Grapheme_Link (new).

*

For programmatic determination of grapheme cluster boundaries.

+ * @draft ICU 2.1 */ public static final int GRAPHEME_LINK = 12; /** *

Binary property Hex_Digit.

*

Characters commonly used for hexadecimal numbers.

+ * @draft ICU 2.1 */ public static final int HEX_DIGIT = 13; /** *

Binary property Hyphen.

*

Dashes used to mark connections between pieces of words, plus the * Katakana middle dot.

+ * @draft ICU 2.1 */ public static final int HYPHEN = 14; /** *

Binary property ID_Continue.

*

Characters that can continue an identifier.

*

ID_Start+Mn+Mc+Nd+Pc

+ * @draft ICU 2.1 */ public static final int ID_CONTINUE = 15; /** *

Binary property ID_Start.

*

Characters that can start an identifier.

*

Lu+Ll+Lt+Lm+Lo+Nl

+ * @draft ICU 2.1 */ public static final int ID_START = 16; /** *

Binary property Ideographic.

*

CJKV ideographs.

+ * @draft ICU 2.1 */ public static final int IDEOGRAPHIC = 17; /** *

Binary property IDS_Binary_Operator (new).

*

For programmatic determination of Ideographic Description Sequences. *

+ * @draft ICU 2.1 */ public static final int IDS_BINARY_OPERATOR = 18; /** *

Binary property IDS_Trinary_Operator (new).

* + * @draft ICU 2.1 */ public static final int IDS_TRINARY_OPERATOR = 19; /** *

Binary property Join_Control.

*

Format controls for cursive joining and ligation.

+ * @draft ICU 2.1 */ public static final int JOIN_CONTROL = 20; /** *

Binary property Logical_Order_Exception (new).

*

Characters that do not use logical order and require special * handling in most processing.

+ * @draft ICU 2.1 */ public static final int LOGICAL_ORDER_EXCEPTION = 21; /** @@ -175,44 +198,52 @@ public interface UProperty *

Same as UCharacter.isULowercase(), different from * UCharacter.islower().

*

Ll+Other_Lowercase

+ * @draft ICU 2.1 */ public static final int LOWERCASE = 22; /**

Binary property Math.

*

Sm+Other_Math

+ * @draft ICU 2.1 */ public static final int MATH = 23; /** *

Binary property Noncharacter_Code_Point.

*

Code points that are explicitly defined as illegal for the encoding * of characters.

+ * @draft ICU 2.1 */ public static final int NONCHARACTER_CODE_POINT = 24; /** *

Binary property Quotation_Mark.

+ * @draft ICU 2.1 */ public static final int QUOTATION_MARK = 25; /** *

Binary property Radical (new).

*

For programmatic determination of Ideographic Description * Sequences.

+ * @draft ICU 2.1 */ public static final int RADICAL = 26; /** *

Binary property Soft_Dotted (new).

*

Characters with a "soft dot", like i or j.

*

An accent placed on these characters causes the dot to disappear.

+ * @draft ICU 2.1 */ public static final int SOFT_DOTTED = 27; /** *

Binary property Terminal_Punctuation.

*

Punctuation characters that generally mark the end of textual * units.

+ * @draft ICU 2.1 */ public static final int TERMINAL_PUNCTUATION = 28; /** *

Binary property Unified_Ideograph (new).

*

For programmatic determination of Ideographic Description * Sequences.

+ * @draft ICU 2.1 */ public static final int UNIFIED_IDEOGRAPH = 29; /** @@ -220,6 +251,7 @@ public interface UProperty *

Same as UCharacter.isUUppercase(), different from * UCharacter.isUpperCase().

*

Lu+Other_Uppercase

+ * @draft ICU 2.1 */ public static final int UPPERCASE = 30; /** @@ -227,22 +259,26 @@ public interface UProperty *

Same as UCharacter.isUWhiteSpace(), different from * UCharacter.isSpace() and UCharacter.isWhitespace().

* Space characters+TAB+CR+LF-ZWSP-ZWNBSP

+ * @draft ICU 2.1 */ public static final int WHITE_SPACE = 31; /** *

Binary property XID_Continue.

*

ID_Continue modified to allow closure under normalization forms * NFKC and NFKD.

+ * @draft ICU 2.1 */ public static final int XID_CONTINUE = 32; /** *

Binary property XID_Start.

*

ID_Start modified to allow closure under normalization forms NFKC * and NFKD.

+ * @draft ICU 2.1 */ public static final int XID_START = 33; /** *

One more than the last constant for binary Unicode properties.

+ * @draft ICU 2.1 */ public static final int BINARY_LIMIT = 34; /** @@ -442,7 +478,7 @@ public interface UProperty * * @see UCharacter#getPropertyName * @see UCharacter#getPropertyValueName - * @since ICU 2.4 + * @draft ICU 2.4 */ public interface NameChoice { /**