ICU-3630 cover jsr 204 APIs where possible, also jb 3523 after a fashion

X-SVN-Rev: 14666
This commit is contained in:
Doug Felt 2004-03-10 02:21:38 +00:00
parent 7388e19127
commit ace86ed92b
4 changed files with 1157 additions and 627 deletions

View File

@ -1,19 +1,23 @@
/**
*******************************************************************************
* Copyright (C) 1996-2003, International Business Machines Corporation and *
* Copyright (C) 1996-2004, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/lang/UCharacter.java,v $
* $Date: 2004/02/06 21:54:00 $
* $Revision: 1.86 $
* $Date: 2004/03/10 02:21:38 $
* $Revision: 1.87 $
*
*******************************************************************************
*/
package com.ibm.icu.lang;
import java.lang.ref.SoftReference;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
import com.ibm.icu.impl.UCharacterProperty;
import com.ibm.icu.util.RangeValueIterator;
import com.ibm.icu.util.ValueIterator;
@ -27,6 +31,8 @@ import com.ibm.icu.impl.UCharacterName;
import com.ibm.icu.impl.UCharacterNameChoice;
import com.ibm.icu.impl.UPropertyAliases;
import com.ibm.icu.lang.UCharacterEnums.*;
/**
* <p>
* The UCharacter class provides extensions to the
@ -83,17 +89,10 @@ import com.ibm.icu.impl.UPropertyAliases;
* </p>
* @author Syn Wee Quek
* @stable ICU 2.1
* @see com.ibm.icu.lang.UCharacterCategory
* @see com.ibm.icu.lang.UCharacterDirection
* @see com.ibm.icu.lang.UCharacterEnums
*/
/*
* notes:
* 1) forDigit is not provided since there is no difference between the
* icu4c version and the jdk version
*/
public final class UCharacter
public final class UCharacter implements ECharacterCategory, ECharacterDirection
{
// public inner classes ----------------------------------------------
@ -1229,36 +1228,36 @@ public final class UCharacter
*/
public static final int SUPPLEMENTARY_PRIVATE_USE_AREA_B_ID = 110;
/** @draft ICU 2.6 */
public static final int LIMBU_ID = 111; /*[1900]*/
/** @draft ICU 2.6 */
public static final int TAI_LE_ID = 112; /*[1950]*/
/** @draft ICU 2.6 */
public static final int KHMER_SYMBOLS_ID = 113; /*[19E0]*/
/** @draft ICU 2.6 */
public static final int PHONETIC_EXTENSIONS_ID = 114; /*[1D00]*/
/** @draft ICU 2.6 */
public static final int MISCELLANEOUS_SYMBOLS_AND_ARROWS_ID = 115; /*[2B00]*/
/** @draft ICU 2.6 */
public static final int YIJING_HEXAGRAM_SYMBOLS_ID = 116; /*[4DC0]*/
/** @draft ICU 2.6 */
public static final int LINEAR_B_SYLLABARY_ID = 117; /*[10000]*/
/** @draft ICU 2.6 */
public static final int LINEAR_B_IDEOGRAMS_ID = 118; /*[10080]*/
/** @draft ICU 2.6 */
public static final int AEGEAN_NUMBERS_ID = 119; /*[10100]*/
/** @draft ICU 2.6 */
public static final int UGARITIC_ID = 120; /*[10380]*/
/** @draft ICU 2.6 */
public static final int SHAVIAN_ID = 121; /*[10450]*/
/** @draft ICU 2.6 */
public static final int OSMANYA_ID = 122; /*[10480]*/
/** @draft ICU 2.6 */
public static final int CYPRIOT_SYLLABARY_ID = 123; /*[10800]*/
/** @draft ICU 2.6 */
public static final int TAI_XUAN_JING_SYMBOLS_ID = 124; /*[1D300]*/
/** @draft ICU 2.6 */
public static final int VARIATION_SELECTORS_SUPPLEMENT_ID = 125; /*[E0100]*/
/** @draft ICU 2.6 */
public static final int LIMBU_ID = 111; /*[1900]*/
/** @draft ICU 2.6 */
public static final int TAI_LE_ID = 112; /*[1950]*/
/** @draft ICU 2.6 */
public static final int KHMER_SYMBOLS_ID = 113; /*[19E0]*/
/** @draft ICU 2.6 */
public static final int PHONETIC_EXTENSIONS_ID = 114; /*[1D00]*/
/** @draft ICU 2.6 */
public static final int MISCELLANEOUS_SYMBOLS_AND_ARROWS_ID = 115; /*[2B00]*/
/** @draft ICU 2.6 */
public static final int YIJING_HEXAGRAM_SYMBOLS_ID = 116; /*[4DC0]*/
/** @draft ICU 2.6 */
public static final int LINEAR_B_SYLLABARY_ID = 117; /*[10000]*/
/** @draft ICU 2.6 */
public static final int LINEAR_B_IDEOGRAMS_ID = 118; /*[10080]*/
/** @draft ICU 2.6 */
public static final int AEGEAN_NUMBERS_ID = 119; /*[10100]*/
/** @draft ICU 2.6 */
public static final int UGARITIC_ID = 120; /*[10380]*/
/** @draft ICU 2.6 */
public static final int SHAVIAN_ID = 121; /*[10450]*/
/** @draft ICU 2.6 */
public static final int OSMANYA_ID = 122; /*[10480]*/
/** @draft ICU 2.6 */
public static final int CYPRIOT_SYLLABARY_ID = 123; /*[10800]*/
/** @draft ICU 2.6 */
public static final int TAI_XUAN_JING_SYMBOLS_ID = 124; /*[1D300]*/
/** @draft ICU 2.6 */
public static final int VARIATION_SELECTORS_SUPPLEMENT_ID = 125; /*[E0100]*/
/**
* @draft ICU 2.4
*/
@ -1300,6 +1299,40 @@ public final class UCharacter
& BLOCK_MASK_) >> BLOCK_SHIFT_);
}
/**
* Cover the JDK 1.5 API. Return the Unicode block with the
* given name. <br/><b>Note</b>: Unlike JDK 1.5, this only matches
* against the official UCD name and the Java block name
* (ignoring case).
* @param blockName the name of the block to match
* @return the UnicodeBlock with that name
* @throws IllegalArgumentException if the blockName could not be matched
* @draft ICU 3.0
*/
public static final UnicodeBlock forName(String blockName) {
Map m = null;
if (mref != null) {
m = (Map)mref.get();
}
if (m == null) {
m = new HashMap(BLOCKS_.length);
for (int i = 0; i < BLOCKS_.length; ++i) {
UnicodeBlock b = BLOCKS_[i];
String name = getPropertyValueName(UProperty.BLOCK, b.getID(), UProperty.NameChoice.LONG);
m.put(name.toUpperCase(), b);
m.put(b.toString().toUpperCase(), b);
}
mref = new SoftReference(m);
}
UnicodeBlock b = (UnicodeBlock)m.get(blockName.toUpperCase());
if (b == null) {
throw new IllegalArgumentException();
}
return b;
}
private static SoftReference mref;
/**
* Returns the type ID of this Unicode block
* @return integer type ID of this Unicode block
@ -1954,42 +1987,42 @@ public final class UCharacter
};
/**
* Hangul Syllable Type constants.
*
* @see UProperty#HANGUL_SYLLABLE_TYPE
* @draft ICU 2.6
*/
public static interface HangulSyllableType
{
/**
* Hangul Syllable Type constants.
*
* @see UProperty#HANGUL_SYLLABLE_TYPE
* @draft ICU 2.6
*/
public static final int NOT_APPLICABLE = 0; /*[NA]*/ /*See note !!*/
/**
public static interface HangulSyllableType
{
/**
* @draft ICU 2.6
*/
public static final int LEADING_JAMO = 1; /*[L]*/
/**
public static final int NOT_APPLICABLE = 0; /*[NA]*/ /*See note !!*/
/**
* @draft ICU 2.6
*/
public static final int VOWEL_JAMO = 2; /*[V]*/
/**
public static final int LEADING_JAMO = 1; /*[L]*/
/**
* @draft ICU 2.6
*/
public static final int TRAILING_JAMO = 3; /*[T]*/
/**
public static final int VOWEL_JAMO = 2; /*[V]*/
/**
* @draft ICU 2.6
*/
public static final int LV_SYLLABLE = 4; /*[LV]*/
/**
public static final int TRAILING_JAMO = 3; /*[T]*/
/**
* @draft ICU 2.6
*/
public static final int LVT_SYLLABLE = 5; /*[LVT]*/
/**
public static final int LV_SYLLABLE = 4; /*[LV]*/
/**
* @draft ICU 2.6
*/
public static final int COUNT = 6;
}
public static final int LVT_SYLLABLE = 5; /*[LVT]*/
/**
* @draft ICU 2.6
*/
public static final int COUNT = 6;
}
// public data members -----------------------------------------------
@ -2020,7 +2053,7 @@ public final class UCharacter
* is no existing character.
* @stable ICU 2.1
*/
public static final int REPLACEMENT_CHAR = '\uFFFD';
public static final int REPLACEMENT_CHAR = '\uFFFD';
/**
* Special value that is returned by getUnicodeNumericValue(int) when no
@ -2064,7 +2097,7 @@ public final class UCharacter
}
// if props == 0, it will just fall through and return -1
if (isNotExceptionIndicator(props)) {
// not contained in exception data
// not contained in exception data
// getSignedValue is just shifting so we can check for the sign
// first
// Optimization
@ -2078,7 +2111,7 @@ public final class UCharacter
}
else {
int index = UCharacterProperty.getExceptionIndex(props);
if (PROPERTY_.hasExceptionValue(index,
if (PROPERTY_.hasExceptionValue(index,
UCharacterProperty.EXC_NUMERIC_VALUE_)) {
int result = PROPERTY_.getException(index,
UCharacterProperty.EXC_NUMERIC_VALUE_);
@ -2865,10 +2898,10 @@ public final class UCharacter
*/
public static int getCombiningClass(int ch)
{
if (ch < MIN_VALUE || ch > MAX_VALUE) {
throw new IllegalArgumentException("Codepoint out of bounds");
}
return NormalizerImpl.getCombiningClass(ch);
if (ch < MIN_VALUE || ch > MAX_VALUE) {
throw new IllegalArgumentException("Codepoint out of bounds");
}
return NormalizerImpl.getCombiningClass(ch);
}
/**
@ -2953,7 +2986,7 @@ public final class UCharacter
*/
public static String getName(int ch)
{
if(NAME_==null){
if(NAME_==null){
throw new RuntimeException("Could not load unames.icu");
}
return NAME_.getName(ch, UCharacterNameChoice.UNICODE_CHAR_NAME);
@ -2993,7 +3026,7 @@ public final class UCharacter
*/
public static String getName1_0(int ch)
{
if(NAME_==null){
if(NAME_==null){
throw new RuntimeException("Could not load unames.icu");
}
return NAME_.getName(ch,
@ -3020,7 +3053,7 @@ public final class UCharacter
*/
public static String getExtendedName(int ch)
{
if(NAME_==null){
if(NAME_==null){
throw new RuntimeException("Could not load unames.icu");
}
return NAME_.getName(ch, UCharacterNameChoice.EXTENDED_CHAR_NAME);
@ -3080,7 +3113,7 @@ public final class UCharacter
*/
public static int getCharFromName1_0(String name)
{
if(NAME_==null){
if(NAME_==null){
throw new RuntimeException("Could not load unames.icu");
}
return NAME_.getCharFromName(
@ -3108,7 +3141,7 @@ public final class UCharacter
*/
public static int getCharFromExtendedName(String name)
{
if(NAME_==null){
if(NAME_==null){
throw new RuntimeException("Could not load unames.icu");
}
return NAME_.getCharFromName(
@ -3298,9 +3331,9 @@ public final class UCharacter
public static int getCodePoint(char lead, char trail)
{
if (lead >= UTF16.LEAD_SURROGATE_MIN_VALUE &&
lead <= UTF16.LEAD_SURROGATE_MAX_VALUE &&
lead <= UTF16.LEAD_SURROGATE_MAX_VALUE &&
trail >= UTF16.TRAIL_SURROGATE_MIN_VALUE &&
trail <= UTF16.TRAIL_SURROGATE_MAX_VALUE) {
trail <= UTF16.TRAIL_SURROGATE_MAX_VALUE) {
return UCharacterProperty.getRawSupplementary(lead, trail);
}
throw new IllegalArgumentException("Illegal surrogate characters");
@ -3379,9 +3412,9 @@ public final class UCharacter
*/
public static String toUpperCase(Locale locale, String str)
{
if (locale == null) {
locale = Locale.getDefault();
}
if (locale == null) {
locale = Locale.getDefault();
}
return PROPERTY_.toUpperCase(locale, str, 0, str.length());
}
@ -3395,11 +3428,11 @@ public final class UCharacter
*/
public static String toLowerCase(Locale locale, String str)
{
int length = str.length();
StringBuffer result = new StringBuffer(length);
if (locale == null) {
locale = Locale.getDefault();
}
int length = str.length();
StringBuffer result = new StringBuffer(length);
if (locale == null) {
locale = Locale.getDefault();
}
PROPERTY_.toLowerCase(locale, str, 0, length, result);
return result.toString();
}
@ -3427,9 +3460,9 @@ public final class UCharacter
BreakIterator breakiter)
{
if (breakiter == null) {
if (locale == null) {
locale = Locale.getDefault();
}
if (locale == null) {
locale = Locale.getDefault();
}
breakiter = BreakIterator.getWordInstance(locale);
}
return PROPERTY_.toTitleCase(locale, str, breakiter);
@ -3652,12 +3685,12 @@ public final class UCharacter
return result.toString();
}
/**
* Bit mask for getting just the options from a string compare options word
* that are relevant for case folding (of a single string or code point).
* @internal
*/
private static final int FOLD_CASE_OPTIONS_MASK = 0xff;
/**
* Bit mask for getting just the options from a string compare options word
* that are relevant for case folding (of a single string or code point).
* @internal
*/
private static final int FOLD_CASE_OPTIONS_MASK = 0xff;
/**
* Option value for case folding: use default mappings defined in CaseFolding.txt.
@ -3686,21 +3719,21 @@ public final class UCharacter
* @see #foldCase(String, boolean)
* @draft ICU 2.6
*/
/*
* Issue for canonical caseless match (UAX #21):
* Turkic casefolding (using "T" mappings in CaseFolding.txt) does not preserve
* canonical equivalence, unlike default-option casefolding.
* For example, I-grave and I + grave fold to strings that are not canonically
* equivalent.
* For more details, see the comment in Normalizer.compare()
* and the intermediate prototype changes for Jitterbug 2021.
* (For example, revision 1.104 of uchar.c and 1.4 of CaseFolding.txt.)
*
* This did not get fixed because it appears that it is not possible to fix
* it for uppercase and lowercase characters (I-grave vs. i-grave)
* together in a way that they still fold to common result strings.
*/
public static int foldCase(int ch, int options)
/*
* Issue for canonical caseless match (UAX #21):
* Turkic casefolding (using "T" mappings in CaseFolding.txt) does not preserve
* canonical equivalence, unlike default-option casefolding.
* For example, I-grave and I + grave fold to strings that are not canonically
* equivalent.
* For more details, see the comment in Normalizer.compare()
* and the intermediate prototype changes for Jitterbug 2021.
* (For example, revision 1.104 of uchar.c and 1.4 of CaseFolding.txt.)
*
* This did not get fixed because it appears that it is not possible to fix
* it for uppercase and lowercase characters (I-grave vs. i-grave)
* together in a way that they still fold to common result strings.
*/
public static int foldCase(int ch, int options)
{
int props = PROPERTY_.getProperty(ch);
if (isNotExceptionIndicator(props)) {
@ -3776,7 +3809,7 @@ public final class UCharacter
* @see #foldCase(int, boolean)
* @draft ICU 2.6
*/
public static final String foldCase(String str, int options){
public static final String foldCase(String str, int options){
int size = str.length();
StringBuffer result = new StringBuffer(size);
int offset = 0;
@ -3859,7 +3892,7 @@ public final class UCharacter
}
return result.toString();
}
}
/**
* Return numeric value of Han code points.
* <br> This returns the value of Han 'numeric' code points,
@ -3946,7 +3979,7 @@ public final class UCharacter
return new UCharacterTypeIterator(PROPERTY_);
}
/**
/**
* <p>Gets an iterator for character names, iterating over codepoints.</p>
* <p>This API only gets the iterator for the modern, most up-to-date
* Unicode names. For older 1.0 Unicode names use get1_0NameIterator() or
@ -3968,7 +4001,7 @@ public final class UCharacter
*/
public static ValueIterator getNameIterator()
{
if(NAME_==null){
if(NAME_==null){
throw new RuntimeException("Could not load unames.icu");
}
return new UCharacterNameIterator(NAME_,
@ -3996,7 +4029,7 @@ public final class UCharacter
*/
public static ValueIterator getName1_0Iterator()
{
if(NAME_==null){
if(NAME_==null){
throw new RuntimeException("Could not load unames.icu");
}
return new UCharacterNameIterator(NAME_,
@ -4024,7 +4057,7 @@ public final class UCharacter
*/
public static ValueIterator getExtendedNameIterator()
{
if(NAME_==null){
if(NAME_==null){
throw new RuntimeException("Could not load unames.icu");
}
return new UCharacterNameIterator(NAME_,
@ -4045,93 +4078,93 @@ public final class UCharacter
*/
public static VersionInfo getAge(int ch)
{
if (ch < MIN_VALUE || ch > MAX_VALUE) {
throw new IllegalArgumentException("Codepoint out of bounds");
}
return PROPERTY_.getAge(ch);
if (ch < MIN_VALUE || ch > MAX_VALUE) {
throw new IllegalArgumentException("Codepoint out of bounds");
}
return PROPERTY_.getAge(ch);
}
/**
* <p>Check a binary Unicode property for a code point.</p>
* <p>Unicode, especially in version 3.2, defines many more properties
* than the original set in UnicodeData.txt.</p>
* <p>This API is intended to reflect Unicode properties as defined in
* the Unicode Character Database (UCD) and Unicode Technical Reports
* (UTR).</p>
* <p>For details about the properties see
* <a href=http://www.unicode.org/>http://www.unicode.org/</a>.</p>
* <p>For names of Unicode properties see the UCD file
* PropertyAliases.txt.</p>
* <p>This API does not check the validity of the codepoint.</p>
* <p>Important: If ICU is built with UCD files from Unicode versions
* below 3.2, then properties marked with "new" are not or
* not fully available.</p>
* @param ch code point to test.
* @param property selector constant from com.ibm.icu.lang.UProperty,
* identifies which binary property to check.
* @return true or false according to the binary Unicode property value
* for ch. Also false if property is out of bounds or if the
* Unicode version does not have data for the property at all, or
* not for this code point.
* @see com.ibm.icu.lang.UProperty
* @stable ICU 2.6
*/
public static boolean hasBinaryProperty(int ch, int property)
{
if (ch < MIN_VALUE || ch > MAX_VALUE) {
throw new IllegalArgumentException("Codepoint out of bounds");
}
return PROPERTY_.hasBinaryProperty(ch, property);
}
* <p>Check a binary Unicode property for a code point.</p>
* <p>Unicode, especially in version 3.2, defines many more properties
* than the original set in UnicodeData.txt.</p>
* <p>This API is intended to reflect Unicode properties as defined in
* the Unicode Character Database (UCD) and Unicode Technical Reports
* (UTR).</p>
* <p>For details about the properties see
* <a href=http://www.unicode.org/>http://www.unicode.org/</a>.</p>
* <p>For names of Unicode properties see the UCD file
* PropertyAliases.txt.</p>
* <p>This API does not check the validity of the codepoint.</p>
* <p>Important: If ICU is built with UCD files from Unicode versions
* below 3.2, then properties marked with "new" are not or
* not fully available.</p>
* @param ch code point to test.
* @param property selector constant from com.ibm.icu.lang.UProperty,
* identifies which binary property to check.
* @return true or false according to the binary Unicode property value
* for ch. Also false if property is out of bounds or if the
* Unicode version does not have data for the property at all, or
* not for this code point.
* @see com.ibm.icu.lang.UProperty
* @stable ICU 2.6
*/
public static boolean hasBinaryProperty(int ch, int property)
{
if (ch < MIN_VALUE || ch > MAX_VALUE) {
throw new IllegalArgumentException("Codepoint out of bounds");
}
return PROPERTY_.hasBinaryProperty(ch, property);
}
/**
* <p>Check if a code point has the Alphabetic Unicode property.</p>
* <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.ALPHABETIC).</p>
* <p>Different from UCharacter.isLetter(ch)!</p>
* @stable ICU 2.6
* @param ch codepoint to be tested
*/
public static boolean isUAlphabetic(int ch)
{
return hasBinaryProperty(ch, UProperty.ALPHABETIC);
}
/**
* <p>Check if a code point has the Alphabetic Unicode property.</p>
* <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.ALPHABETIC).</p>
* <p>Different from UCharacter.isLetter(ch)!</p>
* @stable ICU 2.6
* @param ch codepoint to be tested
*/
public static boolean isUAlphabetic(int ch)
{
return hasBinaryProperty(ch, UProperty.ALPHABETIC);
}
/**
* <p>Check if a code point has the Lowercase Unicode property.</p>
* <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.LOWERCASE).</p>
* <p>This is different from UCharacter.isLowerCase(ch)!</p>
* @param ch codepoint to be tested
* @stable ICU 2.6
*/
public static boolean isULowercase(int ch)
{
return hasBinaryProperty(ch, UProperty.LOWERCASE);
}
/**
* <p>Check if a code point has the Lowercase Unicode property.</p>
* <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.LOWERCASE).</p>
* <p>This is different from UCharacter.isLowerCase(ch)!</p>
* @param ch codepoint to be tested
* @stable ICU 2.6
*/
public static boolean isULowercase(int ch)
{
return hasBinaryProperty(ch, UProperty.LOWERCASE);
}
/**
* <p>Check if a code point has the Uppercase Unicode property.</p>
* <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.UPPERCASE).</p>
* <p>This is different from UCharacter.isUpperCase(ch)!</p>
* @param ch codepoint to be tested
* @stable ICU 2.6
*/
public static boolean isUUppercase(int ch)
{
return hasBinaryProperty(ch, UProperty.UPPERCASE);
}
/**
* <p>Check if a code point has the Uppercase Unicode property.</p>
* <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.UPPERCASE).</p>
* <p>This is different from UCharacter.isUpperCase(ch)!</p>
* @param ch codepoint to be tested
* @stable ICU 2.6
*/
public static boolean isUUppercase(int ch)
{
return hasBinaryProperty(ch, UProperty.UPPERCASE);
}
/**
* <p>Check if a code point has the White_Space Unicode property.</p>
* <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.WHITE_SPACE).</p>
* <p>This is different from both UCharacter.isSpace(ch) and
* UCharacter.isWhitespace(ch)!</p>
* @param ch codepoint to be tested
* @stable ICU 2.6
*/
public static boolean isUWhiteSpace(int ch)
{
return hasBinaryProperty(ch, UProperty.WHITE_SPACE);
}
/**
* <p>Check if a code point has the White_Space Unicode property.</p>
* <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.WHITE_SPACE).</p>
* <p>This is different from both UCharacter.isSpace(ch) and
* UCharacter.isWhitespace(ch)!</p>
* @param ch codepoint to be tested
* @stable ICU 2.6
*/
public static boolean isUWhiteSpace(int ch)
{
return hasBinaryProperty(ch, UProperty.WHITE_SPACE);
}
/**
@ -4205,7 +4238,7 @@ public final class UCharacter
return (PROPERTY_.getAdditional(ch, 2)
& JOINING_GROUP_MASK_) >> JOINING_GROUP_SHIFT_;
case UProperty.JOINING_TYPE:
return (int)(PROPERTY_.getAdditional(ch, 2)& JOINING_TYPE_MASK_)>> JOINING_TYPE_SHIFT_;
return (int)(PROPERTY_.getAdditional(ch, 2)& JOINING_TYPE_MASK_)>> JOINING_TYPE_SHIFT_;
// ArabicShaping.txt:
// Note: Characters of joining type T and most characters of
// joining type U are not explicitly listed in this file.
@ -4225,7 +4258,7 @@ public final class UCharacter
return result;
*/
case UProperty.LINE_BREAK:
return (int)(PROPERTY_.getAdditional(ch, 0)& LINE_BREAK_MASK_)>>LINE_BREAK_SHIFT_;
return (int)(PROPERTY_.getAdditional(ch, 0)& LINE_BREAK_MASK_)>>LINE_BREAK_SHIFT_;
/*
* LineBreak.txt:
* - Assigned characters that are not listed explicitly are given the value
@ -4284,7 +4317,7 @@ public final class UCharacter
default:
return 0; /* undefined */
return 0; /* undefined */
}
} else if (type == UProperty.GENERAL_CATEGORY_MASK) {
return UCharacterProperty.getMask(getType(ch));
@ -4395,11 +4428,309 @@ public final class UCharacter
return -1; // undefined
}
/**
* Provide the java.lang.Character forDigit API, for convenience.
* @draft ICU 3.0
*/
public static char forDigit(int digit, int radix) {
return java.lang.Character.forDigit(digit, radix);
}
// JDK 1.5 API coverage
/**
* Cover the JDK 1.5 API, for convenience.
* @see UTF16.LEAD_SURROGATE_MIN_VALUE
* @draft ICU 3.0
*/
public static final char MIN_HIGH_SURROGATE = UTF16.LEAD_SURROGATE_MIN_VALUE;
/**
* Cover the JDK 1.5 API, for convenience.
* @see UTF16.LEAD_SURROGATE_MAX_VALUE
* @draft ICU 3.0
*/
public static final char MAX_HIGH_SURROGATE = UTF16.LEAD_SURROGATE_MAX_VALUE;
/**
* Cover the JDK 1.5 API, for convenience.
* @see UTF16.TRAIL_SURROGATE_MIN_VALUE
* @draft ICU 3.0
*/
public static final char MIN_LOW_SURROGATE = UTF16.TRAIL_SURROGATE_MIN_VALUE;
/**
* Cover the JDK 1.5 API, for convenience.
* @see UTF16.TRAIL_SURROGATE_MAX_VALUE
* @draft ICU 3.0
*/
public static final char MAX_LOW_SURROGATE = UTF16.TRAIL_SURROGATE_MAX_VALUE;
/**
* Cover the JDK 1.5 API, for convenience.
* @see UTF16.SURROGATE_MIN_VALUE
* @draft ICU 3.0
*/
public static final char MIN_SURROGATE = UTF16.SURROGATE_MIN_VALUE;
/**
* Cover the JDK 1.5 API, for convenience.
* @see UTF16.SURROGATE_MAX_VALUE
* @draft ICU 3.0
*/
public static final char MAX_SURROGATE = UTF16.SURROGATE_MAX_VALUE;
/**
* Cover the JDK 1.5 API, for convenience.
* @see UTF16.SUPPLEMENTARY_MIN_VALUE
* @draft ICU 3.0
*/
public static final int MIN_SUPPLEMENTARY_CODE_POINT = UTF16.SUPPLEMENTARY_MIN_VALUE;
/**
* Cover the JDK 1.5 API, for convenience.
* @see UTF16.CODEPOINT_MAX_VALUE
* @draft ICU 3.0
*/
public static final int MAX_CODE_POINT = UTF16.CODEPOINT_MAX_VALUE;
/**
* Cover the JDK 1.5 API, for convenience.
* @param cp the code point to check
* @return true if cp is a valid code point
* @draft ICU 3.0
*/
public static final boolean isValidCodePoint(int cp) {
return cp >= 0 && cp <= MAX_CODE_POINT;
}
/**
* Cover the JDK 1.5 API, for convenience.
* @param cp the code point to check
* @return true if cp is a supplementary code point
* @draft ICU 3.0
*/
public static final boolean isSupplementaryCodePoint(int cp) {
return cp >= UTF16.SUPPLEMENTARY_MIN_VALUE
&& cp <= UTF16.CODEPOINT_MAX_VALUE;
}
/**
* Cover the JDK 1.5 API, for convenience.
* @param ch the char to check
* @return true if ch is a high (lead) surrogate
* @draft ICU 3.0
*/
public static boolean isHighSurrogate(char ch) {
return ch >= MIN_HIGH_SURROGATE && ch <= MIN_LOW_SURROGATE;
}
/**
* Cover the JDK 1.5 API, for convenience.
* @param ch the char to check
* @return true if ch is a low (trail) surrogate
* @draft ICU 3.0
*/
public static boolean isLowSurrogate(char ch) {
return ch >= MIN_LOW_SURROGATE && ch <= MIN_HIGH_SURROGATE;
}
/**
* Cover the JDK 1.5 API, for convenience. Return true if the chars
* form a valid surrogate pair.
* @param high the high (lead) char
* @param low the low (trail) char
* @return true if high, low form a surrogate pair
* @draft ICU 3.0
*/
public static final boolean isSurrogatePair(char high, char low) {
return isHighSurrogate(high) && isLowSurrogate(low);
}
/**
* Cover the JDK 1.5 API, for convenience. Return the number of chars needed
* to represent the code point. This does not check the
* code point for validity.
* @param cp the code point to check
* @param return the number of chars needed to represent the code point
* @see UTF16.getCharCount
* @draft ICU 3.0
*/
public static int charCount(int cp) {
return UTF16.getCharCount(cp);
}
/**
* Cover the JDK 1.5 API, for convenience. Return the code point represented by
* the characters. This does not check the surrogate pair for validity.
* @param high the high (lead) surrogate
* @param low the low (trail) surrogate
* @return the code point formed by the surrogate pair
* @draft ICU 3.0
*/
public static final int toCodePoint(char high, char low) {
return UCharacterProperty.getRawSupplementary(high, low);
}
/**
* Cover the JDK 1.5 API, for convenience. Return the code point at index.
* <br/><b>Note</b>: the semantics of this API is different from the related UTF16
* API. This examines only the characters at index and index+1.
* @param seq the characters to check
* @param index the index of the first or only char forming the code point
* @return the code point at the index
* @draft ICU 3.0
*/
public static final int codePointAt(CharSequence seq, int index) {
char c1 = seq.charAt(index++);
if (isHighSurrogate(c1)) {
if (index < seq.length()) {
char c2 = seq.charAt(index);
if (isLowSurrogate(c2)) {
return toCodePoint(c1, c2);
}
}
}
return c1;
}
/**
* Cover the JDK 1.5 API, for convenience. Return the code point at index.
* <br/><b>Note</b>: the semantics of this API is different from the related UTF16
* API. This examines only the characters at index and index+1.
* @param text the characters to check
* @param index the index of the first or only char forming the code point
* @return the code point at the index
* @draft ICU 3.0
*/
public static final int codePointAt(char[] text, int index) {
char c1 = text[index++];
if (isHighSurrogate(c1)) {
if (index < text.length) {
char c2 = text[index];
if (isLowSurrogate(c2)) {
return toCodePoint(c1, c2);
}
}
}
return c1;
}
/**
* Cover the JDK 1.5 API, for convenience. Return the code point before index.
* <br/><b>Note</b>: the semantics of this API is different from the related UTF16
* API. This examines only the characters at index-1 and index-2.
* @param seq the characters to check
* @param index the index after the last or only char forming the code point
* @return the code point before the index
* @draft ICU 3.0
*/
public static final int codePointBefore(CharSequence seq, int index) {
char c2 = seq.charAt(--index);
if (isLowSurrogate(c2)) {
if (index > 0) {
char c1 = seq.charAt(--index);
if (isHighSurrogate(c1)) {
return toCodePoint(c1, c2);
}
}
}
return c2;
}
/**
* Cover the JDK 1.5 API, for convenience. Return the code point before index.
* <br/><b>Note</b>: the semantics of this API is different from the related UTF16
* API. This examines only the characters at index-1 and index-2.
* @param text the characters to check
* @param index the index after the last or only char forming the code point
* @return the code point before the index
* @draft ICU 3.0
*/
public static final int codePointBefore(char[] text, int index) {
char c2 = text[--index];
if (isLowSurrogate(c2)) {
if (index > 0) {
char c1 = text[--index];
if (isHighSurrogate(c1)) {
return toCodePoint(c1, c2);
}
}
}
return c2;
}
/**
* Cover the JDK 1.5 API, for convenience. Writes the chars representing the
* code point into the destination at the given index.
* @param cp the code point to convert
* @param dst the destination array into which to put the char(s) representing the code point
* @param dstIndex the index at which to put the first (or only) char
* @return the count of the number of chars written (1 or 2)
* @throws IllegalArgumentException if cp is not a valid code point
* @draft ICU 3.0
*/
public static final int toChars(int cp, char[] dst, int dstIndex) {
if (cp >= 0) {
if (cp < MIN_SUPPLEMENTARY_CODE_POINT) {
dst[dstIndex] = (char)cp;
return 1;
}
if (cp <= MAX_CODE_POINT) {
dst[dstIndex] = UTF16.getLeadSurrogate(cp);
dst[dstIndex+1] = UTF16.getTrailSurrogate(cp);
return 2;
}
}
throw new IllegalArgumentException();
}
/**
* Cover the JDK 1.5 API, for convenience. Returns a char array
* representing the code point.
* @param cp the code point to convert
* @return an array containing the char(s) representing the code point
* @throws IllegalArgumentException if cp is not a valid code point
* @draft ICU 3.0
*/
public static final char[] toChars(int cp) {
if (cp >= 0) {
if (cp < MIN_SUPPLEMENTARY_CODE_POINT) {
return new char[] { (char)cp };
}
if (cp <= MAX_CODE_POINT) {
return new char[] {
UTF16.getLeadSurrogate(cp),
UTF16.getTrailSurrogate(cp)
};
}
}
throw new IllegalArgumentException();
}
/**
* Cover the JDK API, for convenience. Return a byte representing the directionality of
* the character.
* <br/><b>Note</b>: Unlike the JDK, this returns DIRECTIONALITY_LEFT_TO_RIGHT for undefined or
* out-of-bounds characters. <br/><b>Note</b>: The return value must be
* tested using the constants defined in {@link UCharacterEnums.ECharacterDirection}
* since the values are different from the ones defined by <code>java.lang.Character</code>.
* @param cp the code point to check
* @return the directionality of the code point
* @see #getDirection
* @draft ICU 3.0
*/
public static byte getDirectionality(int cp)
{
// when ch is out of bounds getProperty == 0
return (byte)((getProperty(cp) >> BIDI_SHIFT_) & BIDI_MASK_AFTER_SHIFT_);
}
// protected data members --------------------------------------------
/**
* Database storing the sets of character name
*/
* Database storing the sets of character name
*/
static UCharacterName NAME_ = null;
/**
@ -4417,7 +4748,7 @@ public final class UCharacter
}
catch (Exception e)
{
e.printStackTrace();
e.printStackTrace();
//throw new RuntimeException(e.getMessage());
// DONOT throw an exception
// we might be building ICU modularly wothout names.icu and pnames.icu
@ -4438,7 +4769,7 @@ public final class UCharacter
private static final int[] PROPERTY_DATA_;
private static final int PROPERTY_INITIAL_VALUE_;
// block to initialise character property database
// block to initialise character property database
static
{
try

View File

@ -1,19 +1,20 @@
/**
*******************************************************************************
* Copyright (C) 1996-2003, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*
* $Source:
* /usr/cvs/icu4j/icu4j/src/com/ibm/icu/text/UCharacterCategory.java $
* $Date: 2004/01/07 20:06:24 $
* $Revision: 1.14 $
*
*******************************************************************************
*/
*******************************************************************************
* Copyright (C) 1996-2004, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/lang/UCharacterCategory.java,v $
* $Date: 2004/03/10 02:21:37 $
* $Revision: 1.15 $
*
*******************************************************************************
*/
package com.ibm.icu.lang;
import com.ibm.icu.lang.UCharacterEnums.ECharacterCategory;
/**
* Enumerated Unicode category types from the UnicodeData.txt file.
* Used as return results from <a href=UCharacter.html>UCharacter</a>
@ -32,197 +33,8 @@ package com.ibm.icu.lang;
* @stable ICU 2.1
*/
public final class UCharacterCategory
public final class UCharacterCategory implements ECharacterCategory
{
// public variable -----------------------------------------------------
/**
* Unassigned character type
* @stable ICU 2.1
*/
public static final int UNASSIGNED = 0;
/**
* Character type Cn
* Not Assigned (no characters in [UnicodeData.txt] have this property)
* @stable ICU 2.6
*/
public static final int GENERAL_OTHER_TYPES = 0;
/**
* Character type Lu
* @stable ICU 2.1
*/
public static final int UPPERCASE_LETTER = 1;
/**
* Character type Ll
* @stable ICU 2.1
*/
public static final int LOWERCASE_LETTER = 2;
/**
* Character type Lt
* @stable ICU 2.1
*/
public static final int TITLECASE_LETTER = 3;
/**
* Character type Lm
* @stable ICU 2.1
*/
public static final int MODIFIER_LETTER = 4;
/**
* Character type Lo
* @stable ICU 2.1
*/
public static final int OTHER_LETTER = 5;
/**
* Character type Mn
* @stable ICU 2.1
*/
public static final int NON_SPACING_MARK = 6;
/**
* Character type Me
* @stable ICU 2.1
*/
public static final int ENCLOSING_MARK = 7;
/**
* Character type Mc
* @stable ICU 2.1
*/
public static final int COMBINING_SPACING_MARK = 8;
/**
* Character type Nd
* @stable ICU 2.1
*/
public static final int DECIMAL_DIGIT_NUMBER = 9;
/**
* Character type Nl
* @stable ICU 2.1
*/
public static final int LETTER_NUMBER = 10;
// start of 11------------
/**
* Character type No
* @stable ICU 2.1
*/
public static final int OTHER_NUMBER = 11;
/**
* Character type Zs
* @stable ICU 2.1
*/
public static final int SPACE_SEPARATOR = 12;
/**
* Character type Zl
* @stable ICU 2.1
*/
public static final int LINE_SEPARATOR = 13;
/**
* Character type Zp
* @stable ICU 2.1
*/
public static final int PARAGRAPH_SEPARATOR = 14;
/**
* Character type Cc
* @stable ICU 2.1
*/
public static final int CONTROL = 15;
/**
* Character type Cf
* @stable ICU 2.1
*/
public static final int FORMAT = 16;
/**
* Character type Co
* @stable ICU 2.1
*/
public static final int PRIVATE_USE = 17;
/**
* Character type Cs
* @stable ICU 2.1
*/
public static final int SURROGATE = 18;
/**
* Character type Pd
* @stable ICU 2.1
*/
public static final int DASH_PUNCTUATION = 19;
/**
* Character type Ps
* @stable ICU 2.1
*/
public static final int START_PUNCTUATION = 20;
// start of 21 ------------
/**
* Character type Pe
* @stable ICU 2.1
*/
public static final int END_PUNCTUATION = 21;
/**
* Character type Pc
* @stable ICU 2.1
*/
public static final int CONNECTOR_PUNCTUATION = 22;
/**
* Character type Po
* @stable ICU 2.1
*/
public static final int OTHER_PUNCTUATION = 23;
/**
* Character type Sm
* @stable ICU 2.1
*/
public static final int MATH_SYMBOL = 24;
/**
* Character type Sc
* @stable ICU 2.1
*/
public static final int CURRENCY_SYMBOL = 25;
/**
* Character type Sk
* @stable ICU 2.1
*/
public static final int MODIFIER_SYMBOL = 26;
/**
* Character type So
* @stable ICU 2.1
*/
public static final int OTHER_SYMBOL = 27;
/**
* Character type Pi
* @see #INITIAL_QUOTE_PUNCTUATION
* @stable ICU 2.1
*/
public static final int INITIAL_PUNCTUATION = 28;
/**
* Character type Pi
* This name is compatible with java.lang.Character's name for this type.
* @see #INITIAL_PUNCTUATION
* @draft ICU 2.8
*/
public static final int INITIAL_QUOTE_PUNCTUATION = 28;
/**
* Character type Pf
* @see #FINAL_QUOTE_PUNCTUATION
* @stable ICU 2.1
*/
public static final int FINAL_PUNCTUATION = 29;
/**
* Character type Pf
* This name is compatible with java.lang.Character's name for this type.
* @see #FINAL_PUNCTUATION
* @draft ICU 2.8
*/
public static final int FINAL_QUOTE_PUNCTUATION = 29;
// start of 31 ------------
/**
* Character type count
* @stable ICU 2.1
*/
public static final int CHAR_CATEGORY_COUNT = 30;
/**
* Gets the name of the argument category
* @param category to retrieve name
@ -232,66 +44,66 @@ public final class UCharacterCategory
public static String toString(int category)
{
switch (category) {
case UPPERCASE_LETTER :
return "Letter, Uppercase";
case LOWERCASE_LETTER :
return "Letter, Lowercase";
case TITLECASE_LETTER :
return "Letter, Titlecase";
case MODIFIER_LETTER :
return "Letter, Modifier";
case OTHER_LETTER :
return "Letter, Other";
case NON_SPACING_MARK :
return "Mark, Non-Spacing";
case ENCLOSING_MARK :
return "Mark, Enclosing";
case COMBINING_SPACING_MARK :
return "Mark, Spacing Combining";
case DECIMAL_DIGIT_NUMBER :
return "Number, Decimal Digit";
case LETTER_NUMBER :
return "Number, Letter";
case OTHER_NUMBER :
return "Number, Other";
case SPACE_SEPARATOR :
return "Separator, Space";
case LINE_SEPARATOR :
return "Separator, Line";
case PARAGRAPH_SEPARATOR :
return "Separator, Paragraph";
case CONTROL :
return "Other, Control";
case FORMAT :
return "Other, Format";
case PRIVATE_USE :
return "Other, Private Use";
case SURROGATE :
return "Other, Surrogate";
case DASH_PUNCTUATION :
return "Punctuation, Dash";
case START_PUNCTUATION :
return "Punctuation, Open";
case END_PUNCTUATION :
return "Punctuation, Close";
case CONNECTOR_PUNCTUATION :
return "Punctuation, Connector";
case OTHER_PUNCTUATION :
return "Punctuation, Other";
case MATH_SYMBOL :
return "Symbol, Math";
case CURRENCY_SYMBOL :
return "Symbol, Currency";
case MODIFIER_SYMBOL :
return "Symbol, Modifier";
case OTHER_SYMBOL :
return "Symbol, Other";
case INITIAL_PUNCTUATION :
return "Punctuation, Initial quote";
case FINAL_PUNCTUATION :
return "Punctuation, Final quote";
}
return "Unassigned";
case UPPERCASE_LETTER :
return "Letter, Uppercase";
case LOWERCASE_LETTER :
return "Letter, Lowercase";
case TITLECASE_LETTER :
return "Letter, Titlecase";
case MODIFIER_LETTER :
return "Letter, Modifier";
case OTHER_LETTER :
return "Letter, Other";
case NON_SPACING_MARK :
return "Mark, Non-Spacing";
case ENCLOSING_MARK :
return "Mark, Enclosing";
case COMBINING_SPACING_MARK :
return "Mark, Spacing Combining";
case DECIMAL_DIGIT_NUMBER :
return "Number, Decimal Digit";
case LETTER_NUMBER :
return "Number, Letter";
case OTHER_NUMBER :
return "Number, Other";
case SPACE_SEPARATOR :
return "Separator, Space";
case LINE_SEPARATOR :
return "Separator, Line";
case PARAGRAPH_SEPARATOR :
return "Separator, Paragraph";
case CONTROL :
return "Other, Control";
case FORMAT :
return "Other, Format";
case PRIVATE_USE :
return "Other, Private Use";
case SURROGATE :
return "Other, Surrogate";
case DASH_PUNCTUATION :
return "Punctuation, Dash";
case START_PUNCTUATION :
return "Punctuation, Open";
case END_PUNCTUATION :
return "Punctuation, Close";
case CONNECTOR_PUNCTUATION :
return "Punctuation, Connector";
case OTHER_PUNCTUATION :
return "Punctuation, Other";
case MATH_SYMBOL :
return "Symbol, Math";
case CURRENCY_SYMBOL :
return "Symbol, Currency";
case MODIFIER_SYMBOL :
return "Symbol, Modifier";
case OTHER_SYMBOL :
return "Symbol, Other";
case INITIAL_PUNCTUATION :
return "Punctuation, Initial quote";
case FINAL_PUNCTUATION :
return "Punctuation, Final quote";
}
return "Unassigned";
}
// private constructor -----------------------------------------------

View File

@ -1,19 +1,21 @@
/**
*******************************************************************************
* Copyright (C) 1996-2001, International Business Machines Corporation and *
* Copyright (C) 1996-2004, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*
* $Source:
* /usr/cvs/icu4j/icu4j/src/com/ibm/icu/text/UCharacterDirection.java $
* $Date: 2002/12/11 23:37:43 $
* $Revision: 1.7 $
* $Date: 2004/03/10 02:21:37 $
* $Revision: 1.8 $
*
*******************************************************************************
*/
package com.ibm.icu.lang;
import com.ibm.icu.lang.UCharacterEnums.ECharacterDirection;
/**
* Enumerated Unicode character linguistic direction constants.
* Used as return results from <a href=UCharacter.html>UCharacter</a>
@ -24,8 +26,8 @@ package com.ibm.icu.lang;
* @stable ICU 2.1
*/
public final class UCharacterDirection
{
public final class UCharacterDirection implements ECharacterDirection {
// private constructor =========================================
///CLOVER:OFF
/**
@ -36,161 +38,54 @@ public final class UCharacterDirection
}
///CLOVER:ON
// public variable =============================================
/**
* Directional type L
* Gets the name of the argument direction
* @param dir direction type to retrieve name
* @return directional name
* @stable ICU 2.1
*/
public static final int LEFT_TO_RIGHT = 0;
/**
* Directional type R
* @stable ICU 2.1
*/
public static final int RIGHT_TO_LEFT = 1;
/**
* Directional type EN
* @stable ICU 2.1
*/
public static final int EUROPEAN_NUMBER = 2;
/**
* Directional type ES
* @stable ICU 2.1
*/
public static final int EUROPEAN_NUMBER_SEPARATOR = 3;
/**
* Directional type ET
* @stable ICU 2.1
*/
public static final int EUROPEAN_NUMBER_TERMINATOR = 4;
/**
* Directional type AN
* @stable ICU 2.1
*/
public static final int ARABIC_NUMBER = 5;
/**
* Directional type CS
* @stable ICU 2.1
*/
public static final int COMMON_NUMBER_SEPARATOR = 6;
/**
* Directional type B
* @stable ICU 2.1
*/
public static final int BLOCK_SEPARATOR = 7;
/**
* Directional type S
* @stable ICU 2.1
*/
public static final int SEGMENT_SEPARATOR = 8;
/**
* Directional type WS
* @stable ICU 2.1
*/
public static final int WHITE_SPACE_NEUTRAL = 9;
// start of 11 ---------------
/**
* Directional type ON
* @stable ICU 2.1
*/
public static final int OTHER_NEUTRAL = 10;
/**
* Directional type LRE
* @stable ICU 2.1
*/
public static final int LEFT_TO_RIGHT_EMBEDDING = 11;
/**
* Directional type LRO
* @stable ICU 2.1
*/
public static final int LEFT_TO_RIGHT_OVERRIDE = 12;
/**
* Directional type AL
* @stable ICU 2.1
*/
public static final int RIGHT_TO_LEFT_ARABIC = 13;
/**
* Directional type RLE
* @stable ICU 2.1
*/
public static final int RIGHT_TO_LEFT_EMBEDDING = 14;
/**
* Directional type RLO
* @stable ICU 2.1
*/
public static final int RIGHT_TO_LEFT_OVERRIDE = 15;
/**
* Directional type PDF
* @stable ICU 2.1
*/
public static final int POP_DIRECTIONAL_FORMAT = 16;
/**
* Directional type NSM
* @stable ICU 2.1
*/
public static final int DIR_NON_SPACING_MARK = 17;
/**
* Directional type BN
* @stable ICU 2.1
*/
public static final int BOUNDARY_NEUTRAL = 18;
/**
* Number of directional type
* @stable ICU 2.1
*/
public static final int CHAR_DIRECTION_COUNT = 19;
/**
* Gets the name of the argument direction
* @param dir direction type to retrieve name
* @return directional name
* @stable ICU 2.1
*/
public static String toString(int dir)
{
switch(dir)
{
case LEFT_TO_RIGHT :
return "Left-to-Right";
case RIGHT_TO_LEFT :
return "Right-to-Left";
case EUROPEAN_NUMBER :
return "European Number";
case EUROPEAN_NUMBER_SEPARATOR :
return "European Number Separator";
case EUROPEAN_NUMBER_TERMINATOR :
return "European Number Terminator";
case ARABIC_NUMBER :
return "Arabic Number";
case COMMON_NUMBER_SEPARATOR :
return "Common Number Separator";
case BLOCK_SEPARATOR :
return "Paragraph Separator";
case SEGMENT_SEPARATOR :
return "Segment Separator";
case WHITE_SPACE_NEUTRAL :
return "Whitespace";
case OTHER_NEUTRAL :
return "Other Neutrals";
case LEFT_TO_RIGHT_EMBEDDING :
return "Left-to-Right Embedding";
case LEFT_TO_RIGHT_OVERRIDE :
return "Left-to-Right Override";
case RIGHT_TO_LEFT_ARABIC :
return "Right-to-Left Arabic";
case RIGHT_TO_LEFT_EMBEDDING :
return "Right-to-Left Embedding";
case RIGHT_TO_LEFT_OVERRIDE :
return "Right-to-Left Override";
case POP_DIRECTIONAL_FORMAT :
return "Pop Directional Format";
case DIR_NON_SPACING_MARK :
return "Non-Spacing Mark";
case BOUNDARY_NEUTRAL :
return "Boundary Neutral";
}
return "Unassigned";
}
public static String toString(int dir) {
switch(dir)
{
case LEFT_TO_RIGHT :
return "Left-to-Right";
case RIGHT_TO_LEFT :
return "Right-to-Left";
case EUROPEAN_NUMBER :
return "European Number";
case EUROPEAN_NUMBER_SEPARATOR :
return "European Number Separator";
case EUROPEAN_NUMBER_TERMINATOR :
return "European Number Terminator";
case ARABIC_NUMBER :
return "Arabic Number";
case COMMON_NUMBER_SEPARATOR :
return "Common Number Separator";
case BLOCK_SEPARATOR :
return "Paragraph Separator";
case SEGMENT_SEPARATOR :
return "Segment Separator";
case WHITE_SPACE_NEUTRAL :
return "Whitespace";
case OTHER_NEUTRAL :
return "Other Neutrals";
case LEFT_TO_RIGHT_EMBEDDING :
return "Left-to-Right Embedding";
case LEFT_TO_RIGHT_OVERRIDE :
return "Left-to-Right Override";
case RIGHT_TO_LEFT_ARABIC :
return "Right-to-Left Arabic";
case RIGHT_TO_LEFT_EMBEDDING :
return "Right-to-Left Embedding";
case RIGHT_TO_LEFT_OVERRIDE :
return "Right-to-Left Override";
case POP_DIRECTIONAL_FORMAT :
return "Pop Directional Format";
case DIR_NON_SPACING_MARK :
return "Non-Spacing Mark";
case BOUNDARY_NEUTRAL :
return "Boundary Neutral";
}
return "Unassigned";
}
}

View File

@ -0,0 +1,492 @@
/**
*******************************************************************************
* Copyright (C) 2004, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/lang/UCharacterEnums.java,v $
* $Date: 2004/03/10 02:21:37 $
* $Revision: 1.1 $
*
*******************************************************************************
*/
package com.ibm.icu.lang;
/**
* A container for the different 'enumerated types' used by UCharacter.
* @draft ICU 3.0
*/
public class UCharacterEnums {
/**
* 'Enum' for the CharacterCategory constants. These constants are
* compatible in name <b>but not in value</b> with those defined in
* <code>java.lang.Character</code>.
* @see UCharacterCategory
* @draft ICU 3.0
*/
public static interface ECharacterCategory {
/**
* Unassigned character type
* @stable ICU 2.1
*/
public static final int UNASSIGNED = 0;
/**
* Character type Cn
* Not Assigned (no characters in [UnicodeData.txt] have this property)
* @stable ICU 2.6
*/
public static final int GENERAL_OTHER_TYPES = 0;
/**
* Character type Lu
* @stable ICU 2.1
*/
public static final int UPPERCASE_LETTER = 1;
/**
* Character type Ll
* @stable ICU 2.1
*/
public static final int LOWERCASE_LETTER = 2;
/**
* Character type Lt
* @stable ICU 2.1
*/
public static final int TITLECASE_LETTER = 3;
/**
* Character type Lm
* @stable ICU 2.1
*/
public static final int MODIFIER_LETTER = 4;
/**
* Character type Lo
* @stable ICU 2.1
*/
public static final int OTHER_LETTER = 5;
/**
* Character type Mn
* @stable ICU 2.1
*/
public static final int NON_SPACING_MARK = 6;
/**
* Character type Me
* @stable ICU 2.1
*/
public static final int ENCLOSING_MARK = 7;
/**
* Character type Mc
* @stable ICU 2.1
*/
public static final int COMBINING_SPACING_MARK = 8;
/**
* Character type Nd
* @stable ICU 2.1
*/
public static final int DECIMAL_DIGIT_NUMBER = 9;
/**
* Character type Nl
* @stable ICU 2.1
*/
public static final int LETTER_NUMBER = 10;
/**
* Character type No
* @stable ICU 2.1
*/
public static final int OTHER_NUMBER = 11;
/**
* Character type Zs
* @stable ICU 2.1
*/
public static final int SPACE_SEPARATOR = 12;
/**
* Character type Zl
* @stable ICU 2.1
*/
public static final int LINE_SEPARATOR = 13;
/**
* Character type Zp
* @stable ICU 2.1
*/
public static final int PARAGRAPH_SEPARATOR = 14;
/**
* Character type Cc
* @stable ICU 2.1
*/
public static final int CONTROL = 15;
/**
* Character type Cf
* @stable ICU 2.1
*/
public static final int FORMAT = 16;
/**
* Character type Co
* @stable ICU 2.1
*/
public static final int PRIVATE_USE = 17;
/**
* Character type Cs
* @stable ICU 2.1
*/
public static final int SURROGATE = 18;
/**
* Character type Pd
* @stable ICU 2.1
*/
public static final int DASH_PUNCTUATION = 19;
/**
* Character type Ps
* @stable ICU 2.1
*/
public static final int START_PUNCTUATION = 20;
/**
* Character type Pe
* @stable ICU 2.1
*/
public static final int END_PUNCTUATION = 21;
/**
* Character type Pc
* @stable ICU 2.1
*/
public static final int CONNECTOR_PUNCTUATION = 22;
/**
* Character type Po
* @stable ICU 2.1
*/
public static final int OTHER_PUNCTUATION = 23;
/**
* Character type Sm
* @stable ICU 2.1
*/
public static final int MATH_SYMBOL = 24;
/**
* Character type Sc
* @stable ICU 2.1
*/
public static final int CURRENCY_SYMBOL = 25;
/**
* Character type Sk
* @stable ICU 2.1
*/
public static final int MODIFIER_SYMBOL = 26;
/**
* Character type So
* @stable ICU 2.1
*/
public static final int OTHER_SYMBOL = 27;
/**
* Character type Pi
* @see #INITIAL_QUOTE_PUNCTUATION
* @stable ICU 2.1
*/
public static final int INITIAL_PUNCTUATION = 28;
/**
* Character type Pi
* This name is compatible with java.lang.Character's name for this type.
* @see #INITIAL_PUNCTUATION
* @draft ICU 2.8
*/
public static final int INITIAL_QUOTE_PUNCTUATION = 28;
/**
* Character type Pf
* @see #FINAL_QUOTE_PUNCTUATION
* @stable ICU 2.1
*/
public static final int FINAL_PUNCTUATION = 29;
/**
* Character type Pf
* This name is compatible with java.lang.Character's name for this type.
* @see #FINAL_PUNCTUATION
* @draft ICU 2.8
*/
public static final int FINAL_QUOTE_PUNCTUATION = 29;
/**
* Character type count
* @stable ICU 2.1
*/
public static final int CHAR_CATEGORY_COUNT = 30;
}
/**
* 'Enum' for the CharacterDirection constants. There are two sets
* of names, those used in ICU, and those used in the JDK. The
* JDK constants are compatible in name <b>but not in value</b>
* with those defined in <code>java.lang.Character</code>.
* @see UCharacterDirection.
* @draft ICU 3.0
*/
public static interface ECharacterDirection {
/**
* Directional type L
* @stable ICU 2.1
*/
public static final int LEFT_TO_RIGHT = 0;
/**
* JDK-compatible synonum for LEFT_TO_RIGHT.
* @draft ICU 3.0
*/
public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = (byte)LEFT_TO_RIGHT;
/**
* Directional type R
* @stable ICU 2.1
*/
public static final int RIGHT_TO_LEFT = 1;
/**
* JDK-compatible synonum for RIGHT_TO_LEFT.
* @draft ICU 3.0
*/
public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = (byte)RIGHT_TO_LEFT;
/**
* Directional type EN
* @stable ICU 2.1
*/
public static final int EUROPEAN_NUMBER = 2;
/**
* JDK-compatible synonum for EUROPEAN_NUMBER.
* @draft ICU 3.0
*/
public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = (byte)EUROPEAN_NUMBER;
/**
* Directional type ES
* @stable ICU 2.1
*/
public static final int EUROPEAN_NUMBER_SEPARATOR = 3;
/**
* JDK-compatible synonum for EUROPEAN_NUMBER_SEPARATOR.
* @draft ICU 3.0
*/
public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = (byte)EUROPEAN_NUMBER_SEPARATOR;
/**
* Directional type ET
* @stable ICU 2.1
*/
public static final int EUROPEAN_NUMBER_TERMINATOR = 4;
/**
* JDK-compatible synonum for EUROPEAN_NUMBER_TERMINATOR.
* @draft ICU 3.0
*/
public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = (byte)EUROPEAN_NUMBER_TERMINATOR;
/**
* Directional type AN
* @stable ICU 2.1
*/
public static final int ARABIC_NUMBER = 5;
/**
* JDK-compatible synonum for ARABIC_NUMBER.
* @draft ICU 3.0
*/
public static final byte DIRECTIONALITY_ARABIC_NUMBER = (byte)ARABIC_NUMBER;
/**
* Directional type CS
* @stable ICU 2.1
*/
public static final int COMMON_NUMBER_SEPARATOR = 6;
/**
* JDK-compatible synonum for COMMON_NUMBER_SEPARATOR.
* @draft ICU 3.0
*/
public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = (byte)COMMON_NUMBER_SEPARATOR;
/**
* Directional type B
* @stable ICU 2.1
*/
public static final int BLOCK_SEPARATOR = 7;
/**
* JDK-compatible synonum for BLOCK_SEPARATOR.
* @draft ICU 3.0
*/
public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = (byte)BLOCK_SEPARATOR;
/**
* Directional type S
* @stable ICU 2.1
*/
public static final int SEGMENT_SEPARATOR = 8;
/**
* JDK-compatible synonum for SEGMENT_SEPARATOR.
* @draft ICU 3.0
*/
public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = (byte)SEGMENT_SEPARATOR;
/**
* Directional type WS
* @stable ICU 2.1
*/
public static final int WHITE_SPACE_NEUTRAL = 9;
/**
* JDK-compatible synonum for WHITE_SPACE_NEUTRAL.
* @draft ICU 3.0
*/
public static final byte DIRECTIONALITY_WHITESPACE = (byte)WHITE_SPACE_NEUTRAL;
/**
* Directional type ON
* @stable ICU 2.1
*/
public static final int OTHER_NEUTRAL = 10;
/**
* JDK-compatible synonum for OTHER_NEUTRAL.
* @draft ICU 3.0
*/
public static final byte DIRECTIONALITY_OTHER_NEUTRALS = (byte)OTHER_NEUTRAL;
/**
* Directional type LRE
* @stable ICU 2.1
*/
public static final int LEFT_TO_RIGHT_EMBEDDING = 11;
/**
* JDK-compatible synonum for LEFT_TO_RIGHT_EMBEDDING.
* @draft ICU 3.0
*/
public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = (byte)LEFT_TO_RIGHT_EMBEDDING;
/**
* Directional type LRO
* @stable ICU 2.1
*/
public static final int LEFT_TO_RIGHT_OVERRIDE = 12;
/**
* JDK-compatible synonum for LEFT_TO_RIGHT_OVERRIDE.
* @draft ICU 3.0
*/
public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = (byte)LEFT_TO_RIGHT_OVERRIDE;
/**
* Directional type AL
* @stable ICU 2.1
*/
public static final int RIGHT_TO_LEFT_ARABIC = 13;
/**
* JDK-compatible synonum for RIGHT_TO_LEFT_ARABIC.
* @draft ICU 3.0
*/
public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = (byte)RIGHT_TO_LEFT_ARABIC;
/**
* Directional type RLE
* @stable ICU 2.1
*/
public static final int RIGHT_TO_LEFT_EMBEDDING = 14;
/**
* JDK-compatible synonum for RIGHT_TO_LEFT_EMBEDDING.
* @draft ICU 3.0
*/
public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = (byte)RIGHT_TO_LEFT_EMBEDDING;
/**
* Directional type RLO
* @stable ICU 2.1
*/
public static final int RIGHT_TO_LEFT_OVERRIDE = 15;
/**
* JDK-compatible synonum for RIGHT_TO_LEFT_OVERRIDE.
* @draft ICU 3.0
*/
public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = (byte)RIGHT_TO_LEFT_OVERRIDE;
/**
* Directional type PDF
* @stable ICU 2.1
*/
public static final int POP_DIRECTIONAL_FORMAT = 16;
/**
* JDK-compatible synonum for POP_DIRECTIONAL_FORMAT.
* @draft ICU 3.0
*/
public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = (byte)POP_DIRECTIONAL_FORMAT;
/**
* Directional type NSM
* @stable ICU 2.1
*/
public static final int DIR_NON_SPACING_MARK = 17;
/**
* JDK-compatible synonum for DIR_NON_SPACING_MARK.
* @draft ICU 3.0
*/
public static final byte DIRECTIONALITY_NON_SPACING_MARK = (byte)DIR_NON_SPACING_MARK;
/**
* Directional type BN
* @stable ICU 2.1
*/
public static final int BOUNDARY_NEUTRAL = 18;
/**
* JDK-compatible synonum for BOUNDARY_NEUTRAL.
* @draft ICU 3.0
*/
public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = (byte)BOUNDARY_NEUTRAL;
/**
* Number of directional types
* @stable ICU 2.1
*/
public static final int CHAR_DIRECTION_COUNT = 19;
/**
* Undefined bidirectional character type. Undefined <code>char</code>
* values have undefined directionality in the Unicode specification.
*/
public static final byte DIRECTIONALITY_UNDEFINED = -1;
}
}