ICU-1707
new extended name APIs X-SVN-Rev: 7677
This commit is contained in:
parent
6fdea6ffb4
commit
d882319b30
@ -5,8 +5,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/lang/UCharacterTest.java,v $
|
||||
* $Date: 2002/02/08 23:44:17 $
|
||||
* $Revision: 1.21 $
|
||||
* $Date: 2002/02/15 02:53:32 $
|
||||
* $Revision: 1.22 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -555,16 +555,33 @@ public final class UCharacterTest extends TestFmwk
|
||||
*/
|
||||
public void TestNames()
|
||||
{
|
||||
int c[] = {0x0061, 0x0284, 0x3401, 0x7fed, 0xac00, 0xd7a3, 0xff08, 0xffe5,
|
||||
0x23456};
|
||||
int c[] = {0x0061, 0x0284, 0x3401, 0x7fed, 0xac00, 0xd7a3, 0xd800, 0xdc00,
|
||||
0xff08, 0xffe5, 0xffff, 0x23456, 0x9};
|
||||
String name[] = {"LATIN SMALL LETTER A",
|
||||
"LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK",
|
||||
"CJK UNIFIED IDEOGRAPH-3401",
|
||||
"CJK UNIFIED IDEOGRAPH-7FED", "HANGUL SYLLABLE GA",
|
||||
"HANGUL SYLLABLE HIH", "FULLWIDTH LEFT PARENTHESIS",
|
||||
"FULLWIDTH YEN SIGN", "CJK UNIFIED IDEOGRAPH-23456"};
|
||||
"HANGUL SYLLABLE HIH", "", "",
|
||||
"FULLWIDTH LEFT PARENTHESIS",
|
||||
"FULLWIDTH YEN SIGN", "", "CJK UNIFIED IDEOGRAPH-23456",
|
||||
""};
|
||||
String oldname[] = {"", "LATIN SMALL LETTER DOTLESS J BAR HOOK", "", "",
|
||||
"", "", "FULLWIDTH OPENING PARENTHESIS", "", ""};
|
||||
"", "", "", "", "FULLWIDTH OPENING PARENTHESIS", "",
|
||||
"", "", "HORIZONTAL TABULATION"};
|
||||
String extendedname[] = {"LATIN SMALL LETTER A",
|
||||
"LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK",
|
||||
"CJK UNIFIED IDEOGRAPH-3401",
|
||||
"CJK UNIFIED IDEOGRAPH-7FED",
|
||||
"HANGUL SYLLABLE GA",
|
||||
"HANGUL SYLLABLE HIH",
|
||||
"<lead surrogate-D800>",
|
||||
"<trail surrogate-DC00>",
|
||||
"FULLWIDTH LEFT PARENTHESIS",
|
||||
"FULLWIDTH YEN SIGN",
|
||||
"<noncharacter-FFFF>",
|
||||
"CJK UNIFIED IDEOGRAPH-23456",
|
||||
"HORIZONTAL TABULATION"};
|
||||
|
||||
int size = c.length;
|
||||
String str;
|
||||
int uc;
|
||||
@ -573,7 +590,8 @@ public final class UCharacterTest extends TestFmwk
|
||||
{
|
||||
// modern Unicode character name
|
||||
str = UCharacter.getName(c[i]);
|
||||
if (!str.equals(name[i]))
|
||||
if ((str == null && name[i].length() > 0) ||
|
||||
(str != null && !str.equals(name[i])))
|
||||
{
|
||||
errln("FAIL \\u" + hex(c[i]) + " expected name " +
|
||||
name[i]);
|
||||
@ -590,9 +608,18 @@ public final class UCharacterTest extends TestFmwk
|
||||
break;
|
||||
}
|
||||
|
||||
// extended character name
|
||||
str = UCharacter.getExtendedName(c[i]);
|
||||
if (str == null || !str.equals(extendedname[i]))
|
||||
{
|
||||
errln("FAIL \\u" + hex(c[i]) + " expected extended name " +
|
||||
extendedname[i]);
|
||||
break;
|
||||
}
|
||||
|
||||
// retrieving unicode character from modern name
|
||||
uc = UCharacter.getCharFromName(name[i]);
|
||||
if (uc != c[i])
|
||||
if (uc != c[i] && name[i].length() != 0)
|
||||
{
|
||||
errln("FAIL " + name[i] + " expected character \\u" + hex(c[i]));
|
||||
break;
|
||||
@ -600,9 +627,17 @@ public final class UCharacterTest extends TestFmwk
|
||||
|
||||
//retrieving unicode character from 1.0 name
|
||||
uc = UCharacter.getCharFromName1_0(oldname[i]);
|
||||
if (uc != c[i] && oldname[i].length() != 0)
|
||||
{
|
||||
errln("FAIL " + oldname[i] + " expected 1.0 character \\u" + hex(c[i]));
|
||||
break;
|
||||
}
|
||||
|
||||
//retrieving unicode character from 1.0 name
|
||||
uc = UCharacter.getCharFromExtendedName(extendedname[i]);
|
||||
if (uc != c[i] && i != 0 && (i == 1 || i == 6))
|
||||
{
|
||||
errln("FAIL " + name[i] + " expected 1.0 character \\u" + hex(c[i]));
|
||||
errln("FAIL " + extendedname[i] + " expected extended character \\u" + hex(c[i]));
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -1014,8 +1049,8 @@ public final class UCharacterTest extends TestFmwk
|
||||
try
|
||||
{
|
||||
UCharacterTest test = new UCharacterTest();
|
||||
//test.TestEnumeration();
|
||||
test.run(arg);
|
||||
test.TestNames();
|
||||
//test.run(arg);
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
|
@ -5,8 +5,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/lang/UCharacter.java,v $
|
||||
* $Date: 2002/02/08 01:08:38 $
|
||||
* $Revision: 1.21 $
|
||||
* $Date: 2002/02/15 02:53:32 $
|
||||
* $Revision: 1.22 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -17,6 +17,8 @@ package com.ibm.text;
|
||||
import java.util.Locale;
|
||||
import com.ibm.util.Utility;
|
||||
import com.ibm.icu.util.RangeValueIterator;
|
||||
import com.ibm.text.BreakIterator;
|
||||
import com.ibm.text.RuleBasedBreakIterator;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
@ -910,8 +912,7 @@ public final class UCharacter
|
||||
*/
|
||||
public static String getName(int ch)
|
||||
{
|
||||
return NAME_.getName(ch,
|
||||
UCharacterNameChoice.U_UNICODE_CHAR_NAME);
|
||||
return NAME_.getName(ch, UCharacterNameChoice.U_UNICODE_CHAR_NAME);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -929,10 +930,33 @@ public final class UCharacter
|
||||
return NAME_.getName(ch,
|
||||
UCharacterNameChoice.U_UNICODE_10_CHAR_NAME);
|
||||
}
|
||||
|
||||
/**
|
||||
* <p>Retrieves a name for a valid codepoint. Unlike, getName(int) and
|
||||
* getName1_0(int), this method will return a name even for codepoints that
|
||||
* are not assigned a name in UnicodeData.txt.
|
||||
* </p>
|
||||
* The names are returned in the following order.
|
||||
* <ul>
|
||||
* <li> Most current Unicode name if there is any
|
||||
* <li> Unicode 1.0 name if there is any
|
||||
* <li> Extended name in the form of "<codepoint_type-codepoint_hex_digits>".
|
||||
* E.g. <noncharacter-fffe>
|
||||
* </ul>
|
||||
* Note calling any methods related to code point names, e.g. get*Name*()
|
||||
* incurs a one-time initialisation cost to construct the name tables.
|
||||
* @param ch the code point for which to get the name
|
||||
* @return a name for the argument codepoint
|
||||
* @draft 2.1
|
||||
*/
|
||||
public static String getExtendedName(int ch)
|
||||
{
|
||||
return NAME_.getName(ch, UCharacterNameChoice.U_EXTENDED_CHAR_NAME);
|
||||
}
|
||||
|
||||
/**
|
||||
* Find a Unicode code point by its most current Unicode name and return its
|
||||
* code point value.<br>
|
||||
* <p>Find a Unicode code point by its most current Unicode name and
|
||||
* return its code point value. All Unicode names are in uppercase.</p>
|
||||
* Note calling any methods related to code point names, e.g. get*Name*()
|
||||
* incurs a one-time initialisation cost to construct the name tables.
|
||||
* @param name most current Unicode character name whose code point is to be
|
||||
@ -946,8 +970,8 @@ public final class UCharacter
|
||||
}
|
||||
|
||||
/**
|
||||
* Find a Unicode character by its version 1.0 Unicode name and return its
|
||||
* code point value.<br>
|
||||
* <p>Find a Unicode character by its version 1.0 Unicode name and return
|
||||
* its code point value. All Unicode names are in uppercase.</p>
|
||||
* Note calling any methods related to code point names, e.g. get*Name*()
|
||||
* incurs a one-time initialisation cost to construct the name tables.
|
||||
* @param name Unicode 1.0 code point name whose code point is to
|
||||
@ -959,6 +983,31 @@ public final class UCharacter
|
||||
return NAME_.getCharFromName(
|
||||
UCharacterNameChoice.U_UNICODE_10_CHAR_NAME, name);
|
||||
}
|
||||
|
||||
/**
|
||||
* <p>Find a Unicode character by either its name and return its code
|
||||
* point value. All Unicode names are in uppercase.
|
||||
* Extended names are all lowercase except for numbers and are contained
|
||||
* within angle brackets.</p>
|
||||
* The names are searched in the following order
|
||||
* <ul>
|
||||
* <li> Most current Unicode name if there is any
|
||||
* <li> Unicode 1.0 name if there is any
|
||||
* <li> Extended name in the form of "<codepoint_type-codepoint_hex_digits>".
|
||||
* E.g. <noncharacter-FFFE>
|
||||
* </ul>
|
||||
* Note calling any methods related to code point names, e.g. get*Name*()
|
||||
* incurs a one-time initialisation cost to construct the name tables.
|
||||
* @param name codepoint name
|
||||
* @return code point associated with the name or -1 if the name is not
|
||||
* found.
|
||||
* @draft 2.1
|
||||
*/
|
||||
public static int getCharFromExtendedName(String name)
|
||||
{
|
||||
return NAME_.getCharFromName(
|
||||
UCharacterNameChoice.U_EXTENDED_CHAR_NAME, name);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a code pointcorresponding to the two UTF16 characters.<br>
|
||||
@ -1016,6 +1065,38 @@ public final class UCharacter
|
||||
{
|
||||
return toLowerCase(Locale.getDefault(), str);
|
||||
}
|
||||
|
||||
/**
|
||||
* <p>Gets the titlecase version of the argument string.</p>
|
||||
* <p>Position for titlecasing is determined by the argument break
|
||||
* iterator, hence the user can customized his break iterator for
|
||||
* a specialized titlecasing. In this case only the forward iteration
|
||||
* needs to be implemented.
|
||||
* If the break iterator passed in is null, the default Unicode algorithm
|
||||
* will be used to determine the titlecase positions.
|
||||
* </p>
|
||||
* <p>Only positions returned by the break iterator will be title cased,
|
||||
* character in between the positions will all be in lower case.</p>
|
||||
* <p>Casing is dependent on the default locale and context-sensitive</p>
|
||||
* @param str source string to be performed on
|
||||
* @param breakiter break iterator to determine the positions in which
|
||||
* the character should be title cased.
|
||||
* @return lowercase version of the argument string
|
||||
*/
|
||||
public static String toTitleCase(String str, BreakIterator breakiter)
|
||||
{
|
||||
if (breakiter == null) {
|
||||
String rules = "$cased=[[:Lu:][:Lt:][:Ll:]];" +
|
||||
"$case_ignorable=[[:Mn:][:Me:][:Cf:][:Lm:][:Sk:]"
|
||||
+ " \\u0027\u00AD\u2019];" +
|
||||
"$not_cased=[^$cased$case_ignorable];" +
|
||||
"[$not_cased$case_ignorable]*/" +
|
||||
"$cased[$cased$case_ignorable]*$not_cased*;";
|
||||
breakiter = new RuleBasedBreakIterator(rules);
|
||||
}
|
||||
|
||||
return str;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets uppercase version of the argument string.
|
||||
@ -1111,6 +1192,30 @@ public final class UCharacter
|
||||
return result.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* <p>Gets the titlecase version of the argument string.</p>
|
||||
* <p>Position for titlecasing is determined by the argument break
|
||||
* iterator, hence the user can customized his break iterator for
|
||||
* a specialized titlecasing. In this case only the forward iteration
|
||||
* needs to be implemented.
|
||||
* If the break iterator passed in is null, the default Unicode algorithm
|
||||
* will be used to determine the titlecase positions.
|
||||
* </p>
|
||||
* <p>Only positions returned by the break iterator will be title cased,
|
||||
* character in between the positions will all be in lower case.</p>
|
||||
* <p>Casing is dependent on the argument locale and context-sensitive</p>
|
||||
* @param locale which string is to be converted in
|
||||
* @param str source string to be performed on
|
||||
* @param breakiter break iterator to determine the positions in which
|
||||
* the character should be title cased.
|
||||
* @return lowercase version of the argument string
|
||||
*/
|
||||
public static String toTitleCase(Locale locale, String str,
|
||||
BreakIterator breakiter)
|
||||
{
|
||||
return str;
|
||||
}
|
||||
|
||||
/**
|
||||
* The given character is mapped to its case folding equivalent according to
|
||||
* UnicodeData.txt and CaseFolding.txt; if the character has no case folding
|
||||
|
@ -6,8 +6,8 @@
|
||||
*
|
||||
* $Source:
|
||||
* /usr/cvs/icu4j/icu4j/src/com/ibm/icu/text/UCharacterCategory.java $
|
||||
* $Date: 2001/10/12 23:53:16 $
|
||||
* $Revision: 1.3 $
|
||||
* $Date: 2002/02/15 02:53:35 $
|
||||
* $Revision: 1.4 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -26,22 +26,19 @@ package com.ibm.text;
|
||||
|
||||
public class UCharacterCategory
|
||||
{
|
||||
// private constructor ===================================================
|
||||
|
||||
/**
|
||||
* Private constructor to prevent initialisation
|
||||
*/
|
||||
private UCharacterCategory()
|
||||
{
|
||||
}
|
||||
|
||||
// public variable =======================================================
|
||||
// public variable -----------------------------------------------------
|
||||
|
||||
/**
|
||||
* Unassigned character type
|
||||
*/
|
||||
public static final int UNASSIGNED = 0;
|
||||
/**
|
||||
* Character type Cn
|
||||
* Not Assigned (no characters in [UnicodeData.txt] have this property)
|
||||
* @draft 2.1
|
||||
*/
|
||||
public static final int GENERAL_OTHER_TYPES = 0;
|
||||
/**
|
||||
* Character type Lu
|
||||
*/
|
||||
public static final int UPPERCASE_LETTER = 1;
|
||||
@ -163,17 +160,13 @@ public class UCharacterCategory
|
||||
* Character type Pf
|
||||
*/
|
||||
public static final int FINAL_PUNCTUATION = 29;
|
||||
/**
|
||||
* Character type Cn
|
||||
*/
|
||||
public static final int GENERAL_OTHER_TYPES = 30;
|
||||
|
||||
// start of 31 ------------
|
||||
|
||||
/**
|
||||
* Character type count
|
||||
*/
|
||||
public static final int CHAR_CATEGORY_COUNT = 31;
|
||||
public static final int CHAR_CATEGORY_COUNT = 30;
|
||||
|
||||
/**
|
||||
* Gets the name of the argument category
|
||||
@ -245,4 +238,72 @@ public class UCharacterCategory
|
||||
}
|
||||
return "Unassigned";
|
||||
}
|
||||
|
||||
// private constructor -----------------------------------------------
|
||||
|
||||
/**
|
||||
* Private constructor to prevent initialisation
|
||||
*/
|
||||
private UCharacterCategory()
|
||||
{
|
||||
}
|
||||
|
||||
// package private data members --------------------------------------
|
||||
|
||||
/**
|
||||
* Not a character type
|
||||
*/
|
||||
static final int NON_CHARACTER_ = CHAR_CATEGORY_COUNT;
|
||||
/**
|
||||
* Lead surrogate type
|
||||
*/
|
||||
static final int LEAD_SURROGATE_ = CHAR_CATEGORY_COUNT + 1;
|
||||
/**
|
||||
* Trail surrogate type
|
||||
*/
|
||||
static final int TRAIL_SURROGATE_ = CHAR_CATEGORY_COUNT + 2;
|
||||
/**
|
||||
* Extended category count
|
||||
*/
|
||||
static final int EXTENDED_CATEGORY_ = CHAR_CATEGORY_COUNT + 3;
|
||||
/**
|
||||
* Type names used for extended names
|
||||
*/
|
||||
static final String TYPE_NAMES_[] = {"unassigned",
|
||||
"uppercase letter",
|
||||
"lowercase letter",
|
||||
"titlecase letter",
|
||||
"modifier letter",
|
||||
"other letter",
|
||||
"non spacing mark",
|
||||
"enclosing mark",
|
||||
"combining spacing mark",
|
||||
"decimal digit number",
|
||||
"letter number",
|
||||
"other number",
|
||||
"space separator",
|
||||
"line separator",
|
||||
"paragraph separator",
|
||||
"control",
|
||||
"format",
|
||||
"private use area",
|
||||
"surrogate",
|
||||
"dash punctuation",
|
||||
"start punctuation",
|
||||
"end punctuation",
|
||||
"connector punctuation",
|
||||
"other punctuation",
|
||||
"math symbol",
|
||||
"currency symbol",
|
||||
"modifier symbol",
|
||||
"other symbol",
|
||||
"initial punctuation",
|
||||
"final punctuation",
|
||||
"noncharacter",
|
||||
"lead surrogate",
|
||||
"trail surrogate"};
|
||||
/**
|
||||
* Unknown type name
|
||||
*/
|
||||
static final String UNKNOWN_TYPE_NAME_ = "unknown";
|
||||
}
|
||||
|
@ -6,8 +6,8 @@
|
||||
*
|
||||
* $Source:
|
||||
* /usr/cvs/icu4j/icu4j/src/com/ibm/icu/text/UCharacterName.java $
|
||||
* $Date: 2002/02/08 01:08:38 $
|
||||
* $Revision: 1.6 $
|
||||
* $Date: 2002/02/15 02:53:34 $
|
||||
* $Revision: 1.7 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -409,20 +409,29 @@ final class UCharacterName
|
||||
return null;
|
||||
}
|
||||
|
||||
int tempChoice = choice;
|
||||
if (tempChoice == UCharacterNameChoice.U_EXTENDED_CHAR_NAME) {
|
||||
tempChoice = UCharacterNameChoice.U_UNICODE_CHAR_NAME;
|
||||
}
|
||||
|
||||
String result = "";
|
||||
|
||||
// Do not write algorithmic Unicode 1.0 names because Unihan names are
|
||||
// the same as the modern ones, extension A was only introduced with
|
||||
// Unicode 3.0, and the Hangul syllable block was moved and changed around
|
||||
// Unicode 1.1.5.
|
||||
if (choice == UCharacterNameChoice.U_UNICODE_CHAR_NAME) {
|
||||
if (tempChoice == UCharacterNameChoice.U_UNICODE_CHAR_NAME) {
|
||||
// try getting algorithmic name first
|
||||
result = getAlgName(ch);
|
||||
}
|
||||
|
||||
// getting normal character name
|
||||
if (result == null || result.length() == 0) {
|
||||
result = getGroupName(ch, choice);
|
||||
if (choice == UCharacterNameChoice.U_EXTENDED_CHAR_NAME) {
|
||||
result = getExtendedName(ch);
|
||||
} else {
|
||||
result = getGroupName(ch, choice);
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
@ -442,26 +451,42 @@ final class UCharacterName
|
||||
name == null || name.length() == 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
String uppercasename = UCharacter.toUpperCase(Locale.ENGLISH, name);
|
||||
|
||||
// try extended names first
|
||||
int result = getExtendedChar(name, choice);
|
||||
if (result >= -1) {
|
||||
return result;
|
||||
}
|
||||
// try algorithmic names first, if fails then try group names
|
||||
// int result = getAlgorithmChar(choice, uppercasename);
|
||||
int tempChoice = choice;
|
||||
if (choice == UCharacterNameChoice.U_EXTENDED_CHAR_NAME) {
|
||||
tempChoice = UCharacterNameChoice.U_UNICODE_CHAR_NAME;
|
||||
}
|
||||
|
||||
// 1.0 has no algorithmic names
|
||||
String upperCaseName = UCharacter.toUpperCase(Locale.ENGLISH, name);
|
||||
// try algorithmic names now, 1.0 has no algorithmic names
|
||||
if (choice != UCharacterNameChoice.U_UNICODE_CHAR_NAME) {
|
||||
return getGroupChar(uppercasename, choice);
|
||||
return getGroupChar(upperCaseName, tempChoice);
|
||||
}
|
||||
int count = 0;
|
||||
if (m_algorithm_ != null) {
|
||||
count = m_algorithm_.length;
|
||||
}
|
||||
for (count --; count >= 0; count --) {
|
||||
int result = m_algorithm_[count].getAlgorithmChar(name);
|
||||
result = m_algorithm_[count].getAlgorithmChar(name);
|
||||
if (result >= 0) {
|
||||
return result;
|
||||
}
|
||||
}
|
||||
return getGroupChar(uppercasename, choice);
|
||||
|
||||
result = getGroupChar(upperCaseName, tempChoice);
|
||||
if (result == -1 &&
|
||||
choice == UCharacterNameChoice.U_EXTENDED_CHAR_NAME) {
|
||||
result = getGroupChar(upperCaseName,
|
||||
UCharacterNameChoice.U_UNICODE_10_CHAR_NAME);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -943,4 +968,118 @@ final class UCharacterName
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Getting the character with extended name of the form <....>.
|
||||
* @param name of the character to be found
|
||||
* @param choice name choice
|
||||
* @return character associated with the name, -1 if such character is not
|
||||
* found and -2 if we should continue with the search.
|
||||
*/
|
||||
private int getExtendedChar(String name, int choice)
|
||||
{
|
||||
if (name.charAt(0) == '<') {
|
||||
if (choice == UCharacterNameChoice.U_EXTENDED_CHAR_NAME) {
|
||||
int endIndex = name.length() - 1;
|
||||
if (name.charAt(endIndex) == '>') {
|
||||
int startIndex = name.lastIndexOf('-');
|
||||
if (startIndex >= 0) { // We've got a category.
|
||||
startIndex ++;
|
||||
int result = -1;
|
||||
try {
|
||||
result = Integer.parseInt(
|
||||
name.substring(startIndex, endIndex),
|
||||
16);
|
||||
}
|
||||
catch (NumberFormatException e) {
|
||||
return -1;
|
||||
}
|
||||
// Now validate the category name. We could use a
|
||||
// binary search, or a trie, if we really wanted to.
|
||||
String type = name.substring(1, startIndex - 1);
|
||||
int length = UCharacterCategory.TYPE_NAMES_.length;
|
||||
for (int i = 0; i < length; ++ i) {
|
||||
if (type.compareToIgnoreCase(
|
||||
UCharacterCategory.TYPE_NAMES_[i]) == 0) {
|
||||
if (getType(result) == i) {
|
||||
return result;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
return -2;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the character extended type
|
||||
* @param ch character to be tested
|
||||
* @return extended type it is associated with
|
||||
*/
|
||||
private int getType(int ch)
|
||||
{
|
||||
if ((ch & 0xFFFE) == 0xFFFE || (ch >= 0xFDD0 && ch <= 0xFDEF)) {
|
||||
// not a character we return a invalid category count
|
||||
return UCharacterCategory.NON_CHARACTER_;
|
||||
}
|
||||
// Undo ICU exceptions to the UCD when determining the category.
|
||||
int result;
|
||||
if (UCharacter.isISOControl(ch)) {
|
||||
result = UCharacterCategory.CONTROL;
|
||||
}
|
||||
else {
|
||||
result = UCharacter.getType(ch);
|
||||
if (result == UCharacterCategory.SURROGATE) {
|
||||
if (UTF16.isLeadSurrogate((char)ch)) {
|
||||
result = UCharacterCategory.LEAD_SURROGATE_;
|
||||
}
|
||||
else {
|
||||
result = UCharacterCategory.TRAIL_SURROGATE_;
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the extended name
|
||||
*/
|
||||
private String getExtendedName(int ch)
|
||||
{
|
||||
String result = getName(ch, UCharacterNameChoice.U_UNICODE_CHAR_NAME);
|
||||
if (result == null) {
|
||||
if (getType(ch) == UCharacterCategory.CONTROL) {
|
||||
result = getName(ch,
|
||||
UCharacterNameChoice.U_UNICODE_10_CHAR_NAME);
|
||||
}
|
||||
if (result == null) {
|
||||
int type = getType(ch);
|
||||
// Return unknown if the table of names above is not up to
|
||||
// date.
|
||||
if (type >= UCharacterCategory.TYPE_NAMES_.length) {
|
||||
result = UCharacterCategory.UNKNOWN_TYPE_NAME_;
|
||||
}
|
||||
else {
|
||||
result = UCharacterCategory.TYPE_NAMES_[type];
|
||||
}
|
||||
StringBuffer tempResult = new StringBuffer(result);
|
||||
tempResult.insert(0, '<');
|
||||
tempResult.append('-');
|
||||
String chStr = Integer.toHexString(ch).toUpperCase();
|
||||
int zeros = 4 - chStr.length();
|
||||
while (zeros > 0) {
|
||||
tempResult.append('0');
|
||||
zeros --;
|
||||
}
|
||||
tempResult.append(chStr);
|
||||
tempResult.append('>');
|
||||
result = tempResult.toString();
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
@ -6,8 +6,8 @@
|
||||
*
|
||||
* $Source:
|
||||
* /usr/cvs/icu4j/icu4j/src/com/ibm/icu/text/UCharacterNameChoiceEnum.java $
|
||||
* $Date: 2001/03/23 19:51:38 $
|
||||
* $Revision: 1.2 $
|
||||
* $Date: 2002/02/15 02:53:35 $
|
||||
* $Revision: 1.3 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -30,5 +30,6 @@ interface UCharacterNameChoice
|
||||
|
||||
static final int U_UNICODE_CHAR_NAME = 0;
|
||||
static final int U_UNICODE_10_CHAR_NAME = 1;
|
||||
static final int U_CHAR_NAME_CHOICE_COUNT = 2;
|
||||
static final int U_EXTENDED_CHAR_NAME = 2;
|
||||
static final int U_CHAR_NAME_CHOICE_COUNT = 3;
|
||||
}
|
||||
|
@ -5,8 +5,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/test/text/Attic/UCharacterTest.java,v $
|
||||
* $Date: 2002/02/08 23:44:17 $
|
||||
* $Revision: 1.21 $
|
||||
* $Date: 2002/02/15 02:53:32 $
|
||||
* $Revision: 1.22 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -555,16 +555,33 @@ public final class UCharacterTest extends TestFmwk
|
||||
*/
|
||||
public void TestNames()
|
||||
{
|
||||
int c[] = {0x0061, 0x0284, 0x3401, 0x7fed, 0xac00, 0xd7a3, 0xff08, 0xffe5,
|
||||
0x23456};
|
||||
int c[] = {0x0061, 0x0284, 0x3401, 0x7fed, 0xac00, 0xd7a3, 0xd800, 0xdc00,
|
||||
0xff08, 0xffe5, 0xffff, 0x23456, 0x9};
|
||||
String name[] = {"LATIN SMALL LETTER A",
|
||||
"LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK",
|
||||
"CJK UNIFIED IDEOGRAPH-3401",
|
||||
"CJK UNIFIED IDEOGRAPH-7FED", "HANGUL SYLLABLE GA",
|
||||
"HANGUL SYLLABLE HIH", "FULLWIDTH LEFT PARENTHESIS",
|
||||
"FULLWIDTH YEN SIGN", "CJK UNIFIED IDEOGRAPH-23456"};
|
||||
"HANGUL SYLLABLE HIH", "", "",
|
||||
"FULLWIDTH LEFT PARENTHESIS",
|
||||
"FULLWIDTH YEN SIGN", "", "CJK UNIFIED IDEOGRAPH-23456",
|
||||
""};
|
||||
String oldname[] = {"", "LATIN SMALL LETTER DOTLESS J BAR HOOK", "", "",
|
||||
"", "", "FULLWIDTH OPENING PARENTHESIS", "", ""};
|
||||
"", "", "", "", "FULLWIDTH OPENING PARENTHESIS", "",
|
||||
"", "", "HORIZONTAL TABULATION"};
|
||||
String extendedname[] = {"LATIN SMALL LETTER A",
|
||||
"LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK",
|
||||
"CJK UNIFIED IDEOGRAPH-3401",
|
||||
"CJK UNIFIED IDEOGRAPH-7FED",
|
||||
"HANGUL SYLLABLE GA",
|
||||
"HANGUL SYLLABLE HIH",
|
||||
"<lead surrogate-D800>",
|
||||
"<trail surrogate-DC00>",
|
||||
"FULLWIDTH LEFT PARENTHESIS",
|
||||
"FULLWIDTH YEN SIGN",
|
||||
"<noncharacter-FFFF>",
|
||||
"CJK UNIFIED IDEOGRAPH-23456",
|
||||
"HORIZONTAL TABULATION"};
|
||||
|
||||
int size = c.length;
|
||||
String str;
|
||||
int uc;
|
||||
@ -573,7 +590,8 @@ public final class UCharacterTest extends TestFmwk
|
||||
{
|
||||
// modern Unicode character name
|
||||
str = UCharacter.getName(c[i]);
|
||||
if (!str.equals(name[i]))
|
||||
if ((str == null && name[i].length() > 0) ||
|
||||
(str != null && !str.equals(name[i])))
|
||||
{
|
||||
errln("FAIL \\u" + hex(c[i]) + " expected name " +
|
||||
name[i]);
|
||||
@ -590,9 +608,18 @@ public final class UCharacterTest extends TestFmwk
|
||||
break;
|
||||
}
|
||||
|
||||
// extended character name
|
||||
str = UCharacter.getExtendedName(c[i]);
|
||||
if (str == null || !str.equals(extendedname[i]))
|
||||
{
|
||||
errln("FAIL \\u" + hex(c[i]) + " expected extended name " +
|
||||
extendedname[i]);
|
||||
break;
|
||||
}
|
||||
|
||||
// retrieving unicode character from modern name
|
||||
uc = UCharacter.getCharFromName(name[i]);
|
||||
if (uc != c[i])
|
||||
if (uc != c[i] && name[i].length() != 0)
|
||||
{
|
||||
errln("FAIL " + name[i] + " expected character \\u" + hex(c[i]));
|
||||
break;
|
||||
@ -600,9 +627,17 @@ public final class UCharacterTest extends TestFmwk
|
||||
|
||||
//retrieving unicode character from 1.0 name
|
||||
uc = UCharacter.getCharFromName1_0(oldname[i]);
|
||||
if (uc != c[i] && oldname[i].length() != 0)
|
||||
{
|
||||
errln("FAIL " + oldname[i] + " expected 1.0 character \\u" + hex(c[i]));
|
||||
break;
|
||||
}
|
||||
|
||||
//retrieving unicode character from 1.0 name
|
||||
uc = UCharacter.getCharFromExtendedName(extendedname[i]);
|
||||
if (uc != c[i] && i != 0 && (i == 1 || i == 6))
|
||||
{
|
||||
errln("FAIL " + name[i] + " expected 1.0 character \\u" + hex(c[i]));
|
||||
errln("FAIL " + extendedname[i] + " expected extended character \\u" + hex(c[i]));
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -1014,8 +1049,8 @@ public final class UCharacterTest extends TestFmwk
|
||||
try
|
||||
{
|
||||
UCharacterTest test = new UCharacterTest();
|
||||
//test.TestEnumeration();
|
||||
test.run(arg);
|
||||
test.TestNames();
|
||||
//test.run(arg);
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
|
@ -5,8 +5,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/UCharacter.java,v $
|
||||
* $Date: 2002/02/08 01:08:38 $
|
||||
* $Revision: 1.21 $
|
||||
* $Date: 2002/02/15 02:53:32 $
|
||||
* $Revision: 1.22 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -17,6 +17,8 @@ package com.ibm.text;
|
||||
import java.util.Locale;
|
||||
import com.ibm.util.Utility;
|
||||
import com.ibm.icu.util.RangeValueIterator;
|
||||
import com.ibm.text.BreakIterator;
|
||||
import com.ibm.text.RuleBasedBreakIterator;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
@ -910,8 +912,7 @@ public final class UCharacter
|
||||
*/
|
||||
public static String getName(int ch)
|
||||
{
|
||||
return NAME_.getName(ch,
|
||||
UCharacterNameChoice.U_UNICODE_CHAR_NAME);
|
||||
return NAME_.getName(ch, UCharacterNameChoice.U_UNICODE_CHAR_NAME);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -929,10 +930,33 @@ public final class UCharacter
|
||||
return NAME_.getName(ch,
|
||||
UCharacterNameChoice.U_UNICODE_10_CHAR_NAME);
|
||||
}
|
||||
|
||||
/**
|
||||
* <p>Retrieves a name for a valid codepoint. Unlike, getName(int) and
|
||||
* getName1_0(int), this method will return a name even for codepoints that
|
||||
* are not assigned a name in UnicodeData.txt.
|
||||
* </p>
|
||||
* The names are returned in the following order.
|
||||
* <ul>
|
||||
* <li> Most current Unicode name if there is any
|
||||
* <li> Unicode 1.0 name if there is any
|
||||
* <li> Extended name in the form of "<codepoint_type-codepoint_hex_digits>".
|
||||
* E.g. <noncharacter-fffe>
|
||||
* </ul>
|
||||
* Note calling any methods related to code point names, e.g. get*Name*()
|
||||
* incurs a one-time initialisation cost to construct the name tables.
|
||||
* @param ch the code point for which to get the name
|
||||
* @return a name for the argument codepoint
|
||||
* @draft 2.1
|
||||
*/
|
||||
public static String getExtendedName(int ch)
|
||||
{
|
||||
return NAME_.getName(ch, UCharacterNameChoice.U_EXTENDED_CHAR_NAME);
|
||||
}
|
||||
|
||||
/**
|
||||
* Find a Unicode code point by its most current Unicode name and return its
|
||||
* code point value.<br>
|
||||
* <p>Find a Unicode code point by its most current Unicode name and
|
||||
* return its code point value. All Unicode names are in uppercase.</p>
|
||||
* Note calling any methods related to code point names, e.g. get*Name*()
|
||||
* incurs a one-time initialisation cost to construct the name tables.
|
||||
* @param name most current Unicode character name whose code point is to be
|
||||
@ -946,8 +970,8 @@ public final class UCharacter
|
||||
}
|
||||
|
||||
/**
|
||||
* Find a Unicode character by its version 1.0 Unicode name and return its
|
||||
* code point value.<br>
|
||||
* <p>Find a Unicode character by its version 1.0 Unicode name and return
|
||||
* its code point value. All Unicode names are in uppercase.</p>
|
||||
* Note calling any methods related to code point names, e.g. get*Name*()
|
||||
* incurs a one-time initialisation cost to construct the name tables.
|
||||
* @param name Unicode 1.0 code point name whose code point is to
|
||||
@ -959,6 +983,31 @@ public final class UCharacter
|
||||
return NAME_.getCharFromName(
|
||||
UCharacterNameChoice.U_UNICODE_10_CHAR_NAME, name);
|
||||
}
|
||||
|
||||
/**
|
||||
* <p>Find a Unicode character by either its name and return its code
|
||||
* point value. All Unicode names are in uppercase.
|
||||
* Extended names are all lowercase except for numbers and are contained
|
||||
* within angle brackets.</p>
|
||||
* The names are searched in the following order
|
||||
* <ul>
|
||||
* <li> Most current Unicode name if there is any
|
||||
* <li> Unicode 1.0 name if there is any
|
||||
* <li> Extended name in the form of "<codepoint_type-codepoint_hex_digits>".
|
||||
* E.g. <noncharacter-FFFE>
|
||||
* </ul>
|
||||
* Note calling any methods related to code point names, e.g. get*Name*()
|
||||
* incurs a one-time initialisation cost to construct the name tables.
|
||||
* @param name codepoint name
|
||||
* @return code point associated with the name or -1 if the name is not
|
||||
* found.
|
||||
* @draft 2.1
|
||||
*/
|
||||
public static int getCharFromExtendedName(String name)
|
||||
{
|
||||
return NAME_.getCharFromName(
|
||||
UCharacterNameChoice.U_EXTENDED_CHAR_NAME, name);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a code pointcorresponding to the two UTF16 characters.<br>
|
||||
@ -1016,6 +1065,38 @@ public final class UCharacter
|
||||
{
|
||||
return toLowerCase(Locale.getDefault(), str);
|
||||
}
|
||||
|
||||
/**
|
||||
* <p>Gets the titlecase version of the argument string.</p>
|
||||
* <p>Position for titlecasing is determined by the argument break
|
||||
* iterator, hence the user can customized his break iterator for
|
||||
* a specialized titlecasing. In this case only the forward iteration
|
||||
* needs to be implemented.
|
||||
* If the break iterator passed in is null, the default Unicode algorithm
|
||||
* will be used to determine the titlecase positions.
|
||||
* </p>
|
||||
* <p>Only positions returned by the break iterator will be title cased,
|
||||
* character in between the positions will all be in lower case.</p>
|
||||
* <p>Casing is dependent on the default locale and context-sensitive</p>
|
||||
* @param str source string to be performed on
|
||||
* @param breakiter break iterator to determine the positions in which
|
||||
* the character should be title cased.
|
||||
* @return lowercase version of the argument string
|
||||
*/
|
||||
public static String toTitleCase(String str, BreakIterator breakiter)
|
||||
{
|
||||
if (breakiter == null) {
|
||||
String rules = "$cased=[[:Lu:][:Lt:][:Ll:]];" +
|
||||
"$case_ignorable=[[:Mn:][:Me:][:Cf:][:Lm:][:Sk:]"
|
||||
+ " \\u0027\u00AD\u2019];" +
|
||||
"$not_cased=[^$cased$case_ignorable];" +
|
||||
"[$not_cased$case_ignorable]*/" +
|
||||
"$cased[$cased$case_ignorable]*$not_cased*;";
|
||||
breakiter = new RuleBasedBreakIterator(rules);
|
||||
}
|
||||
|
||||
return str;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets uppercase version of the argument string.
|
||||
@ -1111,6 +1192,30 @@ public final class UCharacter
|
||||
return result.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* <p>Gets the titlecase version of the argument string.</p>
|
||||
* <p>Position for titlecasing is determined by the argument break
|
||||
* iterator, hence the user can customized his break iterator for
|
||||
* a specialized titlecasing. In this case only the forward iteration
|
||||
* needs to be implemented.
|
||||
* If the break iterator passed in is null, the default Unicode algorithm
|
||||
* will be used to determine the titlecase positions.
|
||||
* </p>
|
||||
* <p>Only positions returned by the break iterator will be title cased,
|
||||
* character in between the positions will all be in lower case.</p>
|
||||
* <p>Casing is dependent on the argument locale and context-sensitive</p>
|
||||
* @param locale which string is to be converted in
|
||||
* @param str source string to be performed on
|
||||
* @param breakiter break iterator to determine the positions in which
|
||||
* the character should be title cased.
|
||||
* @return lowercase version of the argument string
|
||||
*/
|
||||
public static String toTitleCase(Locale locale, String str,
|
||||
BreakIterator breakiter)
|
||||
{
|
||||
return str;
|
||||
}
|
||||
|
||||
/**
|
||||
* The given character is mapped to its case folding equivalent according to
|
||||
* UnicodeData.txt and CaseFolding.txt; if the character has no case folding
|
||||
|
@ -6,8 +6,8 @@
|
||||
*
|
||||
* $Source:
|
||||
* /usr/cvs/icu4j/icu4j/src/com/ibm/icu/text/UCharacterCategory.java $
|
||||
* $Date: 2001/10/12 23:53:16 $
|
||||
* $Revision: 1.3 $
|
||||
* $Date: 2002/02/15 02:53:35 $
|
||||
* $Revision: 1.4 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -26,22 +26,19 @@ package com.ibm.text;
|
||||
|
||||
public class UCharacterCategory
|
||||
{
|
||||
// private constructor ===================================================
|
||||
|
||||
/**
|
||||
* Private constructor to prevent initialisation
|
||||
*/
|
||||
private UCharacterCategory()
|
||||
{
|
||||
}
|
||||
|
||||
// public variable =======================================================
|
||||
// public variable -----------------------------------------------------
|
||||
|
||||
/**
|
||||
* Unassigned character type
|
||||
*/
|
||||
public static final int UNASSIGNED = 0;
|
||||
/**
|
||||
* Character type Cn
|
||||
* Not Assigned (no characters in [UnicodeData.txt] have this property)
|
||||
* @draft 2.1
|
||||
*/
|
||||
public static final int GENERAL_OTHER_TYPES = 0;
|
||||
/**
|
||||
* Character type Lu
|
||||
*/
|
||||
public static final int UPPERCASE_LETTER = 1;
|
||||
@ -163,17 +160,13 @@ public class UCharacterCategory
|
||||
* Character type Pf
|
||||
*/
|
||||
public static final int FINAL_PUNCTUATION = 29;
|
||||
/**
|
||||
* Character type Cn
|
||||
*/
|
||||
public static final int GENERAL_OTHER_TYPES = 30;
|
||||
|
||||
// start of 31 ------------
|
||||
|
||||
/**
|
||||
* Character type count
|
||||
*/
|
||||
public static final int CHAR_CATEGORY_COUNT = 31;
|
||||
public static final int CHAR_CATEGORY_COUNT = 30;
|
||||
|
||||
/**
|
||||
* Gets the name of the argument category
|
||||
@ -245,4 +238,72 @@ public class UCharacterCategory
|
||||
}
|
||||
return "Unassigned";
|
||||
}
|
||||
|
||||
// private constructor -----------------------------------------------
|
||||
|
||||
/**
|
||||
* Private constructor to prevent initialisation
|
||||
*/
|
||||
private UCharacterCategory()
|
||||
{
|
||||
}
|
||||
|
||||
// package private data members --------------------------------------
|
||||
|
||||
/**
|
||||
* Not a character type
|
||||
*/
|
||||
static final int NON_CHARACTER_ = CHAR_CATEGORY_COUNT;
|
||||
/**
|
||||
* Lead surrogate type
|
||||
*/
|
||||
static final int LEAD_SURROGATE_ = CHAR_CATEGORY_COUNT + 1;
|
||||
/**
|
||||
* Trail surrogate type
|
||||
*/
|
||||
static final int TRAIL_SURROGATE_ = CHAR_CATEGORY_COUNT + 2;
|
||||
/**
|
||||
* Extended category count
|
||||
*/
|
||||
static final int EXTENDED_CATEGORY_ = CHAR_CATEGORY_COUNT + 3;
|
||||
/**
|
||||
* Type names used for extended names
|
||||
*/
|
||||
static final String TYPE_NAMES_[] = {"unassigned",
|
||||
"uppercase letter",
|
||||
"lowercase letter",
|
||||
"titlecase letter",
|
||||
"modifier letter",
|
||||
"other letter",
|
||||
"non spacing mark",
|
||||
"enclosing mark",
|
||||
"combining spacing mark",
|
||||
"decimal digit number",
|
||||
"letter number",
|
||||
"other number",
|
||||
"space separator",
|
||||
"line separator",
|
||||
"paragraph separator",
|
||||
"control",
|
||||
"format",
|
||||
"private use area",
|
||||
"surrogate",
|
||||
"dash punctuation",
|
||||
"start punctuation",
|
||||
"end punctuation",
|
||||
"connector punctuation",
|
||||
"other punctuation",
|
||||
"math symbol",
|
||||
"currency symbol",
|
||||
"modifier symbol",
|
||||
"other symbol",
|
||||
"initial punctuation",
|
||||
"final punctuation",
|
||||
"noncharacter",
|
||||
"lead surrogate",
|
||||
"trail surrogate"};
|
||||
/**
|
||||
* Unknown type name
|
||||
*/
|
||||
static final String UNKNOWN_TYPE_NAME_ = "unknown";
|
||||
}
|
||||
|
@ -6,8 +6,8 @@
|
||||
*
|
||||
* $Source:
|
||||
* /usr/cvs/icu4j/icu4j/src/com/ibm/icu/text/UCharacterName.java $
|
||||
* $Date: 2002/02/08 01:08:38 $
|
||||
* $Revision: 1.6 $
|
||||
* $Date: 2002/02/15 02:53:34 $
|
||||
* $Revision: 1.7 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -409,20 +409,29 @@ final class UCharacterName
|
||||
return null;
|
||||
}
|
||||
|
||||
int tempChoice = choice;
|
||||
if (tempChoice == UCharacterNameChoice.U_EXTENDED_CHAR_NAME) {
|
||||
tempChoice = UCharacterNameChoice.U_UNICODE_CHAR_NAME;
|
||||
}
|
||||
|
||||
String result = "";
|
||||
|
||||
// Do not write algorithmic Unicode 1.0 names because Unihan names are
|
||||
// the same as the modern ones, extension A was only introduced with
|
||||
// Unicode 3.0, and the Hangul syllable block was moved and changed around
|
||||
// Unicode 1.1.5.
|
||||
if (choice == UCharacterNameChoice.U_UNICODE_CHAR_NAME) {
|
||||
if (tempChoice == UCharacterNameChoice.U_UNICODE_CHAR_NAME) {
|
||||
// try getting algorithmic name first
|
||||
result = getAlgName(ch);
|
||||
}
|
||||
|
||||
// getting normal character name
|
||||
if (result == null || result.length() == 0) {
|
||||
result = getGroupName(ch, choice);
|
||||
if (choice == UCharacterNameChoice.U_EXTENDED_CHAR_NAME) {
|
||||
result = getExtendedName(ch);
|
||||
} else {
|
||||
result = getGroupName(ch, choice);
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
@ -442,26 +451,42 @@ final class UCharacterName
|
||||
name == null || name.length() == 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
String uppercasename = UCharacter.toUpperCase(Locale.ENGLISH, name);
|
||||
|
||||
// try extended names first
|
||||
int result = getExtendedChar(name, choice);
|
||||
if (result >= -1) {
|
||||
return result;
|
||||
}
|
||||
// try algorithmic names first, if fails then try group names
|
||||
// int result = getAlgorithmChar(choice, uppercasename);
|
||||
int tempChoice = choice;
|
||||
if (choice == UCharacterNameChoice.U_EXTENDED_CHAR_NAME) {
|
||||
tempChoice = UCharacterNameChoice.U_UNICODE_CHAR_NAME;
|
||||
}
|
||||
|
||||
// 1.0 has no algorithmic names
|
||||
String upperCaseName = UCharacter.toUpperCase(Locale.ENGLISH, name);
|
||||
// try algorithmic names now, 1.0 has no algorithmic names
|
||||
if (choice != UCharacterNameChoice.U_UNICODE_CHAR_NAME) {
|
||||
return getGroupChar(uppercasename, choice);
|
||||
return getGroupChar(upperCaseName, tempChoice);
|
||||
}
|
||||
int count = 0;
|
||||
if (m_algorithm_ != null) {
|
||||
count = m_algorithm_.length;
|
||||
}
|
||||
for (count --; count >= 0; count --) {
|
||||
int result = m_algorithm_[count].getAlgorithmChar(name);
|
||||
result = m_algorithm_[count].getAlgorithmChar(name);
|
||||
if (result >= 0) {
|
||||
return result;
|
||||
}
|
||||
}
|
||||
return getGroupChar(uppercasename, choice);
|
||||
|
||||
result = getGroupChar(upperCaseName, tempChoice);
|
||||
if (result == -1 &&
|
||||
choice == UCharacterNameChoice.U_EXTENDED_CHAR_NAME) {
|
||||
result = getGroupChar(upperCaseName,
|
||||
UCharacterNameChoice.U_UNICODE_10_CHAR_NAME);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -943,4 +968,118 @@ final class UCharacterName
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Getting the character with extended name of the form <....>.
|
||||
* @param name of the character to be found
|
||||
* @param choice name choice
|
||||
* @return character associated with the name, -1 if such character is not
|
||||
* found and -2 if we should continue with the search.
|
||||
*/
|
||||
private int getExtendedChar(String name, int choice)
|
||||
{
|
||||
if (name.charAt(0) == '<') {
|
||||
if (choice == UCharacterNameChoice.U_EXTENDED_CHAR_NAME) {
|
||||
int endIndex = name.length() - 1;
|
||||
if (name.charAt(endIndex) == '>') {
|
||||
int startIndex = name.lastIndexOf('-');
|
||||
if (startIndex >= 0) { // We've got a category.
|
||||
startIndex ++;
|
||||
int result = -1;
|
||||
try {
|
||||
result = Integer.parseInt(
|
||||
name.substring(startIndex, endIndex),
|
||||
16);
|
||||
}
|
||||
catch (NumberFormatException e) {
|
||||
return -1;
|
||||
}
|
||||
// Now validate the category name. We could use a
|
||||
// binary search, or a trie, if we really wanted to.
|
||||
String type = name.substring(1, startIndex - 1);
|
||||
int length = UCharacterCategory.TYPE_NAMES_.length;
|
||||
for (int i = 0; i < length; ++ i) {
|
||||
if (type.compareToIgnoreCase(
|
||||
UCharacterCategory.TYPE_NAMES_[i]) == 0) {
|
||||
if (getType(result) == i) {
|
||||
return result;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
return -2;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the character extended type
|
||||
* @param ch character to be tested
|
||||
* @return extended type it is associated with
|
||||
*/
|
||||
private int getType(int ch)
|
||||
{
|
||||
if ((ch & 0xFFFE) == 0xFFFE || (ch >= 0xFDD0 && ch <= 0xFDEF)) {
|
||||
// not a character we return a invalid category count
|
||||
return UCharacterCategory.NON_CHARACTER_;
|
||||
}
|
||||
// Undo ICU exceptions to the UCD when determining the category.
|
||||
int result;
|
||||
if (UCharacter.isISOControl(ch)) {
|
||||
result = UCharacterCategory.CONTROL;
|
||||
}
|
||||
else {
|
||||
result = UCharacter.getType(ch);
|
||||
if (result == UCharacterCategory.SURROGATE) {
|
||||
if (UTF16.isLeadSurrogate((char)ch)) {
|
||||
result = UCharacterCategory.LEAD_SURROGATE_;
|
||||
}
|
||||
else {
|
||||
result = UCharacterCategory.TRAIL_SURROGATE_;
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the extended name
|
||||
*/
|
||||
private String getExtendedName(int ch)
|
||||
{
|
||||
String result = getName(ch, UCharacterNameChoice.U_UNICODE_CHAR_NAME);
|
||||
if (result == null) {
|
||||
if (getType(ch) == UCharacterCategory.CONTROL) {
|
||||
result = getName(ch,
|
||||
UCharacterNameChoice.U_UNICODE_10_CHAR_NAME);
|
||||
}
|
||||
if (result == null) {
|
||||
int type = getType(ch);
|
||||
// Return unknown if the table of names above is not up to
|
||||
// date.
|
||||
if (type >= UCharacterCategory.TYPE_NAMES_.length) {
|
||||
result = UCharacterCategory.UNKNOWN_TYPE_NAME_;
|
||||
}
|
||||
else {
|
||||
result = UCharacterCategory.TYPE_NAMES_[type];
|
||||
}
|
||||
StringBuffer tempResult = new StringBuffer(result);
|
||||
tempResult.insert(0, '<');
|
||||
tempResult.append('-');
|
||||
String chStr = Integer.toHexString(ch).toUpperCase();
|
||||
int zeros = 4 - chStr.length();
|
||||
while (zeros > 0) {
|
||||
tempResult.append('0');
|
||||
zeros --;
|
||||
}
|
||||
tempResult.append(chStr);
|
||||
tempResult.append('>');
|
||||
result = tempResult.toString();
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
@ -6,8 +6,8 @@
|
||||
*
|
||||
* $Source:
|
||||
* /usr/cvs/icu4j/icu4j/src/com/ibm/icu/text/UCharacterNameChoiceEnum.java $
|
||||
* $Date: 2001/03/23 19:51:38 $
|
||||
* $Revision: 1.2 $
|
||||
* $Date: 2002/02/15 02:53:35 $
|
||||
* $Revision: 1.3 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -30,5 +30,6 @@ interface UCharacterNameChoice
|
||||
|
||||
static final int U_UNICODE_CHAR_NAME = 0;
|
||||
static final int U_UNICODE_10_CHAR_NAME = 1;
|
||||
static final int U_CHAR_NAME_CHOICE_COUNT = 2;
|
||||
static final int U_EXTENDED_CHAR_NAME = 2;
|
||||
static final int U_CHAR_NAME_CHOICE_COUNT = 3;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user