ICU-1707
new extended name APIs X-SVN-Rev: 7677
This commit is contained in:
parent
6fdea6ffb4
commit
d882319b30
@ -5,8 +5,8 @@
|
|||||||
*******************************************************************************
|
*******************************************************************************
|
||||||
*
|
*
|
||||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/lang/UCharacterTest.java,v $
|
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/lang/UCharacterTest.java,v $
|
||||||
* $Date: 2002/02/08 23:44:17 $
|
* $Date: 2002/02/15 02:53:32 $
|
||||||
* $Revision: 1.21 $
|
* $Revision: 1.22 $
|
||||||
*
|
*
|
||||||
*******************************************************************************
|
*******************************************************************************
|
||||||
*/
|
*/
|
||||||
@ -555,16 +555,33 @@ public final class UCharacterTest extends TestFmwk
|
|||||||
*/
|
*/
|
||||||
public void TestNames()
|
public void TestNames()
|
||||||
{
|
{
|
||||||
int c[] = {0x0061, 0x0284, 0x3401, 0x7fed, 0xac00, 0xd7a3, 0xff08, 0xffe5,
|
int c[] = {0x0061, 0x0284, 0x3401, 0x7fed, 0xac00, 0xd7a3, 0xd800, 0xdc00,
|
||||||
0x23456};
|
0xff08, 0xffe5, 0xffff, 0x23456, 0x9};
|
||||||
String name[] = {"LATIN SMALL LETTER A",
|
String name[] = {"LATIN SMALL LETTER A",
|
||||||
"LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK",
|
"LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK",
|
||||||
"CJK UNIFIED IDEOGRAPH-3401",
|
"CJK UNIFIED IDEOGRAPH-3401",
|
||||||
"CJK UNIFIED IDEOGRAPH-7FED", "HANGUL SYLLABLE GA",
|
"CJK UNIFIED IDEOGRAPH-7FED", "HANGUL SYLLABLE GA",
|
||||||
"HANGUL SYLLABLE HIH", "FULLWIDTH LEFT PARENTHESIS",
|
"HANGUL SYLLABLE HIH", "", "",
|
||||||
"FULLWIDTH YEN SIGN", "CJK UNIFIED IDEOGRAPH-23456"};
|
"FULLWIDTH LEFT PARENTHESIS",
|
||||||
|
"FULLWIDTH YEN SIGN", "", "CJK UNIFIED IDEOGRAPH-23456",
|
||||||
|
""};
|
||||||
String oldname[] = {"", "LATIN SMALL LETTER DOTLESS J BAR HOOK", "", "",
|
String oldname[] = {"", "LATIN SMALL LETTER DOTLESS J BAR HOOK", "", "",
|
||||||
"", "", "FULLWIDTH OPENING PARENTHESIS", "", ""};
|
"", "", "", "", "FULLWIDTH OPENING PARENTHESIS", "",
|
||||||
|
"", "", "HORIZONTAL TABULATION"};
|
||||||
|
String extendedname[] = {"LATIN SMALL LETTER A",
|
||||||
|
"LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK",
|
||||||
|
"CJK UNIFIED IDEOGRAPH-3401",
|
||||||
|
"CJK UNIFIED IDEOGRAPH-7FED",
|
||||||
|
"HANGUL SYLLABLE GA",
|
||||||
|
"HANGUL SYLLABLE HIH",
|
||||||
|
"<lead surrogate-D800>",
|
||||||
|
"<trail surrogate-DC00>",
|
||||||
|
"FULLWIDTH LEFT PARENTHESIS",
|
||||||
|
"FULLWIDTH YEN SIGN",
|
||||||
|
"<noncharacter-FFFF>",
|
||||||
|
"CJK UNIFIED IDEOGRAPH-23456",
|
||||||
|
"HORIZONTAL TABULATION"};
|
||||||
|
|
||||||
int size = c.length;
|
int size = c.length;
|
||||||
String str;
|
String str;
|
||||||
int uc;
|
int uc;
|
||||||
@ -573,7 +590,8 @@ public final class UCharacterTest extends TestFmwk
|
|||||||
{
|
{
|
||||||
// modern Unicode character name
|
// modern Unicode character name
|
||||||
str = UCharacter.getName(c[i]);
|
str = UCharacter.getName(c[i]);
|
||||||
if (!str.equals(name[i]))
|
if ((str == null && name[i].length() > 0) ||
|
||||||
|
(str != null && !str.equals(name[i])))
|
||||||
{
|
{
|
||||||
errln("FAIL \\u" + hex(c[i]) + " expected name " +
|
errln("FAIL \\u" + hex(c[i]) + " expected name " +
|
||||||
name[i]);
|
name[i]);
|
||||||
@ -590,9 +608,18 @@ public final class UCharacterTest extends TestFmwk
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// extended character name
|
||||||
|
str = UCharacter.getExtendedName(c[i]);
|
||||||
|
if (str == null || !str.equals(extendedname[i]))
|
||||||
|
{
|
||||||
|
errln("FAIL \\u" + hex(c[i]) + " expected extended name " +
|
||||||
|
extendedname[i]);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
// retrieving unicode character from modern name
|
// retrieving unicode character from modern name
|
||||||
uc = UCharacter.getCharFromName(name[i]);
|
uc = UCharacter.getCharFromName(name[i]);
|
||||||
if (uc != c[i])
|
if (uc != c[i] && name[i].length() != 0)
|
||||||
{
|
{
|
||||||
errln("FAIL " + name[i] + " expected character \\u" + hex(c[i]));
|
errln("FAIL " + name[i] + " expected character \\u" + hex(c[i]));
|
||||||
break;
|
break;
|
||||||
@ -600,9 +627,17 @@ public final class UCharacterTest extends TestFmwk
|
|||||||
|
|
||||||
//retrieving unicode character from 1.0 name
|
//retrieving unicode character from 1.0 name
|
||||||
uc = UCharacter.getCharFromName1_0(oldname[i]);
|
uc = UCharacter.getCharFromName1_0(oldname[i]);
|
||||||
|
if (uc != c[i] && oldname[i].length() != 0)
|
||||||
|
{
|
||||||
|
errln("FAIL " + oldname[i] + " expected 1.0 character \\u" + hex(c[i]));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
//retrieving unicode character from 1.0 name
|
||||||
|
uc = UCharacter.getCharFromExtendedName(extendedname[i]);
|
||||||
if (uc != c[i] && i != 0 && (i == 1 || i == 6))
|
if (uc != c[i] && i != 0 && (i == 1 || i == 6))
|
||||||
{
|
{
|
||||||
errln("FAIL " + name[i] + " expected 1.0 character \\u" + hex(c[i]));
|
errln("FAIL " + extendedname[i] + " expected extended character \\u" + hex(c[i]));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1014,8 +1049,8 @@ public final class UCharacterTest extends TestFmwk
|
|||||||
try
|
try
|
||||||
{
|
{
|
||||||
UCharacterTest test = new UCharacterTest();
|
UCharacterTest test = new UCharacterTest();
|
||||||
//test.TestEnumeration();
|
test.TestNames();
|
||||||
test.run(arg);
|
//test.run(arg);
|
||||||
}
|
}
|
||||||
catch (Exception e)
|
catch (Exception e)
|
||||||
{
|
{
|
||||||
|
@ -5,8 +5,8 @@
|
|||||||
*******************************************************************************
|
*******************************************************************************
|
||||||
*
|
*
|
||||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/lang/UCharacter.java,v $
|
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/lang/UCharacter.java,v $
|
||||||
* $Date: 2002/02/08 01:08:38 $
|
* $Date: 2002/02/15 02:53:32 $
|
||||||
* $Revision: 1.21 $
|
* $Revision: 1.22 $
|
||||||
*
|
*
|
||||||
*******************************************************************************
|
*******************************************************************************
|
||||||
*/
|
*/
|
||||||
@ -17,6 +17,8 @@ package com.ibm.text;
|
|||||||
import java.util.Locale;
|
import java.util.Locale;
|
||||||
import com.ibm.util.Utility;
|
import com.ibm.util.Utility;
|
||||||
import com.ibm.icu.util.RangeValueIterator;
|
import com.ibm.icu.util.RangeValueIterator;
|
||||||
|
import com.ibm.text.BreakIterator;
|
||||||
|
import com.ibm.text.RuleBasedBreakIterator;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* <p>
|
* <p>
|
||||||
@ -910,8 +912,7 @@ public final class UCharacter
|
|||||||
*/
|
*/
|
||||||
public static String getName(int ch)
|
public static String getName(int ch)
|
||||||
{
|
{
|
||||||
return NAME_.getName(ch,
|
return NAME_.getName(ch, UCharacterNameChoice.U_UNICODE_CHAR_NAME);
|
||||||
UCharacterNameChoice.U_UNICODE_CHAR_NAME);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -929,10 +930,33 @@ public final class UCharacter
|
|||||||
return NAME_.getName(ch,
|
return NAME_.getName(ch,
|
||||||
UCharacterNameChoice.U_UNICODE_10_CHAR_NAME);
|
UCharacterNameChoice.U_UNICODE_10_CHAR_NAME);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* <p>Retrieves a name for a valid codepoint. Unlike, getName(int) and
|
||||||
|
* getName1_0(int), this method will return a name even for codepoints that
|
||||||
|
* are not assigned a name in UnicodeData.txt.
|
||||||
|
* </p>
|
||||||
|
* The names are returned in the following order.
|
||||||
|
* <ul>
|
||||||
|
* <li> Most current Unicode name if there is any
|
||||||
|
* <li> Unicode 1.0 name if there is any
|
||||||
|
* <li> Extended name in the form of "<codepoint_type-codepoint_hex_digits>".
|
||||||
|
* E.g. <noncharacter-fffe>
|
||||||
|
* </ul>
|
||||||
|
* Note calling any methods related to code point names, e.g. get*Name*()
|
||||||
|
* incurs a one-time initialisation cost to construct the name tables.
|
||||||
|
* @param ch the code point for which to get the name
|
||||||
|
* @return a name for the argument codepoint
|
||||||
|
* @draft 2.1
|
||||||
|
*/
|
||||||
|
public static String getExtendedName(int ch)
|
||||||
|
{
|
||||||
|
return NAME_.getName(ch, UCharacterNameChoice.U_EXTENDED_CHAR_NAME);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Find a Unicode code point by its most current Unicode name and return its
|
* <p>Find a Unicode code point by its most current Unicode name and
|
||||||
* code point value.<br>
|
* return its code point value. All Unicode names are in uppercase.</p>
|
||||||
* Note calling any methods related to code point names, e.g. get*Name*()
|
* Note calling any methods related to code point names, e.g. get*Name*()
|
||||||
* incurs a one-time initialisation cost to construct the name tables.
|
* incurs a one-time initialisation cost to construct the name tables.
|
||||||
* @param name most current Unicode character name whose code point is to be
|
* @param name most current Unicode character name whose code point is to be
|
||||||
@ -946,8 +970,8 @@ public final class UCharacter
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Find a Unicode character by its version 1.0 Unicode name and return its
|
* <p>Find a Unicode character by its version 1.0 Unicode name and return
|
||||||
* code point value.<br>
|
* its code point value. All Unicode names are in uppercase.</p>
|
||||||
* Note calling any methods related to code point names, e.g. get*Name*()
|
* Note calling any methods related to code point names, e.g. get*Name*()
|
||||||
* incurs a one-time initialisation cost to construct the name tables.
|
* incurs a one-time initialisation cost to construct the name tables.
|
||||||
* @param name Unicode 1.0 code point name whose code point is to
|
* @param name Unicode 1.0 code point name whose code point is to
|
||||||
@ -959,6 +983,31 @@ public final class UCharacter
|
|||||||
return NAME_.getCharFromName(
|
return NAME_.getCharFromName(
|
||||||
UCharacterNameChoice.U_UNICODE_10_CHAR_NAME, name);
|
UCharacterNameChoice.U_UNICODE_10_CHAR_NAME, name);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* <p>Find a Unicode character by either its name and return its code
|
||||||
|
* point value. All Unicode names are in uppercase.
|
||||||
|
* Extended names are all lowercase except for numbers and are contained
|
||||||
|
* within angle brackets.</p>
|
||||||
|
* The names are searched in the following order
|
||||||
|
* <ul>
|
||||||
|
* <li> Most current Unicode name if there is any
|
||||||
|
* <li> Unicode 1.0 name if there is any
|
||||||
|
* <li> Extended name in the form of "<codepoint_type-codepoint_hex_digits>".
|
||||||
|
* E.g. <noncharacter-FFFE>
|
||||||
|
* </ul>
|
||||||
|
* Note calling any methods related to code point names, e.g. get*Name*()
|
||||||
|
* incurs a one-time initialisation cost to construct the name tables.
|
||||||
|
* @param name codepoint name
|
||||||
|
* @return code point associated with the name or -1 if the name is not
|
||||||
|
* found.
|
||||||
|
* @draft 2.1
|
||||||
|
*/
|
||||||
|
public static int getCharFromExtendedName(String name)
|
||||||
|
{
|
||||||
|
return NAME_.getCharFromName(
|
||||||
|
UCharacterNameChoice.U_EXTENDED_CHAR_NAME, name);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns a code pointcorresponding to the two UTF16 characters.<br>
|
* Returns a code pointcorresponding to the two UTF16 characters.<br>
|
||||||
@ -1016,6 +1065,38 @@ public final class UCharacter
|
|||||||
{
|
{
|
||||||
return toLowerCase(Locale.getDefault(), str);
|
return toLowerCase(Locale.getDefault(), str);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* <p>Gets the titlecase version of the argument string.</p>
|
||||||
|
* <p>Position for titlecasing is determined by the argument break
|
||||||
|
* iterator, hence the user can customized his break iterator for
|
||||||
|
* a specialized titlecasing. In this case only the forward iteration
|
||||||
|
* needs to be implemented.
|
||||||
|
* If the break iterator passed in is null, the default Unicode algorithm
|
||||||
|
* will be used to determine the titlecase positions.
|
||||||
|
* </p>
|
||||||
|
* <p>Only positions returned by the break iterator will be title cased,
|
||||||
|
* character in between the positions will all be in lower case.</p>
|
||||||
|
* <p>Casing is dependent on the default locale and context-sensitive</p>
|
||||||
|
* @param str source string to be performed on
|
||||||
|
* @param breakiter break iterator to determine the positions in which
|
||||||
|
* the character should be title cased.
|
||||||
|
* @return lowercase version of the argument string
|
||||||
|
*/
|
||||||
|
public static String toTitleCase(String str, BreakIterator breakiter)
|
||||||
|
{
|
||||||
|
if (breakiter == null) {
|
||||||
|
String rules = "$cased=[[:Lu:][:Lt:][:Ll:]];" +
|
||||||
|
"$case_ignorable=[[:Mn:][:Me:][:Cf:][:Lm:][:Sk:]"
|
||||||
|
+ " \\u0027\u00AD\u2019];" +
|
||||||
|
"$not_cased=[^$cased$case_ignorable];" +
|
||||||
|
"[$not_cased$case_ignorable]*/" +
|
||||||
|
"$cased[$cased$case_ignorable]*$not_cased*;";
|
||||||
|
breakiter = new RuleBasedBreakIterator(rules);
|
||||||
|
}
|
||||||
|
|
||||||
|
return str;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets uppercase version of the argument string.
|
* Gets uppercase version of the argument string.
|
||||||
@ -1111,6 +1192,30 @@ public final class UCharacter
|
|||||||
return result.toString();
|
return result.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* <p>Gets the titlecase version of the argument string.</p>
|
||||||
|
* <p>Position for titlecasing is determined by the argument break
|
||||||
|
* iterator, hence the user can customized his break iterator for
|
||||||
|
* a specialized titlecasing. In this case only the forward iteration
|
||||||
|
* needs to be implemented.
|
||||||
|
* If the break iterator passed in is null, the default Unicode algorithm
|
||||||
|
* will be used to determine the titlecase positions.
|
||||||
|
* </p>
|
||||||
|
* <p>Only positions returned by the break iterator will be title cased,
|
||||||
|
* character in between the positions will all be in lower case.</p>
|
||||||
|
* <p>Casing is dependent on the argument locale and context-sensitive</p>
|
||||||
|
* @param locale which string is to be converted in
|
||||||
|
* @param str source string to be performed on
|
||||||
|
* @param breakiter break iterator to determine the positions in which
|
||||||
|
* the character should be title cased.
|
||||||
|
* @return lowercase version of the argument string
|
||||||
|
*/
|
||||||
|
public static String toTitleCase(Locale locale, String str,
|
||||||
|
BreakIterator breakiter)
|
||||||
|
{
|
||||||
|
return str;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The given character is mapped to its case folding equivalent according to
|
* The given character is mapped to its case folding equivalent according to
|
||||||
* UnicodeData.txt and CaseFolding.txt; if the character has no case folding
|
* UnicodeData.txt and CaseFolding.txt; if the character has no case folding
|
||||||
|
@ -6,8 +6,8 @@
|
|||||||
*
|
*
|
||||||
* $Source:
|
* $Source:
|
||||||
* /usr/cvs/icu4j/icu4j/src/com/ibm/icu/text/UCharacterCategory.java $
|
* /usr/cvs/icu4j/icu4j/src/com/ibm/icu/text/UCharacterCategory.java $
|
||||||
* $Date: 2001/10/12 23:53:16 $
|
* $Date: 2002/02/15 02:53:35 $
|
||||||
* $Revision: 1.3 $
|
* $Revision: 1.4 $
|
||||||
*
|
*
|
||||||
*******************************************************************************
|
*******************************************************************************
|
||||||
*/
|
*/
|
||||||
@ -26,22 +26,19 @@ package com.ibm.text;
|
|||||||
|
|
||||||
public class UCharacterCategory
|
public class UCharacterCategory
|
||||||
{
|
{
|
||||||
// private constructor ===================================================
|
// public variable -----------------------------------------------------
|
||||||
|
|
||||||
/**
|
|
||||||
* Private constructor to prevent initialisation
|
|
||||||
*/
|
|
||||||
private UCharacterCategory()
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
// public variable =======================================================
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Unassigned character type
|
* Unassigned character type
|
||||||
*/
|
*/
|
||||||
public static final int UNASSIGNED = 0;
|
public static final int UNASSIGNED = 0;
|
||||||
/**
|
/**
|
||||||
|
* Character type Cn
|
||||||
|
* Not Assigned (no characters in [UnicodeData.txt] have this property)
|
||||||
|
* @draft 2.1
|
||||||
|
*/
|
||||||
|
public static final int GENERAL_OTHER_TYPES = 0;
|
||||||
|
/**
|
||||||
* Character type Lu
|
* Character type Lu
|
||||||
*/
|
*/
|
||||||
public static final int UPPERCASE_LETTER = 1;
|
public static final int UPPERCASE_LETTER = 1;
|
||||||
@ -163,17 +160,13 @@ public class UCharacterCategory
|
|||||||
* Character type Pf
|
* Character type Pf
|
||||||
*/
|
*/
|
||||||
public static final int FINAL_PUNCTUATION = 29;
|
public static final int FINAL_PUNCTUATION = 29;
|
||||||
/**
|
|
||||||
* Character type Cn
|
|
||||||
*/
|
|
||||||
public static final int GENERAL_OTHER_TYPES = 30;
|
|
||||||
|
|
||||||
// start of 31 ------------
|
// start of 31 ------------
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Character type count
|
* Character type count
|
||||||
*/
|
*/
|
||||||
public static final int CHAR_CATEGORY_COUNT = 31;
|
public static final int CHAR_CATEGORY_COUNT = 30;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets the name of the argument category
|
* Gets the name of the argument category
|
||||||
@ -245,4 +238,72 @@ public class UCharacterCategory
|
|||||||
}
|
}
|
||||||
return "Unassigned";
|
return "Unassigned";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// private constructor -----------------------------------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Private constructor to prevent initialisation
|
||||||
|
*/
|
||||||
|
private UCharacterCategory()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
// package private data members --------------------------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Not a character type
|
||||||
|
*/
|
||||||
|
static final int NON_CHARACTER_ = CHAR_CATEGORY_COUNT;
|
||||||
|
/**
|
||||||
|
* Lead surrogate type
|
||||||
|
*/
|
||||||
|
static final int LEAD_SURROGATE_ = CHAR_CATEGORY_COUNT + 1;
|
||||||
|
/**
|
||||||
|
* Trail surrogate type
|
||||||
|
*/
|
||||||
|
static final int TRAIL_SURROGATE_ = CHAR_CATEGORY_COUNT + 2;
|
||||||
|
/**
|
||||||
|
* Extended category count
|
||||||
|
*/
|
||||||
|
static final int EXTENDED_CATEGORY_ = CHAR_CATEGORY_COUNT + 3;
|
||||||
|
/**
|
||||||
|
* Type names used for extended names
|
||||||
|
*/
|
||||||
|
static final String TYPE_NAMES_[] = {"unassigned",
|
||||||
|
"uppercase letter",
|
||||||
|
"lowercase letter",
|
||||||
|
"titlecase letter",
|
||||||
|
"modifier letter",
|
||||||
|
"other letter",
|
||||||
|
"non spacing mark",
|
||||||
|
"enclosing mark",
|
||||||
|
"combining spacing mark",
|
||||||
|
"decimal digit number",
|
||||||
|
"letter number",
|
||||||
|
"other number",
|
||||||
|
"space separator",
|
||||||
|
"line separator",
|
||||||
|
"paragraph separator",
|
||||||
|
"control",
|
||||||
|
"format",
|
||||||
|
"private use area",
|
||||||
|
"surrogate",
|
||||||
|
"dash punctuation",
|
||||||
|
"start punctuation",
|
||||||
|
"end punctuation",
|
||||||
|
"connector punctuation",
|
||||||
|
"other punctuation",
|
||||||
|
"math symbol",
|
||||||
|
"currency symbol",
|
||||||
|
"modifier symbol",
|
||||||
|
"other symbol",
|
||||||
|
"initial punctuation",
|
||||||
|
"final punctuation",
|
||||||
|
"noncharacter",
|
||||||
|
"lead surrogate",
|
||||||
|
"trail surrogate"};
|
||||||
|
/**
|
||||||
|
* Unknown type name
|
||||||
|
*/
|
||||||
|
static final String UNKNOWN_TYPE_NAME_ = "unknown";
|
||||||
}
|
}
|
||||||
|
@ -6,8 +6,8 @@
|
|||||||
*
|
*
|
||||||
* $Source:
|
* $Source:
|
||||||
* /usr/cvs/icu4j/icu4j/src/com/ibm/icu/text/UCharacterName.java $
|
* /usr/cvs/icu4j/icu4j/src/com/ibm/icu/text/UCharacterName.java $
|
||||||
* $Date: 2002/02/08 01:08:38 $
|
* $Date: 2002/02/15 02:53:34 $
|
||||||
* $Revision: 1.6 $
|
* $Revision: 1.7 $
|
||||||
*
|
*
|
||||||
*******************************************************************************
|
*******************************************************************************
|
||||||
*/
|
*/
|
||||||
@ -409,20 +409,29 @@ final class UCharacterName
|
|||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int tempChoice = choice;
|
||||||
|
if (tempChoice == UCharacterNameChoice.U_EXTENDED_CHAR_NAME) {
|
||||||
|
tempChoice = UCharacterNameChoice.U_UNICODE_CHAR_NAME;
|
||||||
|
}
|
||||||
|
|
||||||
String result = "";
|
String result = "";
|
||||||
|
|
||||||
// Do not write algorithmic Unicode 1.0 names because Unihan names are
|
// Do not write algorithmic Unicode 1.0 names because Unihan names are
|
||||||
// the same as the modern ones, extension A was only introduced with
|
// the same as the modern ones, extension A was only introduced with
|
||||||
// Unicode 3.0, and the Hangul syllable block was moved and changed around
|
// Unicode 3.0, and the Hangul syllable block was moved and changed around
|
||||||
// Unicode 1.1.5.
|
// Unicode 1.1.5.
|
||||||
if (choice == UCharacterNameChoice.U_UNICODE_CHAR_NAME) {
|
if (tempChoice == UCharacterNameChoice.U_UNICODE_CHAR_NAME) {
|
||||||
// try getting algorithmic name first
|
// try getting algorithmic name first
|
||||||
result = getAlgName(ch);
|
result = getAlgName(ch);
|
||||||
}
|
}
|
||||||
|
|
||||||
// getting normal character name
|
// getting normal character name
|
||||||
if (result == null || result.length() == 0) {
|
if (result == null || result.length() == 0) {
|
||||||
result = getGroupName(ch, choice);
|
if (choice == UCharacterNameChoice.U_EXTENDED_CHAR_NAME) {
|
||||||
|
result = getExtendedName(ch);
|
||||||
|
} else {
|
||||||
|
result = getGroupName(ch, choice);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
@ -442,26 +451,42 @@ final class UCharacterName
|
|||||||
name == null || name.length() == 0) {
|
name == null || name.length() == 0) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
String uppercasename = UCharacter.toUpperCase(Locale.ENGLISH, name);
|
// try extended names first
|
||||||
|
int result = getExtendedChar(name, choice);
|
||||||
|
if (result >= -1) {
|
||||||
|
return result;
|
||||||
|
}
|
||||||
// try algorithmic names first, if fails then try group names
|
// try algorithmic names first, if fails then try group names
|
||||||
// int result = getAlgorithmChar(choice, uppercasename);
|
// int result = getAlgorithmChar(choice, uppercasename);
|
||||||
|
int tempChoice = choice;
|
||||||
|
if (choice == UCharacterNameChoice.U_EXTENDED_CHAR_NAME) {
|
||||||
|
tempChoice = UCharacterNameChoice.U_UNICODE_CHAR_NAME;
|
||||||
|
}
|
||||||
|
|
||||||
// 1.0 has no algorithmic names
|
String upperCaseName = UCharacter.toUpperCase(Locale.ENGLISH, name);
|
||||||
|
// try algorithmic names now, 1.0 has no algorithmic names
|
||||||
if (choice != UCharacterNameChoice.U_UNICODE_CHAR_NAME) {
|
if (choice != UCharacterNameChoice.U_UNICODE_CHAR_NAME) {
|
||||||
return getGroupChar(uppercasename, choice);
|
return getGroupChar(upperCaseName, tempChoice);
|
||||||
}
|
}
|
||||||
int count = 0;
|
int count = 0;
|
||||||
if (m_algorithm_ != null) {
|
if (m_algorithm_ != null) {
|
||||||
count = m_algorithm_.length;
|
count = m_algorithm_.length;
|
||||||
}
|
}
|
||||||
for (count --; count >= 0; count --) {
|
for (count --; count >= 0; count --) {
|
||||||
int result = m_algorithm_[count].getAlgorithmChar(name);
|
result = m_algorithm_[count].getAlgorithmChar(name);
|
||||||
if (result >= 0) {
|
if (result >= 0) {
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return getGroupChar(uppercasename, choice);
|
|
||||||
|
result = getGroupChar(upperCaseName, tempChoice);
|
||||||
|
if (result == -1 &&
|
||||||
|
choice == UCharacterNameChoice.U_EXTENDED_CHAR_NAME) {
|
||||||
|
result = getGroupChar(upperCaseName,
|
||||||
|
UCharacterNameChoice.U_UNICODE_10_CHAR_NAME);
|
||||||
|
}
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -943,4 +968,118 @@ final class UCharacterName
|
|||||||
}
|
}
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Getting the character with extended name of the form <....>.
|
||||||
|
* @param name of the character to be found
|
||||||
|
* @param choice name choice
|
||||||
|
* @return character associated with the name, -1 if such character is not
|
||||||
|
* found and -2 if we should continue with the search.
|
||||||
|
*/
|
||||||
|
private int getExtendedChar(String name, int choice)
|
||||||
|
{
|
||||||
|
if (name.charAt(0) == '<') {
|
||||||
|
if (choice == UCharacterNameChoice.U_EXTENDED_CHAR_NAME) {
|
||||||
|
int endIndex = name.length() - 1;
|
||||||
|
if (name.charAt(endIndex) == '>') {
|
||||||
|
int startIndex = name.lastIndexOf('-');
|
||||||
|
if (startIndex >= 0) { // We've got a category.
|
||||||
|
startIndex ++;
|
||||||
|
int result = -1;
|
||||||
|
try {
|
||||||
|
result = Integer.parseInt(
|
||||||
|
name.substring(startIndex, endIndex),
|
||||||
|
16);
|
||||||
|
}
|
||||||
|
catch (NumberFormatException e) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
// Now validate the category name. We could use a
|
||||||
|
// binary search, or a trie, if we really wanted to.
|
||||||
|
String type = name.substring(1, startIndex - 1);
|
||||||
|
int length = UCharacterCategory.TYPE_NAMES_.length;
|
||||||
|
for (int i = 0; i < length; ++ i) {
|
||||||
|
if (type.compareToIgnoreCase(
|
||||||
|
UCharacterCategory.TYPE_NAMES_[i]) == 0) {
|
||||||
|
if (getType(result) == i) {
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
return -2;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the character extended type
|
||||||
|
* @param ch character to be tested
|
||||||
|
* @return extended type it is associated with
|
||||||
|
*/
|
||||||
|
private int getType(int ch)
|
||||||
|
{
|
||||||
|
if ((ch & 0xFFFE) == 0xFFFE || (ch >= 0xFDD0 && ch <= 0xFDEF)) {
|
||||||
|
// not a character we return a invalid category count
|
||||||
|
return UCharacterCategory.NON_CHARACTER_;
|
||||||
|
}
|
||||||
|
// Undo ICU exceptions to the UCD when determining the category.
|
||||||
|
int result;
|
||||||
|
if (UCharacter.isISOControl(ch)) {
|
||||||
|
result = UCharacterCategory.CONTROL;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
result = UCharacter.getType(ch);
|
||||||
|
if (result == UCharacterCategory.SURROGATE) {
|
||||||
|
if (UTF16.isLeadSurrogate((char)ch)) {
|
||||||
|
result = UCharacterCategory.LEAD_SURROGATE_;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
result = UCharacterCategory.TRAIL_SURROGATE_;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Retrieves the extended name
|
||||||
|
*/
|
||||||
|
private String getExtendedName(int ch)
|
||||||
|
{
|
||||||
|
String result = getName(ch, UCharacterNameChoice.U_UNICODE_CHAR_NAME);
|
||||||
|
if (result == null) {
|
||||||
|
if (getType(ch) == UCharacterCategory.CONTROL) {
|
||||||
|
result = getName(ch,
|
||||||
|
UCharacterNameChoice.U_UNICODE_10_CHAR_NAME);
|
||||||
|
}
|
||||||
|
if (result == null) {
|
||||||
|
int type = getType(ch);
|
||||||
|
// Return unknown if the table of names above is not up to
|
||||||
|
// date.
|
||||||
|
if (type >= UCharacterCategory.TYPE_NAMES_.length) {
|
||||||
|
result = UCharacterCategory.UNKNOWN_TYPE_NAME_;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
result = UCharacterCategory.TYPE_NAMES_[type];
|
||||||
|
}
|
||||||
|
StringBuffer tempResult = new StringBuffer(result);
|
||||||
|
tempResult.insert(0, '<');
|
||||||
|
tempResult.append('-');
|
||||||
|
String chStr = Integer.toHexString(ch).toUpperCase();
|
||||||
|
int zeros = 4 - chStr.length();
|
||||||
|
while (zeros > 0) {
|
||||||
|
tempResult.append('0');
|
||||||
|
zeros --;
|
||||||
|
}
|
||||||
|
tempResult.append(chStr);
|
||||||
|
tempResult.append('>');
|
||||||
|
result = tempResult.toString();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -6,8 +6,8 @@
|
|||||||
*
|
*
|
||||||
* $Source:
|
* $Source:
|
||||||
* /usr/cvs/icu4j/icu4j/src/com/ibm/icu/text/UCharacterNameChoiceEnum.java $
|
* /usr/cvs/icu4j/icu4j/src/com/ibm/icu/text/UCharacterNameChoiceEnum.java $
|
||||||
* $Date: 2001/03/23 19:51:38 $
|
* $Date: 2002/02/15 02:53:35 $
|
||||||
* $Revision: 1.2 $
|
* $Revision: 1.3 $
|
||||||
*
|
*
|
||||||
*******************************************************************************
|
*******************************************************************************
|
||||||
*/
|
*/
|
||||||
@ -30,5 +30,6 @@ interface UCharacterNameChoice
|
|||||||
|
|
||||||
static final int U_UNICODE_CHAR_NAME = 0;
|
static final int U_UNICODE_CHAR_NAME = 0;
|
||||||
static final int U_UNICODE_10_CHAR_NAME = 1;
|
static final int U_UNICODE_10_CHAR_NAME = 1;
|
||||||
static final int U_CHAR_NAME_CHOICE_COUNT = 2;
|
static final int U_EXTENDED_CHAR_NAME = 2;
|
||||||
|
static final int U_CHAR_NAME_CHOICE_COUNT = 3;
|
||||||
}
|
}
|
||||||
|
@ -5,8 +5,8 @@
|
|||||||
*******************************************************************************
|
*******************************************************************************
|
||||||
*
|
*
|
||||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/test/text/Attic/UCharacterTest.java,v $
|
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/test/text/Attic/UCharacterTest.java,v $
|
||||||
* $Date: 2002/02/08 23:44:17 $
|
* $Date: 2002/02/15 02:53:32 $
|
||||||
* $Revision: 1.21 $
|
* $Revision: 1.22 $
|
||||||
*
|
*
|
||||||
*******************************************************************************
|
*******************************************************************************
|
||||||
*/
|
*/
|
||||||
@ -555,16 +555,33 @@ public final class UCharacterTest extends TestFmwk
|
|||||||
*/
|
*/
|
||||||
public void TestNames()
|
public void TestNames()
|
||||||
{
|
{
|
||||||
int c[] = {0x0061, 0x0284, 0x3401, 0x7fed, 0xac00, 0xd7a3, 0xff08, 0xffe5,
|
int c[] = {0x0061, 0x0284, 0x3401, 0x7fed, 0xac00, 0xd7a3, 0xd800, 0xdc00,
|
||||||
0x23456};
|
0xff08, 0xffe5, 0xffff, 0x23456, 0x9};
|
||||||
String name[] = {"LATIN SMALL LETTER A",
|
String name[] = {"LATIN SMALL LETTER A",
|
||||||
"LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK",
|
"LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK",
|
||||||
"CJK UNIFIED IDEOGRAPH-3401",
|
"CJK UNIFIED IDEOGRAPH-3401",
|
||||||
"CJK UNIFIED IDEOGRAPH-7FED", "HANGUL SYLLABLE GA",
|
"CJK UNIFIED IDEOGRAPH-7FED", "HANGUL SYLLABLE GA",
|
||||||
"HANGUL SYLLABLE HIH", "FULLWIDTH LEFT PARENTHESIS",
|
"HANGUL SYLLABLE HIH", "", "",
|
||||||
"FULLWIDTH YEN SIGN", "CJK UNIFIED IDEOGRAPH-23456"};
|
"FULLWIDTH LEFT PARENTHESIS",
|
||||||
|
"FULLWIDTH YEN SIGN", "", "CJK UNIFIED IDEOGRAPH-23456",
|
||||||
|
""};
|
||||||
String oldname[] = {"", "LATIN SMALL LETTER DOTLESS J BAR HOOK", "", "",
|
String oldname[] = {"", "LATIN SMALL LETTER DOTLESS J BAR HOOK", "", "",
|
||||||
"", "", "FULLWIDTH OPENING PARENTHESIS", "", ""};
|
"", "", "", "", "FULLWIDTH OPENING PARENTHESIS", "",
|
||||||
|
"", "", "HORIZONTAL TABULATION"};
|
||||||
|
String extendedname[] = {"LATIN SMALL LETTER A",
|
||||||
|
"LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK",
|
||||||
|
"CJK UNIFIED IDEOGRAPH-3401",
|
||||||
|
"CJK UNIFIED IDEOGRAPH-7FED",
|
||||||
|
"HANGUL SYLLABLE GA",
|
||||||
|
"HANGUL SYLLABLE HIH",
|
||||||
|
"<lead surrogate-D800>",
|
||||||
|
"<trail surrogate-DC00>",
|
||||||
|
"FULLWIDTH LEFT PARENTHESIS",
|
||||||
|
"FULLWIDTH YEN SIGN",
|
||||||
|
"<noncharacter-FFFF>",
|
||||||
|
"CJK UNIFIED IDEOGRAPH-23456",
|
||||||
|
"HORIZONTAL TABULATION"};
|
||||||
|
|
||||||
int size = c.length;
|
int size = c.length;
|
||||||
String str;
|
String str;
|
||||||
int uc;
|
int uc;
|
||||||
@ -573,7 +590,8 @@ public final class UCharacterTest extends TestFmwk
|
|||||||
{
|
{
|
||||||
// modern Unicode character name
|
// modern Unicode character name
|
||||||
str = UCharacter.getName(c[i]);
|
str = UCharacter.getName(c[i]);
|
||||||
if (!str.equals(name[i]))
|
if ((str == null && name[i].length() > 0) ||
|
||||||
|
(str != null && !str.equals(name[i])))
|
||||||
{
|
{
|
||||||
errln("FAIL \\u" + hex(c[i]) + " expected name " +
|
errln("FAIL \\u" + hex(c[i]) + " expected name " +
|
||||||
name[i]);
|
name[i]);
|
||||||
@ -590,9 +608,18 @@ public final class UCharacterTest extends TestFmwk
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// extended character name
|
||||||
|
str = UCharacter.getExtendedName(c[i]);
|
||||||
|
if (str == null || !str.equals(extendedname[i]))
|
||||||
|
{
|
||||||
|
errln("FAIL \\u" + hex(c[i]) + " expected extended name " +
|
||||||
|
extendedname[i]);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
// retrieving unicode character from modern name
|
// retrieving unicode character from modern name
|
||||||
uc = UCharacter.getCharFromName(name[i]);
|
uc = UCharacter.getCharFromName(name[i]);
|
||||||
if (uc != c[i])
|
if (uc != c[i] && name[i].length() != 0)
|
||||||
{
|
{
|
||||||
errln("FAIL " + name[i] + " expected character \\u" + hex(c[i]));
|
errln("FAIL " + name[i] + " expected character \\u" + hex(c[i]));
|
||||||
break;
|
break;
|
||||||
@ -600,9 +627,17 @@ public final class UCharacterTest extends TestFmwk
|
|||||||
|
|
||||||
//retrieving unicode character from 1.0 name
|
//retrieving unicode character from 1.0 name
|
||||||
uc = UCharacter.getCharFromName1_0(oldname[i]);
|
uc = UCharacter.getCharFromName1_0(oldname[i]);
|
||||||
|
if (uc != c[i] && oldname[i].length() != 0)
|
||||||
|
{
|
||||||
|
errln("FAIL " + oldname[i] + " expected 1.0 character \\u" + hex(c[i]));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
//retrieving unicode character from 1.0 name
|
||||||
|
uc = UCharacter.getCharFromExtendedName(extendedname[i]);
|
||||||
if (uc != c[i] && i != 0 && (i == 1 || i == 6))
|
if (uc != c[i] && i != 0 && (i == 1 || i == 6))
|
||||||
{
|
{
|
||||||
errln("FAIL " + name[i] + " expected 1.0 character \\u" + hex(c[i]));
|
errln("FAIL " + extendedname[i] + " expected extended character \\u" + hex(c[i]));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1014,8 +1049,8 @@ public final class UCharacterTest extends TestFmwk
|
|||||||
try
|
try
|
||||||
{
|
{
|
||||||
UCharacterTest test = new UCharacterTest();
|
UCharacterTest test = new UCharacterTest();
|
||||||
//test.TestEnumeration();
|
test.TestNames();
|
||||||
test.run(arg);
|
//test.run(arg);
|
||||||
}
|
}
|
||||||
catch (Exception e)
|
catch (Exception e)
|
||||||
{
|
{
|
||||||
|
@ -5,8 +5,8 @@
|
|||||||
*******************************************************************************
|
*******************************************************************************
|
||||||
*
|
*
|
||||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/UCharacter.java,v $
|
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/UCharacter.java,v $
|
||||||
* $Date: 2002/02/08 01:08:38 $
|
* $Date: 2002/02/15 02:53:32 $
|
||||||
* $Revision: 1.21 $
|
* $Revision: 1.22 $
|
||||||
*
|
*
|
||||||
*******************************************************************************
|
*******************************************************************************
|
||||||
*/
|
*/
|
||||||
@ -17,6 +17,8 @@ package com.ibm.text;
|
|||||||
import java.util.Locale;
|
import java.util.Locale;
|
||||||
import com.ibm.util.Utility;
|
import com.ibm.util.Utility;
|
||||||
import com.ibm.icu.util.RangeValueIterator;
|
import com.ibm.icu.util.RangeValueIterator;
|
||||||
|
import com.ibm.text.BreakIterator;
|
||||||
|
import com.ibm.text.RuleBasedBreakIterator;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* <p>
|
* <p>
|
||||||
@ -910,8 +912,7 @@ public final class UCharacter
|
|||||||
*/
|
*/
|
||||||
public static String getName(int ch)
|
public static String getName(int ch)
|
||||||
{
|
{
|
||||||
return NAME_.getName(ch,
|
return NAME_.getName(ch, UCharacterNameChoice.U_UNICODE_CHAR_NAME);
|
||||||
UCharacterNameChoice.U_UNICODE_CHAR_NAME);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -929,10 +930,33 @@ public final class UCharacter
|
|||||||
return NAME_.getName(ch,
|
return NAME_.getName(ch,
|
||||||
UCharacterNameChoice.U_UNICODE_10_CHAR_NAME);
|
UCharacterNameChoice.U_UNICODE_10_CHAR_NAME);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* <p>Retrieves a name for a valid codepoint. Unlike, getName(int) and
|
||||||
|
* getName1_0(int), this method will return a name even for codepoints that
|
||||||
|
* are not assigned a name in UnicodeData.txt.
|
||||||
|
* </p>
|
||||||
|
* The names are returned in the following order.
|
||||||
|
* <ul>
|
||||||
|
* <li> Most current Unicode name if there is any
|
||||||
|
* <li> Unicode 1.0 name if there is any
|
||||||
|
* <li> Extended name in the form of "<codepoint_type-codepoint_hex_digits>".
|
||||||
|
* E.g. <noncharacter-fffe>
|
||||||
|
* </ul>
|
||||||
|
* Note calling any methods related to code point names, e.g. get*Name*()
|
||||||
|
* incurs a one-time initialisation cost to construct the name tables.
|
||||||
|
* @param ch the code point for which to get the name
|
||||||
|
* @return a name for the argument codepoint
|
||||||
|
* @draft 2.1
|
||||||
|
*/
|
||||||
|
public static String getExtendedName(int ch)
|
||||||
|
{
|
||||||
|
return NAME_.getName(ch, UCharacterNameChoice.U_EXTENDED_CHAR_NAME);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Find a Unicode code point by its most current Unicode name and return its
|
* <p>Find a Unicode code point by its most current Unicode name and
|
||||||
* code point value.<br>
|
* return its code point value. All Unicode names are in uppercase.</p>
|
||||||
* Note calling any methods related to code point names, e.g. get*Name*()
|
* Note calling any methods related to code point names, e.g. get*Name*()
|
||||||
* incurs a one-time initialisation cost to construct the name tables.
|
* incurs a one-time initialisation cost to construct the name tables.
|
||||||
* @param name most current Unicode character name whose code point is to be
|
* @param name most current Unicode character name whose code point is to be
|
||||||
@ -946,8 +970,8 @@ public final class UCharacter
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Find a Unicode character by its version 1.0 Unicode name and return its
|
* <p>Find a Unicode character by its version 1.0 Unicode name and return
|
||||||
* code point value.<br>
|
* its code point value. All Unicode names are in uppercase.</p>
|
||||||
* Note calling any methods related to code point names, e.g. get*Name*()
|
* Note calling any methods related to code point names, e.g. get*Name*()
|
||||||
* incurs a one-time initialisation cost to construct the name tables.
|
* incurs a one-time initialisation cost to construct the name tables.
|
||||||
* @param name Unicode 1.0 code point name whose code point is to
|
* @param name Unicode 1.0 code point name whose code point is to
|
||||||
@ -959,6 +983,31 @@ public final class UCharacter
|
|||||||
return NAME_.getCharFromName(
|
return NAME_.getCharFromName(
|
||||||
UCharacterNameChoice.U_UNICODE_10_CHAR_NAME, name);
|
UCharacterNameChoice.U_UNICODE_10_CHAR_NAME, name);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* <p>Find a Unicode character by either its name and return its code
|
||||||
|
* point value. All Unicode names are in uppercase.
|
||||||
|
* Extended names are all lowercase except for numbers and are contained
|
||||||
|
* within angle brackets.</p>
|
||||||
|
* The names are searched in the following order
|
||||||
|
* <ul>
|
||||||
|
* <li> Most current Unicode name if there is any
|
||||||
|
* <li> Unicode 1.0 name if there is any
|
||||||
|
* <li> Extended name in the form of "<codepoint_type-codepoint_hex_digits>".
|
||||||
|
* E.g. <noncharacter-FFFE>
|
||||||
|
* </ul>
|
||||||
|
* Note calling any methods related to code point names, e.g. get*Name*()
|
||||||
|
* incurs a one-time initialisation cost to construct the name tables.
|
||||||
|
* @param name codepoint name
|
||||||
|
* @return code point associated with the name or -1 if the name is not
|
||||||
|
* found.
|
||||||
|
* @draft 2.1
|
||||||
|
*/
|
||||||
|
public static int getCharFromExtendedName(String name)
|
||||||
|
{
|
||||||
|
return NAME_.getCharFromName(
|
||||||
|
UCharacterNameChoice.U_EXTENDED_CHAR_NAME, name);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns a code pointcorresponding to the two UTF16 characters.<br>
|
* Returns a code pointcorresponding to the two UTF16 characters.<br>
|
||||||
@ -1016,6 +1065,38 @@ public final class UCharacter
|
|||||||
{
|
{
|
||||||
return toLowerCase(Locale.getDefault(), str);
|
return toLowerCase(Locale.getDefault(), str);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* <p>Gets the titlecase version of the argument string.</p>
|
||||||
|
* <p>Position for titlecasing is determined by the argument break
|
||||||
|
* iterator, hence the user can customized his break iterator for
|
||||||
|
* a specialized titlecasing. In this case only the forward iteration
|
||||||
|
* needs to be implemented.
|
||||||
|
* If the break iterator passed in is null, the default Unicode algorithm
|
||||||
|
* will be used to determine the titlecase positions.
|
||||||
|
* </p>
|
||||||
|
* <p>Only positions returned by the break iterator will be title cased,
|
||||||
|
* character in between the positions will all be in lower case.</p>
|
||||||
|
* <p>Casing is dependent on the default locale and context-sensitive</p>
|
||||||
|
* @param str source string to be performed on
|
||||||
|
* @param breakiter break iterator to determine the positions in which
|
||||||
|
* the character should be title cased.
|
||||||
|
* @return lowercase version of the argument string
|
||||||
|
*/
|
||||||
|
public static String toTitleCase(String str, BreakIterator breakiter)
|
||||||
|
{
|
||||||
|
if (breakiter == null) {
|
||||||
|
String rules = "$cased=[[:Lu:][:Lt:][:Ll:]];" +
|
||||||
|
"$case_ignorable=[[:Mn:][:Me:][:Cf:][:Lm:][:Sk:]"
|
||||||
|
+ " \\u0027\u00AD\u2019];" +
|
||||||
|
"$not_cased=[^$cased$case_ignorable];" +
|
||||||
|
"[$not_cased$case_ignorable]*/" +
|
||||||
|
"$cased[$cased$case_ignorable]*$not_cased*;";
|
||||||
|
breakiter = new RuleBasedBreakIterator(rules);
|
||||||
|
}
|
||||||
|
|
||||||
|
return str;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets uppercase version of the argument string.
|
* Gets uppercase version of the argument string.
|
||||||
@ -1111,6 +1192,30 @@ public final class UCharacter
|
|||||||
return result.toString();
|
return result.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* <p>Gets the titlecase version of the argument string.</p>
|
||||||
|
* <p>Position for titlecasing is determined by the argument break
|
||||||
|
* iterator, hence the user can customized his break iterator for
|
||||||
|
* a specialized titlecasing. In this case only the forward iteration
|
||||||
|
* needs to be implemented.
|
||||||
|
* If the break iterator passed in is null, the default Unicode algorithm
|
||||||
|
* will be used to determine the titlecase positions.
|
||||||
|
* </p>
|
||||||
|
* <p>Only positions returned by the break iterator will be title cased,
|
||||||
|
* character in between the positions will all be in lower case.</p>
|
||||||
|
* <p>Casing is dependent on the argument locale and context-sensitive</p>
|
||||||
|
* @param locale which string is to be converted in
|
||||||
|
* @param str source string to be performed on
|
||||||
|
* @param breakiter break iterator to determine the positions in which
|
||||||
|
* the character should be title cased.
|
||||||
|
* @return lowercase version of the argument string
|
||||||
|
*/
|
||||||
|
public static String toTitleCase(Locale locale, String str,
|
||||||
|
BreakIterator breakiter)
|
||||||
|
{
|
||||||
|
return str;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The given character is mapped to its case folding equivalent according to
|
* The given character is mapped to its case folding equivalent according to
|
||||||
* UnicodeData.txt and CaseFolding.txt; if the character has no case folding
|
* UnicodeData.txt and CaseFolding.txt; if the character has no case folding
|
||||||
|
@ -6,8 +6,8 @@
|
|||||||
*
|
*
|
||||||
* $Source:
|
* $Source:
|
||||||
* /usr/cvs/icu4j/icu4j/src/com/ibm/icu/text/UCharacterCategory.java $
|
* /usr/cvs/icu4j/icu4j/src/com/ibm/icu/text/UCharacterCategory.java $
|
||||||
* $Date: 2001/10/12 23:53:16 $
|
* $Date: 2002/02/15 02:53:35 $
|
||||||
* $Revision: 1.3 $
|
* $Revision: 1.4 $
|
||||||
*
|
*
|
||||||
*******************************************************************************
|
*******************************************************************************
|
||||||
*/
|
*/
|
||||||
@ -26,22 +26,19 @@ package com.ibm.text;
|
|||||||
|
|
||||||
public class UCharacterCategory
|
public class UCharacterCategory
|
||||||
{
|
{
|
||||||
// private constructor ===================================================
|
// public variable -----------------------------------------------------
|
||||||
|
|
||||||
/**
|
|
||||||
* Private constructor to prevent initialisation
|
|
||||||
*/
|
|
||||||
private UCharacterCategory()
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
// public variable =======================================================
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Unassigned character type
|
* Unassigned character type
|
||||||
*/
|
*/
|
||||||
public static final int UNASSIGNED = 0;
|
public static final int UNASSIGNED = 0;
|
||||||
/**
|
/**
|
||||||
|
* Character type Cn
|
||||||
|
* Not Assigned (no characters in [UnicodeData.txt] have this property)
|
||||||
|
* @draft 2.1
|
||||||
|
*/
|
||||||
|
public static final int GENERAL_OTHER_TYPES = 0;
|
||||||
|
/**
|
||||||
* Character type Lu
|
* Character type Lu
|
||||||
*/
|
*/
|
||||||
public static final int UPPERCASE_LETTER = 1;
|
public static final int UPPERCASE_LETTER = 1;
|
||||||
@ -163,17 +160,13 @@ public class UCharacterCategory
|
|||||||
* Character type Pf
|
* Character type Pf
|
||||||
*/
|
*/
|
||||||
public static final int FINAL_PUNCTUATION = 29;
|
public static final int FINAL_PUNCTUATION = 29;
|
||||||
/**
|
|
||||||
* Character type Cn
|
|
||||||
*/
|
|
||||||
public static final int GENERAL_OTHER_TYPES = 30;
|
|
||||||
|
|
||||||
// start of 31 ------------
|
// start of 31 ------------
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Character type count
|
* Character type count
|
||||||
*/
|
*/
|
||||||
public static final int CHAR_CATEGORY_COUNT = 31;
|
public static final int CHAR_CATEGORY_COUNT = 30;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets the name of the argument category
|
* Gets the name of the argument category
|
||||||
@ -245,4 +238,72 @@ public class UCharacterCategory
|
|||||||
}
|
}
|
||||||
return "Unassigned";
|
return "Unassigned";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// private constructor -----------------------------------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Private constructor to prevent initialisation
|
||||||
|
*/
|
||||||
|
private UCharacterCategory()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
// package private data members --------------------------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Not a character type
|
||||||
|
*/
|
||||||
|
static final int NON_CHARACTER_ = CHAR_CATEGORY_COUNT;
|
||||||
|
/**
|
||||||
|
* Lead surrogate type
|
||||||
|
*/
|
||||||
|
static final int LEAD_SURROGATE_ = CHAR_CATEGORY_COUNT + 1;
|
||||||
|
/**
|
||||||
|
* Trail surrogate type
|
||||||
|
*/
|
||||||
|
static final int TRAIL_SURROGATE_ = CHAR_CATEGORY_COUNT + 2;
|
||||||
|
/**
|
||||||
|
* Extended category count
|
||||||
|
*/
|
||||||
|
static final int EXTENDED_CATEGORY_ = CHAR_CATEGORY_COUNT + 3;
|
||||||
|
/**
|
||||||
|
* Type names used for extended names
|
||||||
|
*/
|
||||||
|
static final String TYPE_NAMES_[] = {"unassigned",
|
||||||
|
"uppercase letter",
|
||||||
|
"lowercase letter",
|
||||||
|
"titlecase letter",
|
||||||
|
"modifier letter",
|
||||||
|
"other letter",
|
||||||
|
"non spacing mark",
|
||||||
|
"enclosing mark",
|
||||||
|
"combining spacing mark",
|
||||||
|
"decimal digit number",
|
||||||
|
"letter number",
|
||||||
|
"other number",
|
||||||
|
"space separator",
|
||||||
|
"line separator",
|
||||||
|
"paragraph separator",
|
||||||
|
"control",
|
||||||
|
"format",
|
||||||
|
"private use area",
|
||||||
|
"surrogate",
|
||||||
|
"dash punctuation",
|
||||||
|
"start punctuation",
|
||||||
|
"end punctuation",
|
||||||
|
"connector punctuation",
|
||||||
|
"other punctuation",
|
||||||
|
"math symbol",
|
||||||
|
"currency symbol",
|
||||||
|
"modifier symbol",
|
||||||
|
"other symbol",
|
||||||
|
"initial punctuation",
|
||||||
|
"final punctuation",
|
||||||
|
"noncharacter",
|
||||||
|
"lead surrogate",
|
||||||
|
"trail surrogate"};
|
||||||
|
/**
|
||||||
|
* Unknown type name
|
||||||
|
*/
|
||||||
|
static final String UNKNOWN_TYPE_NAME_ = "unknown";
|
||||||
}
|
}
|
||||||
|
@ -6,8 +6,8 @@
|
|||||||
*
|
*
|
||||||
* $Source:
|
* $Source:
|
||||||
* /usr/cvs/icu4j/icu4j/src/com/ibm/icu/text/UCharacterName.java $
|
* /usr/cvs/icu4j/icu4j/src/com/ibm/icu/text/UCharacterName.java $
|
||||||
* $Date: 2002/02/08 01:08:38 $
|
* $Date: 2002/02/15 02:53:34 $
|
||||||
* $Revision: 1.6 $
|
* $Revision: 1.7 $
|
||||||
*
|
*
|
||||||
*******************************************************************************
|
*******************************************************************************
|
||||||
*/
|
*/
|
||||||
@ -409,20 +409,29 @@ final class UCharacterName
|
|||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int tempChoice = choice;
|
||||||
|
if (tempChoice == UCharacterNameChoice.U_EXTENDED_CHAR_NAME) {
|
||||||
|
tempChoice = UCharacterNameChoice.U_UNICODE_CHAR_NAME;
|
||||||
|
}
|
||||||
|
|
||||||
String result = "";
|
String result = "";
|
||||||
|
|
||||||
// Do not write algorithmic Unicode 1.0 names because Unihan names are
|
// Do not write algorithmic Unicode 1.0 names because Unihan names are
|
||||||
// the same as the modern ones, extension A was only introduced with
|
// the same as the modern ones, extension A was only introduced with
|
||||||
// Unicode 3.0, and the Hangul syllable block was moved and changed around
|
// Unicode 3.0, and the Hangul syllable block was moved and changed around
|
||||||
// Unicode 1.1.5.
|
// Unicode 1.1.5.
|
||||||
if (choice == UCharacterNameChoice.U_UNICODE_CHAR_NAME) {
|
if (tempChoice == UCharacterNameChoice.U_UNICODE_CHAR_NAME) {
|
||||||
// try getting algorithmic name first
|
// try getting algorithmic name first
|
||||||
result = getAlgName(ch);
|
result = getAlgName(ch);
|
||||||
}
|
}
|
||||||
|
|
||||||
// getting normal character name
|
// getting normal character name
|
||||||
if (result == null || result.length() == 0) {
|
if (result == null || result.length() == 0) {
|
||||||
result = getGroupName(ch, choice);
|
if (choice == UCharacterNameChoice.U_EXTENDED_CHAR_NAME) {
|
||||||
|
result = getExtendedName(ch);
|
||||||
|
} else {
|
||||||
|
result = getGroupName(ch, choice);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
@ -442,26 +451,42 @@ final class UCharacterName
|
|||||||
name == null || name.length() == 0) {
|
name == null || name.length() == 0) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
String uppercasename = UCharacter.toUpperCase(Locale.ENGLISH, name);
|
// try extended names first
|
||||||
|
int result = getExtendedChar(name, choice);
|
||||||
|
if (result >= -1) {
|
||||||
|
return result;
|
||||||
|
}
|
||||||
// try algorithmic names first, if fails then try group names
|
// try algorithmic names first, if fails then try group names
|
||||||
// int result = getAlgorithmChar(choice, uppercasename);
|
// int result = getAlgorithmChar(choice, uppercasename);
|
||||||
|
int tempChoice = choice;
|
||||||
|
if (choice == UCharacterNameChoice.U_EXTENDED_CHAR_NAME) {
|
||||||
|
tempChoice = UCharacterNameChoice.U_UNICODE_CHAR_NAME;
|
||||||
|
}
|
||||||
|
|
||||||
// 1.0 has no algorithmic names
|
String upperCaseName = UCharacter.toUpperCase(Locale.ENGLISH, name);
|
||||||
|
// try algorithmic names now, 1.0 has no algorithmic names
|
||||||
if (choice != UCharacterNameChoice.U_UNICODE_CHAR_NAME) {
|
if (choice != UCharacterNameChoice.U_UNICODE_CHAR_NAME) {
|
||||||
return getGroupChar(uppercasename, choice);
|
return getGroupChar(upperCaseName, tempChoice);
|
||||||
}
|
}
|
||||||
int count = 0;
|
int count = 0;
|
||||||
if (m_algorithm_ != null) {
|
if (m_algorithm_ != null) {
|
||||||
count = m_algorithm_.length;
|
count = m_algorithm_.length;
|
||||||
}
|
}
|
||||||
for (count --; count >= 0; count --) {
|
for (count --; count >= 0; count --) {
|
||||||
int result = m_algorithm_[count].getAlgorithmChar(name);
|
result = m_algorithm_[count].getAlgorithmChar(name);
|
||||||
if (result >= 0) {
|
if (result >= 0) {
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return getGroupChar(uppercasename, choice);
|
|
||||||
|
result = getGroupChar(upperCaseName, tempChoice);
|
||||||
|
if (result == -1 &&
|
||||||
|
choice == UCharacterNameChoice.U_EXTENDED_CHAR_NAME) {
|
||||||
|
result = getGroupChar(upperCaseName,
|
||||||
|
UCharacterNameChoice.U_UNICODE_10_CHAR_NAME);
|
||||||
|
}
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -943,4 +968,118 @@ final class UCharacterName
|
|||||||
}
|
}
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Getting the character with extended name of the form <....>.
|
||||||
|
* @param name of the character to be found
|
||||||
|
* @param choice name choice
|
||||||
|
* @return character associated with the name, -1 if such character is not
|
||||||
|
* found and -2 if we should continue with the search.
|
||||||
|
*/
|
||||||
|
private int getExtendedChar(String name, int choice)
|
||||||
|
{
|
||||||
|
if (name.charAt(0) == '<') {
|
||||||
|
if (choice == UCharacterNameChoice.U_EXTENDED_CHAR_NAME) {
|
||||||
|
int endIndex = name.length() - 1;
|
||||||
|
if (name.charAt(endIndex) == '>') {
|
||||||
|
int startIndex = name.lastIndexOf('-');
|
||||||
|
if (startIndex >= 0) { // We've got a category.
|
||||||
|
startIndex ++;
|
||||||
|
int result = -1;
|
||||||
|
try {
|
||||||
|
result = Integer.parseInt(
|
||||||
|
name.substring(startIndex, endIndex),
|
||||||
|
16);
|
||||||
|
}
|
||||||
|
catch (NumberFormatException e) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
// Now validate the category name. We could use a
|
||||||
|
// binary search, or a trie, if we really wanted to.
|
||||||
|
String type = name.substring(1, startIndex - 1);
|
||||||
|
int length = UCharacterCategory.TYPE_NAMES_.length;
|
||||||
|
for (int i = 0; i < length; ++ i) {
|
||||||
|
if (type.compareToIgnoreCase(
|
||||||
|
UCharacterCategory.TYPE_NAMES_[i]) == 0) {
|
||||||
|
if (getType(result) == i) {
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
return -2;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the character extended type
|
||||||
|
* @param ch character to be tested
|
||||||
|
* @return extended type it is associated with
|
||||||
|
*/
|
||||||
|
private int getType(int ch)
|
||||||
|
{
|
||||||
|
if ((ch & 0xFFFE) == 0xFFFE || (ch >= 0xFDD0 && ch <= 0xFDEF)) {
|
||||||
|
// not a character we return a invalid category count
|
||||||
|
return UCharacterCategory.NON_CHARACTER_;
|
||||||
|
}
|
||||||
|
// Undo ICU exceptions to the UCD when determining the category.
|
||||||
|
int result;
|
||||||
|
if (UCharacter.isISOControl(ch)) {
|
||||||
|
result = UCharacterCategory.CONTROL;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
result = UCharacter.getType(ch);
|
||||||
|
if (result == UCharacterCategory.SURROGATE) {
|
||||||
|
if (UTF16.isLeadSurrogate((char)ch)) {
|
||||||
|
result = UCharacterCategory.LEAD_SURROGATE_;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
result = UCharacterCategory.TRAIL_SURROGATE_;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Retrieves the extended name
|
||||||
|
*/
|
||||||
|
private String getExtendedName(int ch)
|
||||||
|
{
|
||||||
|
String result = getName(ch, UCharacterNameChoice.U_UNICODE_CHAR_NAME);
|
||||||
|
if (result == null) {
|
||||||
|
if (getType(ch) == UCharacterCategory.CONTROL) {
|
||||||
|
result = getName(ch,
|
||||||
|
UCharacterNameChoice.U_UNICODE_10_CHAR_NAME);
|
||||||
|
}
|
||||||
|
if (result == null) {
|
||||||
|
int type = getType(ch);
|
||||||
|
// Return unknown if the table of names above is not up to
|
||||||
|
// date.
|
||||||
|
if (type >= UCharacterCategory.TYPE_NAMES_.length) {
|
||||||
|
result = UCharacterCategory.UNKNOWN_TYPE_NAME_;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
result = UCharacterCategory.TYPE_NAMES_[type];
|
||||||
|
}
|
||||||
|
StringBuffer tempResult = new StringBuffer(result);
|
||||||
|
tempResult.insert(0, '<');
|
||||||
|
tempResult.append('-');
|
||||||
|
String chStr = Integer.toHexString(ch).toUpperCase();
|
||||||
|
int zeros = 4 - chStr.length();
|
||||||
|
while (zeros > 0) {
|
||||||
|
tempResult.append('0');
|
||||||
|
zeros --;
|
||||||
|
}
|
||||||
|
tempResult.append(chStr);
|
||||||
|
tempResult.append('>');
|
||||||
|
result = tempResult.toString();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -6,8 +6,8 @@
|
|||||||
*
|
*
|
||||||
* $Source:
|
* $Source:
|
||||||
* /usr/cvs/icu4j/icu4j/src/com/ibm/icu/text/UCharacterNameChoiceEnum.java $
|
* /usr/cvs/icu4j/icu4j/src/com/ibm/icu/text/UCharacterNameChoiceEnum.java $
|
||||||
* $Date: 2001/03/23 19:51:38 $
|
* $Date: 2002/02/15 02:53:35 $
|
||||||
* $Revision: 1.2 $
|
* $Revision: 1.3 $
|
||||||
*
|
*
|
||||||
*******************************************************************************
|
*******************************************************************************
|
||||||
*/
|
*/
|
||||||
@ -30,5 +30,6 @@ interface UCharacterNameChoice
|
|||||||
|
|
||||||
static final int U_UNICODE_CHAR_NAME = 0;
|
static final int U_UNICODE_CHAR_NAME = 0;
|
||||||
static final int U_UNICODE_10_CHAR_NAME = 1;
|
static final int U_UNICODE_10_CHAR_NAME = 1;
|
||||||
static final int U_CHAR_NAME_CHOICE_COUNT = 2;
|
static final int U_EXTENDED_CHAR_NAME = 2;
|
||||||
|
static final int U_CHAR_NAME_CHOICE_COUNT = 3;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user