ICU-1707

new extended name APIs X-SVN-Rev: 7677
2002-02-15 02:53:35 +00:00 · 2002-02-15 02:53:35 +00:00 · d882319b30
commit d882319b30
parent 6fdea6ffb4
10 changed files with 782 additions and 100 deletions
--- a/icu4j/src/com/ibm/icu/dev/test/lang/UCharacterTest.java
+++ b/icu4j/src/com/ibm/icu/dev/test/lang/UCharacterTest.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/lang/UCharacterTest.java,v $ 
-* $Date: 2002/02/08 23:44:17 $ 
-* $Revision: 1.21 $
+* $Date: 2002/02/15 02:53:32 $ 
+* $Revision: 1.22 $
 *
 *******************************************************************************
 */
@ -555,16 +555,33 @@ public final class UCharacterTest extends TestFmwk
  */
  public void TestNames()
  {
-    int c[] = {0x0061, 0x0284, 0x3401, 0x7fed, 0xac00, 0xd7a3, 0xff08, 0xffe5,
-               0x23456};
+    int c[] = {0x0061, 0x0284, 0x3401, 0x7fed, 0xac00, 0xd7a3, 0xd800, 0xdc00, 
+               0xff08, 0xffe5, 0xffff, 0x23456, 0x9};
    String name[] = {"LATIN SMALL LETTER A", 
                     "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK", 
                     "CJK UNIFIED IDEOGRAPH-3401", 
                     "CJK UNIFIED IDEOGRAPH-7FED", "HANGUL SYLLABLE GA", 
-                     "HANGUL SYLLABLE HIH", "FULLWIDTH LEFT PARENTHESIS",
-                     "FULLWIDTH YEN SIGN", "CJK UNIFIED IDEOGRAPH-23456"};
+                     "HANGUL SYLLABLE HIH", "", "",
+                     "FULLWIDTH LEFT PARENTHESIS",
+                     "FULLWIDTH YEN SIGN", "", "CJK UNIFIED IDEOGRAPH-23456",
+                     ""};
    String oldname[] = {"", "LATIN SMALL LETTER DOTLESS J BAR HOOK", "", "",
-                        "", "", "FULLWIDTH OPENING PARENTHESIS", "", ""};
+                        "", "", "", "", "FULLWIDTH OPENING PARENTHESIS", "", 
+                        "", "", "HORIZONTAL TABULATION"};
+    String extendedname[] = {"LATIN SMALL LETTER A", 
+                             "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK",
+                             "CJK UNIFIED IDEOGRAPH-3401",
+                             "CJK UNIFIED IDEOGRAPH-7FED",
+                             "HANGUL SYLLABLE GA",
+                             "HANGUL SYLLABLE HIH",
+                             "<lead surrogate-D800>",
+                             "<trail surrogate-DC00>",
+                             "FULLWIDTH LEFT PARENTHESIS",
+                             "FULLWIDTH YEN SIGN",
+                             "<noncharacter-FFFF>",
+                             "CJK UNIFIED IDEOGRAPH-23456", 
+                             "HORIZONTAL TABULATION"};
+                             
    int size = c.length;
    String str;
    int uc;
@ -573,7 +590,8 @@ public final class UCharacterTest extends TestFmwk
    {
      // modern Unicode character name
      str = UCharacter.getName(c[i]);
-      if (!str.equals(name[i]))
+      if ((str == null && name[i].length() > 0) || 
+          (str != null && !str.equals(name[i])))
      {
        errln("FAIL \\u" + hex(c[i]) + " expected name " +
              name[i]);
@ -590,9 +608,18 @@ public final class UCharacterTest extends TestFmwk
        break;
      }
      
+      // extended character name
+      str = UCharacter.getExtendedName(c[i]);
+      if (str == null || !str.equals(extendedname[i]))
+      {
+        errln("FAIL \\u" + hex(c[i]) + " expected extended name " +
+              extendedname[i]);
+        break;
+      }
+      
      // retrieving unicode character from modern name
      uc = UCharacter.getCharFromName(name[i]);
-      if (uc != c[i])
+      if (uc != c[i] && name[i].length() != 0)
      {
        errln("FAIL " + name[i] + " expected character \\u" + hex(c[i]));
        break;
@ -600,9 +627,17 @@ public final class UCharacterTest extends TestFmwk
      
      //retrieving unicode character from 1.0 name
      uc = UCharacter.getCharFromName1_0(oldname[i]);
+      if (uc != c[i] && oldname[i].length() != 0)
+      {
+        errln("FAIL " + oldname[i] + " expected 1.0 character \\u" + hex(c[i]));
+        break;
+      }
+      
+      //retrieving unicode character from 1.0 name
+      uc = UCharacter.getCharFromExtendedName(extendedname[i]);
      if (uc != c[i] && i != 0 && (i == 1 || i == 6))
      {
-        errln("FAIL " + name[i] + " expected 1.0 character \\u" + hex(c[i]));
+        errln("FAIL " + extendedname[i] + " expected extended character \\u" + hex(c[i]));
        break;
      }
    }
@ -1014,8 +1049,8 @@ public final class UCharacterTest extends TestFmwk
    try
    {
      UCharacterTest test = new UCharacterTest();
-      //test.TestEnumeration();
-      test.run(arg);
+      test.TestNames();
+      //test.run(arg);
    }
    catch (Exception e)
    {
--- a/icu4j/src/com/ibm/icu/lang/UCharacter.java
+++ b/icu4j/src/com/ibm/icu/lang/UCharacter.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/lang/UCharacter.java,v $ 
-* $Date: 2002/02/08 01:08:38 $ 
-* $Revision: 1.21 $
+* $Date: 2002/02/15 02:53:32 $ 
+* $Revision: 1.22 $
 *
 *******************************************************************************
 */
@ -17,6 +17,8 @@ package com.ibm.text;
 import java.util.Locale;
 import com.ibm.util.Utility;
 import com.ibm.icu.util.RangeValueIterator;
+import com.ibm.text.BreakIterator;
+import com.ibm.text.RuleBasedBreakIterator;

 /**
 * <p>
@ -910,8 +912,7 @@ public final class UCharacter
    */
    public static String getName(int ch)
    {
-        return NAME_.getName(ch, 
-                                    UCharacterNameChoice.U_UNICODE_CHAR_NAME);
+        return NAME_.getName(ch, UCharacterNameChoice.U_UNICODE_CHAR_NAME);
    }
      
    /**
@ -929,10 +930,33 @@ public final class UCharacter
        return NAME_.getName(ch, 
                             UCharacterNameChoice.U_UNICODE_10_CHAR_NAME);
    }
+    
+    /**
+    * <p>Retrieves a name for a valid codepoint. Unlike, getName(int) and
+    * getName1_0(int), this method will return a name even for codepoints that
+    * are not assigned a name in UnicodeData.txt.
+    * </p>
+    * The names are returned in the following order.
+    * <ul>
+    * <li> Most current Unicode name if there is any
+    * <li> Unicode 1.0 name if there is any
+    * <li> Extended name in the form of "<codepoint_type-codepoint_hex_digits>". 
+    *      E.g. <noncharacter-fffe>
+    * </ul>
+    * Note calling any methods related to code point names, e.g. get*Name*() 
+    * incurs a one-time initialisation cost to construct the name tables.
+    * @param ch the code point for which to get the name
+    * @return a name for the argument codepoint
+    * @draft 2.1
+    */
+    public static String getExtendedName(int ch) 
+    {
+        return NAME_.getName(ch, UCharacterNameChoice.U_EXTENDED_CHAR_NAME);
+    }
      
    /**
-    * Find a Unicode code point by its most current Unicode name and return its 
-    * code point value.<br>
+    * <p>Find a Unicode code point by its most current Unicode name and 
+    * return its code point value. All Unicode names are in uppercase.</p>
    * Note calling any methods related to code point names, e.g. get*Name*() 
    * incurs a one-time initialisation cost to construct the name tables.
    * @param name most current Unicode character name whose code point is to be 
@ -946,8 +970,8 @@ public final class UCharacter
    }
      
    /**
-    * Find a Unicode character by its version 1.0 Unicode name and return its 
-    * code point value.<br>
+    * <p>Find a Unicode character by its version 1.0 Unicode name and return 
+    * its code point value. All Unicode names are in uppercase.</p>
    * Note calling any methods related to code point names, e.g. get*Name*() 
    * incurs a one-time initialisation cost to construct the name tables.
    * @param name Unicode 1.0 code point name whose code point is to 
@ -959,6 +983,31 @@ public final class UCharacter
        return NAME_.getCharFromName(
                         UCharacterNameChoice.U_UNICODE_10_CHAR_NAME, name);
    }
+    
+    /**
+    * <p>Find a Unicode character by either its name and return its code 
+    * point value. All Unicode names are in uppercase. 
+    * Extended names are all lowercase except for numbers and are contained
+    * within angle brackets.</p>
+    * The names are searched in the following order
+    * <ul>
+    * <li> Most current Unicode name if there is any
+    * <li> Unicode 1.0 name if there is any
+    * <li> Extended name in the form of "<codepoint_type-codepoint_hex_digits>". 
+    *      E.g. <noncharacter-FFFE>
+    * </ul>
+    * Note calling any methods related to code point names, e.g. get*Name*() 
+    * incurs a one-time initialisation cost to construct the name tables.
+    * @param name codepoint name
+    * @return code point associated with the name or -1 if the name is not
+    *         found.
+    * @draft 2.1
+    */
+    public static int getCharFromExtendedName(String name)
+    {
+        return NAME_.getCharFromName(
+                            UCharacterNameChoice.U_EXTENDED_CHAR_NAME, name);
+    }
      
    /**
    * Returns a code pointcorresponding to the two UTF16 characters.<br>
@ -1016,6 +1065,38 @@ public final class UCharacter
    {
        return toLowerCase(Locale.getDefault(), str);
    }
+    
+    /**
+    * <p>Gets the titlecase version of the argument string.</p>
+    * <p>Position for titlecasing is determined by the argument break 
+    * iterator, hence the user can customized his break iterator for 
+    * a specialized titlecasing. In this case only the forward iteration 
+    * needs to be implemented.
+    * If the break iterator passed in is null, the default Unicode algorithm
+    * will be used to determine the titlecase positions.
+    * </p>
+    * <p>Only positions returned by the break iterator will be title cased,
+    * character in between the positions will all be in lower case.</p>
+    * <p>Casing is dependent on the default locale and context-sensitive</p>
+    * @param str source string to be performed on
+    * @param breakiter break iterator to determine the positions in which
+    *        the character should be title cased.
+    * @return lowercase version of the argument string
+    */
+    public static String toTitleCase(String str, BreakIterator breakiter)
+    {
+        if (breakiter == null) {
+            String rules = "$cased=[[:Lu:][:Lt:][:Ll:]];" +  
+                           "$case_ignorable=[[:Mn:][:Me:][:Cf:][:Lm:][:Sk:]" 
+                                            + " \\u0027\u00AD\u2019];" +
+                           "$not_cased=[^$cased$case_ignorable];" +
+                           "[$not_cased$case_ignorable]*/" + 
+                           "$cased[$cased$case_ignorable]*$not_cased*;";
+            breakiter = new RuleBasedBreakIterator(rules);
+        }
+        
+        return str;
+    }
      
    /**
    * Gets uppercase version of the argument string. 
@ -1111,6 +1192,30 @@ public final class UCharacter
        return result.toString();
    }
    
+    /**
+    * <p>Gets the titlecase version of the argument string.</p>
+    * <p>Position for titlecasing is determined by the argument break 
+    * iterator, hence the user can customized his break iterator for 
+    * a specialized titlecasing. In this case only the forward iteration 
+    * needs to be implemented.
+    * If the break iterator passed in is null, the default Unicode algorithm
+    * will be used to determine the titlecase positions.
+    * </p>
+    * <p>Only positions returned by the break iterator will be title cased,
+    * character in between the positions will all be in lower case.</p>
+    * <p>Casing is dependent on the argument locale and context-sensitive</p>
+    * @param locale which string is to be converted in
+    * @param str source string to be performed on
+    * @param breakiter break iterator to determine the positions in which
+    *        the character should be title cased.
+    * @return lowercase version of the argument string
+    */
+    public static String toTitleCase(Locale locale, String str, 
+                                     BreakIterator breakiter)
+    {
+        return str;
+    }
+    
    /**
    * The given character is mapped to its case folding equivalent according to
    * UnicodeData.txt and CaseFolding.txt; if the character has no case folding 
--- a/icu4j/src/com/ibm/icu/lang/UCharacterCategory.java
+++ b/icu4j/src/com/ibm/icu/lang/UCharacterCategory.java
@ -6,8 +6,8 @@
 *
 * $Source: 
 *      /usr/cvs/icu4j/icu4j/src/com/ibm/icu/text/UCharacterCategory.java $ 
-* $Date: 2001/10/12 23:53:16 $ 
-* $Revision: 1.3 $
+* $Date: 2002/02/15 02:53:35 $ 
+* $Revision: 1.4 $
 *
 *******************************************************************************
 */
@ -26,22 +26,19 @@ package com.ibm.text;

 public class UCharacterCategory
 {
-  // private constructor ===================================================
-  
-  /**
-  * Private constructor to prevent initialisation
-  */
-  private UCharacterCategory()
-  {
-  }
-  
-  // public variable =======================================================
+  // public variable -----------------------------------------------------
  
  /**
  * Unassigned character type
  */
  public static final int UNASSIGNED              = 0; 
  /**
+  * Character type Cn
+  * Not Assigned (no characters in [UnicodeData.txt] have this property) 
+  * @draft 2.1
+  */
+  public static final int GENERAL_OTHER_TYPES     = 0;
+  /**
  * Character type Lu
  */
  public static final int UPPERCASE_LETTER        = 1;
@ -163,17 +160,13 @@ public class UCharacterCategory
  * Character type Pf
  */
 	public static final int FINAL_PUNCTUATION       = 29;
-	/**
-  * Character type Cn
-  */
-	public static final int GENERAL_OTHER_TYPES     = 30;
 	
 	// start of 31 ------------

 	/**
  * Character type count
  */
-	public static final int CHAR_CATEGORY_COUNT     = 31;
+	public static final int CHAR_CATEGORY_COUNT     = 30;
 	
 	/**
 	* Gets the name of the argument category
@ -245,4 +238,72 @@ public class UCharacterCategory
 	  }
 	  return "Unassigned";
 	}
+	
+	// private constructor -----------------------------------------------
+  
+    /**
+    * Private constructor to prevent initialisation
+    */
+    private UCharacterCategory()
+    {
+    }
+    
+	// package private data members --------------------------------------
+	
+	/**
+	* Not a character type
+	*/
+	static final int NON_CHARACTER_ = CHAR_CATEGORY_COUNT;
+	/**
+	* Lead surrogate type
+	*/
+	static final int LEAD_SURROGATE_ = CHAR_CATEGORY_COUNT + 1;
+    /**
+	* Trail surrogate type
+	*/
+	static final int TRAIL_SURROGATE_ = CHAR_CATEGORY_COUNT + 2;
+	/**
+	* Extended category count
+	*/
+	static final int EXTENDED_CATEGORY_ = CHAR_CATEGORY_COUNT + 3;
+	/**
+    * Type names used for extended names
+    */
+    static final String TYPE_NAMES_[] = {"unassigned",
+                                                 "uppercase letter",
+                                                 "lowercase letter",
+                                                 "titlecase letter",
+                                                 "modifier letter",
+                                                 "other letter",
+                                                 "non spacing mark",
+                                                 "enclosing mark",
+                                                 "combining spacing mark",
+                                                 "decimal digit number",
+                                                 "letter number",
+                                                 "other number",
+                                                 "space separator",
+                                                 "line separator",
+                                                 "paragraph separator",
+                                                 "control",
+                                                 "format",
+                                                 "private use area",
+                                                 "surrogate",
+                                                 "dash punctuation",   
+                                                 "start punctuation",
+                                                 "end punctuation",
+                                                 "connector punctuation",
+                                                 "other punctuation",
+                                                 "math symbol",
+                                                 "currency symbol",
+                                                 "modifier symbol",
+                                                 "other symbol",
+                                                 "initial punctuation",
+                                                 "final punctuation",
+                                                 "noncharacter",
+                                                 "lead surrogate",
+                                                 "trail surrogate"};
+   /**
+   * Unknown type name
+   */
+   static final String UNKNOWN_TYPE_NAME_ = "unknown";
 }
--- a/icu4j/src/com/ibm/icu/lang/UCharacterName.java
+++ b/icu4j/src/com/ibm/icu/lang/UCharacterName.java
@ -6,8 +6,8 @@
 *
 * $Source: 
 *     /usr/cvs/icu4j/icu4j/src/com/ibm/icu/text/UCharacterName.java $ 
-* $Date: 2002/02/08 01:08:38 $ 
-* $Revision: 1.6 $
+* $Date: 2002/02/15 02:53:34 $ 
+* $Revision: 1.7 $
 *
 *******************************************************************************
 */
@ -409,20 +409,29 @@ final class UCharacterName
            return null;
        }
          
+        int tempChoice = choice;
+        if (tempChoice == UCharacterNameChoice.U_EXTENDED_CHAR_NAME) {
+            tempChoice = UCharacterNameChoice.U_UNICODE_CHAR_NAME;
+        }
+            
        String result = "";
        
        // Do not write algorithmic Unicode 1.0 names because Unihan names are 
        // the same as the modern ones, extension A was only introduced with 
        // Unicode 3.0, and the Hangul syllable block was moved and changed around 
        // Unicode 1.1.5.
-        if (choice == UCharacterNameChoice.U_UNICODE_CHAR_NAME) {
+        if (tempChoice == UCharacterNameChoice.U_UNICODE_CHAR_NAME) {
        // try getting algorithmic name first
            result = getAlgName(ch);
        }
        
        // getting normal character name
        if (result == null || result.length() == 0) {
-            result = getGroupName(ch, choice);
+            if (choice == UCharacterNameChoice.U_EXTENDED_CHAR_NAME) {	  
+                result = getExtendedName(ch);	
+            } else {
+                result = getGroupName(ch, choice);
+            }
        }
          
        return result;
@ -442,26 +451,42 @@ final class UCharacterName
            name == null || name.length() == 0) {
            return -1;
        }
-       
-        String uppercasename = UCharacter.toUpperCase(Locale.ENGLISH, name);
+        
+        // try extended names first  
+        int result = getExtendedChar(name, choice);
+        if (result >= -1) {
+            return result;
+        }
        // try algorithmic names first, if fails then try group names
        // int result = getAlgorithmChar(choice, uppercasename);
+        int tempChoice = choice;    
+        if (choice == UCharacterNameChoice.U_EXTENDED_CHAR_NAME) {
+            tempChoice = UCharacterNameChoice.U_UNICODE_CHAR_NAME;
+        }
        
-        // 1.0 has no algorithmic names
+        String upperCaseName = UCharacter.toUpperCase(Locale.ENGLISH, name);
+        // try algorithmic names now, 1.0 has no algorithmic names
        if (choice != UCharacterNameChoice.U_UNICODE_CHAR_NAME) {
-            return getGroupChar(uppercasename, choice);
+            return getGroupChar(upperCaseName, tempChoice);
        }
        int count = 0;
        if (m_algorithm_ != null) {
            count = m_algorithm_.length;
        }
        for (count --; count >= 0; count --) {
-            int result = m_algorithm_[count].getAlgorithmChar(name); 
+            result = m_algorithm_[count].getAlgorithmChar(name); 
            if (result >= 0) {
                return result;
            }
        }
-       return getGroupChar(uppercasename, choice);
+        
+        result = getGroupChar(upperCaseName, tempChoice);
+        if (result == -1 && 
+            choice == UCharacterNameChoice.U_EXTENDED_CHAR_NAME) {
+            result = getGroupChar(upperCaseName, 
+                                UCharacterNameChoice.U_UNICODE_10_CHAR_NAME);        
+        } 
+        return result;
    }
    
    /**
@ -943,4 +968,118 @@ final class UCharacterName
        }
        return -1;
    }
+    
+    /**
+    * Getting the character with extended name of the form <....>.
+    * @param name of the character to be found
+    * @param choice name choice
+    * @return character associated with the name, -1 if such character is not
+    *                   found and -2 if we should continue with the search.
+    */
+    private int getExtendedChar(String name, int choice)
+    {
+        if (name.charAt(0) == '<') {        
+            if (choice == UCharacterNameChoice.U_EXTENDED_CHAR_NAME) {            
+                int endIndex = name.length() - 1;
+                if (name.charAt(endIndex) == '>') {
+                    int startIndex = name.lastIndexOf('-');
+                    if (startIndex >= 0) { // We've got a category.     
+                        startIndex ++;
+                        int result = -1;
+                        try {
+                            result = Integer.parseInt(
+                                        name.substring(startIndex, endIndex), 
+                                        16);
+                        }
+                        catch (NumberFormatException e) {
+                            return -1;     
+                        } 
+                        // Now validate the category name. We could use a 
+                        // binary search, or a trie, if we really wanted to. 
+                        String type = name.substring(1, startIndex - 1);
+                        int length = UCharacterCategory.TYPE_NAMES_.length;
+                        for (int i = 0; i < length; ++ i) {             
+                            if (type.compareToIgnoreCase(
+                                   UCharacterCategory.TYPE_NAMES_[i]) == 0) { 
+                                if (getType(result) == i) { 
+                                    return result;     
+                                }  
+                                break;          
+                            } 
+                        }
+                    }
+                }
+            }            
+            return -1; 
+        }    
+        return -2;
+    }
+    
+    /**
+    * Gets the character extended type
+    * @param ch character to be tested
+    * @return extended type it is associated with
+    */
+    private int getType(int ch)
+    {
+        if ((ch & 0xFFFE) == 0xFFFE || (ch >= 0xFDD0 && ch <= 0xFDEF)) {  
+            // not a character we return a invalid category count
+            return UCharacterCategory.NON_CHARACTER_;    
+        }    
+        // Undo ICU exceptions to the UCD when determining the category.  
+        int result;   
+        if (UCharacter.isISOControl(ch)) {        
+            result = UCharacterCategory.CONTROL;    
+        } 
+        else {        
+            result = UCharacter.getType(ch);
+            if (result == UCharacterCategory.SURROGATE) {            
+                if (UTF16.isLeadSurrogate((char)ch)) {
+                    result = UCharacterCategory.LEAD_SURROGATE_;
+                }
+                else {
+                    result = UCharacterCategory.TRAIL_SURROGATE_;
+                }
+            }    
+        }    
+        return result;
+    }
+    
+    /**
+    * Retrieves the extended name
+    */
+    private String getExtendedName(int ch) 
+    {    
+        String result = getName(ch, UCharacterNameChoice.U_UNICODE_CHAR_NAME);    
+        if (result == null) {        
+            if (getType(ch) == UCharacterCategory.CONTROL) {            
+                result = getName(ch, 
+                                 UCharacterNameChoice.U_UNICODE_10_CHAR_NAME);        
+            }        
+            if (result == null) {            
+                int type = getType(ch);    
+                // Return unknown if the table of names above is not up to 
+                // date.
+                if (type >= UCharacterCategory.TYPE_NAMES_.length) {       
+                    result = UCharacterCategory.UNKNOWN_TYPE_NAME_;    
+                } 
+                else {        
+                    result = UCharacterCategory.TYPE_NAMES_[type];    
+                }
+                StringBuffer tempResult = new StringBuffer(result);
+                tempResult.insert(0, '<');
+                tempResult.append('-');
+                String chStr = Integer.toHexString(ch).toUpperCase();
+                int zeros = 4 - chStr.length();
+                while (zeros > 0) {
+                    tempResult.append('0');
+                    zeros --;
+                }
+                tempResult.append(chStr);
+                tempResult.append('>');
+                result = tempResult.toString();
+            }
+        }    
+        return result;
+    }
 }
--- a/icu4j/src/com/ibm/icu/lang/UCharacterNameChoice.java
+++ b/icu4j/src/com/ibm/icu/lang/UCharacterNameChoice.java
@ -6,8 +6,8 @@
 *
 * $Source: 
 *     /usr/cvs/icu4j/icu4j/src/com/ibm/icu/text/UCharacterNameChoiceEnum.java $ 
-* $Date: 2001/03/23 19:51:38 $ 
-* $Revision: 1.2 $
+* $Date: 2002/02/15 02:53:35 $ 
+* $Revision: 1.3 $
 *
 *******************************************************************************
 */
@ -30,5 +30,6 @@ interface UCharacterNameChoice
  
  static final int U_UNICODE_CHAR_NAME = 0;
  static final int U_UNICODE_10_CHAR_NAME = 1;
-  static final int U_CHAR_NAME_CHOICE_COUNT = 2;
+  static final int U_EXTENDED_CHAR_NAME = 2;
+  static final int U_CHAR_NAME_CHOICE_COUNT = 3;
 }
--- a/icu4j/src/com/ibm/icu/test/text/UCharacterTest.java
+++ b/icu4j/src/com/ibm/icu/test/text/UCharacterTest.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/test/text/Attic/UCharacterTest.java,v $ 
-* $Date: 2002/02/08 23:44:17 $ 
-* $Revision: 1.21 $
+* $Date: 2002/02/15 02:53:32 $ 
+* $Revision: 1.22 $
 *
 *******************************************************************************
 */
@ -555,16 +555,33 @@ public final class UCharacterTest extends TestFmwk
  */
  public void TestNames()
  {
-    int c[] = {0x0061, 0x0284, 0x3401, 0x7fed, 0xac00, 0xd7a3, 0xff08, 0xffe5,
-               0x23456};
+    int c[] = {0x0061, 0x0284, 0x3401, 0x7fed, 0xac00, 0xd7a3, 0xd800, 0xdc00, 
+               0xff08, 0xffe5, 0xffff, 0x23456, 0x9};
    String name[] = {"LATIN SMALL LETTER A", 
                     "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK", 
                     "CJK UNIFIED IDEOGRAPH-3401", 
                     "CJK UNIFIED IDEOGRAPH-7FED", "HANGUL SYLLABLE GA", 
-                     "HANGUL SYLLABLE HIH", "FULLWIDTH LEFT PARENTHESIS",
-                     "FULLWIDTH YEN SIGN", "CJK UNIFIED IDEOGRAPH-23456"};
+                     "HANGUL SYLLABLE HIH", "", "",
+                     "FULLWIDTH LEFT PARENTHESIS",
+                     "FULLWIDTH YEN SIGN", "", "CJK UNIFIED IDEOGRAPH-23456",
+                     ""};
    String oldname[] = {"", "LATIN SMALL LETTER DOTLESS J BAR HOOK", "", "",
-                        "", "", "FULLWIDTH OPENING PARENTHESIS", "", ""};
+                        "", "", "", "", "FULLWIDTH OPENING PARENTHESIS", "", 
+                        "", "", "HORIZONTAL TABULATION"};
+    String extendedname[] = {"LATIN SMALL LETTER A", 
+                             "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK",
+                             "CJK UNIFIED IDEOGRAPH-3401",
+                             "CJK UNIFIED IDEOGRAPH-7FED",
+                             "HANGUL SYLLABLE GA",
+                             "HANGUL SYLLABLE HIH",
+                             "<lead surrogate-D800>",
+                             "<trail surrogate-DC00>",
+                             "FULLWIDTH LEFT PARENTHESIS",
+                             "FULLWIDTH YEN SIGN",
+                             "<noncharacter-FFFF>",
+                             "CJK UNIFIED IDEOGRAPH-23456", 
+                             "HORIZONTAL TABULATION"};
+                             
    int size = c.length;
    String str;
    int uc;
@ -573,7 +590,8 @@ public final class UCharacterTest extends TestFmwk
    {
      // modern Unicode character name
      str = UCharacter.getName(c[i]);
-      if (!str.equals(name[i]))
+      if ((str == null && name[i].length() > 0) || 
+          (str != null && !str.equals(name[i])))
      {
        errln("FAIL \\u" + hex(c[i]) + " expected name " +
              name[i]);
@ -590,9 +608,18 @@ public final class UCharacterTest extends TestFmwk
        break;
      }
      
+      // extended character name
+      str = UCharacter.getExtendedName(c[i]);
+      if (str == null || !str.equals(extendedname[i]))
+      {
+        errln("FAIL \\u" + hex(c[i]) + " expected extended name " +
+              extendedname[i]);
+        break;
+      }
+      
      // retrieving unicode character from modern name
      uc = UCharacter.getCharFromName(name[i]);
-      if (uc != c[i])
+      if (uc != c[i] && name[i].length() != 0)
      {
        errln("FAIL " + name[i] + " expected character \\u" + hex(c[i]));
        break;
@ -600,9 +627,17 @@ public final class UCharacterTest extends TestFmwk
      
      //retrieving unicode character from 1.0 name
      uc = UCharacter.getCharFromName1_0(oldname[i]);
+      if (uc != c[i] && oldname[i].length() != 0)
+      {
+        errln("FAIL " + oldname[i] + " expected 1.0 character \\u" + hex(c[i]));
+        break;
+      }
+      
+      //retrieving unicode character from 1.0 name
+      uc = UCharacter.getCharFromExtendedName(extendedname[i]);
      if (uc != c[i] && i != 0 && (i == 1 || i == 6))
      {
-        errln("FAIL " + name[i] + " expected 1.0 character \\u" + hex(c[i]));
+        errln("FAIL " + extendedname[i] + " expected extended character \\u" + hex(c[i]));
        break;
      }
    }
@ -1014,8 +1049,8 @@ public final class UCharacterTest extends TestFmwk
    try
    {
      UCharacterTest test = new UCharacterTest();
-      //test.TestEnumeration();
-      test.run(arg);
+      test.TestNames();
+      //test.run(arg);
    }
    catch (Exception e)
    {
--- a/icu4j/src/com/ibm/text/UCharacter.java
+++ b/icu4j/src/com/ibm/text/UCharacter.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/UCharacter.java,v $ 
-* $Date: 2002/02/08 01:08:38 $ 
-* $Revision: 1.21 $
+* $Date: 2002/02/15 02:53:32 $ 
+* $Revision: 1.22 $
 *
 *******************************************************************************
 */
@ -17,6 +17,8 @@ package com.ibm.text;
 import java.util.Locale;
 import com.ibm.util.Utility;
 import com.ibm.icu.util.RangeValueIterator;
+import com.ibm.text.BreakIterator;
+import com.ibm.text.RuleBasedBreakIterator;

 /**
 * <p>
@ -910,8 +912,7 @@ public final class UCharacter
    */
    public static String getName(int ch)
    {
-        return NAME_.getName(ch, 
-                                    UCharacterNameChoice.U_UNICODE_CHAR_NAME);
+        return NAME_.getName(ch, UCharacterNameChoice.U_UNICODE_CHAR_NAME);
    }
      
    /**
@ -929,10 +930,33 @@ public final class UCharacter
        return NAME_.getName(ch, 
                             UCharacterNameChoice.U_UNICODE_10_CHAR_NAME);
    }
+    
+    /**
+    * <p>Retrieves a name for a valid codepoint. Unlike, getName(int) and
+    * getName1_0(int), this method will return a name even for codepoints that
+    * are not assigned a name in UnicodeData.txt.
+    * </p>
+    * The names are returned in the following order.
+    * <ul>
+    * <li> Most current Unicode name if there is any
+    * <li> Unicode 1.0 name if there is any
+    * <li> Extended name in the form of "<codepoint_type-codepoint_hex_digits>". 
+    *      E.g. <noncharacter-fffe>
+    * </ul>
+    * Note calling any methods related to code point names, e.g. get*Name*() 
+    * incurs a one-time initialisation cost to construct the name tables.
+    * @param ch the code point for which to get the name
+    * @return a name for the argument codepoint
+    * @draft 2.1
+    */
+    public static String getExtendedName(int ch) 
+    {
+        return NAME_.getName(ch, UCharacterNameChoice.U_EXTENDED_CHAR_NAME);
+    }
      
    /**
-    * Find a Unicode code point by its most current Unicode name and return its 
-    * code point value.<br>
+    * <p>Find a Unicode code point by its most current Unicode name and 
+    * return its code point value. All Unicode names are in uppercase.</p>
    * Note calling any methods related to code point names, e.g. get*Name*() 
    * incurs a one-time initialisation cost to construct the name tables.
    * @param name most current Unicode character name whose code point is to be 
@ -946,8 +970,8 @@ public final class UCharacter
    }
      
    /**
-    * Find a Unicode character by its version 1.0 Unicode name and return its 
-    * code point value.<br>
+    * <p>Find a Unicode character by its version 1.0 Unicode name and return 
+    * its code point value. All Unicode names are in uppercase.</p>
    * Note calling any methods related to code point names, e.g. get*Name*() 
    * incurs a one-time initialisation cost to construct the name tables.
    * @param name Unicode 1.0 code point name whose code point is to 
@ -959,6 +983,31 @@ public final class UCharacter
        return NAME_.getCharFromName(
                         UCharacterNameChoice.U_UNICODE_10_CHAR_NAME, name);
    }
+    
+    /**
+    * <p>Find a Unicode character by either its name and return its code 
+    * point value. All Unicode names are in uppercase. 
+    * Extended names are all lowercase except for numbers and are contained
+    * within angle brackets.</p>
+    * The names are searched in the following order
+    * <ul>
+    * <li> Most current Unicode name if there is any
+    * <li> Unicode 1.0 name if there is any
+    * <li> Extended name in the form of "<codepoint_type-codepoint_hex_digits>". 
+    *      E.g. <noncharacter-FFFE>
+    * </ul>
+    * Note calling any methods related to code point names, e.g. get*Name*() 
+    * incurs a one-time initialisation cost to construct the name tables.
+    * @param name codepoint name
+    * @return code point associated with the name or -1 if the name is not
+    *         found.
+    * @draft 2.1
+    */
+    public static int getCharFromExtendedName(String name)
+    {
+        return NAME_.getCharFromName(
+                            UCharacterNameChoice.U_EXTENDED_CHAR_NAME, name);
+    }
      
    /**
    * Returns a code pointcorresponding to the two UTF16 characters.<br>
@ -1016,6 +1065,38 @@ public final class UCharacter
    {
        return toLowerCase(Locale.getDefault(), str);
    }
+    
+    /**
+    * <p>Gets the titlecase version of the argument string.</p>
+    * <p>Position for titlecasing is determined by the argument break 
+    * iterator, hence the user can customized his break iterator for 
+    * a specialized titlecasing. In this case only the forward iteration 
+    * needs to be implemented.
+    * If the break iterator passed in is null, the default Unicode algorithm
+    * will be used to determine the titlecase positions.
+    * </p>
+    * <p>Only positions returned by the break iterator will be title cased,
+    * character in between the positions will all be in lower case.</p>
+    * <p>Casing is dependent on the default locale and context-sensitive</p>
+    * @param str source string to be performed on
+    * @param breakiter break iterator to determine the positions in which
+    *        the character should be title cased.
+    * @return lowercase version of the argument string
+    */
+    public static String toTitleCase(String str, BreakIterator breakiter)
+    {
+        if (breakiter == null) {
+            String rules = "$cased=[[:Lu:][:Lt:][:Ll:]];" +  
+                           "$case_ignorable=[[:Mn:][:Me:][:Cf:][:Lm:][:Sk:]" 
+                                            + " \\u0027\u00AD\u2019];" +
+                           "$not_cased=[^$cased$case_ignorable];" +
+                           "[$not_cased$case_ignorable]*/" + 
+                           "$cased[$cased$case_ignorable]*$not_cased*;";
+            breakiter = new RuleBasedBreakIterator(rules);
+        }
+        
+        return str;
+    }
      
    /**
    * Gets uppercase version of the argument string. 
@ -1111,6 +1192,30 @@ public final class UCharacter
        return result.toString();
    }
    
+    /**
+    * <p>Gets the titlecase version of the argument string.</p>
+    * <p>Position for titlecasing is determined by the argument break 
+    * iterator, hence the user can customized his break iterator for 
+    * a specialized titlecasing. In this case only the forward iteration 
+    * needs to be implemented.
+    * If the break iterator passed in is null, the default Unicode algorithm
+    * will be used to determine the titlecase positions.
+    * </p>
+    * <p>Only positions returned by the break iterator will be title cased,
+    * character in between the positions will all be in lower case.</p>
+    * <p>Casing is dependent on the argument locale and context-sensitive</p>
+    * @param locale which string is to be converted in
+    * @param str source string to be performed on
+    * @param breakiter break iterator to determine the positions in which
+    *        the character should be title cased.
+    * @return lowercase version of the argument string
+    */
+    public static String toTitleCase(Locale locale, String str, 
+                                     BreakIterator breakiter)
+    {
+        return str;
+    }
+    
    /**
    * The given character is mapped to its case folding equivalent according to
    * UnicodeData.txt and CaseFolding.txt; if the character has no case folding 
--- a/icu4j/src/com/ibm/text/UCharacterCategory.java
+++ b/icu4j/src/com/ibm/text/UCharacterCategory.java
@ -6,8 +6,8 @@
 *
 * $Source: 
 *      /usr/cvs/icu4j/icu4j/src/com/ibm/icu/text/UCharacterCategory.java $ 
-* $Date: 2001/10/12 23:53:16 $ 
-* $Revision: 1.3 $
+* $Date: 2002/02/15 02:53:35 $ 
+* $Revision: 1.4 $
 *
 *******************************************************************************
 */
@ -26,22 +26,19 @@ package com.ibm.text;

 public class UCharacterCategory
 {
-  // private constructor ===================================================
-  
-  /**
-  * Private constructor to prevent initialisation
-  */
-  private UCharacterCategory()
-  {
-  }
-  
-  // public variable =======================================================
+  // public variable -----------------------------------------------------
  
  /**
  * Unassigned character type
  */
  public static final int UNASSIGNED              = 0; 
  /**
+  * Character type Cn
+  * Not Assigned (no characters in [UnicodeData.txt] have this property) 
+  * @draft 2.1
+  */
+  public static final int GENERAL_OTHER_TYPES     = 0;
+  /**
  * Character type Lu
  */
  public static final int UPPERCASE_LETTER        = 1;
@ -163,17 +160,13 @@ public class UCharacterCategory
  * Character type Pf
  */
 	public static final int FINAL_PUNCTUATION       = 29;
-	/**
-  * Character type Cn
-  */
-	public static final int GENERAL_OTHER_TYPES     = 30;
 	
 	// start of 31 ------------

 	/**
  * Character type count
  */
-	public static final int CHAR_CATEGORY_COUNT     = 31;
+	public static final int CHAR_CATEGORY_COUNT     = 30;
 	
 	/**
 	* Gets the name of the argument category
@ -245,4 +238,72 @@ public class UCharacterCategory
 	  }
 	  return "Unassigned";
 	}
+	
+	// private constructor -----------------------------------------------
+  
+    /**
+    * Private constructor to prevent initialisation
+    */
+    private UCharacterCategory()
+    {
+    }
+    
+	// package private data members --------------------------------------
+	
+	/**
+	* Not a character type
+	*/
+	static final int NON_CHARACTER_ = CHAR_CATEGORY_COUNT;
+	/**
+	* Lead surrogate type
+	*/
+	static final int LEAD_SURROGATE_ = CHAR_CATEGORY_COUNT + 1;
+    /**
+	* Trail surrogate type
+	*/
+	static final int TRAIL_SURROGATE_ = CHAR_CATEGORY_COUNT + 2;
+	/**
+	* Extended category count
+	*/
+	static final int EXTENDED_CATEGORY_ = CHAR_CATEGORY_COUNT + 3;
+	/**
+    * Type names used for extended names
+    */
+    static final String TYPE_NAMES_[] = {"unassigned",
+                                                 "uppercase letter",
+                                                 "lowercase letter",
+                                                 "titlecase letter",
+                                                 "modifier letter",
+                                                 "other letter",
+                                                 "non spacing mark",
+                                                 "enclosing mark",
+                                                 "combining spacing mark",
+                                                 "decimal digit number",
+                                                 "letter number",
+                                                 "other number",
+                                                 "space separator",
+                                                 "line separator",
+                                                 "paragraph separator",
+                                                 "control",
+                                                 "format",
+                                                 "private use area",
+                                                 "surrogate",
+                                                 "dash punctuation",   
+                                                 "start punctuation",
+                                                 "end punctuation",
+                                                 "connector punctuation",
+                                                 "other punctuation",
+                                                 "math symbol",
+                                                 "currency symbol",
+                                                 "modifier symbol",
+                                                 "other symbol",
+                                                 "initial punctuation",
+                                                 "final punctuation",
+                                                 "noncharacter",
+                                                 "lead surrogate",
+                                                 "trail surrogate"};
+   /**
+   * Unknown type name
+   */
+   static final String UNKNOWN_TYPE_NAME_ = "unknown";
 }
--- a/icu4j/src/com/ibm/text/UCharacterName.java
+++ b/icu4j/src/com/ibm/text/UCharacterName.java
@ -6,8 +6,8 @@
 *
 * $Source: 
 *     /usr/cvs/icu4j/icu4j/src/com/ibm/icu/text/UCharacterName.java $ 
-* $Date: 2002/02/08 01:08:38 $ 
-* $Revision: 1.6 $
+* $Date: 2002/02/15 02:53:34 $ 
+* $Revision: 1.7 $
 *
 *******************************************************************************
 */
@ -409,20 +409,29 @@ final class UCharacterName
            return null;
        }
          
+        int tempChoice = choice;
+        if (tempChoice == UCharacterNameChoice.U_EXTENDED_CHAR_NAME) {
+            tempChoice = UCharacterNameChoice.U_UNICODE_CHAR_NAME;
+        }
+            
        String result = "";
        
        // Do not write algorithmic Unicode 1.0 names because Unihan names are 
        // the same as the modern ones, extension A was only introduced with 
        // Unicode 3.0, and the Hangul syllable block was moved and changed around 
        // Unicode 1.1.5.
-        if (choice == UCharacterNameChoice.U_UNICODE_CHAR_NAME) {
+        if (tempChoice == UCharacterNameChoice.U_UNICODE_CHAR_NAME) {
        // try getting algorithmic name first
            result = getAlgName(ch);
        }
        
        // getting normal character name
        if (result == null || result.length() == 0) {
-            result = getGroupName(ch, choice);
+            if (choice == UCharacterNameChoice.U_EXTENDED_CHAR_NAME) {	  
+                result = getExtendedName(ch);	
+            } else {
+                result = getGroupName(ch, choice);
+            }
        }
          
        return result;
@ -442,26 +451,42 @@ final class UCharacterName
            name == null || name.length() == 0) {
            return -1;
        }
-       
-        String uppercasename = UCharacter.toUpperCase(Locale.ENGLISH, name);
+        
+        // try extended names first  
+        int result = getExtendedChar(name, choice);
+        if (result >= -1) {
+            return result;
+        }
        // try algorithmic names first, if fails then try group names
        // int result = getAlgorithmChar(choice, uppercasename);
+        int tempChoice = choice;    
+        if (choice == UCharacterNameChoice.U_EXTENDED_CHAR_NAME) {
+            tempChoice = UCharacterNameChoice.U_UNICODE_CHAR_NAME;
+        }
        
-        // 1.0 has no algorithmic names
+        String upperCaseName = UCharacter.toUpperCase(Locale.ENGLISH, name);
+        // try algorithmic names now, 1.0 has no algorithmic names
        if (choice != UCharacterNameChoice.U_UNICODE_CHAR_NAME) {
-            return getGroupChar(uppercasename, choice);
+            return getGroupChar(upperCaseName, tempChoice);
        }
        int count = 0;
        if (m_algorithm_ != null) {
            count = m_algorithm_.length;
        }
        for (count --; count >= 0; count --) {
-            int result = m_algorithm_[count].getAlgorithmChar(name); 
+            result = m_algorithm_[count].getAlgorithmChar(name); 
            if (result >= 0) {
                return result;
            }
        }
-       return getGroupChar(uppercasename, choice);
+        
+        result = getGroupChar(upperCaseName, tempChoice);
+        if (result == -1 && 
+            choice == UCharacterNameChoice.U_EXTENDED_CHAR_NAME) {
+            result = getGroupChar(upperCaseName, 
+                                UCharacterNameChoice.U_UNICODE_10_CHAR_NAME);        
+        } 
+        return result;
    }
    
    /**
@ -943,4 +968,118 @@ final class UCharacterName
        }
        return -1;
    }
+    
+    /**
+    * Getting the character with extended name of the form <....>.
+    * @param name of the character to be found
+    * @param choice name choice
+    * @return character associated with the name, -1 if such character is not
+    *                   found and -2 if we should continue with the search.
+    */
+    private int getExtendedChar(String name, int choice)
+    {
+        if (name.charAt(0) == '<') {        
+            if (choice == UCharacterNameChoice.U_EXTENDED_CHAR_NAME) {            
+                int endIndex = name.length() - 1;
+                if (name.charAt(endIndex) == '>') {
+                    int startIndex = name.lastIndexOf('-');
+                    if (startIndex >= 0) { // We've got a category.     
+                        startIndex ++;
+                        int result = -1;
+                        try {
+                            result = Integer.parseInt(
+                                        name.substring(startIndex, endIndex), 
+                                        16);
+                        }
+                        catch (NumberFormatException e) {
+                            return -1;     
+                        } 
+                        // Now validate the category name. We could use a 
+                        // binary search, or a trie, if we really wanted to. 
+                        String type = name.substring(1, startIndex - 1);
+                        int length = UCharacterCategory.TYPE_NAMES_.length;
+                        for (int i = 0; i < length; ++ i) {             
+                            if (type.compareToIgnoreCase(
+                                   UCharacterCategory.TYPE_NAMES_[i]) == 0) { 
+                                if (getType(result) == i) { 
+                                    return result;     
+                                }  
+                                break;          
+                            } 
+                        }
+                    }
+                }
+            }            
+            return -1; 
+        }    
+        return -2;
+    }
+    
+    /**
+    * Gets the character extended type
+    * @param ch character to be tested
+    * @return extended type it is associated with
+    */
+    private int getType(int ch)
+    {
+        if ((ch & 0xFFFE) == 0xFFFE || (ch >= 0xFDD0 && ch <= 0xFDEF)) {  
+            // not a character we return a invalid category count
+            return UCharacterCategory.NON_CHARACTER_;    
+        }    
+        // Undo ICU exceptions to the UCD when determining the category.  
+        int result;   
+        if (UCharacter.isISOControl(ch)) {        
+            result = UCharacterCategory.CONTROL;    
+        } 
+        else {        
+            result = UCharacter.getType(ch);
+            if (result == UCharacterCategory.SURROGATE) {            
+                if (UTF16.isLeadSurrogate((char)ch)) {
+                    result = UCharacterCategory.LEAD_SURROGATE_;
+                }
+                else {
+                    result = UCharacterCategory.TRAIL_SURROGATE_;
+                }
+            }    
+        }    
+        return result;
+    }
+    
+    /**
+    * Retrieves the extended name
+    */
+    private String getExtendedName(int ch) 
+    {    
+        String result = getName(ch, UCharacterNameChoice.U_UNICODE_CHAR_NAME);    
+        if (result == null) {        
+            if (getType(ch) == UCharacterCategory.CONTROL) {            
+                result = getName(ch, 
+                                 UCharacterNameChoice.U_UNICODE_10_CHAR_NAME);        
+            }        
+            if (result == null) {            
+                int type = getType(ch);    
+                // Return unknown if the table of names above is not up to 
+                // date.
+                if (type >= UCharacterCategory.TYPE_NAMES_.length) {       
+                    result = UCharacterCategory.UNKNOWN_TYPE_NAME_;    
+                } 
+                else {        
+                    result = UCharacterCategory.TYPE_NAMES_[type];    
+                }
+                StringBuffer tempResult = new StringBuffer(result);
+                tempResult.insert(0, '<');
+                tempResult.append('-');
+                String chStr = Integer.toHexString(ch).toUpperCase();
+                int zeros = 4 - chStr.length();
+                while (zeros > 0) {
+                    tempResult.append('0');
+                    zeros --;
+                }
+                tempResult.append(chStr);
+                tempResult.append('>');
+                result = tempResult.toString();
+            }
+        }    
+        return result;
+    }
 }
--- a/icu4j/src/com/ibm/text/UCharacterNameChoice.java
+++ b/icu4j/src/com/ibm/text/UCharacterNameChoice.java
@ -6,8 +6,8 @@
 *
 * $Source: 
 *     /usr/cvs/icu4j/icu4j/src/com/ibm/icu/text/UCharacterNameChoiceEnum.java $ 
-* $Date: 2001/03/23 19:51:38 $ 
-* $Revision: 1.2 $
+* $Date: 2002/02/15 02:53:35 $ 
+* $Revision: 1.3 $
 *
 *******************************************************************************
 */
@ -30,5 +30,6 @@ interface UCharacterNameChoice
  
  static final int U_UNICODE_CHAR_NAME = 0;
  static final int U_UNICODE_10_CHAR_NAME = 1;
-  static final int U_CHAR_NAME_CHOICE_COUNT = 2;
+  static final int U_EXTENDED_CHAR_NAME = 2;
+  static final int U_CHAR_NAME_CHOICE_COUNT = 3;
 }