ICU-1429

Addition of SpecialCasing tests. X-SVN-Rev: 6626
2001-11-06 00:04:10 +00:00 · 2001-11-06 00:04:10 +00:00 · a2a32dbecb
commit a2a32dbecb
parent b4e203bee7
5 changed files with 485 additions and 61 deletions
--- a/icu4j/readme.html
+++ b/icu4j/readme.html
@ -378,7 +378,7 @@ Completed in 19 seconds</pre>
 file.</p>

 <p>After doing a build it is a good idea to run all the tests by typing <br>&quot;java
-classpath $Root/classes -DUnicodeData=$Root/src/data/unicode/UnicodeData.txt com.ibm.test.TestAll&quot;. </p>
+-classpath $Root/classes -DUnicodeData=$Root/src/data/unicode com.ibm.test.TestAll&quot;. </p>

 <p>(As an alternative to using Ant, you can build simply by running javac and javadoc
 directly. This is not recommended, but a Windows batch file &quot;buildall.bat&quot;
--- a/icu4j/src/com/ibm/icu/dev/test/lang/UCharacterTest.java
+++ b/icu4j/src/com/ibm/icu/dev/test/lang/UCharacterTest.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/lang/UCharacterTest.java,v $ 
-* $Date: 2001/10/23 17:08:13 $ 
-* $Revision: 1.16 $
+* $Date: 2001/11/06 00:02:13 $ 
+* $Revision: 1.17 $
 *
 *******************************************************************************
 */
@ -17,11 +17,14 @@ import java.io.BufferedReader;
 import java.io.FileReader;
 import java.io.FileNotFoundException;
 import java.util.Locale;
+import java.io.File;
+import java.util.Vector;
 import com.ibm.test.TestFmwk;
 import com.ibm.text.UCharacter;
 import com.ibm.text.UCharacterCategory;
 import com.ibm.text.UCharacterDirection;
 import com.ibm.text.UTF16;
+import com.ibm.util.Utility;

 /**
 * Testing class for UCharacter
@ -417,9 +420,10 @@ public final class UCharacterTest extends TestFmwk
  /**
  * Tests for the character types, direction.<br>
  * This method reads in UnicodeData.txt file for testing purposes. A default 
-  * path is provided relative to the class path, however if the user could 
-  * set a system property to change the path.<br>
-  * e.g. java -DUnicodeData="anyfile.dat" com.ibm.test.text.UCharacterTest
+  * path is provided relative to the src path, however the user could 
+  * set a system property to change the directory path.<br>
+  * e.g. java -DUnicodeData="data_directory_path" 
+  * com.ibm.test.text.UCharacterTest
  */
  public void TestUnicodeData()
  {
@ -438,9 +442,19 @@ public final class UCharacterTest extends TestFmwk
    // unicode data file path system name
    final String UNICODE_DATA_SYSTEM_NAME = "UnicodeData";
    String s = System.getProperty(UNICODE_DATA_SYSTEM_NAME);
-    if (s == null)
+    if (s == null) {
    // assuming runtime directory is on the same level as the source
      s = System.getProperty("user.dir") + "//..//" + UNICODE_DATA_FILE;
+    }
+    else {
+      StringBuffer tempfilename = new StringBuffer(s);
+      if (tempfilename.charAt(tempfilename.length() - 1) != 
+          File.pathSeparatorChar) {
+        tempfilename.append(File.separatorChar);
+      }
+      tempfilename.append("UnicodeData.txt");
+      s = tempfilename.toString();
+    }
    
    final int LASTUNICODECHAR = 0xFFFD;
    int ch = 0,
@ -463,8 +477,7 @@ public final class UCharacterTest extends TestFmwk
        index = s.indexOf(';', 5);
        String t = s.substring(index + 1, index + 3);
        index += 4;
-        byte cc = (byte)(Integer.parseInt(s.substring(index, 
-                                                      s.indexOf(';', index))));
+        int cc = Integer.parseInt(s.substring(index, s.indexOf(';', index)));
        index = s.indexOf(';', index);
        String d = s.substring(index + 1, s.indexOf(';', index + 1));
        
@ -521,7 +534,7 @@ public final class UCharacterTest extends TestFmwk
    {
      errln("FAIL UnicodeData.txt not found\n" +
            "Configure the system setting UnicodeData to the right path\n" +
-            "e.g. java -DUnicodeData=\"anyfile.dat\" " +
+            "e.g. java -DUnicodeData=\"data_dir_path\" " +
            "com.ibm.icu.test.text.UCharacterTest");
    }
    catch (Exception e)
@ -538,6 +551,7 @@ public final class UCharacterTest extends TestFmwk
  /**
  * Test for the character names
  */
+  /*
  public void TestNames()
  {
    int c[] = {0x0061, 0x0284, 0x3401, 0x7fed, 0xac00, 0xd7a3, 0xff08, 0xffe5,
@ -735,6 +749,200 @@ public final class UCharacterTest extends TestFmwk
    */
  }
  
+  /**
+  * Tests for case mapping in the file SpecialCasing.txt
+  * This method reads in SpecialCasing.txt file for testing purposes. 
+  * A default path is provided relative to the src path, however the user 
+  * could set a system property to change the directory path.<br>
+  * e.g. java -DUnicodeData="data_dir_path" com.ibm.test.text.UCharacterTest
+  */
+  public void TestSpecialCasing()
+  {
+    // default unicode data file name
+    final String SPECIALCASING_FILE = "src//data//unicode//SpecialCasing.txt";
+    
+    // unicode data file path system name
+    final String UNICODE_DATA_SYSTEM_NAME = "UnicodeData";
+    String s = System.getProperty(UNICODE_DATA_SYSTEM_NAME);
+    if (s == null) {
+    // assuming runtime directory is on the same level as the source
+      s = System.getProperty("user.dir") + "//..//" + SPECIALCASING_FILE;
+    }
+    else {
+      StringBuffer tempfilename = new StringBuffer(s);
+      if (tempfilename.charAt(tempfilename.length() - 1) != 
+          File.pathSeparatorChar) {
+        tempfilename.append(File.separatorChar);
+      }
+      tempfilename.append("SpecialCasing.txt");
+      s = tempfilename.toString();
+    }
+    
+    try
+	{
+	  // reading in the SpecialCasing file
+	  FileReader fr = new FileReader(s);
+	  BufferedReader input = new BufferedReader(fr);
+	    
+      while (true)
+      {
+        s = input.readLine();
+        if (s == null) {
+            break;
+        }
+        if (s.length() == 0 || s.charAt(0) == '#') {
+            continue;
+        }
+        String chstr[] = getUnicodeStrings(s);
+        if (chstr.length == 5) {
+            StringBuffer strbuffer   = new StringBuffer(chstr[0]);
+            StringBuffer lowerbuffer = new StringBuffer(chstr[1]); 
+            StringBuffer upperbuffer = new StringBuffer(chstr[3]); 
+            
+            if (chstr[4].indexOf("AFTER_i NOT_MORE_ABOVE") != -1) {
+                strbuffer.insert(0, 'i');
+                lowerbuffer.insert(0, strbuffer);
+                upperbuffer.insert(0, (char)(0x130));
+            } 
+            else {
+                if (chstr[4].indexOf("MORE_ABOVE") != -1) {
+                    strbuffer.append((char)0x300);
+                    lowerbuffer.append((char)0x300);
+                    upperbuffer.append((char)0x300);
+                }
+                if (chstr[4].indexOf("AFTER_i") != -1) {
+                    strbuffer.insert(0, 'i');
+                    lowerbuffer.insert(0, 'i');
+                    upperbuffer.insert(0, 'I');
+                }
+                if (chstr[4].indexOf("FINAL_SIGMA") != -1) {
+                    strbuffer.insert(0, 'c');
+                    lowerbuffer.insert(0, 'c');
+                    upperbuffer.insert(0, 'C');
+                }
+            }
+            if (UCharacter.isLowerCase(chstr[4].charAt(0))) {
+                Locale locale = new Locale(chstr[4].substring(0, 2), "");
+                if (!UCharacter.toLowerCase(locale, 
+                        strbuffer.toString()).equals(lowerbuffer.toString())) {
+                    errln(s);
+                    errln("Fail: toLowerCase for locale " + locale + 
+                        ", character " + Utility.escape(strbuffer.toString()) +
+                        ", expected " + Utility.escape(lowerbuffer.toString()) 
+                        + " but resulted in " + 
+                        Utility.escape(UCharacter.toLowerCase(locale, 
+                                                      strbuffer.toString())));
+                }
+                if (!UCharacter.toUpperCase(locale, 
+                       strbuffer.toString()).equals(upperbuffer.toString())) {
+                    errln(s);
+                    errln("Fail: toUpperCase for locale " + locale + 
+                        ", character " + Utility.escape(strbuffer.toString()) 
+                        + ", expected "
+                        + Utility.escape(upperbuffer.toString()) + 
+                        " but resulted in " + 
+                        Utility.escape(UCharacter.toUpperCase(locale, 
+                                                      strbuffer.toString())));
+                }
+            }
+            else {
+                if (!UCharacter.toLowerCase(strbuffer.toString()).equals(
+                                                    lowerbuffer.toString())) {
+                    errln(s);
+                    errln("Fail: toLowerCase for character " + 
+                          Utility.escape(strbuffer.toString()) + ", expected " 
+                          + Utility.escape(lowerbuffer.toString()) 
+                          + " but resulted in " + 
+                          Utility.escape(UCharacter.toLowerCase( 
+                                                      strbuffer.toString())));
+                }
+                if (!UCharacter.toUpperCase(strbuffer.toString()).equals(
+                                                    upperbuffer.toString())) {
+                    errln(s);
+                    errln("Fail: toUpperCase for character " + 
+                          Utility.escape(strbuffer.toString()) + ", expected "
+                          + Utility.escape(upperbuffer.toString()) + 
+                          " but resulted in " + 
+                          Utility.escape(UCharacter.toUpperCase( 
+                                                      strbuffer.toString())));
+                }
+            }
+        }
+        else {
+            if (!UCharacter.toLowerCase(chstr[0]).equals(chstr[1])) {
+                errln(s);
+                errln("Fail: toLowerCase for character " + 
+                      Utility.escape(chstr[0]) + ", expected "
+                      + Utility.escape(chstr[1]) + " but resulted in " + 
+                      Utility.escape(UCharacter.toLowerCase(chstr[0])));
+            }
+            if (!UCharacter.toUpperCase(chstr[0]).equals(chstr[3])) {
+                errln(s);
+                errln("Fail: toUpperCase for character " + 
+                      Utility.escape(chstr[0]) + ", expected "
+                      + Utility.escape(chstr[3]) + " but resulted in " + 
+                      Utility.escape(UCharacter.toUpperCase(chstr[0])));
+            }
+        }
+      }
+      input.close();
+    }
+    catch (FileNotFoundException e)
+    {
+      errln("FAIL SpecialCasing.txt not found\n" +
+            "Configure the system setting UnicodeData to the right path\n" +
+            "e.g. java -DUnicodeData=\"data_dir_path\" " +
+            "com.ibm.icu.test.text.UCharacterTest");
+    }
+    catch (Exception e)
+    {
+      e.printStackTrace();
+    }
+  }
+  
+  /**
+  * Converting the hex numbers represented between ';' to Unicode strings
+  * @param str string to break up into Unicode strings
+  * @return array of Unicode strings ending with a null
+  */
+  private String[] getUnicodeStrings(String str)
+  {
+    Vector v = new Vector(10);
+    int end = str.indexOf("; ");
+    int start = 0;
+    while (end != -1) {
+        StringBuffer buffer = new StringBuffer(10);
+        int tempstart = start;
+        int tempend   = str.indexOf(' ', tempstart);
+        while (tempend != -1 && tempend < end) {
+           buffer.append((char)Integer.parseInt(str.substring(tempstart, 
+                                                              tempend), 16));
+           tempstart = tempend + 1;
+           tempend   = str.indexOf(' ', tempstart);
+        }
+        String s = str.substring(tempstart, end);
+        try {
+            if (s.length() != 0) {
+                buffer.append((char)Integer.parseInt(s, 16));
+            }
+        } catch (NumberFormatException e) {
+            buffer.append(s);
+        }
+        start = end + 2;
+        end   = str.indexOf("; ", start);
+        v.addElement(buffer.toString());
+    }
+    String s = str.substring(start);
+    if (s.charAt(0) != '#') {
+        v.addElement(s);
+    }
+    int size = v.size();
+    String result[] = new String[size];
+    for (int i = 0; i < size; i ++) {
+        result[i] = (String)v.elementAt(i);
+    }
+    return result;
+  }
 
  public static void main(String[] arg)
  {
--- a/icu4j/src/com/ibm/icu/lang/UCharacter.java
+++ b/icu4j/src/com/ibm/icu/lang/UCharacter.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/lang/UCharacter.java,v $ 
-* $Date: 2001/11/02 23:11:36 $ 
-* $Revision: 1.15 $
+* $Date: 2001/11/06 00:02:14 $ 
+* $Revision: 1.16 $
 *
 *******************************************************************************
 */
@ -883,13 +883,13 @@ public final class UCharacter
    * @param ch code point whose combining is to be retrieved
    * @return the combining class of the codepoint
    */
-    public static byte getCombiningClass(int ch)
+    public static int getCombiningClass(int ch)
    {
        int props = getProps(ch);
        if(!UCharacterPropertyDB.isExceptionIndicator(props)) {
        if (UCharacterPropertyDB.getPropType(props) == 
                                        UCharacterCategory.NON_SPACING_MARK) {
-            return (byte)(PROPERTY_DB_.getUnsignedValue(props));
+            return PROPERTY_DB_.getUnsignedValue(props);
        }
        else {
            return 0;
@ -897,10 +897,10 @@ public final class UCharacter
        }
        else {
        // the combining class is in bits 23..16 of the first exception value
-        return (byte)(
-             (PROPERTY_DB_.getException(PROPERTY_DB_.getExceptionIndex(props), 
+        return (PROPERTY_DB_.getException(
+                                    PROPERTY_DB_.getExceptionIndex(props), 
                                    UCharacterPropertyDB.EXC_COMBINING_CLASS_)
-                                    >> SHIFT_16_) & LAST_BYTE_MASK_);
+                                    >> SHIFT_16_) & LAST_BYTE_MASK_;
        }
    }
      
@ -1104,7 +1104,6 @@ public final class UCharacter
        while (offset < size)
        {
            int ch = UTF16.charAt(str, offset);
-            offset += UTF16.getCharCount(ch);
            int props = PROPERTY_DB_.getProperty(ch);
            if (!UCharacterPropertyDB.isExceptionIndicator(props)) 
            {
@ -1130,6 +1129,7 @@ public final class UCharacter
                    }
                }
            }
+            offset += UTF16.getCharCount(ch);
        }
        return result.toString();
    }
@ -1149,7 +1149,6 @@ public final class UCharacter
        StringBuffer result = new StringBuffer(length);
        while (offset < length) {
            int ch = UTF16.charAt(str, offset);
-            offset += UTF16.getCharCount(ch);
            int props = PROPERTY_DB_.getProperty(ch);
            if (!UCharacterPropertyDB.isExceptionIndicator(props)) {
                int type = UCharacterPropertyDB.getPropType(props);
@ -1174,6 +1173,7 @@ public final class UCharacter
                    }
                }
            }
+            offset += UTF16.getCharCount(ch);
        }
        return result.toString();
    }
@ -1793,6 +1793,7 @@ public final class UCharacter
    private static boolean isCFINAL(String str, int offset) 
    {
        int length = str.length();
+        offset += UTF16.getCharCount(UTF16.charAt(str, offset));
        while (offset < length) {
            int ch = UTF16.charAt(str, offset);
            int cat = getType(ch);
@ -1821,7 +1822,7 @@ public final class UCharacter
    private static boolean isNotCINITIAL(String str, int offset) 
    {
        offset --;
-        while (offset > 0) {
+        while (offset >= 0) {
            int ch = UTF16.charAt(str, offset);
            int cat = getType(ch);
            if (cat == UCharacterCategory.LOWERCASE_LETTER || 
@ -1851,7 +1852,7 @@ public final class UCharacter
    private static boolean isAFTER_i(String str, int offset) 
    {
        offset --;
-        while (offset > 0) {
+        while (offset >= 0) {
            int ch = UTF16.charAt(str, offset);
            if (ch == LATIN_SMALL_LETTER_I_ || ch == LATIN_SMALL_LETTER_J_ || 
                ch == LATIN_SMALL_LETTER_I_WITH_OGONEK_ ||
@ -1884,7 +1885,7 @@ public final class UCharacter
    private static boolean isAFTER_I(String str, int offset) 
    {
        offset --;
-        while (offset > 0) {
+        while (offset >= 0) {
            int ch = UTF16.charAt(str, offset);
            if (ch == LATIN_CAPITAL_LETTER_I_) {
                return true; // preceded by I
@ -1914,6 +1915,7 @@ public final class UCharacter
    private static boolean isFollowedByMOREABOVE(String str, int offset) 
    {
        int length = str.length();
+        offset += UTF16.getCharCount(UTF16.charAt(str, 0));
        while (offset < length) {
            int ch = UTF16.charAt(str, offset);
            int cc = getCombiningClass(ch);
@ -1941,6 +1943,7 @@ public final class UCharacter
    private static boolean isFollowedByDotAbove(String str, int offset) 
    {
        int length = str.length();
+        offset += UTF16.getCharCount(UTF16.charAt(str, 0));
        while (offset < length) {
            int ch = UTF16.charAt(str, offset);
            if (ch == COMBINING_DOT_ABOVE_) {
@ -1976,13 +1979,14 @@ public final class UCharacter
        if (exception < 0) {
            String language = locale.getLanguage();
            // use hardcoded conditions and mappings
-            if (language.equals(TURKISH_) && ch == LATIN_SMALL_LETTER_I_) {
+            if ((language.equals(TURKISH_) || language.equals(AZERBAIJANI_))
+                && ch == LATIN_SMALL_LETTER_I_) {
                // turkish: i maps to dotted I
                buffer.append(LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE_);
            } 
            else {
                if (language.equals(LITHUANIAN_) && ch == COMBINING_DOT_ABOVE_ 
-                    && isAFTER_i(str, offset - 1)) {
+                    && isAFTER_i(str, offset)) {
                    // lithuanian: remove DOT ABOVE after U+0069 "i" with 
                    // upper or titlecase
                    return; // remove the dot (continue without output)
@ -2087,7 +2091,7 @@ public final class UCharacter
                } 
                else {
                    if (ch == COMBINING_DOT_ABOVE_ && 
-                        isAFTER_I(str, offset - 1) && 
+                        isAFTER_I(str, offset) && 
                        !isFollowedByMOREABOVE(str, offset)) {
                        // decomposed I+dot above becomes i (see handling of 
                        // U+0049 for turkish) and removes the dot above
@ -2096,7 +2100,7 @@ public final class UCharacter
                    else {
                        if (ch == GREEK_CAPITAL_LETTER_SIGMA_ &&
                            isCFINAL(str, offset) &&
-                            isNotCINITIAL(str, offset - 1)) {
+                            isNotCINITIAL(str, offset)) {
                            // greek capital sigma maps depending on 
                            // surrounding cased letters
                            buffer.append(GREEK_SMALL_LETTER_RHO_);
--- a/icu4j/src/com/ibm/icu/test/text/UCharacterTest.java
+++ b/icu4j/src/com/ibm/icu/test/text/UCharacterTest.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/test/text/Attic/UCharacterTest.java,v $ 
-* $Date: 2001/10/23 17:08:13 $ 
-* $Revision: 1.16 $
+* $Date: 2001/11/06 00:02:13 $ 
+* $Revision: 1.17 $
 *
 *******************************************************************************
 */
@ -17,11 +17,14 @@ import java.io.BufferedReader;
 import java.io.FileReader;
 import java.io.FileNotFoundException;
 import java.util.Locale;
+import java.io.File;
+import java.util.Vector;
 import com.ibm.test.TestFmwk;
 import com.ibm.text.UCharacter;
 import com.ibm.text.UCharacterCategory;
 import com.ibm.text.UCharacterDirection;
 import com.ibm.text.UTF16;
+import com.ibm.util.Utility;

 /**
 * Testing class for UCharacter
@ -417,9 +420,10 @@ public final class UCharacterTest extends TestFmwk
  /**
  * Tests for the character types, direction.<br>
  * This method reads in UnicodeData.txt file for testing purposes. A default 
-  * path is provided relative to the class path, however if the user could 
-  * set a system property to change the path.<br>
-  * e.g. java -DUnicodeData="anyfile.dat" com.ibm.test.text.UCharacterTest
+  * path is provided relative to the src path, however the user could 
+  * set a system property to change the directory path.<br>
+  * e.g. java -DUnicodeData="data_directory_path" 
+  * com.ibm.test.text.UCharacterTest
  */
  public void TestUnicodeData()
  {
@ -438,9 +442,19 @@ public final class UCharacterTest extends TestFmwk
    // unicode data file path system name
    final String UNICODE_DATA_SYSTEM_NAME = "UnicodeData";
    String s = System.getProperty(UNICODE_DATA_SYSTEM_NAME);
-    if (s == null)
+    if (s == null) {
    // assuming runtime directory is on the same level as the source
      s = System.getProperty("user.dir") + "//..//" + UNICODE_DATA_FILE;
+    }
+    else {
+      StringBuffer tempfilename = new StringBuffer(s);
+      if (tempfilename.charAt(tempfilename.length() - 1) != 
+          File.pathSeparatorChar) {
+        tempfilename.append(File.separatorChar);
+      }
+      tempfilename.append("UnicodeData.txt");
+      s = tempfilename.toString();
+    }
    
    final int LASTUNICODECHAR = 0xFFFD;
    int ch = 0,
@ -463,8 +477,7 @@ public final class UCharacterTest extends TestFmwk
        index = s.indexOf(';', 5);
        String t = s.substring(index + 1, index + 3);
        index += 4;
-        byte cc = (byte)(Integer.parseInt(s.substring(index, 
-                                                      s.indexOf(';', index))));
+        int cc = Integer.parseInt(s.substring(index, s.indexOf(';', index)));
        index = s.indexOf(';', index);
        String d = s.substring(index + 1, s.indexOf(';', index + 1));
        
@ -521,7 +534,7 @@ public final class UCharacterTest extends TestFmwk
    {
      errln("FAIL UnicodeData.txt not found\n" +
            "Configure the system setting UnicodeData to the right path\n" +
-            "e.g. java -DUnicodeData=\"anyfile.dat\" " +
+            "e.g. java -DUnicodeData=\"data_dir_path\" " +
            "com.ibm.icu.test.text.UCharacterTest");
    }
    catch (Exception e)
@ -538,6 +551,7 @@ public final class UCharacterTest extends TestFmwk
  /**
  * Test for the character names
  */
+  /*
  public void TestNames()
  {
    int c[] = {0x0061, 0x0284, 0x3401, 0x7fed, 0xac00, 0xd7a3, 0xff08, 0xffe5,
@ -735,6 +749,200 @@ public final class UCharacterTest extends TestFmwk
    */
  }
  
+  /**
+  * Tests for case mapping in the file SpecialCasing.txt
+  * This method reads in SpecialCasing.txt file for testing purposes. 
+  * A default path is provided relative to the src path, however the user 
+  * could set a system property to change the directory path.<br>
+  * e.g. java -DUnicodeData="data_dir_path" com.ibm.test.text.UCharacterTest
+  */
+  public void TestSpecialCasing()
+  {
+    // default unicode data file name
+    final String SPECIALCASING_FILE = "src//data//unicode//SpecialCasing.txt";
+    
+    // unicode data file path system name
+    final String UNICODE_DATA_SYSTEM_NAME = "UnicodeData";
+    String s = System.getProperty(UNICODE_DATA_SYSTEM_NAME);
+    if (s == null) {
+    // assuming runtime directory is on the same level as the source
+      s = System.getProperty("user.dir") + "//..//" + SPECIALCASING_FILE;
+    }
+    else {
+      StringBuffer tempfilename = new StringBuffer(s);
+      if (tempfilename.charAt(tempfilename.length() - 1) != 
+          File.pathSeparatorChar) {
+        tempfilename.append(File.separatorChar);
+      }
+      tempfilename.append("SpecialCasing.txt");
+      s = tempfilename.toString();
+    }
+    
+    try
+	{
+	  // reading in the SpecialCasing file
+	  FileReader fr = new FileReader(s);
+	  BufferedReader input = new BufferedReader(fr);
+	    
+      while (true)
+      {
+        s = input.readLine();
+        if (s == null) {
+            break;
+        }
+        if (s.length() == 0 || s.charAt(0) == '#') {
+            continue;
+        }
+        String chstr[] = getUnicodeStrings(s);
+        if (chstr.length == 5) {
+            StringBuffer strbuffer   = new StringBuffer(chstr[0]);
+            StringBuffer lowerbuffer = new StringBuffer(chstr[1]); 
+            StringBuffer upperbuffer = new StringBuffer(chstr[3]); 
+            
+            if (chstr[4].indexOf("AFTER_i NOT_MORE_ABOVE") != -1) {
+                strbuffer.insert(0, 'i');
+                lowerbuffer.insert(0, strbuffer);
+                upperbuffer.insert(0, (char)(0x130));
+            } 
+            else {
+                if (chstr[4].indexOf("MORE_ABOVE") != -1) {
+                    strbuffer.append((char)0x300);
+                    lowerbuffer.append((char)0x300);
+                    upperbuffer.append((char)0x300);
+                }
+                if (chstr[4].indexOf("AFTER_i") != -1) {
+                    strbuffer.insert(0, 'i');
+                    lowerbuffer.insert(0, 'i');
+                    upperbuffer.insert(0, 'I');
+                }
+                if (chstr[4].indexOf("FINAL_SIGMA") != -1) {
+                    strbuffer.insert(0, 'c');
+                    lowerbuffer.insert(0, 'c');
+                    upperbuffer.insert(0, 'C');
+                }
+            }
+            if (UCharacter.isLowerCase(chstr[4].charAt(0))) {
+                Locale locale = new Locale(chstr[4].substring(0, 2), "");
+                if (!UCharacter.toLowerCase(locale, 
+                        strbuffer.toString()).equals(lowerbuffer.toString())) {
+                    errln(s);
+                    errln("Fail: toLowerCase for locale " + locale + 
+                        ", character " + Utility.escape(strbuffer.toString()) +
+                        ", expected " + Utility.escape(lowerbuffer.toString()) 
+                        + " but resulted in " + 
+                        Utility.escape(UCharacter.toLowerCase(locale, 
+                                                      strbuffer.toString())));
+                }
+                if (!UCharacter.toUpperCase(locale, 
+                       strbuffer.toString()).equals(upperbuffer.toString())) {
+                    errln(s);
+                    errln("Fail: toUpperCase for locale " + locale + 
+                        ", character " + Utility.escape(strbuffer.toString()) 
+                        + ", expected "
+                        + Utility.escape(upperbuffer.toString()) + 
+                        " but resulted in " + 
+                        Utility.escape(UCharacter.toUpperCase(locale, 
+                                                      strbuffer.toString())));
+                }
+            }
+            else {
+                if (!UCharacter.toLowerCase(strbuffer.toString()).equals(
+                                                    lowerbuffer.toString())) {
+                    errln(s);
+                    errln("Fail: toLowerCase for character " + 
+                          Utility.escape(strbuffer.toString()) + ", expected " 
+                          + Utility.escape(lowerbuffer.toString()) 
+                          + " but resulted in " + 
+                          Utility.escape(UCharacter.toLowerCase( 
+                                                      strbuffer.toString())));
+                }
+                if (!UCharacter.toUpperCase(strbuffer.toString()).equals(
+                                                    upperbuffer.toString())) {
+                    errln(s);
+                    errln("Fail: toUpperCase for character " + 
+                          Utility.escape(strbuffer.toString()) + ", expected "
+                          + Utility.escape(upperbuffer.toString()) + 
+                          " but resulted in " + 
+                          Utility.escape(UCharacter.toUpperCase( 
+                                                      strbuffer.toString())));
+                }
+            }
+        }
+        else {
+            if (!UCharacter.toLowerCase(chstr[0]).equals(chstr[1])) {
+                errln(s);
+                errln("Fail: toLowerCase for character " + 
+                      Utility.escape(chstr[0]) + ", expected "
+                      + Utility.escape(chstr[1]) + " but resulted in " + 
+                      Utility.escape(UCharacter.toLowerCase(chstr[0])));
+            }
+            if (!UCharacter.toUpperCase(chstr[0]).equals(chstr[3])) {
+                errln(s);
+                errln("Fail: toUpperCase for character " + 
+                      Utility.escape(chstr[0]) + ", expected "
+                      + Utility.escape(chstr[3]) + " but resulted in " + 
+                      Utility.escape(UCharacter.toUpperCase(chstr[0])));
+            }
+        }
+      }
+      input.close();
+    }
+    catch (FileNotFoundException e)
+    {
+      errln("FAIL SpecialCasing.txt not found\n" +
+            "Configure the system setting UnicodeData to the right path\n" +
+            "e.g. java -DUnicodeData=\"data_dir_path\" " +
+            "com.ibm.icu.test.text.UCharacterTest");
+    }
+    catch (Exception e)
+    {
+      e.printStackTrace();
+    }
+  }
+  
+  /**
+  * Converting the hex numbers represented between ';' to Unicode strings
+  * @param str string to break up into Unicode strings
+  * @return array of Unicode strings ending with a null
+  */
+  private String[] getUnicodeStrings(String str)
+  {
+    Vector v = new Vector(10);
+    int end = str.indexOf("; ");
+    int start = 0;
+    while (end != -1) {
+        StringBuffer buffer = new StringBuffer(10);
+        int tempstart = start;
+        int tempend   = str.indexOf(' ', tempstart);
+        while (tempend != -1 && tempend < end) {
+           buffer.append((char)Integer.parseInt(str.substring(tempstart, 
+                                                              tempend), 16));
+           tempstart = tempend + 1;
+           tempend   = str.indexOf(' ', tempstart);
+        }
+        String s = str.substring(tempstart, end);
+        try {
+            if (s.length() != 0) {
+                buffer.append((char)Integer.parseInt(s, 16));
+            }
+        } catch (NumberFormatException e) {
+            buffer.append(s);
+        }
+        start = end + 2;
+        end   = str.indexOf("; ", start);
+        v.addElement(buffer.toString());
+    }
+    String s = str.substring(start);
+    if (s.charAt(0) != '#') {
+        v.addElement(s);
+    }
+    int size = v.size();
+    String result[] = new String[size];
+    for (int i = 0; i < size; i ++) {
+        result[i] = (String)v.elementAt(i);
+    }
+    return result;
+  }
 
  public static void main(String[] arg)
  {
--- a/icu4j/src/com/ibm/text/UCharacter.java
+++ b/icu4j/src/com/ibm/text/UCharacter.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/UCharacter.java,v $ 
-* $Date: 2001/11/02 23:11:36 $ 
-* $Revision: 1.15 $
+* $Date: 2001/11/06 00:02:14 $ 
+* $Revision: 1.16 $
 *
 *******************************************************************************
 */
@ -883,13 +883,13 @@ public final class UCharacter
    * @param ch code point whose combining is to be retrieved
    * @return the combining class of the codepoint
    */
-    public static byte getCombiningClass(int ch)
+    public static int getCombiningClass(int ch)
    {
        int props = getProps(ch);
        if(!UCharacterPropertyDB.isExceptionIndicator(props)) {
        if (UCharacterPropertyDB.getPropType(props) == 
                                        UCharacterCategory.NON_SPACING_MARK) {
-            return (byte)(PROPERTY_DB_.getUnsignedValue(props));
+            return PROPERTY_DB_.getUnsignedValue(props);
        }
        else {
            return 0;
@ -897,10 +897,10 @@ public final class UCharacter
        }
        else {
        // the combining class is in bits 23..16 of the first exception value
-        return (byte)(
-             (PROPERTY_DB_.getException(PROPERTY_DB_.getExceptionIndex(props), 
+        return (PROPERTY_DB_.getException(
+                                    PROPERTY_DB_.getExceptionIndex(props), 
                                    UCharacterPropertyDB.EXC_COMBINING_CLASS_)
-                                    >> SHIFT_16_) & LAST_BYTE_MASK_);
+                                    >> SHIFT_16_) & LAST_BYTE_MASK_;
        }
    }
      
@ -1104,7 +1104,6 @@ public final class UCharacter
        while (offset < size)
        {
            int ch = UTF16.charAt(str, offset);
-            offset += UTF16.getCharCount(ch);
            int props = PROPERTY_DB_.getProperty(ch);
            if (!UCharacterPropertyDB.isExceptionIndicator(props)) 
            {
@ -1130,6 +1129,7 @@ public final class UCharacter
                    }
                }
            }
+            offset += UTF16.getCharCount(ch);
        }
        return result.toString();
    }
@ -1149,7 +1149,6 @@ public final class UCharacter
        StringBuffer result = new StringBuffer(length);
        while (offset < length) {
            int ch = UTF16.charAt(str, offset);
-            offset += UTF16.getCharCount(ch);
            int props = PROPERTY_DB_.getProperty(ch);
            if (!UCharacterPropertyDB.isExceptionIndicator(props)) {
                int type = UCharacterPropertyDB.getPropType(props);
@ -1174,6 +1173,7 @@ public final class UCharacter
                    }
                }
            }
+            offset += UTF16.getCharCount(ch);
        }
        return result.toString();
    }
@ -1793,6 +1793,7 @@ public final class UCharacter
    private static boolean isCFINAL(String str, int offset) 
    {
        int length = str.length();
+        offset += UTF16.getCharCount(UTF16.charAt(str, offset));
        while (offset < length) {
            int ch = UTF16.charAt(str, offset);
            int cat = getType(ch);
@ -1821,7 +1822,7 @@ public final class UCharacter
    private static boolean isNotCINITIAL(String str, int offset) 
    {
        offset --;
-        while (offset > 0) {
+        while (offset >= 0) {
            int ch = UTF16.charAt(str, offset);
            int cat = getType(ch);
            if (cat == UCharacterCategory.LOWERCASE_LETTER || 
@ -1851,7 +1852,7 @@ public final class UCharacter
    private static boolean isAFTER_i(String str, int offset) 
    {
        offset --;
-        while (offset > 0) {
+        while (offset >= 0) {
            int ch = UTF16.charAt(str, offset);
            if (ch == LATIN_SMALL_LETTER_I_ || ch == LATIN_SMALL_LETTER_J_ || 
                ch == LATIN_SMALL_LETTER_I_WITH_OGONEK_ ||
@ -1884,7 +1885,7 @@ public final class UCharacter
    private static boolean isAFTER_I(String str, int offset) 
    {
        offset --;
-        while (offset > 0) {
+        while (offset >= 0) {
            int ch = UTF16.charAt(str, offset);
            if (ch == LATIN_CAPITAL_LETTER_I_) {
                return true; // preceded by I
@ -1914,6 +1915,7 @@ public final class UCharacter
    private static boolean isFollowedByMOREABOVE(String str, int offset) 
    {
        int length = str.length();
+        offset += UTF16.getCharCount(UTF16.charAt(str, 0));
        while (offset < length) {
            int ch = UTF16.charAt(str, offset);
            int cc = getCombiningClass(ch);
@ -1941,6 +1943,7 @@ public final class UCharacter
    private static boolean isFollowedByDotAbove(String str, int offset) 
    {
        int length = str.length();
+        offset += UTF16.getCharCount(UTF16.charAt(str, 0));
        while (offset < length) {
            int ch = UTF16.charAt(str, offset);
            if (ch == COMBINING_DOT_ABOVE_) {
@ -1976,13 +1979,14 @@ public final class UCharacter
        if (exception < 0) {
            String language = locale.getLanguage();
            // use hardcoded conditions and mappings
-            if (language.equals(TURKISH_) && ch == LATIN_SMALL_LETTER_I_) {
+            if ((language.equals(TURKISH_) || language.equals(AZERBAIJANI_))
+                && ch == LATIN_SMALL_LETTER_I_) {
                // turkish: i maps to dotted I
                buffer.append(LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE_);
            } 
            else {
                if (language.equals(LITHUANIAN_) && ch == COMBINING_DOT_ABOVE_ 
-                    && isAFTER_i(str, offset - 1)) {
+                    && isAFTER_i(str, offset)) {
                    // lithuanian: remove DOT ABOVE after U+0069 "i" with 
                    // upper or titlecase
                    return; // remove the dot (continue without output)
@ -2087,7 +2091,7 @@ public final class UCharacter
                } 
                else {
                    if (ch == COMBINING_DOT_ABOVE_ && 
-                        isAFTER_I(str, offset - 1) && 
+                        isAFTER_I(str, offset) && 
                        !isFollowedByMOREABOVE(str, offset)) {
                        // decomposed I+dot above becomes i (see handling of 
                        // U+0049 for turkish) and removes the dot above
@ -2096,7 +2100,7 @@ public final class UCharacter
                    else {
                        if (ch == GREEK_CAPITAL_LETTER_SIGMA_ &&
                            isCFINAL(str, offset) &&
-                            isNotCINITIAL(str, offset - 1)) {
+                            isNotCINITIAL(str, offset)) {
                            // greek capital sigma maps depending on 
                            // surrounding cased letters
                            buffer.append(GREEK_SMALL_LETTER_RHO_);