Addition of SpecialCasing tests.

X-SVN-Rev: 6626
This commit is contained in:
Syn Wee Quek 2001-11-06 00:04:10 +00:00
parent b4e203bee7
commit a2a32dbecb
5 changed files with 485 additions and 61 deletions

View File

@ -378,7 +378,7 @@ Completed in 19 seconds</pre>
file.</p>
<p>After doing a build it is a good idea to run all the tests by typing <br>&quot;java
-classpath $Root/classes -DUnicodeData=$Root/src/data/unicode/UnicodeData.txt com.ibm.test.TestAll&quot;. </p>
-classpath $Root/classes -DUnicodeData=$Root/src/data/unicode com.ibm.test.TestAll&quot;. </p>
<p>(As an alternative to using Ant, you can build simply by running javac and javadoc
directly. This is not recommended, but a Windows batch file &quot;buildall.bat&quot;

View File

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/lang/UCharacterTest.java,v $
* $Date: 2001/10/23 17:08:13 $
* $Revision: 1.16 $
* $Date: 2001/11/06 00:02:13 $
* $Revision: 1.17 $
*
*******************************************************************************
*/
@ -17,11 +17,14 @@ import java.io.BufferedReader;
import java.io.FileReader;
import java.io.FileNotFoundException;
import java.util.Locale;
import java.io.File;
import java.util.Vector;
import com.ibm.test.TestFmwk;
import com.ibm.text.UCharacter;
import com.ibm.text.UCharacterCategory;
import com.ibm.text.UCharacterDirection;
import com.ibm.text.UTF16;
import com.ibm.util.Utility;
/**
* Testing class for UCharacter
@ -417,9 +420,10 @@ public final class UCharacterTest extends TestFmwk
/**
* Tests for the character types, direction.<br>
* This method reads in UnicodeData.txt file for testing purposes. A default
* path is provided relative to the class path, however if the user could
* set a system property to change the path.<br>
* e.g. java -DUnicodeData="anyfile.dat" com.ibm.test.text.UCharacterTest
* path is provided relative to the src path, however the user could
* set a system property to change the directory path.<br>
* e.g. java -DUnicodeData="data_directory_path"
* com.ibm.test.text.UCharacterTest
*/
public void TestUnicodeData()
{
@ -438,9 +442,19 @@ public final class UCharacterTest extends TestFmwk
// unicode data file path system name
final String UNICODE_DATA_SYSTEM_NAME = "UnicodeData";
String s = System.getProperty(UNICODE_DATA_SYSTEM_NAME);
if (s == null)
if (s == null) {
// assuming runtime directory is on the same level as the source
s = System.getProperty("user.dir") + "//..//" + UNICODE_DATA_FILE;
}
else {
StringBuffer tempfilename = new StringBuffer(s);
if (tempfilename.charAt(tempfilename.length() - 1) !=
File.pathSeparatorChar) {
tempfilename.append(File.separatorChar);
}
tempfilename.append("UnicodeData.txt");
s = tempfilename.toString();
}
final int LASTUNICODECHAR = 0xFFFD;
int ch = 0,
@ -463,8 +477,7 @@ public final class UCharacterTest extends TestFmwk
index = s.indexOf(';', 5);
String t = s.substring(index + 1, index + 3);
index += 4;
byte cc = (byte)(Integer.parseInt(s.substring(index,
s.indexOf(';', index))));
int cc = Integer.parseInt(s.substring(index, s.indexOf(';', index)));
index = s.indexOf(';', index);
String d = s.substring(index + 1, s.indexOf(';', index + 1));
@ -521,7 +534,7 @@ public final class UCharacterTest extends TestFmwk
{
errln("FAIL UnicodeData.txt not found\n" +
"Configure the system setting UnicodeData to the right path\n" +
"e.g. java -DUnicodeData=\"anyfile.dat\" " +
"e.g. java -DUnicodeData=\"data_dir_path\" " +
"com.ibm.icu.test.text.UCharacterTest");
}
catch (Exception e)
@ -538,6 +551,7 @@ public final class UCharacterTest extends TestFmwk
/**
* Test for the character names
*/
/*
public void TestNames()
{
int c[] = {0x0061, 0x0284, 0x3401, 0x7fed, 0xac00, 0xd7a3, 0xff08, 0xffe5,
@ -735,6 +749,200 @@ public final class UCharacterTest extends TestFmwk
*/
}
/**
* Tests for case mapping in the file SpecialCasing.txt
* This method reads in SpecialCasing.txt file for testing purposes.
* A default path is provided relative to the src path, however the user
* could set a system property to change the directory path.<br>
* e.g. java -DUnicodeData="data_dir_path" com.ibm.test.text.UCharacterTest
*/
public void TestSpecialCasing()
{
// default unicode data file name
final String SPECIALCASING_FILE = "src//data//unicode//SpecialCasing.txt";
// unicode data file path system name
final String UNICODE_DATA_SYSTEM_NAME = "UnicodeData";
String s = System.getProperty(UNICODE_DATA_SYSTEM_NAME);
if (s == null) {
// assuming runtime directory is on the same level as the source
s = System.getProperty("user.dir") + "//..//" + SPECIALCASING_FILE;
}
else {
StringBuffer tempfilename = new StringBuffer(s);
if (tempfilename.charAt(tempfilename.length() - 1) !=
File.pathSeparatorChar) {
tempfilename.append(File.separatorChar);
}
tempfilename.append("SpecialCasing.txt");
s = tempfilename.toString();
}
try
{
// reading in the SpecialCasing file
FileReader fr = new FileReader(s);
BufferedReader input = new BufferedReader(fr);
while (true)
{
s = input.readLine();
if (s == null) {
break;
}
if (s.length() == 0 || s.charAt(0) == '#') {
continue;
}
String chstr[] = getUnicodeStrings(s);
if (chstr.length == 5) {
StringBuffer strbuffer = new StringBuffer(chstr[0]);
StringBuffer lowerbuffer = new StringBuffer(chstr[1]);
StringBuffer upperbuffer = new StringBuffer(chstr[3]);
if (chstr[4].indexOf("AFTER_i NOT_MORE_ABOVE") != -1) {
strbuffer.insert(0, 'i');
lowerbuffer.insert(0, strbuffer);
upperbuffer.insert(0, (char)(0x130));
}
else {
if (chstr[4].indexOf("MORE_ABOVE") != -1) {
strbuffer.append((char)0x300);
lowerbuffer.append((char)0x300);
upperbuffer.append((char)0x300);
}
if (chstr[4].indexOf("AFTER_i") != -1) {
strbuffer.insert(0, 'i');
lowerbuffer.insert(0, 'i');
upperbuffer.insert(0, 'I');
}
if (chstr[4].indexOf("FINAL_SIGMA") != -1) {
strbuffer.insert(0, 'c');
lowerbuffer.insert(0, 'c');
upperbuffer.insert(0, 'C');
}
}
if (UCharacter.isLowerCase(chstr[4].charAt(0))) {
Locale locale = new Locale(chstr[4].substring(0, 2), "");
if (!UCharacter.toLowerCase(locale,
strbuffer.toString()).equals(lowerbuffer.toString())) {
errln(s);
errln("Fail: toLowerCase for locale " + locale +
", character " + Utility.escape(strbuffer.toString()) +
", expected " + Utility.escape(lowerbuffer.toString())
+ " but resulted in " +
Utility.escape(UCharacter.toLowerCase(locale,
strbuffer.toString())));
}
if (!UCharacter.toUpperCase(locale,
strbuffer.toString()).equals(upperbuffer.toString())) {
errln(s);
errln("Fail: toUpperCase for locale " + locale +
", character " + Utility.escape(strbuffer.toString())
+ ", expected "
+ Utility.escape(upperbuffer.toString()) +
" but resulted in " +
Utility.escape(UCharacter.toUpperCase(locale,
strbuffer.toString())));
}
}
else {
if (!UCharacter.toLowerCase(strbuffer.toString()).equals(
lowerbuffer.toString())) {
errln(s);
errln("Fail: toLowerCase for character " +
Utility.escape(strbuffer.toString()) + ", expected "
+ Utility.escape(lowerbuffer.toString())
+ " but resulted in " +
Utility.escape(UCharacter.toLowerCase(
strbuffer.toString())));
}
if (!UCharacter.toUpperCase(strbuffer.toString()).equals(
upperbuffer.toString())) {
errln(s);
errln("Fail: toUpperCase for character " +
Utility.escape(strbuffer.toString()) + ", expected "
+ Utility.escape(upperbuffer.toString()) +
" but resulted in " +
Utility.escape(UCharacter.toUpperCase(
strbuffer.toString())));
}
}
}
else {
if (!UCharacter.toLowerCase(chstr[0]).equals(chstr[1])) {
errln(s);
errln("Fail: toLowerCase for character " +
Utility.escape(chstr[0]) + ", expected "
+ Utility.escape(chstr[1]) + " but resulted in " +
Utility.escape(UCharacter.toLowerCase(chstr[0])));
}
if (!UCharacter.toUpperCase(chstr[0]).equals(chstr[3])) {
errln(s);
errln("Fail: toUpperCase for character " +
Utility.escape(chstr[0]) + ", expected "
+ Utility.escape(chstr[3]) + " but resulted in " +
Utility.escape(UCharacter.toUpperCase(chstr[0])));
}
}
}
input.close();
}
catch (FileNotFoundException e)
{
errln("FAIL SpecialCasing.txt not found\n" +
"Configure the system setting UnicodeData to the right path\n" +
"e.g. java -DUnicodeData=\"data_dir_path\" " +
"com.ibm.icu.test.text.UCharacterTest");
}
catch (Exception e)
{
e.printStackTrace();
}
}
/**
* Converting the hex numbers represented between ';' to Unicode strings
* @param str string to break up into Unicode strings
* @return array of Unicode strings ending with a null
*/
private String[] getUnicodeStrings(String str)
{
Vector v = new Vector(10);
int end = str.indexOf("; ");
int start = 0;
while (end != -1) {
StringBuffer buffer = new StringBuffer(10);
int tempstart = start;
int tempend = str.indexOf(' ', tempstart);
while (tempend != -1 && tempend < end) {
buffer.append((char)Integer.parseInt(str.substring(tempstart,
tempend), 16));
tempstart = tempend + 1;
tempend = str.indexOf(' ', tempstart);
}
String s = str.substring(tempstart, end);
try {
if (s.length() != 0) {
buffer.append((char)Integer.parseInt(s, 16));
}
} catch (NumberFormatException e) {
buffer.append(s);
}
start = end + 2;
end = str.indexOf("; ", start);
v.addElement(buffer.toString());
}
String s = str.substring(start);
if (s.charAt(0) != '#') {
v.addElement(s);
}
int size = v.size();
String result[] = new String[size];
for (int i = 0; i < size; i ++) {
result[i] = (String)v.elementAt(i);
}
return result;
}
public static void main(String[] arg)
{

View File

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/lang/UCharacter.java,v $
* $Date: 2001/11/02 23:11:36 $
* $Revision: 1.15 $
* $Date: 2001/11/06 00:02:14 $
* $Revision: 1.16 $
*
*******************************************************************************
*/
@ -883,13 +883,13 @@ public final class UCharacter
* @param ch code point whose combining is to be retrieved
* @return the combining class of the codepoint
*/
public static byte getCombiningClass(int ch)
public static int getCombiningClass(int ch)
{
int props = getProps(ch);
if(!UCharacterPropertyDB.isExceptionIndicator(props)) {
if (UCharacterPropertyDB.getPropType(props) ==
UCharacterCategory.NON_SPACING_MARK) {
return (byte)(PROPERTY_DB_.getUnsignedValue(props));
return PROPERTY_DB_.getUnsignedValue(props);
}
else {
return 0;
@ -897,10 +897,10 @@ public final class UCharacter
}
else {
// the combining class is in bits 23..16 of the first exception value
return (byte)(
(PROPERTY_DB_.getException(PROPERTY_DB_.getExceptionIndex(props),
return (PROPERTY_DB_.getException(
PROPERTY_DB_.getExceptionIndex(props),
UCharacterPropertyDB.EXC_COMBINING_CLASS_)
>> SHIFT_16_) & LAST_BYTE_MASK_);
>> SHIFT_16_) & LAST_BYTE_MASK_;
}
}
@ -1104,7 +1104,6 @@ public final class UCharacter
while (offset < size)
{
int ch = UTF16.charAt(str, offset);
offset += UTF16.getCharCount(ch);
int props = PROPERTY_DB_.getProperty(ch);
if (!UCharacterPropertyDB.isExceptionIndicator(props))
{
@ -1130,6 +1129,7 @@ public final class UCharacter
}
}
}
offset += UTF16.getCharCount(ch);
}
return result.toString();
}
@ -1149,7 +1149,6 @@ public final class UCharacter
StringBuffer result = new StringBuffer(length);
while (offset < length) {
int ch = UTF16.charAt(str, offset);
offset += UTF16.getCharCount(ch);
int props = PROPERTY_DB_.getProperty(ch);
if (!UCharacterPropertyDB.isExceptionIndicator(props)) {
int type = UCharacterPropertyDB.getPropType(props);
@ -1174,6 +1173,7 @@ public final class UCharacter
}
}
}
offset += UTF16.getCharCount(ch);
}
return result.toString();
}
@ -1793,6 +1793,7 @@ public final class UCharacter
private static boolean isCFINAL(String str, int offset)
{
int length = str.length();
offset += UTF16.getCharCount(UTF16.charAt(str, offset));
while (offset < length) {
int ch = UTF16.charAt(str, offset);
int cat = getType(ch);
@ -1821,7 +1822,7 @@ public final class UCharacter
private static boolean isNotCINITIAL(String str, int offset)
{
offset --;
while (offset > 0) {
while (offset >= 0) {
int ch = UTF16.charAt(str, offset);
int cat = getType(ch);
if (cat == UCharacterCategory.LOWERCASE_LETTER ||
@ -1851,7 +1852,7 @@ public final class UCharacter
private static boolean isAFTER_i(String str, int offset)
{
offset --;
while (offset > 0) {
while (offset >= 0) {
int ch = UTF16.charAt(str, offset);
if (ch == LATIN_SMALL_LETTER_I_ || ch == LATIN_SMALL_LETTER_J_ ||
ch == LATIN_SMALL_LETTER_I_WITH_OGONEK_ ||
@ -1884,7 +1885,7 @@ public final class UCharacter
private static boolean isAFTER_I(String str, int offset)
{
offset --;
while (offset > 0) {
while (offset >= 0) {
int ch = UTF16.charAt(str, offset);
if (ch == LATIN_CAPITAL_LETTER_I_) {
return true; // preceded by I
@ -1914,6 +1915,7 @@ public final class UCharacter
private static boolean isFollowedByMOREABOVE(String str, int offset)
{
int length = str.length();
offset += UTF16.getCharCount(UTF16.charAt(str, 0));
while (offset < length) {
int ch = UTF16.charAt(str, offset);
int cc = getCombiningClass(ch);
@ -1941,6 +1943,7 @@ public final class UCharacter
private static boolean isFollowedByDotAbove(String str, int offset)
{
int length = str.length();
offset += UTF16.getCharCount(UTF16.charAt(str, 0));
while (offset < length) {
int ch = UTF16.charAt(str, offset);
if (ch == COMBINING_DOT_ABOVE_) {
@ -1976,13 +1979,14 @@ public final class UCharacter
if (exception < 0) {
String language = locale.getLanguage();
// use hardcoded conditions and mappings
if (language.equals(TURKISH_) && ch == LATIN_SMALL_LETTER_I_) {
if ((language.equals(TURKISH_) || language.equals(AZERBAIJANI_))
&& ch == LATIN_SMALL_LETTER_I_) {
// turkish: i maps to dotted I
buffer.append(LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE_);
}
else {
if (language.equals(LITHUANIAN_) && ch == COMBINING_DOT_ABOVE_
&& isAFTER_i(str, offset - 1)) {
&& isAFTER_i(str, offset)) {
// lithuanian: remove DOT ABOVE after U+0069 "i" with
// upper or titlecase
return; // remove the dot (continue without output)
@ -2087,7 +2091,7 @@ public final class UCharacter
}
else {
if (ch == COMBINING_DOT_ABOVE_ &&
isAFTER_I(str, offset - 1) &&
isAFTER_I(str, offset) &&
!isFollowedByMOREABOVE(str, offset)) {
// decomposed I+dot above becomes i (see handling of
// U+0049 for turkish) and removes the dot above
@ -2096,7 +2100,7 @@ public final class UCharacter
else {
if (ch == GREEK_CAPITAL_LETTER_SIGMA_ &&
isCFINAL(str, offset) &&
isNotCINITIAL(str, offset - 1)) {
isNotCINITIAL(str, offset)) {
// greek capital sigma maps depending on
// surrounding cased letters
buffer.append(GREEK_SMALL_LETTER_RHO_);

View File

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/test/text/Attic/UCharacterTest.java,v $
* $Date: 2001/10/23 17:08:13 $
* $Revision: 1.16 $
* $Date: 2001/11/06 00:02:13 $
* $Revision: 1.17 $
*
*******************************************************************************
*/
@ -17,11 +17,14 @@ import java.io.BufferedReader;
import java.io.FileReader;
import java.io.FileNotFoundException;
import java.util.Locale;
import java.io.File;
import java.util.Vector;
import com.ibm.test.TestFmwk;
import com.ibm.text.UCharacter;
import com.ibm.text.UCharacterCategory;
import com.ibm.text.UCharacterDirection;
import com.ibm.text.UTF16;
import com.ibm.util.Utility;
/**
* Testing class for UCharacter
@ -417,9 +420,10 @@ public final class UCharacterTest extends TestFmwk
/**
* Tests for the character types, direction.<br>
* This method reads in UnicodeData.txt file for testing purposes. A default
* path is provided relative to the class path, however if the user could
* set a system property to change the path.<br>
* e.g. java -DUnicodeData="anyfile.dat" com.ibm.test.text.UCharacterTest
* path is provided relative to the src path, however the user could
* set a system property to change the directory path.<br>
* e.g. java -DUnicodeData="data_directory_path"
* com.ibm.test.text.UCharacterTest
*/
public void TestUnicodeData()
{
@ -438,9 +442,19 @@ public final class UCharacterTest extends TestFmwk
// unicode data file path system name
final String UNICODE_DATA_SYSTEM_NAME = "UnicodeData";
String s = System.getProperty(UNICODE_DATA_SYSTEM_NAME);
if (s == null)
if (s == null) {
// assuming runtime directory is on the same level as the source
s = System.getProperty("user.dir") + "//..//" + UNICODE_DATA_FILE;
}
else {
StringBuffer tempfilename = new StringBuffer(s);
if (tempfilename.charAt(tempfilename.length() - 1) !=
File.pathSeparatorChar) {
tempfilename.append(File.separatorChar);
}
tempfilename.append("UnicodeData.txt");
s = tempfilename.toString();
}
final int LASTUNICODECHAR = 0xFFFD;
int ch = 0,
@ -463,8 +477,7 @@ public final class UCharacterTest extends TestFmwk
index = s.indexOf(';', 5);
String t = s.substring(index + 1, index + 3);
index += 4;
byte cc = (byte)(Integer.parseInt(s.substring(index,
s.indexOf(';', index))));
int cc = Integer.parseInt(s.substring(index, s.indexOf(';', index)));
index = s.indexOf(';', index);
String d = s.substring(index + 1, s.indexOf(';', index + 1));
@ -521,7 +534,7 @@ public final class UCharacterTest extends TestFmwk
{
errln("FAIL UnicodeData.txt not found\n" +
"Configure the system setting UnicodeData to the right path\n" +
"e.g. java -DUnicodeData=\"anyfile.dat\" " +
"e.g. java -DUnicodeData=\"data_dir_path\" " +
"com.ibm.icu.test.text.UCharacterTest");
}
catch (Exception e)
@ -538,6 +551,7 @@ public final class UCharacterTest extends TestFmwk
/**
* Test for the character names
*/
/*
public void TestNames()
{
int c[] = {0x0061, 0x0284, 0x3401, 0x7fed, 0xac00, 0xd7a3, 0xff08, 0xffe5,
@ -735,6 +749,200 @@ public final class UCharacterTest extends TestFmwk
*/
}
/**
* Tests for case mapping in the file SpecialCasing.txt
* This method reads in SpecialCasing.txt file for testing purposes.
* A default path is provided relative to the src path, however the user
* could set a system property to change the directory path.<br>
* e.g. java -DUnicodeData="data_dir_path" com.ibm.test.text.UCharacterTest
*/
public void TestSpecialCasing()
{
// default unicode data file name
final String SPECIALCASING_FILE = "src//data//unicode//SpecialCasing.txt";
// unicode data file path system name
final String UNICODE_DATA_SYSTEM_NAME = "UnicodeData";
String s = System.getProperty(UNICODE_DATA_SYSTEM_NAME);
if (s == null) {
// assuming runtime directory is on the same level as the source
s = System.getProperty("user.dir") + "//..//" + SPECIALCASING_FILE;
}
else {
StringBuffer tempfilename = new StringBuffer(s);
if (tempfilename.charAt(tempfilename.length() - 1) !=
File.pathSeparatorChar) {
tempfilename.append(File.separatorChar);
}
tempfilename.append("SpecialCasing.txt");
s = tempfilename.toString();
}
try
{
// reading in the SpecialCasing file
FileReader fr = new FileReader(s);
BufferedReader input = new BufferedReader(fr);
while (true)
{
s = input.readLine();
if (s == null) {
break;
}
if (s.length() == 0 || s.charAt(0) == '#') {
continue;
}
String chstr[] = getUnicodeStrings(s);
if (chstr.length == 5) {
StringBuffer strbuffer = new StringBuffer(chstr[0]);
StringBuffer lowerbuffer = new StringBuffer(chstr[1]);
StringBuffer upperbuffer = new StringBuffer(chstr[3]);
if (chstr[4].indexOf("AFTER_i NOT_MORE_ABOVE") != -1) {
strbuffer.insert(0, 'i');
lowerbuffer.insert(0, strbuffer);
upperbuffer.insert(0, (char)(0x130));
}
else {
if (chstr[4].indexOf("MORE_ABOVE") != -1) {
strbuffer.append((char)0x300);
lowerbuffer.append((char)0x300);
upperbuffer.append((char)0x300);
}
if (chstr[4].indexOf("AFTER_i") != -1) {
strbuffer.insert(0, 'i');
lowerbuffer.insert(0, 'i');
upperbuffer.insert(0, 'I');
}
if (chstr[4].indexOf("FINAL_SIGMA") != -1) {
strbuffer.insert(0, 'c');
lowerbuffer.insert(0, 'c');
upperbuffer.insert(0, 'C');
}
}
if (UCharacter.isLowerCase(chstr[4].charAt(0))) {
Locale locale = new Locale(chstr[4].substring(0, 2), "");
if (!UCharacter.toLowerCase(locale,
strbuffer.toString()).equals(lowerbuffer.toString())) {
errln(s);
errln("Fail: toLowerCase for locale " + locale +
", character " + Utility.escape(strbuffer.toString()) +
", expected " + Utility.escape(lowerbuffer.toString())
+ " but resulted in " +
Utility.escape(UCharacter.toLowerCase(locale,
strbuffer.toString())));
}
if (!UCharacter.toUpperCase(locale,
strbuffer.toString()).equals(upperbuffer.toString())) {
errln(s);
errln("Fail: toUpperCase for locale " + locale +
", character " + Utility.escape(strbuffer.toString())
+ ", expected "
+ Utility.escape(upperbuffer.toString()) +
" but resulted in " +
Utility.escape(UCharacter.toUpperCase(locale,
strbuffer.toString())));
}
}
else {
if (!UCharacter.toLowerCase(strbuffer.toString()).equals(
lowerbuffer.toString())) {
errln(s);
errln("Fail: toLowerCase for character " +
Utility.escape(strbuffer.toString()) + ", expected "
+ Utility.escape(lowerbuffer.toString())
+ " but resulted in " +
Utility.escape(UCharacter.toLowerCase(
strbuffer.toString())));
}
if (!UCharacter.toUpperCase(strbuffer.toString()).equals(
upperbuffer.toString())) {
errln(s);
errln("Fail: toUpperCase for character " +
Utility.escape(strbuffer.toString()) + ", expected "
+ Utility.escape(upperbuffer.toString()) +
" but resulted in " +
Utility.escape(UCharacter.toUpperCase(
strbuffer.toString())));
}
}
}
else {
if (!UCharacter.toLowerCase(chstr[0]).equals(chstr[1])) {
errln(s);
errln("Fail: toLowerCase for character " +
Utility.escape(chstr[0]) + ", expected "
+ Utility.escape(chstr[1]) + " but resulted in " +
Utility.escape(UCharacter.toLowerCase(chstr[0])));
}
if (!UCharacter.toUpperCase(chstr[0]).equals(chstr[3])) {
errln(s);
errln("Fail: toUpperCase for character " +
Utility.escape(chstr[0]) + ", expected "
+ Utility.escape(chstr[3]) + " but resulted in " +
Utility.escape(UCharacter.toUpperCase(chstr[0])));
}
}
}
input.close();
}
catch (FileNotFoundException e)
{
errln("FAIL SpecialCasing.txt not found\n" +
"Configure the system setting UnicodeData to the right path\n" +
"e.g. java -DUnicodeData=\"data_dir_path\" " +
"com.ibm.icu.test.text.UCharacterTest");
}
catch (Exception e)
{
e.printStackTrace();
}
}
/**
* Converting the hex numbers represented between ';' to Unicode strings
* @param str string to break up into Unicode strings
* @return array of Unicode strings ending with a null
*/
private String[] getUnicodeStrings(String str)
{
Vector v = new Vector(10);
int end = str.indexOf("; ");
int start = 0;
while (end != -1) {
StringBuffer buffer = new StringBuffer(10);
int tempstart = start;
int tempend = str.indexOf(' ', tempstart);
while (tempend != -1 && tempend < end) {
buffer.append((char)Integer.parseInt(str.substring(tempstart,
tempend), 16));
tempstart = tempend + 1;
tempend = str.indexOf(' ', tempstart);
}
String s = str.substring(tempstart, end);
try {
if (s.length() != 0) {
buffer.append((char)Integer.parseInt(s, 16));
}
} catch (NumberFormatException e) {
buffer.append(s);
}
start = end + 2;
end = str.indexOf("; ", start);
v.addElement(buffer.toString());
}
String s = str.substring(start);
if (s.charAt(0) != '#') {
v.addElement(s);
}
int size = v.size();
String result[] = new String[size];
for (int i = 0; i < size; i ++) {
result[i] = (String)v.elementAt(i);
}
return result;
}
public static void main(String[] arg)
{

View File

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/UCharacter.java,v $
* $Date: 2001/11/02 23:11:36 $
* $Revision: 1.15 $
* $Date: 2001/11/06 00:02:14 $
* $Revision: 1.16 $
*
*******************************************************************************
*/
@ -883,13 +883,13 @@ public final class UCharacter
* @param ch code point whose combining is to be retrieved
* @return the combining class of the codepoint
*/
public static byte getCombiningClass(int ch)
public static int getCombiningClass(int ch)
{
int props = getProps(ch);
if(!UCharacterPropertyDB.isExceptionIndicator(props)) {
if (UCharacterPropertyDB.getPropType(props) ==
UCharacterCategory.NON_SPACING_MARK) {
return (byte)(PROPERTY_DB_.getUnsignedValue(props));
return PROPERTY_DB_.getUnsignedValue(props);
}
else {
return 0;
@ -897,10 +897,10 @@ public final class UCharacter
}
else {
// the combining class is in bits 23..16 of the first exception value
return (byte)(
(PROPERTY_DB_.getException(PROPERTY_DB_.getExceptionIndex(props),
return (PROPERTY_DB_.getException(
PROPERTY_DB_.getExceptionIndex(props),
UCharacterPropertyDB.EXC_COMBINING_CLASS_)
>> SHIFT_16_) & LAST_BYTE_MASK_);
>> SHIFT_16_) & LAST_BYTE_MASK_;
}
}
@ -1104,7 +1104,6 @@ public final class UCharacter
while (offset < size)
{
int ch = UTF16.charAt(str, offset);
offset += UTF16.getCharCount(ch);
int props = PROPERTY_DB_.getProperty(ch);
if (!UCharacterPropertyDB.isExceptionIndicator(props))
{
@ -1130,6 +1129,7 @@ public final class UCharacter
}
}
}
offset += UTF16.getCharCount(ch);
}
return result.toString();
}
@ -1149,7 +1149,6 @@ public final class UCharacter
StringBuffer result = new StringBuffer(length);
while (offset < length) {
int ch = UTF16.charAt(str, offset);
offset += UTF16.getCharCount(ch);
int props = PROPERTY_DB_.getProperty(ch);
if (!UCharacterPropertyDB.isExceptionIndicator(props)) {
int type = UCharacterPropertyDB.getPropType(props);
@ -1174,6 +1173,7 @@ public final class UCharacter
}
}
}
offset += UTF16.getCharCount(ch);
}
return result.toString();
}
@ -1793,6 +1793,7 @@ public final class UCharacter
private static boolean isCFINAL(String str, int offset)
{
int length = str.length();
offset += UTF16.getCharCount(UTF16.charAt(str, offset));
while (offset < length) {
int ch = UTF16.charAt(str, offset);
int cat = getType(ch);
@ -1821,7 +1822,7 @@ public final class UCharacter
private static boolean isNotCINITIAL(String str, int offset)
{
offset --;
while (offset > 0) {
while (offset >= 0) {
int ch = UTF16.charAt(str, offset);
int cat = getType(ch);
if (cat == UCharacterCategory.LOWERCASE_LETTER ||
@ -1851,7 +1852,7 @@ public final class UCharacter
private static boolean isAFTER_i(String str, int offset)
{
offset --;
while (offset > 0) {
while (offset >= 0) {
int ch = UTF16.charAt(str, offset);
if (ch == LATIN_SMALL_LETTER_I_ || ch == LATIN_SMALL_LETTER_J_ ||
ch == LATIN_SMALL_LETTER_I_WITH_OGONEK_ ||
@ -1884,7 +1885,7 @@ public final class UCharacter
private static boolean isAFTER_I(String str, int offset)
{
offset --;
while (offset > 0) {
while (offset >= 0) {
int ch = UTF16.charAt(str, offset);
if (ch == LATIN_CAPITAL_LETTER_I_) {
return true; // preceded by I
@ -1914,6 +1915,7 @@ public final class UCharacter
private static boolean isFollowedByMOREABOVE(String str, int offset)
{
int length = str.length();
offset += UTF16.getCharCount(UTF16.charAt(str, 0));
while (offset < length) {
int ch = UTF16.charAt(str, offset);
int cc = getCombiningClass(ch);
@ -1941,6 +1943,7 @@ public final class UCharacter
private static boolean isFollowedByDotAbove(String str, int offset)
{
int length = str.length();
offset += UTF16.getCharCount(UTF16.charAt(str, 0));
while (offset < length) {
int ch = UTF16.charAt(str, offset);
if (ch == COMBINING_DOT_ABOVE_) {
@ -1976,13 +1979,14 @@ public final class UCharacter
if (exception < 0) {
String language = locale.getLanguage();
// use hardcoded conditions and mappings
if (language.equals(TURKISH_) && ch == LATIN_SMALL_LETTER_I_) {
if ((language.equals(TURKISH_) || language.equals(AZERBAIJANI_))
&& ch == LATIN_SMALL_LETTER_I_) {
// turkish: i maps to dotted I
buffer.append(LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE_);
}
else {
if (language.equals(LITHUANIAN_) && ch == COMBINING_DOT_ABOVE_
&& isAFTER_i(str, offset - 1)) {
&& isAFTER_i(str, offset)) {
// lithuanian: remove DOT ABOVE after U+0069 "i" with
// upper or titlecase
return; // remove the dot (continue without output)
@ -2087,7 +2091,7 @@ public final class UCharacter
}
else {
if (ch == COMBINING_DOT_ABOVE_ &&
isAFTER_I(str, offset - 1) &&
isAFTER_I(str, offset) &&
!isFollowedByMOREABOVE(str, offset)) {
// decomposed I+dot above becomes i (see handling of
// U+0049 for turkish) and removes the dot above
@ -2096,7 +2100,7 @@ public final class UCharacter
else {
if (ch == GREEK_CAPITAL_LETTER_SIGMA_ &&
isCFINAL(str, offset) &&
isNotCINITIAL(str, offset - 1)) {
isNotCINITIAL(str, offset)) {
// greek capital sigma maps depending on
// surrounding cased letters
buffer.append(GREEK_SMALL_LETTER_RHO_);