ICU-0 fixes to work with Eclipse

X-SVN-Rev: 11702
This commit is contained in:
Mark Davis 2003-04-25 01:39:15 +00:00
parent 5967077f2c
commit c5d385e09e
11 changed files with 342 additions and 85 deletions

View File

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/tool/docs/CheckTags.java,v $
* $Date: 2002/12/17 07:31:26 $
* $Revision: 1.13 $
* $Date: 2003/04/25 01:39:15 $
* $Revision: 1.14 $
*
*******************************************************************************
*/
@ -252,7 +252,7 @@ public class CheckTags {
void tagErr(Tag tag) {
// Tag.position() requires JDK 1.4, build.xml tests for this
errln(tag.toString() + " [" + tag.position() + "]");
errln(tag.toString() + " [" + /* tag.position() + */ "]");
}
void doDocs(ProgramElementDoc[] docs, String header, boolean reportError) {
@ -362,7 +362,7 @@ public class CheckTags {
}
}
if (!foundRequiredTag) {
errln("missing required tag [" + doc.position() + "]");
errln("missing required tag [" + /*doc.position() +*/ "]");
}
}
}

View File

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/Main.java,v $
* $Date: 2002/10/03 22:58:17 $
* $Revision: 1.11 $
* $Date: 2003/04/25 01:39:15 $
* $Revision: 1.12 $
*
*******************************************************************************
*/
@ -43,13 +43,15 @@ public class Main {
for (int i = 0; i < args.length; ++i) {
String arg = args[i];
System.out.println("OPTION: " + arg);
if (arg.charAt(0) == '#') return; // skip rest of line
if (arg.equalsIgnoreCase("ICU")) args = Utility.append(args, ICU_FILES);
else if (arg.equalsIgnoreCase("GenOverlap")) GenOverlap.test(WriteCollationData.collator);
else if (arg.equalsIgnoreCase("validateUCA")) GenOverlap.validateUCA(WriteCollationData.collator);
//else if (arg.equalsIgnoreCase("writeNonspacingDifference")) WriteCollationData.writeNonspacingDifference();
else if (arg.equalsIgnoreCase("collationChart")) WriteCharts.collationChart(WriteCollationData.collator);
else if (arg.equalsIgnoreCase("collationChart")) WriteCharts.collationChart(WriteCollationData.collator);
else if (arg.equalsIgnoreCase("scriptChart")) WriteCharts.scriptChart();
else if (arg.equalsIgnoreCase("normalizationChart")) WriteCharts.normalizationChart();
else if (arg.equalsIgnoreCase("caseChart")) WriteCharts.caseChart();
else if (arg.equalsIgnoreCase("indexChart")) WriteCharts.indexChart();

View File

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/WriteCharts.java,v $
* $Date: 2002/10/05 02:16:17 $
* $Revision: 1.15 $
* $Date: 2003/04/25 01:39:15 $
* $Revision: 1.16 $
*
*******************************************************************************
*/
@ -434,6 +434,87 @@ public class WriteCharts implements UCD_Types {
closeFile(output);
closeIndexFile(indexFile, "", CASE);
}
static public void scriptChart() throws IOException {
Default.setUCD();
HACK_KANA = false;
Set set = new TreeSet();
for (int i = 0; i <= 0x10FFFF; ++i) {
if (!Default.ucd.isRepresented(i)) continue;
byte cat = Default.ucd.getCategory(i);
if (cat == Cs || cat == Co || cat == Cn) continue;
String code = UTF16.valueOf(i);
String decomp = Default.nfkd.normalize(i);
int script = getBestScript(decomp);
set.add(new Pair(new Integer(script == COMMON_SCRIPT ? cat + CAT_OFFSET : script),
new Pair(decomp,
new Integer(i))));
}
PrintWriter output = null;
Iterator it = set.iterator();
int oldScript = -127;
int counter = 0;
String[] replacement = new String[] {"%%%", "Script Charts"};
String folder = "charts\\script\\";
Utility.copyTextFile("index.html", Utility.UTF8, folder + "index.html", replacement);
Utility.copyTextFile("charts.css", Utility.LATIN1, folder + "charts.css");
Utility.copyTextFile("script_help.html", Utility.UTF8, folder + "help.html");
indexFile = Utility.openPrintWriter(folder + "index_list.html", Utility.UTF8_WINDOWS);
Utility.appendFile("script_index_header.html", Utility.UTF8, indexFile, replacement);
/*
indexFile.println("<html><head><meta http-equiv='Content-Type' content='text/html; charset=utf-8'>");
indexFile.println("<title>UCA Default Collation Table</title>");
indexFile.println("<base target='main'>");
indexFile.println("<style><!-- p { font-size: 90% } --></style>");
indexFile.println("</head><body><h2 align='center'>UCA Default Collation Table</h2>");
indexFile.println("<p align='center'><a href = 'help.html'>Help</a>");
*/
int columnCount = 0;
while (it.hasNext()) {
Utility.dot(counter);
Pair p = (Pair) it.next();
int script = ((Integer) p.first).intValue();
int cp = ((Integer)((Pair)p.second).second).intValue();
if (script != oldScript
// && (script != COMMON_SCRIPT && script != INHERITED_SCRIPT)
) {
closeFile(output);
output = null;
oldScript = script;
columnCount = 0;
}
if (output == null) {
output = openFile(0, folder, script);
}
if (columnCount > 10) {
output.println("</tr><tr>");
columnCount = 0;
}
showCell(output, UTF16.valueOf(cp), "<td ", "", false);
++columnCount;
}
closeFile(output);
closeIndexFile(indexFile, "", CASE);
}
static public void addMapChar(Map m, Set stoplist, String key, String ch) {
if (stoplist.contains(key)) return;
@ -466,7 +547,8 @@ public class WriteCharts implements UCD_Types {
System.out.println("Stop-list: " + stoplist);
for (int i = 0; i <= 0x10FFFF; ++i) {
if (!Default.ucd.isRepresented(i)) continue;
if (!Default.ucd.isRepresented(i)) continue;
if (!Default.ucd.isAssigned(i)) continue;
if (0xAC00 <= i && i <= 0xD7A3) continue;
if (Default.ucd.hasComputableName(i)) continue;
@ -474,7 +556,7 @@ public class WriteCharts implements UCD_Types {
if (s == null) continue;
if (s.startsWith("<")) {
System.out.println("Wierd character at " + Default.ucd.getCodeAndName(i));
System.out.println("Weird character at " + Default.ucd.getCodeAndName(i));
}
String ch = UTF16.valueOf(i);
int last = -1;
@ -816,7 +898,7 @@ public class WriteCharts implements UCD_Types {
out.println("<table border='1' cellspacing='0'>");
out.println("<caption>" + scriptName + "<br>(" + letters.size() + " × " + marks.size() + ")</caption>");
out.println("<caption>" + scriptName + "<br>(" + letters.size() + " ? " + marks.size() + ")</caption>");
Iterator it2 = letters.iterator();
while (it2.hasNext()) {

View File

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/WriteCollationData.java,v $
* $Date: 2003/04/01 02:51:57 $
* $Revision: 1.31 $
* $Date: 2003/04/25 01:39:13 $
* $Revision: 1.32 $
*
*******************************************************************************
*/
@ -465,7 +465,7 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
// add characters with different strengths, to verify the order
addStringY(s + 'a', option);
addStringY(s + 'b', option);
addStringY(s + 'á', option);
addStringY(s + '?', option);
addStringY(s + 'A', option);
addStringY(s + '!', option);
if (option == SHIFTED && collator.isVariable(firstChar)) addStringY(s + LOW_ACCENT, option);
@ -2503,7 +2503,7 @@ F900..FAFF; CJK Compatibility Ideographs
|| (c >= 0xA0 && !UCharacterProperty.isRuleWhiteSpace(c))
*/
needsQuoting = new UnicodeSet("[a-zA-Z0-9\\u00A0-\\U00010FFF]");
needsQuoting.remove();
// needsQuoting.remove();
}
s = NFC.normalize(s);
quoteOperandBuffer.setLength(0);

View File

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/Compare14652.java,v $
* $Date: 2003/04/23 19:01:21 $
* $Revision: 1.1 $
* $Date: 2003/04/25 01:39:15 $
* $Revision: 1.2 $
*
*******************************************************************************
*/
@ -24,6 +24,8 @@ import com.ibm.icu.text.UnicodeSet;
public class Compare14652 implements UCD_Types {
static final boolean oldVersion = false;
public static UnicodeSet getSet(int prop, byte propValue) {
return UnifiedBinaryProperty.make(prop | propValue).getSet();
}
@ -33,28 +35,40 @@ public class Compare14652 implements UCD_Types {
combiningSet = getSet(CATEGORY, Mc)
.addAll(getSet(CATEGORY, Me))
.addAll(getSet(CATEGORY, Mn)),
alphaSet = getSet(DERIVED, PropAlphabetic).addAll(combiningSet),
lowerSet = getSet(DERIVED, PropLowercase).addAll(titleSet),
upperSet = getSet(DERIVED, PropUppercase).addAll(titleSet),
digitSet = getSet(CATEGORY, Nd),
xdigitSet = new UnicodeSet("[a-fA-F\uFF21-\uFF26\uFF41-\uFF46]").addAll(digitSet),
spaceSet = getSet(BINARY_PROPERTIES, White_space),
controlSet = getSet(CATEGORY, Cc),
punctSet = getSet(CATEGORY, Pd)
zSet = getSet(CATEGORY, Zs)
.addAll(getSet(CATEGORY, Zl))
.addAll(getSet(CATEGORY, Zp)),
pSet = getSet(CATEGORY, Pd)
.addAll(getSet(CATEGORY, Ps))
.addAll(getSet(CATEGORY, Pe))
.addAll(getSet(CATEGORY, Pc))
.addAll(getSet(CATEGORY, Po))
.addAll(getSet(CATEGORY, Pi))
.addAll(getSet(CATEGORY, Pf)),
sSet = getSet(CATEGORY, Sm)
.addAll(getSet(CATEGORY, Sc))
.addAll(getSet(CATEGORY, Sk))
.addAll(getSet(CATEGORY, So)),
noSet = getSet(CATEGORY, No),
csSet = getSet(CATEGORY, Cs),
cfSet = getSet(CATEGORY, Cf),
cnSet = getSet(CATEGORY, Cn),
circled = getSet(DECOMPOSITION_TYPE, COMPAT_CIRCLE),
whitespaceSet = getSet(BINARY_PROPERTIES, White_space),
alphaSet = getSet(DERIVED, PropAlphabetic).addAll(combiningSet),
lowerSet = getSet(DERIVED, PropLowercase).addAll(titleSet).removeAll(circled),
upperSet = getSet(DERIVED, PropUppercase).addAll(titleSet).removeAll(circled),
digitSet = getSet(CATEGORY, Nd),
xdigitSet = new UnicodeSet("[a-fA-F\uFF21-\uFF26\uFF41-\uFF46]").addAll(digitSet),
spaceSet = whitespaceSet.size() == 0 ? zSet : whitespaceSet,
controlSet = getSet(CATEGORY, Cc),
punctSet = new UnicodeSet(pSet).addAll(sSet),
graphSet = new UnicodeSet(0,0x10ffff)
.removeAll(controlSet)
//.removeAll(getSet(CATEGORY, Cf))
.removeAll(getSet(CATEGORY, Cs))
.removeAll(getSet(CATEGORY, Cn))
.removeAll(getSet(CATEGORY, Zs))
.removeAll(getSet(CATEGORY, Zl))
.removeAll(getSet(CATEGORY, Zp)),
.removeAll(csSet)
.removeAll(cnSet)
.removeAll(zSet),
// Cc, Cf, Cs, Cn, Z
blankSet = new UnicodeSet(spaceSet).removeAll(new UnicodeSet("[\\u000A-\\u000D\\u0085]"))
.removeAll(getSet(CATEGORY, Zl))
@ -67,16 +81,18 @@ public class Compare14652 implements UCD_Types {
String guess = "???";
UnicodeSet guessContents = new UnicodeSet();
String wsname = whitespaceSet.size() == 0 ? "gc=Z" : "Whitespace";
Prop(String name) {
this.name = name;
if (name.equals("alpha")) {
guess = "Alphabetic + gc=M";
guessContents = alphaSet;
} else if (name.equals("lower")) {
guess = "Lowercase + gc=Lt";
guess = "Lowercase + gc=Lt - dt=circle";
guessContents = lowerSet;
} else if (name.equals("upper")) {
guess = "Uppercase + gc=Lt";
guess = "Uppercase + gc=Lt - dt=circle";
guessContents = upperSet;
} else if (name.equals("digit")) {
guess = "gc=Nd";
@ -85,20 +101,20 @@ public class Compare14652 implements UCD_Types {
guess = "gc=Nd+a..f (upper/lower,normal/fullwidth)";
guessContents = xdigitSet;
} else if (name.equals("space")) {
guess = "Whitespace";
guess = wsname;
guessContents = spaceSet;
Utility.showSetNames("Whitespace", spaceSet, true, Default.ucd);
//Utility.showSetNames("Whitespace", spaceSet, true, Default.ucd);
} else if (name.equals("cntrl")) {
guess = "gc=Cc";
guessContents = controlSet;
} else if (name.equals("punct")) {
guess = "gc=P";
guess = "gc=P,S";
guessContents = punctSet;
} else if (name.equals("graph")) {
guess = "All - gc=Cc, Cs, Cn, or Z";
guessContents = graphSet;
} else if (name.equals("blank")) {
guess = "Whitespace - (LF,VT,FF,CR,NEL + gc=Zl,Zp)";
guess = wsname + " - (LF,VT,FF,CR,NEL + gc=Zl,Zp)";
guessContents = blankSet;
} else if (name.equals("ISO_14652_class \"combining\"")) {
guess = "gc=M";
@ -143,43 +159,183 @@ tolower
static int propCount = 0;
public static void main(String[] args) throws IOException {
BufferedReader br = Utility.openReadFile("C:\\DATA\\ISO14652_CTYPE.txt", Utility.LATIN1);
while (true) {
String line = br.readLine();
if (line == null) break;
line = line.trim();
if (line.length() == 0) continue;
if (line.charAt(line.length() - 1) == '/') {
line = line.substring(0, line.length() - 1);
}
line = line.trim();
if (line.length() == 0) continue;
char ch = line.charAt(0);
if (ch == '%') continue;
if (ch == '(') continue;
if (ch == '<') {
addItems(line, props[propCount-1].contents);
} else {
// new property
System.out.println(line);
if (line.equals("width")) break;
props[propCount] = new Prop(line);
props[propCount].name = "ISO_14652_" + line;
props[propCount].contents = new UnicodeSet();
propCount++;
}
}
PrintWriter log = Utility.openPrintWriter("Diff14652.txt", Utility.UTF8_WINDOWS);
log.write('\uFEFF');
String version = Default.ucd.getVersion();
PrintWriter log = Utility.openPrintWriter("Diff14652_" + version + ".txt", Utility.UTF8_WINDOWS);
try {
log.write('\uFEFF');
log.print("Version: " + version);
if (false) {
UnicodeSet ID = getSet(DERIVED, ID_Start).addAll(getSet(DERIVED, ID_Continue_NO_Cf));
UnicodeSet XID = getSet(DERIVED, Mod_ID_Start).addAll(getSet(DERIVED, Mod_ID_Continue_NO_Cf));
UnicodeSet alphanumSet = new UnicodeSet(alphaSet).addAll(digitSet).addAll(getSet(CATEGORY, Pc));
Utility.showSetDifferences("ID", ID, "XID", XID, false, Default.ucd);
Utility.showSetDifferences("ID", ID, "Alphabetic+Digit+Pc", alphanumSet, false, Default.ucd);
}
BufferedReader br = Utility.openReadFile("C:\\DATA\\ISO14652_CTYPE.txt", Utility.LATIN1);
while (true) {
String line = br.readLine();
if (line == null) break;
line = line.trim();
if (line.length() == 0) continue;
if (line.charAt(line.length() - 1) == '/') {
line = line.substring(0, line.length() - 1);
}
line = line.trim();
if (line.length() == 0) continue;
char ch = line.charAt(0);
if (ch == '%') continue;
if (ch == '(') continue;
if (ch == '<') {
addItems(line, props[propCount-1].contents);
} else {
// new property
System.out.println(line);
if (line.equals("width")) break;
props[propCount] = new Prop(line);
props[propCount].name = "ISO_14652_" + line;
props[propCount].contents = new UnicodeSet();
propCount++;
}
}
for (int i = 0; i < propCount; ++i) props[i].show(log);
log.println();
log.println("**************************************************");
log.println("Checking POSIX requirements for inclusion and disjointness.");
log.println("**************************************************");
log.println();
/*
alpha, digit, punct, cntrl are all disjoint
space, cntrl, blank are pairwise disjoint with any of alpha, digit, xdigit
alpha includes upper, lower
graph includes alpha, digit, punct
print includes graph
xdigit includes digit
*/
Prop
alpha = getProp("ISO_14652_alpha"),
upper = getProp("ISO_14652_upper"),
lower = getProp("ISO_14652_lower"),
graph = getProp("ISO_14652_graph"),
//print = getProp("ISO_14652_print"),
punct = getProp("ISO_14652_punct"),
digit = getProp("ISO_14652_digit"),
xdigit = getProp("ISO_14652_xdigit"),
space = getProp("ISO_14652_space"),
blank = getProp("ISO_14652_blank"),
cntrl = getProp("ISO_14652_cntrl");
checkDisjoint(log, new Prop[] {alpha, digit, punct, cntrl});
Prop [] l1 = new Prop[] {space, cntrl, blank};
Prop [] l2 = new Prop[] {alpha, digit, xdigit};
for (int i = 0; i < l1.length; ++i) {
for (int j = i + 1; j < l2.length; ++j) {
checkDisjoint(log, l1[i], l2[j]);
}
}
checkIncludes(log, alpha, upper);
checkIncludes(log, alpha, lower);
checkIncludes(log, graph, alpha);
checkIncludes(log, graph, digit);
checkIncludes(log, graph, punct);
//checkIncludes(log, print, graph);
checkIncludes(log, xdigit, digit);
// possibly alpha, digit, punct, cntrl, space cover the !(Cn,Cs)
UnicodeSet trRemainder = new UnicodeSet(cnSet)
.complement()
.removeAll(csSet)
.removeAll(digit.contents)
.removeAll(punct.contents)
.removeAll(alpha.contents)
.removeAll(cntrl.contents)
.removeAll(space.contents);
Utility.showSetNames(log, "TR Remainder: ", trRemainder, false, false, Default.ucd);
UnicodeSet propRemainder = new UnicodeSet(cnSet)
.complement()
.removeAll(csSet)
//.removeAll(noSet)
//.removeAll(cfSet)
.removeAll(digit.guessContents)
.removeAll(punct.guessContents)
.removeAll(alpha.guessContents)
.removeAll(cntrl.guessContents)
.removeAll(space.guessContents);
Utility.showSetNames(log, "Prop Remainder: ", propRemainder, false, false, Default.ucd);
/*
checkDisjoint(new Prop[] {alpha, digit, punct, cntrl});
UnicodeSet remainder = cnSet.complement();
UnicodeSet guessRemainder = new UnicodeSet(remainder);
for (int i = 0; i < list.length; ++i) {
for (int j = i + 1; j < list.length; ++j) {
compare(log, list[i].name, list[i].contents, list[j].name, list[j].contents);
compare(log, list[i].guess, list[i].guessContents, list[j].guess, list[j].guessContents);
}
remainder.removeAll(list[i].contents);
guessRemainder.removeAll(list[i].guessContents);
}
if (remainder.size() != 0) {
log.println();
log.println("Incomplete (TR): " + remainder);
}
if (guessRemainder.size() != 0) {
log.println();
log.println("Incomplete (Prop): " + guessRemainder);
}
*/
} finally {
log.close();
}
}
static void checkDisjoint(PrintWriter log, Prop[] list) {
for (int i = 0; i < list.length; ++i) {
for (int j = i + 1; j < list.length; ++j) {
checkDisjoint(log, list[i], list[j]);
}
}
}
static void checkDisjoint(PrintWriter log, Prop prop1, Prop prop2) {
checkDisjoint(log, prop1.name, prop1.contents, prop2.name, prop2.contents);
checkDisjoint(log, prop1.guess, prop1.guessContents, prop2.guess, prop2.guessContents);
}
static void checkDisjoint(PrintWriter log, String name, UnicodeSet set, String name2, UnicodeSet set2) {
if (set.containsSome(set2)) {
log.println();
log.println("Fails test: " + name + " disjoint-with " + name2);
UnicodeSet diff = new UnicodeSet(set).retainAll(set2);
Utility.showSetNames(log, "", diff, false, false, Default.ucd);
}
}
static void checkIncludes(PrintWriter log, Prop prop1, Prop prop2) {
checkIncludes(log, prop1.name, prop1.contents, prop2.name, prop2.contents);
checkIncludes(log, prop1.guess, prop1.guessContents, prop2.guess, prop2.guessContents);
}
static void checkIncludes(PrintWriter log, String name, UnicodeSet set, String name2, UnicodeSet set2) {
if (!set.containsAll(set2)) {
log.println();
log.println("Fails test:" + name + " includes " + name2);
UnicodeSet diff = new UnicodeSet(set2).removeAll(set);
Utility.showSetNames(log, "", diff, false, false, Default.ucd);
}
}
static String[] pieces = new String[100];
// example: <U1F48>..<U1F4D>;<U1F59>;<U1F5B>;<U1F5D>;<U1F5F>;<U1F68>..<U1F6F>;/
@ -208,6 +364,18 @@ tolower
return Integer.parseInt(piece.substring(2,piece.length()-1), 16);
}
static Prop getProp(String name) {
//System.out.println("Searching for: " + name);
for (int i = 0; i < propCount; ++i) {
//System.out.println("Checking: " + props[i].name);
if (props[i].name.equals(name)) {
return props[i];
}
}
//System.out.println("Missed");
return null;
}
// oddities:
// extra space after ';' <U0300>..<U036F>; <U20D0>..<U20FF>; <UFE20>..<UFE2F>;/
// <0>?? <0>;<U0BE7>..<U0BEF>;/

View File

@ -1,3 +1,4 @@
package com.ibm.text.UCD;
public class ListNFComplete {
// find all the characters that are
@ -11,7 +12,7 @@ public class ListNFComplete {
// add an ogonek it changes to a-ogonek + breve
public static void main (String[] args) {
Normalizer nfd = new Normalizer(Normalizer.NFD);
//Normalizer nfd = new Normalizer(Normalizer.NFD);
}
}

View File

@ -5,13 +5,13 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/MLStreamWriter.java,v $
* $Date: 2001/12/13 23:35:57 $
* $Revision: 1.3 $
* $Date: 2003/04/25 01:39:15 $
* $Revision: 1.4 $
*
*******************************************************************************
*/
package com.ibm.text.utility;
package com.ibm.text.UCD;
import java.io.*;
import java.util.*;

View File

@ -1,4 +1,5 @@
public class NFSkippable {
package com.ibm.text.UCD;
public class NFCSkippable {
// find all the characters that are
// a) not decomposed by this normalization form
@ -10,9 +11,8 @@ public class NFSkippable {
// Example: a-breve might satisfy a-d, but if you
// add an ogonek it changes to a-ogonek + breve
public NF
public boolean is(int cp) {
return false;
}
public static void main (String[] args) {

View File

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/ProcessUnihan.java,v $
* $Date: 2002/07/14 22:07:00 $
* $Revision: 1.1 $
* $Date: 2003/04/25 01:39:15 $
* $Revision: 1.2 $
*
*******************************************************************************
*/
@ -17,6 +17,7 @@ import com.ibm.text.utility.*;
import com.ibm.icu.text.UTF16;
import java.util.*;
// stub file, ignore
public final class ProcessUnihan {
@ -49,7 +50,7 @@ public final class ProcessUnihan {
//out = Utility.openPrintWriter("Transliterate_Han_English.txt");
//err = Utility.openPrintWriter("Transliterate_Han_English.log.txt");
BufferedReader in = Utility.openUnicodeFile("Unihan", "3.2.0", true);
BufferedReader in = Utility.openUnicodeFile("Unihan", "3.2.0", Utility.UTF8);
while (true) {
Utility.dot(++lineCounter);
@ -62,10 +63,12 @@ public final class ProcessUnihan {
int count = Utility.split(line, '#', parts);
int code = Integer.parseInt(parts[0].substring(2), 16);
Byte itag = tags.get(tag);
if (itag == null)
Byte itag = tags.get("a");
if (itag == null) {}
String tag = parts[1];
String value = parts[2];
if (tags.containsKey(tag))
if (tags.containsKey(tag)) {}
}
}
}

View File

@ -1,4 +1,5 @@
package com.ibm.text.utility;
import com.ibm.icu.text.UnicodeSet;
final class UnicodeMapInt {
private int [] index = new int[1];
@ -16,7 +17,7 @@ final class UnicodeMapInt {
int i = findIndex(cp);
// A1. if cp already has the value, return
if (data[i - 1] == value) return;
if (data[i - 1] == value) return cp;
int rangeStart = index[i-1];
int rangeLimit = index[i];
@ -74,7 +75,7 @@ final class UnicodeMapInt {
}
/** Finds the least index with a value greater than cp */
private int findIndex(cp) {
private int findIndex( int cp) {
if (cp > 0x10FFFF) throw new ArrayIndexOutOfBoundsException("Code point too large: " + cp); // out of bounds!
int i = -1;
while (true) {

View File

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/testParser.java,v $
* $Date: 2001/08/31 00:19:16 $
* $Revision: 1.2 $
* $Date: 2003/04/25 01:39:15 $
* $Revision: 1.3 $
*
*******************************************************************************
*/
@ -66,7 +66,7 @@ public class testParser implements XMLParseTypes {
static void test2() throws Exception {
PrintWriter log = Utility.openPrintWriter("UCD-Extract.html");
PrintWriter log = Utility.openPrintWriter("UCD-Extract.html", Utility.UTF8_WINDOWS);
//int fieldCount = 4;
//int width = 100/fieldCount;