ICU-0 fixes to work with Eclipse
X-SVN-Rev: 11702
This commit is contained in:
parent
5967077f2c
commit
c5d385e09e
@ -5,8 +5,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/tool/docs/CheckTags.java,v $
|
||||
* $Date: 2002/12/17 07:31:26 $
|
||||
* $Revision: 1.13 $
|
||||
* $Date: 2003/04/25 01:39:15 $
|
||||
* $Revision: 1.14 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -252,7 +252,7 @@ public class CheckTags {
|
||||
|
||||
void tagErr(Tag tag) {
|
||||
// Tag.position() requires JDK 1.4, build.xml tests for this
|
||||
errln(tag.toString() + " [" + tag.position() + "]");
|
||||
errln(tag.toString() + " [" + /* tag.position() + */ "]");
|
||||
}
|
||||
|
||||
void doDocs(ProgramElementDoc[] docs, String header, boolean reportError) {
|
||||
@ -362,7 +362,7 @@ public class CheckTags {
|
||||
}
|
||||
}
|
||||
if (!foundRequiredTag) {
|
||||
errln("missing required tag [" + doc.position() + "]");
|
||||
errln("missing required tag [" + /*doc.position() +*/ "]");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -5,8 +5,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/Main.java,v $
|
||||
* $Date: 2002/10/03 22:58:17 $
|
||||
* $Revision: 1.11 $
|
||||
* $Date: 2003/04/25 01:39:15 $
|
||||
* $Revision: 1.12 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -43,13 +43,15 @@ public class Main {
|
||||
for (int i = 0; i < args.length; ++i) {
|
||||
String arg = args[i];
|
||||
System.out.println("OPTION: " + arg);
|
||||
if (arg.charAt(0) == '#') return; // skip rest of line
|
||||
|
||||
if (arg.equalsIgnoreCase("ICU")) args = Utility.append(args, ICU_FILES);
|
||||
else if (arg.equalsIgnoreCase("GenOverlap")) GenOverlap.test(WriteCollationData.collator);
|
||||
else if (arg.equalsIgnoreCase("validateUCA")) GenOverlap.validateUCA(WriteCollationData.collator);
|
||||
//else if (arg.equalsIgnoreCase("writeNonspacingDifference")) WriteCollationData.writeNonspacingDifference();
|
||||
|
||||
else if (arg.equalsIgnoreCase("collationChart")) WriteCharts.collationChart(WriteCollationData.collator);
|
||||
else if (arg.equalsIgnoreCase("collationChart")) WriteCharts.collationChart(WriteCollationData.collator);
|
||||
else if (arg.equalsIgnoreCase("scriptChart")) WriteCharts.scriptChart();
|
||||
else if (arg.equalsIgnoreCase("normalizationChart")) WriteCharts.normalizationChart();
|
||||
else if (arg.equalsIgnoreCase("caseChart")) WriteCharts.caseChart();
|
||||
else if (arg.equalsIgnoreCase("indexChart")) WriteCharts.indexChart();
|
||||
|
@ -5,8 +5,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/WriteCharts.java,v $
|
||||
* $Date: 2002/10/05 02:16:17 $
|
||||
* $Revision: 1.15 $
|
||||
* $Date: 2003/04/25 01:39:15 $
|
||||
* $Revision: 1.16 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -434,6 +434,87 @@ public class WriteCharts implements UCD_Types {
|
||||
closeFile(output);
|
||||
closeIndexFile(indexFile, "", CASE);
|
||||
}
|
||||
|
||||
static public void scriptChart() throws IOException {
|
||||
Default.setUCD();
|
||||
HACK_KANA = false;
|
||||
|
||||
Set set = new TreeSet();
|
||||
|
||||
for (int i = 0; i <= 0x10FFFF; ++i) {
|
||||
if (!Default.ucd.isRepresented(i)) continue;
|
||||
byte cat = Default.ucd.getCategory(i);
|
||||
if (cat == Cs || cat == Co || cat == Cn) continue;
|
||||
|
||||
String code = UTF16.valueOf(i);
|
||||
|
||||
String decomp = Default.nfkd.normalize(i);
|
||||
int script = getBestScript(decomp);
|
||||
|
||||
set.add(new Pair(new Integer(script == COMMON_SCRIPT ? cat + CAT_OFFSET : script),
|
||||
new Pair(decomp,
|
||||
new Integer(i))));
|
||||
}
|
||||
|
||||
PrintWriter output = null;
|
||||
|
||||
Iterator it = set.iterator();
|
||||
|
||||
int oldScript = -127;
|
||||
|
||||
int counter = 0;
|
||||
String[] replacement = new String[] {"%%%", "Script Charts"};
|
||||
String folder = "charts\\script\\";
|
||||
|
||||
Utility.copyTextFile("index.html", Utility.UTF8, folder + "index.html", replacement);
|
||||
Utility.copyTextFile("charts.css", Utility.LATIN1, folder + "charts.css");
|
||||
Utility.copyTextFile("script_help.html", Utility.UTF8, folder + "help.html");
|
||||
|
||||
indexFile = Utility.openPrintWriter(folder + "index_list.html", Utility.UTF8_WINDOWS);
|
||||
Utility.appendFile("script_index_header.html", Utility.UTF8, indexFile, replacement);
|
||||
|
||||
/*
|
||||
indexFile.println("<html><head><meta http-equiv='Content-Type' content='text/html; charset=utf-8'>");
|
||||
indexFile.println("<title>UCA Default Collation Table</title>");
|
||||
indexFile.println("<base target='main'>");
|
||||
indexFile.println("<style><!-- p { font-size: 90% } --></style>");
|
||||
indexFile.println("</head><body><h2 align='center'>UCA Default Collation Table</h2>");
|
||||
indexFile.println("<p align='center'><a href = 'help.html'>Help</a>");
|
||||
*/
|
||||
|
||||
int columnCount = 0;
|
||||
|
||||
while (it.hasNext()) {
|
||||
Utility.dot(counter);
|
||||
|
||||
Pair p = (Pair) it.next();
|
||||
int script = ((Integer) p.first).intValue();
|
||||
int cp = ((Integer)((Pair)p.second).second).intValue();
|
||||
|
||||
if (script != oldScript
|
||||
// && (script != COMMON_SCRIPT && script != INHERITED_SCRIPT)
|
||||
) {
|
||||
closeFile(output);
|
||||
output = null;
|
||||
oldScript = script;
|
||||
columnCount = 0;
|
||||
}
|
||||
|
||||
if (output == null) {
|
||||
output = openFile(0, folder, script);
|
||||
}
|
||||
|
||||
if (columnCount > 10) {
|
||||
output.println("</tr><tr>");
|
||||
columnCount = 0;
|
||||
}
|
||||
showCell(output, UTF16.valueOf(cp), "<td ", "", false);
|
||||
++columnCount;
|
||||
}
|
||||
|
||||
closeFile(output);
|
||||
closeIndexFile(indexFile, "", CASE);
|
||||
}
|
||||
|
||||
static public void addMapChar(Map m, Set stoplist, String key, String ch) {
|
||||
if (stoplist.contains(key)) return;
|
||||
@ -466,7 +547,8 @@ public class WriteCharts implements UCD_Types {
|
||||
System.out.println("Stop-list: " + stoplist);
|
||||
|
||||
for (int i = 0; i <= 0x10FFFF; ++i) {
|
||||
if (!Default.ucd.isRepresented(i)) continue;
|
||||
if (!Default.ucd.isRepresented(i)) continue;
|
||||
if (!Default.ucd.isAssigned(i)) continue;
|
||||
if (0xAC00 <= i && i <= 0xD7A3) continue;
|
||||
if (Default.ucd.hasComputableName(i)) continue;
|
||||
|
||||
@ -474,7 +556,7 @@ public class WriteCharts implements UCD_Types {
|
||||
if (s == null) continue;
|
||||
|
||||
if (s.startsWith("<")) {
|
||||
System.out.println("Wierd character at " + Default.ucd.getCodeAndName(i));
|
||||
System.out.println("Weird character at " + Default.ucd.getCodeAndName(i));
|
||||
}
|
||||
String ch = UTF16.valueOf(i);
|
||||
int last = -1;
|
||||
@ -816,7 +898,7 @@ public class WriteCharts implements UCD_Types {
|
||||
|
||||
|
||||
out.println("<table border='1' cellspacing='0'>");
|
||||
out.println("<caption>" + scriptName + "<br>(" + letters.size() + " × " + marks.size() + ")</caption>");
|
||||
out.println("<caption>" + scriptName + "<br>(" + letters.size() + " ? " + marks.size() + ")</caption>");
|
||||
|
||||
Iterator it2 = letters.iterator();
|
||||
while (it2.hasNext()) {
|
||||
|
@ -5,8 +5,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/WriteCollationData.java,v $
|
||||
* $Date: 2003/04/01 02:51:57 $
|
||||
* $Revision: 1.31 $
|
||||
* $Date: 2003/04/25 01:39:13 $
|
||||
* $Revision: 1.32 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -465,7 +465,7 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
|
||||
// add characters with different strengths, to verify the order
|
||||
addStringY(s + 'a', option);
|
||||
addStringY(s + 'b', option);
|
||||
addStringY(s + 'á', option);
|
||||
addStringY(s + '?', option);
|
||||
addStringY(s + 'A', option);
|
||||
addStringY(s + '!', option);
|
||||
if (option == SHIFTED && collator.isVariable(firstChar)) addStringY(s + LOW_ACCENT, option);
|
||||
@ -2503,7 +2503,7 @@ F900..FAFF; CJK Compatibility Ideographs
|
||||
|| (c >= 0xA0 && !UCharacterProperty.isRuleWhiteSpace(c))
|
||||
*/
|
||||
needsQuoting = new UnicodeSet("[a-zA-Z0-9\\u00A0-\\U00010FFF]");
|
||||
needsQuoting.remove();
|
||||
// needsQuoting.remove();
|
||||
}
|
||||
s = NFC.normalize(s);
|
||||
quoteOperandBuffer.setLength(0);
|
||||
|
@ -5,8 +5,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/Compare14652.java,v $
|
||||
* $Date: 2003/04/23 19:01:21 $
|
||||
* $Revision: 1.1 $
|
||||
* $Date: 2003/04/25 01:39:15 $
|
||||
* $Revision: 1.2 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -24,6 +24,8 @@ import com.ibm.icu.text.UnicodeSet;
|
||||
|
||||
public class Compare14652 implements UCD_Types {
|
||||
|
||||
static final boolean oldVersion = false;
|
||||
|
||||
public static UnicodeSet getSet(int prop, byte propValue) {
|
||||
return UnifiedBinaryProperty.make(prop | propValue).getSet();
|
||||
}
|
||||
@ -33,28 +35,40 @@ public class Compare14652 implements UCD_Types {
|
||||
combiningSet = getSet(CATEGORY, Mc)
|
||||
.addAll(getSet(CATEGORY, Me))
|
||||
.addAll(getSet(CATEGORY, Mn)),
|
||||
alphaSet = getSet(DERIVED, PropAlphabetic).addAll(combiningSet),
|
||||
lowerSet = getSet(DERIVED, PropLowercase).addAll(titleSet),
|
||||
upperSet = getSet(DERIVED, PropUppercase).addAll(titleSet),
|
||||
digitSet = getSet(CATEGORY, Nd),
|
||||
xdigitSet = new UnicodeSet("[a-fA-F\uFF21-\uFF26\uFF41-\uFF46]").addAll(digitSet),
|
||||
spaceSet = getSet(BINARY_PROPERTIES, White_space),
|
||||
controlSet = getSet(CATEGORY, Cc),
|
||||
punctSet = getSet(CATEGORY, Pd)
|
||||
zSet = getSet(CATEGORY, Zs)
|
||||
.addAll(getSet(CATEGORY, Zl))
|
||||
.addAll(getSet(CATEGORY, Zp)),
|
||||
pSet = getSet(CATEGORY, Pd)
|
||||
.addAll(getSet(CATEGORY, Ps))
|
||||
.addAll(getSet(CATEGORY, Pe))
|
||||
.addAll(getSet(CATEGORY, Pc))
|
||||
.addAll(getSet(CATEGORY, Po))
|
||||
.addAll(getSet(CATEGORY, Pi))
|
||||
.addAll(getSet(CATEGORY, Pf)),
|
||||
sSet = getSet(CATEGORY, Sm)
|
||||
.addAll(getSet(CATEGORY, Sc))
|
||||
.addAll(getSet(CATEGORY, Sk))
|
||||
.addAll(getSet(CATEGORY, So)),
|
||||
noSet = getSet(CATEGORY, No),
|
||||
csSet = getSet(CATEGORY, Cs),
|
||||
cfSet = getSet(CATEGORY, Cf),
|
||||
cnSet = getSet(CATEGORY, Cn),
|
||||
circled = getSet(DECOMPOSITION_TYPE, COMPAT_CIRCLE),
|
||||
whitespaceSet = getSet(BINARY_PROPERTIES, White_space),
|
||||
alphaSet = getSet(DERIVED, PropAlphabetic).addAll(combiningSet),
|
||||
lowerSet = getSet(DERIVED, PropLowercase).addAll(titleSet).removeAll(circled),
|
||||
upperSet = getSet(DERIVED, PropUppercase).addAll(titleSet).removeAll(circled),
|
||||
digitSet = getSet(CATEGORY, Nd),
|
||||
xdigitSet = new UnicodeSet("[a-fA-F\uFF21-\uFF26\uFF41-\uFF46]").addAll(digitSet),
|
||||
spaceSet = whitespaceSet.size() == 0 ? zSet : whitespaceSet,
|
||||
controlSet = getSet(CATEGORY, Cc),
|
||||
punctSet = new UnicodeSet(pSet).addAll(sSet),
|
||||
graphSet = new UnicodeSet(0,0x10ffff)
|
||||
.removeAll(controlSet)
|
||||
//.removeAll(getSet(CATEGORY, Cf))
|
||||
.removeAll(getSet(CATEGORY, Cs))
|
||||
.removeAll(getSet(CATEGORY, Cn))
|
||||
.removeAll(getSet(CATEGORY, Zs))
|
||||
.removeAll(getSet(CATEGORY, Zl))
|
||||
.removeAll(getSet(CATEGORY, Zp)),
|
||||
.removeAll(csSet)
|
||||
.removeAll(cnSet)
|
||||
.removeAll(zSet),
|
||||
// Cc, Cf, Cs, Cn, Z
|
||||
blankSet = new UnicodeSet(spaceSet).removeAll(new UnicodeSet("[\\u000A-\\u000D\\u0085]"))
|
||||
.removeAll(getSet(CATEGORY, Zl))
|
||||
@ -67,16 +81,18 @@ public class Compare14652 implements UCD_Types {
|
||||
String guess = "???";
|
||||
UnicodeSet guessContents = new UnicodeSet();
|
||||
|
||||
String wsname = whitespaceSet.size() == 0 ? "gc=Z" : "Whitespace";
|
||||
|
||||
Prop(String name) {
|
||||
this.name = name;
|
||||
if (name.equals("alpha")) {
|
||||
guess = "Alphabetic + gc=M";
|
||||
guessContents = alphaSet;
|
||||
} else if (name.equals("lower")) {
|
||||
guess = "Lowercase + gc=Lt";
|
||||
guess = "Lowercase + gc=Lt - dt=circle";
|
||||
guessContents = lowerSet;
|
||||
} else if (name.equals("upper")) {
|
||||
guess = "Uppercase + gc=Lt";
|
||||
guess = "Uppercase + gc=Lt - dt=circle";
|
||||
guessContents = upperSet;
|
||||
} else if (name.equals("digit")) {
|
||||
guess = "gc=Nd";
|
||||
@ -85,20 +101,20 @@ public class Compare14652 implements UCD_Types {
|
||||
guess = "gc=Nd+a..f (upper/lower,normal/fullwidth)";
|
||||
guessContents = xdigitSet;
|
||||
} else if (name.equals("space")) {
|
||||
guess = "Whitespace";
|
||||
guess = wsname;
|
||||
guessContents = spaceSet;
|
||||
Utility.showSetNames("Whitespace", spaceSet, true, Default.ucd);
|
||||
//Utility.showSetNames("Whitespace", spaceSet, true, Default.ucd);
|
||||
} else if (name.equals("cntrl")) {
|
||||
guess = "gc=Cc";
|
||||
guessContents = controlSet;
|
||||
} else if (name.equals("punct")) {
|
||||
guess = "gc=P";
|
||||
guess = "gc=P,S";
|
||||
guessContents = punctSet;
|
||||
} else if (name.equals("graph")) {
|
||||
guess = "All - gc=Cc, Cs, Cn, or Z";
|
||||
guessContents = graphSet;
|
||||
} else if (name.equals("blank")) {
|
||||
guess = "Whitespace - (LF,VT,FF,CR,NEL + gc=Zl,Zp)";
|
||||
guess = wsname + " - (LF,VT,FF,CR,NEL + gc=Zl,Zp)";
|
||||
guessContents = blankSet;
|
||||
} else if (name.equals("ISO_14652_class \"combining\"")) {
|
||||
guess = "gc=M";
|
||||
@ -143,43 +159,183 @@ tolower
|
||||
static int propCount = 0;
|
||||
|
||||
public static void main(String[] args) throws IOException {
|
||||
BufferedReader br = Utility.openReadFile("C:\\DATA\\ISO14652_CTYPE.txt", Utility.LATIN1);
|
||||
while (true) {
|
||||
String line = br.readLine();
|
||||
if (line == null) break;
|
||||
line = line.trim();
|
||||
if (line.length() == 0) continue;
|
||||
if (line.charAt(line.length() - 1) == '/') {
|
||||
line = line.substring(0, line.length() - 1);
|
||||
}
|
||||
line = line.trim();
|
||||
if (line.length() == 0) continue;
|
||||
|
||||
char ch = line.charAt(0);
|
||||
if (ch == '%') continue;
|
||||
if (ch == '(') continue;
|
||||
if (ch == '<') {
|
||||
addItems(line, props[propCount-1].contents);
|
||||
} else {
|
||||
// new property
|
||||
System.out.println(line);
|
||||
if (line.equals("width")) break;
|
||||
props[propCount] = new Prop(line);
|
||||
props[propCount].name = "ISO_14652_" + line;
|
||||
props[propCount].contents = new UnicodeSet();
|
||||
propCount++;
|
||||
}
|
||||
}
|
||||
|
||||
PrintWriter log = Utility.openPrintWriter("Diff14652.txt", Utility.UTF8_WINDOWS);
|
||||
log.write('\uFEFF');
|
||||
String version = Default.ucd.getVersion();
|
||||
PrintWriter log = Utility.openPrintWriter("Diff14652_" + version + ".txt", Utility.UTF8_WINDOWS);
|
||||
try {
|
||||
log.write('\uFEFF');
|
||||
log.print("Version: " + version);
|
||||
|
||||
if (false) {
|
||||
UnicodeSet ID = getSet(DERIVED, ID_Start).addAll(getSet(DERIVED, ID_Continue_NO_Cf));
|
||||
UnicodeSet XID = getSet(DERIVED, Mod_ID_Start).addAll(getSet(DERIVED, Mod_ID_Continue_NO_Cf));
|
||||
UnicodeSet alphanumSet = new UnicodeSet(alphaSet).addAll(digitSet).addAll(getSet(CATEGORY, Pc));
|
||||
|
||||
Utility.showSetDifferences("ID", ID, "XID", XID, false, Default.ucd);
|
||||
Utility.showSetDifferences("ID", ID, "Alphabetic+Digit+Pc", alphanumSet, false, Default.ucd);
|
||||
}
|
||||
|
||||
BufferedReader br = Utility.openReadFile("C:\\DATA\\ISO14652_CTYPE.txt", Utility.LATIN1);
|
||||
while (true) {
|
||||
String line = br.readLine();
|
||||
if (line == null) break;
|
||||
line = line.trim();
|
||||
if (line.length() == 0) continue;
|
||||
if (line.charAt(line.length() - 1) == '/') {
|
||||
line = line.substring(0, line.length() - 1);
|
||||
}
|
||||
line = line.trim();
|
||||
if (line.length() == 0) continue;
|
||||
|
||||
char ch = line.charAt(0);
|
||||
if (ch == '%') continue;
|
||||
if (ch == '(') continue;
|
||||
if (ch == '<') {
|
||||
addItems(line, props[propCount-1].contents);
|
||||
} else {
|
||||
// new property
|
||||
System.out.println(line);
|
||||
if (line.equals("width")) break;
|
||||
props[propCount] = new Prop(line);
|
||||
props[propCount].name = "ISO_14652_" + line;
|
||||
props[propCount].contents = new UnicodeSet();
|
||||
propCount++;
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < propCount; ++i) props[i].show(log);
|
||||
|
||||
log.println();
|
||||
log.println("**************************************************");
|
||||
log.println("Checking POSIX requirements for inclusion and disjointness.");
|
||||
log.println("**************************************************");
|
||||
log.println();
|
||||
/*
|
||||
alpha, digit, punct, cntrl are all disjoint
|
||||
space, cntrl, blank are pairwise disjoint with any of alpha, digit, xdigit
|
||||
alpha includes upper, lower
|
||||
graph includes alpha, digit, punct
|
||||
print includes graph
|
||||
xdigit includes digit
|
||||
*/
|
||||
Prop
|
||||
alpha = getProp("ISO_14652_alpha"),
|
||||
upper = getProp("ISO_14652_upper"),
|
||||
lower = getProp("ISO_14652_lower"),
|
||||
graph = getProp("ISO_14652_graph"),
|
||||
//print = getProp("ISO_14652_print"),
|
||||
punct = getProp("ISO_14652_punct"),
|
||||
digit = getProp("ISO_14652_digit"),
|
||||
xdigit = getProp("ISO_14652_xdigit"),
|
||||
space = getProp("ISO_14652_space"),
|
||||
blank = getProp("ISO_14652_blank"),
|
||||
cntrl = getProp("ISO_14652_cntrl");
|
||||
|
||||
checkDisjoint(log, new Prop[] {alpha, digit, punct, cntrl});
|
||||
|
||||
Prop [] l1 = new Prop[] {space, cntrl, blank};
|
||||
Prop [] l2 = new Prop[] {alpha, digit, xdigit};
|
||||
for (int i = 0; i < l1.length; ++i) {
|
||||
for (int j = i + 1; j < l2.length; ++j) {
|
||||
checkDisjoint(log, l1[i], l2[j]);
|
||||
}
|
||||
}
|
||||
checkIncludes(log, alpha, upper);
|
||||
checkIncludes(log, alpha, lower);
|
||||
checkIncludes(log, graph, alpha);
|
||||
checkIncludes(log, graph, digit);
|
||||
checkIncludes(log, graph, punct);
|
||||
//checkIncludes(log, print, graph);
|
||||
checkIncludes(log, xdigit, digit);
|
||||
|
||||
|
||||
// possibly alpha, digit, punct, cntrl, space cover the !(Cn,Cs)
|
||||
|
||||
UnicodeSet trRemainder = new UnicodeSet(cnSet)
|
||||
.complement()
|
||||
.removeAll(csSet)
|
||||
.removeAll(digit.contents)
|
||||
.removeAll(punct.contents)
|
||||
.removeAll(alpha.contents)
|
||||
.removeAll(cntrl.contents)
|
||||
.removeAll(space.contents);
|
||||
Utility.showSetNames(log, "TR Remainder: ", trRemainder, false, false, Default.ucd);
|
||||
|
||||
UnicodeSet propRemainder = new UnicodeSet(cnSet)
|
||||
.complement()
|
||||
.removeAll(csSet)
|
||||
//.removeAll(noSet)
|
||||
//.removeAll(cfSet)
|
||||
.removeAll(digit.guessContents)
|
||||
.removeAll(punct.guessContents)
|
||||
.removeAll(alpha.guessContents)
|
||||
.removeAll(cntrl.guessContents)
|
||||
.removeAll(space.guessContents);
|
||||
Utility.showSetNames(log, "Prop Remainder: ", propRemainder, false, false, Default.ucd);
|
||||
|
||||
/*
|
||||
checkDisjoint(new Prop[] {alpha, digit, punct, cntrl});
|
||||
UnicodeSet remainder = cnSet.complement();
|
||||
UnicodeSet guessRemainder = new UnicodeSet(remainder);
|
||||
for (int i = 0; i < list.length; ++i) {
|
||||
for (int j = i + 1; j < list.length; ++j) {
|
||||
compare(log, list[i].name, list[i].contents, list[j].name, list[j].contents);
|
||||
compare(log, list[i].guess, list[i].guessContents, list[j].guess, list[j].guessContents);
|
||||
}
|
||||
remainder.removeAll(list[i].contents);
|
||||
guessRemainder.removeAll(list[i].guessContents);
|
||||
}
|
||||
if (remainder.size() != 0) {
|
||||
log.println();
|
||||
log.println("Incomplete (TR): " + remainder);
|
||||
}
|
||||
if (guessRemainder.size() != 0) {
|
||||
log.println();
|
||||
log.println("Incomplete (Prop): " + guessRemainder);
|
||||
}
|
||||
*/
|
||||
|
||||
} finally {
|
||||
log.close();
|
||||
}
|
||||
}
|
||||
|
||||
static void checkDisjoint(PrintWriter log, Prop[] list) {
|
||||
for (int i = 0; i < list.length; ++i) {
|
||||
for (int j = i + 1; j < list.length; ++j) {
|
||||
checkDisjoint(log, list[i], list[j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void checkDisjoint(PrintWriter log, Prop prop1, Prop prop2) {
|
||||
checkDisjoint(log, prop1.name, prop1.contents, prop2.name, prop2.contents);
|
||||
checkDisjoint(log, prop1.guess, prop1.guessContents, prop2.guess, prop2.guessContents);
|
||||
}
|
||||
|
||||
static void checkDisjoint(PrintWriter log, String name, UnicodeSet set, String name2, UnicodeSet set2) {
|
||||
if (set.containsSome(set2)) {
|
||||
log.println();
|
||||
log.println("Fails test: " + name + " disjoint-with " + name2);
|
||||
UnicodeSet diff = new UnicodeSet(set).retainAll(set2);
|
||||
Utility.showSetNames(log, "", diff, false, false, Default.ucd);
|
||||
}
|
||||
}
|
||||
|
||||
static void checkIncludes(PrintWriter log, Prop prop1, Prop prop2) {
|
||||
checkIncludes(log, prop1.name, prop1.contents, prop2.name, prop2.contents);
|
||||
checkIncludes(log, prop1.guess, prop1.guessContents, prop2.guess, prop2.guessContents);
|
||||
}
|
||||
|
||||
static void checkIncludes(PrintWriter log, String name, UnicodeSet set, String name2, UnicodeSet set2) {
|
||||
if (!set.containsAll(set2)) {
|
||||
log.println();
|
||||
log.println("Fails test:" + name + " includes " + name2);
|
||||
UnicodeSet diff = new UnicodeSet(set2).removeAll(set);
|
||||
Utility.showSetNames(log, "", diff, false, false, Default.ucd);
|
||||
}
|
||||
}
|
||||
|
||||
static String[] pieces = new String[100];
|
||||
|
||||
// example: <U1F48>..<U1F4D>;<U1F59>;<U1F5B>;<U1F5D>;<U1F5F>;<U1F68>..<U1F6F>;/
|
||||
@ -208,6 +364,18 @@ tolower
|
||||
return Integer.parseInt(piece.substring(2,piece.length()-1), 16);
|
||||
}
|
||||
|
||||
static Prop getProp(String name) {
|
||||
//System.out.println("Searching for: " + name);
|
||||
for (int i = 0; i < propCount; ++i) {
|
||||
//System.out.println("Checking: " + props[i].name);
|
||||
if (props[i].name.equals(name)) {
|
||||
return props[i];
|
||||
}
|
||||
}
|
||||
//System.out.println("Missed");
|
||||
return null;
|
||||
}
|
||||
|
||||
// oddities:
|
||||
// extra space after ';' <U0300>..<U036F>; <U20D0>..<U20FF>; <UFE20>..<UFE2F>;/
|
||||
// <0>?? <0>;<U0BE7>..<U0BEF>;/
|
||||
|
@ -1,3 +1,4 @@
|
||||
package com.ibm.text.UCD;
|
||||
public class ListNFComplete {
|
||||
|
||||
// find all the characters that are
|
||||
@ -11,7 +12,7 @@ public class ListNFComplete {
|
||||
// add an ogonek it changes to a-ogonek + breve
|
||||
|
||||
public static void main (String[] args) {
|
||||
Normalizer nfd = new Normalizer(Normalizer.NFD);
|
||||
//Normalizer nfd = new Normalizer(Normalizer.NFD);
|
||||
|
||||
}
|
||||
}
|
@ -5,13 +5,13 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/MLStreamWriter.java,v $
|
||||
* $Date: 2001/12/13 23:35:57 $
|
||||
* $Revision: 1.3 $
|
||||
* $Date: 2003/04/25 01:39:15 $
|
||||
* $Revision: 1.4 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
package com.ibm.text.utility;
|
||||
package com.ibm.text.UCD;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.*;
|
||||
|
@ -1,4 +1,5 @@
|
||||
public class NFSkippable {
|
||||
package com.ibm.text.UCD;
|
||||
public class NFCSkippable {
|
||||
|
||||
// find all the characters that are
|
||||
// a) not decomposed by this normalization form
|
||||
@ -10,9 +11,8 @@ public class NFSkippable {
|
||||
// Example: a-breve might satisfy a-d, but if you
|
||||
// add an ogonek it changes to a-ogonek + breve
|
||||
|
||||
public NF
|
||||
|
||||
public boolean is(int cp) {
|
||||
return false;
|
||||
}
|
||||
|
||||
public static void main (String[] args) {
|
||||
|
@ -5,8 +5,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/ProcessUnihan.java,v $
|
||||
* $Date: 2002/07/14 22:07:00 $
|
||||
* $Revision: 1.1 $
|
||||
* $Date: 2003/04/25 01:39:15 $
|
||||
* $Revision: 1.2 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -17,6 +17,7 @@ import com.ibm.text.utility.*;
|
||||
import com.ibm.icu.text.UTF16;
|
||||
import java.util.*;
|
||||
|
||||
// stub file, ignore
|
||||
|
||||
public final class ProcessUnihan {
|
||||
|
||||
@ -49,7 +50,7 @@ public final class ProcessUnihan {
|
||||
//out = Utility.openPrintWriter("Transliterate_Han_English.txt");
|
||||
//err = Utility.openPrintWriter("Transliterate_Han_English.log.txt");
|
||||
|
||||
BufferedReader in = Utility.openUnicodeFile("Unihan", "3.2.0", true);
|
||||
BufferedReader in = Utility.openUnicodeFile("Unihan", "3.2.0", Utility.UTF8);
|
||||
while (true) {
|
||||
Utility.dot(++lineCounter);
|
||||
|
||||
@ -62,10 +63,12 @@ public final class ProcessUnihan {
|
||||
int count = Utility.split(line, '#', parts);
|
||||
|
||||
int code = Integer.parseInt(parts[0].substring(2), 16);
|
||||
Byte itag = tags.get(tag);
|
||||
if (itag == null)
|
||||
Byte itag = tags.get("a");
|
||||
if (itag == null) {}
|
||||
String tag = parts[1];
|
||||
String value = parts[2];
|
||||
if (tags.containsKey(tag))
|
||||
|
||||
if (tags.containsKey(tag)) {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1,4 +1,5 @@
|
||||
package com.ibm.text.utility;
|
||||
import com.ibm.icu.text.UnicodeSet;
|
||||
|
||||
final class UnicodeMapInt {
|
||||
private int [] index = new int[1];
|
||||
@ -16,7 +17,7 @@ final class UnicodeMapInt {
|
||||
int i = findIndex(cp);
|
||||
|
||||
// A1. if cp already has the value, return
|
||||
if (data[i - 1] == value) return;
|
||||
if (data[i - 1] == value) return cp;
|
||||
|
||||
int rangeStart = index[i-1];
|
||||
int rangeLimit = index[i];
|
||||
@ -74,7 +75,7 @@ final class UnicodeMapInt {
|
||||
}
|
||||
|
||||
/** Finds the least index with a value greater than cp */
|
||||
private int findIndex(cp) {
|
||||
private int findIndex( int cp) {
|
||||
if (cp > 0x10FFFF) throw new ArrayIndexOutOfBoundsException("Code point too large: " + cp); // out of bounds!
|
||||
int i = -1;
|
||||
while (true) {
|
||||
|
@ -5,8 +5,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/testParser.java,v $
|
||||
* $Date: 2001/08/31 00:19:16 $
|
||||
* $Revision: 1.2 $
|
||||
* $Date: 2003/04/25 01:39:15 $
|
||||
* $Revision: 1.3 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -66,7 +66,7 @@ public class testParser implements XMLParseTypes {
|
||||
|
||||
static void test2() throws Exception {
|
||||
|
||||
PrintWriter log = Utility.openPrintWriter("UCD-Extract.html");
|
||||
PrintWriter log = Utility.openPrintWriter("UCD-Extract.html", Utility.UTF8_WINDOWS);
|
||||
|
||||
//int fieldCount = 4;
|
||||
//int width = 100/fieldCount;
|
||||
|
Loading…
Reference in New Issue
Block a user