Added chart program, minor edits.
X-SVN-Rev: 9918
This commit is contained in:
parent
ca34222583
commit
5395623062
@ -5,8 +5,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/GenOverlap.java,v $
|
||||
* $Date: 2002/05/31 01:41:03 $
|
||||
* $Revision: 1.9 $
|
||||
* $Date: 2002/09/25 06:40:13 $
|
||||
* $Revision: 1.10 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -164,8 +164,8 @@ public class GenOverlap implements UCD_Types {
|
||||
static boolean PROGRESS = false;
|
||||
|
||||
static void fullCheck() throws IOException {
|
||||
PrintWriter log = Utility.openPrintWriter("Overlap.html");
|
||||
PrintWriter simpleList = Utility.openPrintWriter("Overlap.txt");
|
||||
PrintWriter log = Utility.openPrintWriter("Overlap.html", Utility.UTF8_WINDOWS);
|
||||
PrintWriter simpleList = Utility.openPrintWriter("Overlap.txt", Utility.UTF8_WINDOWS);
|
||||
|
||||
Iterator it = completes.keySet().iterator();
|
||||
int counter = 0;
|
||||
@ -448,7 +448,7 @@ public class GenOverlap implements UCD_Types {
|
||||
newKeys.removeAll(joint);
|
||||
oldKeys.removeAll(joint);
|
||||
|
||||
PrintWriter log = Utility.openPrintWriter("UCA-old-vs-new" + (doMax ? "-MAX.txt" : ".txt"), false, false);
|
||||
PrintWriter log = Utility.openPrintWriter("UCA-old-vs-new" + (doMax ? "-MAX.txt" : ".txt"), Utility.UTF8_WINDOWS);
|
||||
Iterator it = list.iterator();
|
||||
int last = -1;
|
||||
while (it.hasNext()) {
|
||||
@ -631,7 +631,7 @@ public class GenOverlap implements UCD_Types {
|
||||
|
||||
System.out.println("Data Gathered");
|
||||
|
||||
PrintWriter log = Utility.openPrintWriter("checkstringsearchhash.html");
|
||||
PrintWriter log = Utility.openPrintWriter("checkstringsearchhash.html", Utility.UTF8_WINDOWS);
|
||||
Utility.writeHtmlHeader(log, "Check Hash");
|
||||
log.println("<h1>Collisions</h1>");
|
||||
log.println("<p>Shows collisions among primary values when hashed to table size = " + tableLength + ".");
|
||||
@ -694,7 +694,7 @@ public class GenOverlap implements UCD_Types {
|
||||
}
|
||||
|
||||
public static void listCyrillic(UCA collatorIn) throws IOException {
|
||||
PrintWriter log = Utility.openPrintWriter("ListCyrillic.txt", false, false);
|
||||
PrintWriter log = Utility.openPrintWriter("ListCyrillic.txt", Utility.UTF8_WINDOWS);
|
||||
Set set = new TreeSet(collatorIn);
|
||||
Set set2 = new TreeSet(collatorIn);
|
||||
ucd = UCD.make();
|
||||
|
@ -5,8 +5,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/Main.java,v $
|
||||
* $Date: 2002/07/03 02:15:47 $
|
||||
* $Revision: 1.9 $
|
||||
* $Date: 2002/09/25 06:40:13 $
|
||||
* $Revision: 1.10 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -55,6 +55,7 @@ public class Main {
|
||||
else if (arg.equalsIgnoreCase("indexChart")) WriteCharts.indexChart();
|
||||
else if (arg.equalsIgnoreCase("special")) WriteCharts.special();
|
||||
|
||||
else if (arg.equalsIgnoreCase("writeCompositionChart")) WriteCharts.writeCompositionChart();
|
||||
|
||||
else if (arg.equalsIgnoreCase("CheckHash")) GenOverlap.checkHash(WriteCollationData.collator);
|
||||
else if (arg.equalsIgnoreCase("generateRevision")) GenOverlap.generateRevision(WriteCollationData.collator);
|
||||
|
@ -4,9 +4,9 @@
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/WriteCharts.java,v $
|
||||
* $Date: 2002/07/03 02:15:47 $
|
||||
* $Revision: 1.11 $
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/WriteCharts.java,v $
|
||||
* $Date: 2002/09/25 06:40:13 $
|
||||
* $Revision: 1.12 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -19,12 +19,17 @@ import java.io.*;
|
||||
import com.ibm.text.UCD.*;
|
||||
import com.ibm.text.utility.*;
|
||||
import com.ibm.icu.text.UTF16;
|
||||
import com.ibm.icu.text.UnicodeSetIterator;
|
||||
import com.ibm.icu.text.Transliterator;
|
||||
import com.ibm.icu.text.UnicodeSet;
|
||||
|
||||
|
||||
import java.text.SimpleDateFormat;
|
||||
|
||||
public class WriteCharts implements UCD_Types {
|
||||
|
||||
|
||||
static boolean HACK_KANA = false;
|
||||
|
||||
|
||||
static public void special() {
|
||||
Default.setUCD();
|
||||
for (int i = 0xE000; i < 0x10000; ++i) {
|
||||
@ -33,58 +38,58 @@ public class WriteCharts implements UCD_Types {
|
||||
System.out.println(Default.ucd.getCodeAndName(i));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static public void collationChart(UCA uca) throws IOException {
|
||||
Default.setUCD(uca.getUCDVersion());
|
||||
HACK_KANA = true;
|
||||
|
||||
|
||||
uca.setAlternate(UCA.NON_IGNORABLE);
|
||||
|
||||
|
||||
//Normalizer nfd = new Normalizer(Normalizer.NFD);
|
||||
//Normalizer nfc = new Normalizer(Normalizer.NFC);
|
||||
|
||||
|
||||
UCA.UCAContents cc = uca.getContents(UCA.FIXED_CE, null); // nfd instead of null if skipping decomps
|
||||
cc.enableSamples();
|
||||
|
||||
|
||||
Set set = new TreeSet();
|
||||
|
||||
|
||||
while (true) {
|
||||
String x = cc.next();
|
||||
if (x == null) break;
|
||||
if (x.equals("\u2F00")) {
|
||||
System.out.println("debug");
|
||||
}
|
||||
|
||||
|
||||
set.add(new Pair(uca.getSortKey(x), x));
|
||||
}
|
||||
|
||||
|
||||
PrintWriter output = null;
|
||||
|
||||
|
||||
Iterator it = set.iterator();
|
||||
|
||||
|
||||
byte oldScript = -127;
|
||||
|
||||
|
||||
int[] scriptCount = new int[128];
|
||||
|
||||
|
||||
int counter = 0;
|
||||
|
||||
|
||||
String lastSortKey = "\u0000";
|
||||
|
||||
|
||||
int high = uca.getSortKey("a").charAt(0);
|
||||
int variable = UCA.getPrimary(uca.getVariableHigh());
|
||||
|
||||
|
||||
int columnCount = 0;
|
||||
|
||||
|
||||
String[] replacement = new String[] {"%%%", "Collation Charts"};
|
||||
String folder = "charts\\uca\\";
|
||||
|
||||
|
||||
Utility.copyTextFile("index.html", true, folder + "index.html", replacement);
|
||||
Utility.copyTextFile("charts.css", false, folder + "charts.css");
|
||||
Utility.copyTextFile("help.html", true, folder + "help.html");
|
||||
|
||||
indexFile = Utility.openPrintWriter(folder + "index_list.html", false, false);
|
||||
|
||||
indexFile = Utility.openPrintWriter(folder + "index_list.html", Utility.UTF8_WINDOWS);
|
||||
Utility.appendFile("index_header.html", true, indexFile, replacement);
|
||||
|
||||
|
||||
/*
|
||||
indexFile.println("<html><head><meta http-equiv='Content-Type' content='text/html; charset=utf-8'>");
|
||||
indexFile.println("<title>UCA Default Collation Table</title>");
|
||||
@ -93,22 +98,22 @@ public class WriteCharts implements UCD_Types {
|
||||
indexFile.println("</head><body><h2 align='center'>UCA Default Collation Table</h2>");
|
||||
indexFile.println("<p align='center'><a href = 'help.html'>Help</a>");
|
||||
*/
|
||||
|
||||
|
||||
while (it.hasNext()) {
|
||||
Utility.dot(counter);
|
||||
|
||||
|
||||
Pair p = (Pair) it.next();
|
||||
String sortKey = (String) p.first;
|
||||
String s = (String) p.second;
|
||||
|
||||
|
||||
int cp = UTF16.charAt(s,0);
|
||||
|
||||
|
||||
byte script = Default.ucd.getScript(cp);
|
||||
|
||||
|
||||
// get first non-zero primary
|
||||
int currentPrimary = getFirstPrimary(sortKey);
|
||||
int primary = currentPrimary >>> 16;
|
||||
|
||||
|
||||
if (sortKey.length() < 4) script = NULL_ORDER;
|
||||
else if (primary == 0) script = IGNORABLE_ORDER;
|
||||
else if (primary < variable) script = VARIABLE_ORDER;
|
||||
@ -118,35 +123,35 @@ public class WriteCharts implements UCD_Types {
|
||||
else if (primary < UCA_Types.UNSUPPORTED_OTHER_BASE) script = CJK_AB;
|
||||
else script = UNSUPPORTED;
|
||||
}
|
||||
|
||||
|
||||
if (script == KATAKANA_SCRIPT) script = HIRAGANA_SCRIPT;
|
||||
else if ((script == INHERITED_SCRIPT || script == COMMON_SCRIPT) && oldScript >= 0) script = oldScript;
|
||||
|
||||
if (script != oldScript
|
||||
if (script != oldScript
|
||||
// && (script != COMMON_SCRIPT && script != INHERITED_SCRIPT)
|
||||
) {
|
||||
closeFile(output);
|
||||
output = null;
|
||||
oldScript = script;
|
||||
}
|
||||
|
||||
|
||||
if (output == null) {
|
||||
++scriptCount[script+3];
|
||||
if (scriptCount[script+3] > 1) {
|
||||
System.out.println("\t\tFAIL: " + scriptCount[script+3] + ", " +
|
||||
System.out.println("\t\tFAIL: " + scriptCount[script+3] + ", " +
|
||||
getChunkName(script, LONG) + ", " + Default.ucd.getCodeAndName(s));
|
||||
}
|
||||
output = openFile(scriptCount[script+3], folder, script);
|
||||
}
|
||||
|
||||
|
||||
boolean firstPrimaryEquals = currentPrimary == getFirstPrimary(lastSortKey);
|
||||
|
||||
|
||||
int strength = uca.strengthDifference(sortKey, lastSortKey);
|
||||
if (strength < 0) strength = -strength;
|
||||
lastSortKey = sortKey;
|
||||
|
||||
|
||||
// find out if this is an expansion: more than one primary weight
|
||||
|
||||
|
||||
int primaryCount = 0;
|
||||
for (int i = 0; i < sortKey.length(); ++i) {
|
||||
char w = sortKey.charAt(i);
|
||||
@ -156,7 +161,7 @@ public class WriteCharts implements UCD_Types {
|
||||
}
|
||||
++ primaryCount;
|
||||
}
|
||||
|
||||
|
||||
String breaker = "";
|
||||
if (columnCount > 10 || !firstPrimaryEquals) {
|
||||
columnCount = 0;
|
||||
@ -166,20 +171,20 @@ public class WriteCharts implements UCD_Types {
|
||||
++columnCount;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
String classname = primaryCount > 1 ? XCLASSNAME[strength] : CLASSNAME[strength];
|
||||
|
||||
|
||||
String name = Default.ucd.getName(s);
|
||||
|
||||
|
||||
|
||||
|
||||
if (s.equals("\u1eaf")) {
|
||||
System.out.println("debug");
|
||||
}
|
||||
|
||||
|
||||
String comp = Default.nfc.normalize(s);
|
||||
|
||||
|
||||
String outline = breaker + classname
|
||||
+ " title='"
|
||||
+ " title='"
|
||||
+ (script != UNSUPPORTED
|
||||
? Utility.quoteXML(name, true) + ": "
|
||||
: "")
|
||||
@ -193,21 +198,21 @@ public class WriteCharts implements UCD_Types {
|
||||
? "<td class='name'><tt>" + Utility.quoteXML(name, true) + "</td>"
|
||||
: "")
|
||||
;
|
||||
|
||||
|
||||
output.println(outline);
|
||||
++columnCount;
|
||||
}
|
||||
|
||||
|
||||
closeFile(output);
|
||||
closeIndexFile(indexFile, "<br>UCA: " + uca.getDataVersion(), COLLATION);
|
||||
}
|
||||
|
||||
|
||||
static public void normalizationChart() throws IOException {
|
||||
Default.setUCD();
|
||||
HACK_KANA = false;
|
||||
|
||||
|
||||
Set set = new TreeSet();
|
||||
|
||||
|
||||
for (int i = 0; i <= 0x10FFFF; ++i) {
|
||||
if (!Default.ucd.isRepresented(i)) {
|
||||
if (i < 0xAC00) continue;
|
||||
@ -216,35 +221,35 @@ public class WriteCharts implements UCD_Types {
|
||||
}
|
||||
byte cat = Default.ucd.getCategory(i);
|
||||
if (cat == Cs || cat == Co) continue;
|
||||
|
||||
|
||||
if (Default.nfkd.isNormalized(i)) continue;
|
||||
String decomp = Default.nfkd.normalize(i);
|
||||
|
||||
|
||||
byte script = getBestScript(decomp);
|
||||
|
||||
|
||||
set.add(new Pair(new Integer(script == COMMON_SCRIPT ? cat + CAT_OFFSET : script),
|
||||
new Pair(Default.ucd.getCase(decomp, FULL, FOLD),
|
||||
new Integer(i))));
|
||||
}
|
||||
|
||||
|
||||
PrintWriter output = null;
|
||||
|
||||
|
||||
Iterator it = set.iterator();
|
||||
|
||||
|
||||
int oldScript = -127;
|
||||
|
||||
|
||||
int counter = 0;
|
||||
|
||||
|
||||
String[] replacement = new String[] {"%%%", "Normalization Charts"};
|
||||
String folder = "charts\\normalization\\";
|
||||
|
||||
Utility.copyTextFile("index.html", true, folder + "index.html", replacement);
|
||||
Utility.copyTextFile("charts.css", false, folder + "charts.css");
|
||||
Utility.copyTextFile("norm_help.html", true, folder + "help.html");
|
||||
|
||||
indexFile = Utility.openPrintWriter(folder + "index_list.html", false, false);
|
||||
|
||||
indexFile = Utility.openPrintWriter(folder + "index_list.html", Utility.UTF8_WINDOWS);
|
||||
Utility.appendFile("index_header.html", true, indexFile, replacement);
|
||||
|
||||
|
||||
/*
|
||||
indexFile.println("<html><head><meta http-equiv='Content-Type' content='text/html; charset=utf-8'>");
|
||||
indexFile.println("<title>UCA Default Collation Table</title>");
|
||||
@ -253,107 +258,107 @@ public class WriteCharts implements UCD_Types {
|
||||
indexFile.println("</head><body><h2 align='center'>UCA Default Collation Table</h2>");
|
||||
indexFile.println("<p align='center'><a href = 'help.html'>Help</a>");
|
||||
*/
|
||||
|
||||
|
||||
while (it.hasNext()) {
|
||||
Utility.dot(counter);
|
||||
|
||||
|
||||
Pair p = (Pair) it.next();
|
||||
int script = ((Integer) p.first).intValue();
|
||||
int cp = ((Integer)((Pair) p.second).second).intValue();
|
||||
|
||||
if (script != oldScript
|
||||
|
||||
if (script != oldScript
|
||||
// && (script != COMMON_SCRIPT && script != INHERITED_SCRIPT)
|
||||
) {
|
||||
closeFile(output);
|
||||
output = null;
|
||||
oldScript = script;
|
||||
}
|
||||
|
||||
|
||||
if (output == null) {
|
||||
output = openFile(0, folder, script);
|
||||
output.println("<tr><td class='z'>Code</td><td class='z'>C</td><td class='z'>D</td><td class='z'>KC</td><td class='z'>KD</td></tr>");
|
||||
|
||||
}
|
||||
|
||||
|
||||
output.println("<tr>");
|
||||
|
||||
|
||||
String prefix;
|
||||
String code = UTF16.valueOf(cp);
|
||||
String c = Default.nfc.normalize(cp);
|
||||
String d = Default.nfd.normalize(cp);
|
||||
String kc = Default.nfkc.normalize(cp);
|
||||
String kd = Default.nfkd.normalize(cp);
|
||||
|
||||
|
||||
showCell(output, code, "<td class='z' ", "", false);
|
||||
|
||||
|
||||
prefix = c.equals(code) ? "<td class='g' " : "<td class='n' ";
|
||||
showCell(output, c, prefix, "", c.equals(code));
|
||||
|
||||
|
||||
prefix = d.equals(c) ? "<td class='g' " : "<td class='n' ";
|
||||
showCell(output, d, prefix, "", d.equals(c));
|
||||
|
||||
|
||||
prefix = kc.equals(c) ? "<td class='g' " : "<td class='n' ";
|
||||
showCell(output, kc, prefix, "", kc.equals(c));
|
||||
|
||||
|
||||
prefix = (kd.equals(d) || kd.equals(kc)) ? "<td class='g' " : "<td class='n' ";
|
||||
showCell(output, kd, prefix, "", (kd.equals(d) || kd.equals(kc)));
|
||||
|
||||
|
||||
output.println("</tr>");
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
closeFile(output);
|
||||
closeIndexFile(indexFile, "", NORMALIZATION);
|
||||
}
|
||||
|
||||
|
||||
static public void caseChart() throws IOException {
|
||||
Default.setUCD();
|
||||
HACK_KANA = false;
|
||||
|
||||
|
||||
Set set = new TreeSet();
|
||||
|
||||
|
||||
for (int i = 0; i <= 0x10FFFF; ++i) {
|
||||
if (!Default.ucd.isRepresented(i)) continue;
|
||||
byte cat = Default.ucd.getCategory(i);
|
||||
if (cat == Cs || cat == Co) continue;
|
||||
|
||||
|
||||
String code = UTF16.valueOf(i);
|
||||
String lower = Default.ucd.getCase(i, FULL, LOWER);
|
||||
String title = Default.ucd.getCase(i, FULL, TITLE);
|
||||
String upper = Default.ucd.getCase(i, FULL, UPPER);
|
||||
String fold = Default.ucd.getCase(i, FULL, FOLD);
|
||||
|
||||
|
||||
String decomp = Default.nfkd.normalize(i);
|
||||
int script = 0;
|
||||
if (lower.equals(code) && upper.equals(code) && fold.equals(code) && title.equals(code)) {
|
||||
if (!containsCase(decomp)) continue;
|
||||
script = NO_CASE_MAPPING;
|
||||
}
|
||||
|
||||
|
||||
if (script == 0) script = getBestScript(decomp);
|
||||
|
||||
|
||||
set.add(new Pair(new Integer(script == COMMON_SCRIPT ? cat + CAT_OFFSET : script),
|
||||
new Pair(Default.ucd.getCase(decomp, FULL, FOLD),
|
||||
new Integer(i))));
|
||||
}
|
||||
|
||||
|
||||
PrintWriter output = null;
|
||||
|
||||
|
||||
Iterator it = set.iterator();
|
||||
|
||||
|
||||
int oldScript = -127;
|
||||
|
||||
|
||||
int counter = 0;
|
||||
String[] replacement = new String[] {"%%%", "Case Charts"};
|
||||
String folder = "charts\\case\\";
|
||||
|
||||
|
||||
Utility.copyTextFile("index.html", true, folder + "index.html", replacement);
|
||||
Utility.copyTextFile("charts.css", false, folder + "charts.css");
|
||||
Utility.copyTextFile("case_help.html", true, folder + "help.html");
|
||||
|
||||
indexFile = Utility.openPrintWriter(folder + "index_list.html", false, false);
|
||||
|
||||
indexFile = Utility.openPrintWriter(folder + "index_list.html", Utility.UTF8_WINDOWS);
|
||||
Utility.appendFile("index_header.html", true, indexFile, replacement);
|
||||
|
||||
|
||||
/*
|
||||
indexFile.println("<html><head><meta http-equiv='Content-Type' content='text/html; charset=utf-8'>");
|
||||
indexFile.println("<title>UCA Default Collation Table</title>");
|
||||
@ -362,24 +367,24 @@ public class WriteCharts implements UCD_Types {
|
||||
indexFile.println("</head><body><h2 align='center'>UCA Default Collation Table</h2>");
|
||||
indexFile.println("<p align='center'><a href = 'help.html'>Help</a>");
|
||||
*/
|
||||
|
||||
|
||||
int columnCount = 0;
|
||||
|
||||
|
||||
while (it.hasNext()) {
|
||||
Utility.dot(counter);
|
||||
|
||||
|
||||
Pair p = (Pair) it.next();
|
||||
int script = ((Integer) p.first).intValue();
|
||||
int cp = ((Integer)((Pair) p.second).second).intValue();
|
||||
|
||||
if (script != oldScript
|
||||
|
||||
if (script != oldScript
|
||||
// && (script != COMMON_SCRIPT && script != INHERITED_SCRIPT)
|
||||
) {
|
||||
closeFile(output);
|
||||
output = null;
|
||||
oldScript = script;
|
||||
}
|
||||
|
||||
|
||||
if (output == null) {
|
||||
output = openFile(0, folder, script);
|
||||
if (script == NO_CASE_MAPPING) output.println("<tr>");
|
||||
@ -387,7 +392,7 @@ public class WriteCharts implements UCD_Types {
|
||||
+"<td class='z'>Upper</td><td class='z'>Fold</td></tr>");
|
||||
|
||||
}
|
||||
|
||||
|
||||
if (script == NO_CASE_MAPPING) {
|
||||
if (columnCount > 10) {
|
||||
output.println("</tr><tr>");
|
||||
@ -397,38 +402,38 @@ public class WriteCharts implements UCD_Types {
|
||||
++columnCount;
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
output.println("<tr>");
|
||||
|
||||
|
||||
String prefix;
|
||||
String code = UTF16.valueOf(cp);
|
||||
String lower = Default.ucd.getCase(cp, FULL, LOWER);
|
||||
String title = Default.ucd.getCase(cp, FULL, TITLE);
|
||||
String upper = Default.ucd.getCase(cp, FULL, UPPER);
|
||||
String fold = Default.ucd.getCase(cp, FULL, FOLD);
|
||||
|
||||
|
||||
showCell(output, code, "<td class='z' ", "", false);
|
||||
|
||||
|
||||
prefix = lower.equals(code) ? "<td class='g' " : "<td class='n' ";
|
||||
showCell(output, lower, prefix, "", lower.equals(code));
|
||||
|
||||
|
||||
prefix = title.equals(upper) ? "<td class='g' " : "<td class='n' ";
|
||||
showCell(output, title, prefix, "", title.equals(upper));
|
||||
|
||||
|
||||
prefix = upper.equals(code) ? "<td class='g' " : "<td class='n' ";
|
||||
showCell(output, upper, prefix, "", upper.equals(code));
|
||||
|
||||
|
||||
prefix = fold.equals(lower) ? "<td class='g' " : "<td class='n' ";
|
||||
showCell(output, fold, prefix, "", fold.equals(lower));
|
||||
|
||||
|
||||
output.println("</tr>");
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
closeFile(output);
|
||||
closeIndexFile(indexFile, "", CASE);
|
||||
}
|
||||
|
||||
|
||||
static public void addMapChar(Map m, Set stoplist, String key, String ch) {
|
||||
if (stoplist.contains(key)) return;
|
||||
for (int i = 0; i < key.length(); ++i) {
|
||||
@ -442,23 +447,23 @@ public class WriteCharts implements UCD_Types {
|
||||
}
|
||||
result.add(ch);
|
||||
}
|
||||
|
||||
|
||||
static public void indexChart() throws IOException {
|
||||
Default.setUCD();
|
||||
HACK_KANA = false;
|
||||
|
||||
|
||||
Map map = new TreeMap();
|
||||
Set stoplist = new TreeSet();
|
||||
|
||||
|
||||
String[] stops = {"LETTER", "CHARACTER", "AND", "CAPITAL", "SMALL", "COMPATIBILITY", "WITH"};
|
||||
stoplist.addAll(Arrays.asList(stops));
|
||||
System.out.println("Stop-list: " + stoplist);
|
||||
|
||||
|
||||
for (int i = 0; i < LIMIT_SCRIPT; ++i) {
|
||||
stoplist.add(Default.ucd.getScriptID_fromIndex((byte)i));
|
||||
}
|
||||
System.out.println("Stop-list: " + stoplist);
|
||||
|
||||
|
||||
for (int i = 0; i <= 0x10FFFF; ++i) {
|
||||
if (!Default.ucd.isRepresented(i)) continue;
|
||||
if (0xAC00 <= i && i <= 0xD7A3) continue;
|
||||
@ -466,7 +471,7 @@ public class WriteCharts implements UCD_Types {
|
||||
|
||||
String s = Default.ucd.getName(i);
|
||||
if (s == null) continue;
|
||||
|
||||
|
||||
if (s.startsWith("<")) {
|
||||
System.out.println("Wierd character at " + Default.ucd.getCodeAndName(i));
|
||||
}
|
||||
@ -490,52 +495,52 @@ public class WriteCharts implements UCD_Types {
|
||||
addMapChar(map, stoplist, word, ch);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
PrintWriter output = null;
|
||||
|
||||
|
||||
Iterator it = map.keySet().iterator();
|
||||
|
||||
|
||||
int oldScript = -127;
|
||||
|
||||
|
||||
int counter = 0;
|
||||
String[] replacement = new String[] {"%%%", "Name Charts"};
|
||||
String folder = "charts\\name\\";
|
||||
|
||||
|
||||
Utility.copyTextFile("index.html", true, folder + "index.html", replacement);
|
||||
Utility.copyTextFile("charts.css", false, folder + "charts.css");
|
||||
Utility.copyTextFile("name_help.html", true, folder + "help.html");
|
||||
|
||||
indexFile = Utility.openPrintWriter(folder + "index_list.html", false, false);
|
||||
|
||||
indexFile = Utility.openPrintWriter(folder + "index_list.html", Utility.UTF8_WINDOWS);
|
||||
Utility.appendFile("index_header.html", true, indexFile, replacement);
|
||||
|
||||
|
||||
int columnCount = 0;
|
||||
char lastInitial = 0;
|
||||
|
||||
|
||||
while (it.hasNext()) {
|
||||
Utility.dot(counter);
|
||||
|
||||
|
||||
String key = (String) it.next();
|
||||
|
||||
|
||||
Set chars = (Set) map.get(key);
|
||||
|
||||
|
||||
char initial = key.charAt(0);
|
||||
|
||||
|
||||
if (initial != lastInitial) {
|
||||
closeFile(output);
|
||||
output = null;
|
||||
lastInitial = initial;
|
||||
}
|
||||
|
||||
|
||||
if (output == null) {
|
||||
output = openFile2(0, folder, String.valueOf(initial));
|
||||
}
|
||||
|
||||
|
||||
output.println("<tr><td class='h'>" + key + "</td>");
|
||||
columnCount = 1;
|
||||
|
||||
|
||||
Iterator sublist = chars.iterator();
|
||||
while (sublist.hasNext()) {
|
||||
|
||||
|
||||
String ch = (String) sublist.next();
|
||||
if (columnCount > 10) {
|
||||
output.println("</tr><tr><td></td>");
|
||||
@ -545,20 +550,20 @@ public class WriteCharts implements UCD_Types {
|
||||
++columnCount;
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
output.println("</tr>");
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
closeFile(output);
|
||||
closeIndexFile(indexFile, "", CASE);
|
||||
}
|
||||
|
||||
|
||||
static void showCell(PrintWriter output, String s, String prefix, String extra, boolean skipName) {
|
||||
String name = Default.ucd.getName(s);
|
||||
String comp = Default.nfc.normalize(s);
|
||||
|
||||
String outline = prefix
|
||||
|
||||
String outline = prefix
|
||||
+ (skipName ? "" : " title='" + Utility.quoteXML(name, true) + "'")
|
||||
+ extra + ">"
|
||||
+ Utility.quoteXML(comp, true)
|
||||
@ -566,10 +571,10 @@ public class WriteCharts implements UCD_Types {
|
||||
+ Utility.hex(s)
|
||||
//+ "<br>" + script
|
||||
+ "</tt></td>";
|
||||
|
||||
|
||||
output.println(outline);
|
||||
}
|
||||
|
||||
|
||||
static byte getBestScript(String s) {
|
||||
int cp;
|
||||
byte result = COMMON_SCRIPT;
|
||||
@ -588,33 +593,33 @@ public class WriteCharts implements UCD_Types {
|
||||
}
|
||||
return (result << 16);
|
||||
}
|
||||
|
||||
|
||||
static final String[] CLASSNAME = {
|
||||
"<td class='q'",
|
||||
"<td class='q'",
|
||||
"<td class='q'",
|
||||
"<td class='t'",
|
||||
"<td class='s'",
|
||||
"<td class='q'",
|
||||
"<td class='q'",
|
||||
"<td class='q'",
|
||||
"<td class='t'",
|
||||
"<td class='s'",
|
||||
"<td class='p'"};
|
||||
|
||||
|
||||
static final String[] XCLASSNAME = {
|
||||
"<td class='eq'",
|
||||
"<td class='eq'",
|
||||
"<td class='eq'",
|
||||
"<td class='et'",
|
||||
"<td class='es'",
|
||||
"<td class='eq'",
|
||||
"<td class='eq'",
|
||||
"<td class='eq'",
|
||||
"<td class='et'",
|
||||
"<td class='es'",
|
||||
"<td class='ep'"};
|
||||
|
||||
|
||||
|
||||
static PrintWriter indexFile;
|
||||
|
||||
|
||||
static PrintWriter openFile(int count, String directory, int script) throws IOException {
|
||||
String scriptName = getChunkName(script, LONG);
|
||||
String shortScriptName = getChunkName(script, SHORT);
|
||||
String hover = scriptName.equals(shortScriptName) ? "" : "' title='" + shortScriptName;
|
||||
|
||||
|
||||
String fileName = "chart_" + scriptName + (count > 1 ? count + "" : "") + ".html";
|
||||
PrintWriter output = Utility.openPrintWriter(directory + fileName, false, false);
|
||||
PrintWriter output = Utility.openPrintWriter(directory + fileName, Utility.UTF8_WINDOWS);
|
||||
Utility.fixDot();
|
||||
System.out.println("Writing: " + scriptName);
|
||||
indexFile.println(" <a href = '" + fileName + hover + "'>" + scriptName + "</a>");
|
||||
@ -626,10 +631,10 @@ public class WriteCharts implements UCD_Types {
|
||||
output.println("<table>");
|
||||
return output;
|
||||
}
|
||||
|
||||
|
||||
static PrintWriter openFile2(int count, String directory, String name) throws IOException {
|
||||
String fileName = "chart_" + name + (count > 1 ? count + "" : "") + ".html";
|
||||
PrintWriter output = Utility.openPrintWriter(directory + fileName, false, false);
|
||||
PrintWriter output = Utility.openPrintWriter(directory + fileName, Utility.UTF8_WINDOWS);
|
||||
Utility.fixDot();
|
||||
System.out.println("Writing: " + name);
|
||||
indexFile.println(" <a href = '" + fileName + "'>" + name + "</a>");
|
||||
@ -641,8 +646,8 @@ public class WriteCharts implements UCD_Types {
|
||||
output.println("<table>");
|
||||
return output;
|
||||
}
|
||||
|
||||
static final int
|
||||
|
||||
static final int
|
||||
NULL_ORDER = -3,
|
||||
IGNORABLE_ORDER = -2,
|
||||
VARIABLE_ORDER = -1,
|
||||
@ -653,7 +658,7 @@ public class WriteCharts implements UCD_Types {
|
||||
CAT_OFFSET = 128,
|
||||
// categories in here
|
||||
NO_CASE_MAPPING = 200;
|
||||
|
||||
|
||||
static String getChunkName(int script, byte length) {
|
||||
switch(script) {
|
||||
case NO_CASE_MAPPING: return "NoCaseMapping";
|
||||
@ -663,7 +668,7 @@ public class WriteCharts implements UCD_Types {
|
||||
case CJK: return "CJK";
|
||||
case CJK_AB: return "CJK-Extensions";
|
||||
case UNSUPPORTED: return "Unsupported";
|
||||
default:
|
||||
default:
|
||||
if (script >= CAT_OFFSET) return Default.ucd.getCategoryID_fromIndex((byte)(script - CAT_OFFSET), length);
|
||||
else if (script == HIRAGANA_SCRIPT && HACK_KANA) return length == SHORT ? "Kata-Hira" : "Katakana-Hiragana";
|
||||
else return Default.ucd.getCase(Default.ucd.getScriptID_fromIndex((byte)script, length), FULL, TITLE);
|
||||
@ -678,11 +683,11 @@ public class WriteCharts implements UCD_Types {
|
||||
|
||||
|
||||
static final byte COLLATION = 0, NORMALIZATION = 1, CASE = 2;
|
||||
|
||||
|
||||
static void closeIndexFile(PrintWriter indexFile, String extra, byte choice) {
|
||||
SimpleDateFormat df = new SimpleDateFormat("yyyy.MM.dd HH:mm:ss");
|
||||
df.setTimeZone(TimeZone.getTimeZone("GMT"));
|
||||
|
||||
|
||||
indexFile.println("</p><hr width='50%'><p>");
|
||||
boolean gotOne = false;
|
||||
if (choice != COLLATION) {
|
||||
@ -705,12 +710,12 @@ public class WriteCharts implements UCD_Types {
|
||||
indexFile.println("</p></body></html>");
|
||||
indexFile.close();
|
||||
}
|
||||
|
||||
|
||||
static boolean containsCase(String s) {
|
||||
int cp;
|
||||
for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
|
||||
cp = UTF16.charAt(s, i);
|
||||
// contains Lu, Lo, Lt, or Lowercase or Uppercase
|
||||
// contains Lu, Lo, Lt, or Lowercase or Uppercase
|
||||
byte cat = Default.ucd.getCategory(cp);
|
||||
if (cat == Lu || cat == Ll || cat == Lt) return true;
|
||||
if (Default.ucd.getBinaryProperty(cp, Other_Lowercase)) return true;
|
||||
@ -718,7 +723,204 @@ public class WriteCharts implements UCD_Types {
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
static final Transliterator addCircle = Transliterator.createFromRules(
|
||||
"any-addCircle", "([[:Mn:][:Me:]]) > \u25CC $1", Transliterator.FORWARD);
|
||||
|
||||
public static void writeCompositionChart() throws IOException {
|
||||
Default.setUCD();
|
||||
UCA uca = new UCA(null,"");
|
||||
|
||||
Set letters = new TreeSet();
|
||||
Set marks = new TreeSet(uca);
|
||||
Set totalMarks = new TreeSet(uca);
|
||||
Map decomposes = new HashMap();
|
||||
Set notPrinted = new TreeSet(new UTF16.StringComparator());
|
||||
Set printed = new HashSet();
|
||||
|
||||
// UnicodeSet latin = new UnicodeSet("[:latin:]");
|
||||
|
||||
PrintWriter out = Utility.openPrintWriter("composition_chart.html", Utility.UTF8_WINDOWS);
|
||||
try {
|
||||
out.println("<html><head><meta http-equiv='Content-Type' content='text/html; charset=utf-8'>");
|
||||
out.println("<style>");
|
||||
|
||||
out.println("body { font-family: Arial Unicode MS }");
|
||||
out.println("td { text-align: Center ; vertical-align: top; width: 1%; background-color: #EEEEEE }");
|
||||
out.println("tt { font-size: 50% }");
|
||||
out.println("table { width='1%' }");
|
||||
out.println(".w { background-color: #FFFFFF }");
|
||||
out.println(".h { background-color: #EEEEFF }");
|
||||
out.println(".r { background-color: #FF0000 }");
|
||||
out.println("</style>");
|
||||
out.println("</head><body bgcolor='#FFFFFF'>");
|
||||
out.println("<h1>Composites</h1>");
|
||||
|
||||
UnicodeSetIterator it = new UnicodeSetIterator();
|
||||
|
||||
for (byte script = 0; script < UCD_Types.LIMIT_SCRIPT; ++script) {
|
||||
|
||||
String scriptName = "";
|
||||
try {
|
||||
scriptName = Default.ucd.getScriptID_fromIndex(script);
|
||||
Utility.fixDot();
|
||||
System.out.println(scriptName);
|
||||
} catch (IllegalArgumentException e) {
|
||||
System.out.println("Failed to create transliterator for: " + scriptName + "(" + script + ")");
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
letters.clear();
|
||||
letters.add(""); // header row
|
||||
marks.clear();
|
||||
notPrinted.clear();
|
||||
printed.clear();
|
||||
|
||||
for (int cp = 0; cp < 0x10FFFF; ++cp) {
|
||||
byte type = Default.ucd.getCategory(cp);
|
||||
if (type == Default.ucd.UNASSIGNED || type == Default.ucd.PRIVATE_USE) continue; // skip chaff
|
||||
Utility.dot(cp);
|
||||
|
||||
byte newScript = Default.ucd.getScript(cp);
|
||||
if (newScript != script) continue;
|
||||
|
||||
String source = UTF16.valueOf(cp);
|
||||
String decomp = Default.nfd.normalize(source);
|
||||
if (decomp.equals(source)) continue;
|
||||
|
||||
// pick up all decompositions
|
||||
int count = UTF16.getCharCount(UTF16.charAt(decomp, 0));
|
||||
|
||||
if (count == decomp.length()) {
|
||||
notPrinted.add(source);
|
||||
continue; // skip unless marks
|
||||
}
|
||||
|
||||
if (UCD.isHangulSyllable(cp)) count = 2;
|
||||
String first = decomp.substring(0, count);
|
||||
String second = decomp.substring(count);
|
||||
//if (!markSet.containsAll(second)) continue; // skip unless marks
|
||||
|
||||
letters.add(first);
|
||||
marks.add(second);
|
||||
Utility.addToSet(decomposes, decomp, source);
|
||||
notPrinted.add(source);
|
||||
if (source.equals("\u212b")) System.out.println("A-RING!");
|
||||
}
|
||||
|
||||
if (marks.size() != 0) {
|
||||
|
||||
totalMarks.addAll(marks);
|
||||
|
||||
|
||||
out.println("<table border='1' cellspacing='0'>");
|
||||
out.println("<caption>" + scriptName + "<br>(" + letters.size() + " × " + marks.size() + ")</caption>");
|
||||
|
||||
Iterator it2 = letters.iterator();
|
||||
while (it2.hasNext()) {
|
||||
String let = (String)it2.next();
|
||||
out.println("<tr>" + showCell(Default.nfc.normalize(let), "class='h'"));
|
||||
Iterator it3 = marks.iterator();
|
||||
while (it3.hasNext()) {
|
||||
String mark = (String)it3.next();
|
||||
String merge = let + mark;
|
||||
if (let.length() != 0 && decomposes.get(merge) == null) {
|
||||
out.println("<td> </td>");
|
||||
continue;
|
||||
}
|
||||
String comp;
|
||||
try {
|
||||
comp = Default.nfc.normalize(merge);
|
||||
} catch (Exception e) {
|
||||
System.out.println("Failed when trying to compose <" + Utility.hex(e) + ">");
|
||||
continue;
|
||||
}
|
||||
// skip unless single char or header
|
||||
/*if (let.length() != 0
|
||||
&& (UTF16.countCodePoint(comp) != 1 || comp.equals(merge))) {
|
||||
out.println("<td class='x'> </td>");
|
||||
continue;
|
||||
}
|
||||
*/
|
||||
Set decomps = (Set) decomposes.get(merge);
|
||||
if (let.length() == 0) {
|
||||
printed.add(comp);
|
||||
out.println(showCell(comp, "class='h'"));
|
||||
} else if (decomps.contains(comp)) {
|
||||
printed.add(comp);
|
||||
out.println(showCell(comp, "class='w'"));
|
||||
} else {
|
||||
comp = (String) new ArrayList(decomps).get(0);
|
||||
printed.add(comp);
|
||||
out.println(showCell(comp, "class='r'"));
|
||||
}
|
||||
}
|
||||
out.println("</tr>");
|
||||
}
|
||||
out.println("</table><br>");
|
||||
|
||||
//out.println("<table><tr><th>Other Letters</th><th>Other Marks</th></tr><tr><td>");
|
||||
//tabulate(out, atomics.iterator(),16);
|
||||
//out.println("</td><td>");
|
||||
//out.println("</td></tr></table>");
|
||||
|
||||
}
|
||||
notPrinted.removeAll(printed);
|
||||
if (notPrinted.size() != 0) {
|
||||
tabulate(out, scriptName + " Excluded", notPrinted.iterator(), 24, "class='r'");
|
||||
out.println("<br>");
|
||||
}
|
||||
}
|
||||
|
||||
Set otherMarks = new TreeSet(uca);
|
||||
UnicodeSet markSet = new UnicodeSet("[[:Me:][:Mn:]]");
|
||||
it.reset(markSet);
|
||||
while (it.next()) {
|
||||
int cp = it.codepoint;
|
||||
String source = UTF16.valueOf(cp);
|
||||
if (totalMarks.contains(source)) continue; // skip all that we have already
|
||||
otherMarks.add(source);
|
||||
}
|
||||
tabulate(out, "Marks that never combine", otherMarks.iterator(), 24, "class='b'");
|
||||
|
||||
out.println("</body></html>");
|
||||
|
||||
} finally {
|
||||
out.close();
|
||||
}
|
||||
}
|
||||
|
||||
public static void tabulate(PrintWriter out, String caption, Iterator it2, int limit, String classType) {
|
||||
int count = 0;
|
||||
out.println("<table border='1' cellspacing='0'><tr>");
|
||||
if (caption != null && caption.length() != 0) {
|
||||
out.println("<caption>" + caption + "</caption>");
|
||||
}
|
||||
while (it2.hasNext()) {
|
||||
if (++count > limit) {
|
||||
out.println("</tr><tr>");
|
||||
count = 1;
|
||||
}
|
||||
|
||||
out.println(showCell((String)it2.next(), classType));
|
||||
}
|
||||
out.println("</tr></table>");
|
||||
}
|
||||
|
||||
public static String showCell(String comp, String classType) {
|
||||
if (comp == null) {
|
||||
return "<td "
|
||||
+ classType + (classType.length() != 0 ? " " : "")
|
||||
+ "> </td>";
|
||||
}
|
||||
return "<td "
|
||||
+ classType + (classType.length() != 0 ? " " : "")
|
||||
+ "title='" + Utility.hex(comp) + " " + Default.ucd.getName(comp) + "'>" + addCircle.transliterate(comp)
|
||||
+ "<br><tt>" + Utility.hex(comp) + "</tt></td>";
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
@ -730,7 +932,7 @@ public class WriteCharts implements UCD_Types {
|
||||
static final IntStack p2 = new IntStack(30);
|
||||
static final IntStack s2 = new IntStack(30);
|
||||
static final IntStack t2 = new IntStack(30);
|
||||
|
||||
|
||||
static int getStrengthDifference(CEList ceList, CEList lastCEList) {
|
||||
extractNonzeros(ceList, p1, s1, t1);
|
||||
extractNonzeros(lastCEList, p2, s2, t2);
|
||||
@ -742,12 +944,12 @@ public class WriteCharts implements UCD_Types {
|
||||
if (temp != 0) return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static void extractNonzeros(CEList ceList, IntStack primaries, IntStack secondaries, IntStack tertiaries) {
|
||||
primaries.clear();
|
||||
secondaries.clear();
|
||||
tertiaries.clear();
|
||||
|
||||
|
||||
for (int i = 0; i < ceList.length(); ++i) {
|
||||
int ce = ceList.at(i);
|
||||
int temp = UCA.getPrimary(ce);
|
||||
|
@ -5,8 +5,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/WriteCollationData.java,v $
|
||||
* $Date: 2002/07/15 15:23:01 $
|
||||
* $Revision: 1.26 $
|
||||
* $Date: 2002/09/25 06:40:14 $
|
||||
* $Revision: 1.27 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -144,7 +144,7 @@ public class WriteCollationData implements UCD_Types, UCA_Types {
|
||||
BufferedReader in = Utility.openUnicodeFile("CaseFolding", UNICODE_VERSION, true, false);
|
||||
// new BufferedReader(new FileReader(DIR31 + "CaseFolding-3.d3.alpha.txt"), 64*1024);
|
||||
// log = new PrintWriter(new FileOutputStream("CaseFolding_data.js"));
|
||||
log = Utility.openPrintWriter("CaseFolding_data.js", false, false);
|
||||
log = Utility.openPrintWriter("CaseFolding_data.js", Utility.UTF8_WINDOWS);
|
||||
log.println("var CF = new Object();");
|
||||
int count = 0;
|
||||
while (true) {
|
||||
@ -189,7 +189,7 @@ public class WriteCollationData implements UCD_Types, UCA_Types {
|
||||
Normalizer normKD = new Normalizer(Normalizer.NFKD, UNICODE_VERSION);
|
||||
Normalizer normD = new Normalizer(Normalizer.NFD, UNICODE_VERSION);
|
||||
//log = new PrintWriter(new FileOutputStream("Normalization_data.js"));
|
||||
log = Utility.openPrintWriter("Normalization_data.js", false, false);
|
||||
log = Utility.openPrintWriter("Normalization_data.js", Utility.LATIN1_WINDOWS);
|
||||
|
||||
|
||||
int count = 0;
|
||||
@ -318,7 +318,7 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
|
||||
}
|
||||
}
|
||||
|
||||
PrintWriter log = Utility.openPrintWriter(filename + (shortPrint ? "_SHORT" : "") + ".txt", true, true);
|
||||
PrintWriter log = Utility.openPrintWriter(filename + (shortPrint ? "_SHORT" : "") + ".txt", Utility.UTF8_WINDOWS);
|
||||
//if (!shortPrint) log.write('\uFEFF');
|
||||
log.println("# UCA Version: " + collator.getDataVersion() + "/" + collator.getUCDVersion());
|
||||
log.println("# Generated: " + getNormalDate());
|
||||
@ -702,7 +702,7 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
|
||||
}*/
|
||||
|
||||
static void testCompatibilityCharacters() throws IOException {
|
||||
log = Utility.openPrintWriter("UCA_CompatComparison.txt");
|
||||
log = Utility.openPrintWriter("UCA_CompatComparison.txt", Utility.UTF8_WINDOWS);
|
||||
|
||||
int[] kenCes = new int[50];
|
||||
int[] markCes = new int[50];
|
||||
@ -1196,7 +1196,7 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
|
||||
"UTF8"),
|
||||
32*1024));
|
||||
*/
|
||||
PrintWriter diLog = Utility.openPrintWriter("UCA_Contractions.txt", false, false);
|
||||
PrintWriter diLog = Utility.openPrintWriter("UCA_Contractions.txt", Utility.UTF8_WINDOWS);
|
||||
|
||||
diLog.write('\uFEFF');
|
||||
|
||||
@ -1234,7 +1234,7 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
|
||||
"UTF8"),
|
||||
32*1024));
|
||||
*/
|
||||
PrintWriter diLog = Utility.openPrintWriter("DisjointIgnorables.js", false, false);
|
||||
PrintWriter diLog = Utility.openPrintWriter("DisjointIgnorables.js", Utility.LATIN1_WINDOWS);
|
||||
|
||||
diLog.write('\uFEFF');
|
||||
|
||||
@ -1413,7 +1413,7 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
|
||||
"UTF8"),
|
||||
32*1024));
|
||||
*/
|
||||
PrintWriter diLog = Utility.openPrintWriter("DisjointIgnorables2.js", false, false);
|
||||
PrintWriter diLog = Utility.openPrintWriter("DisjointIgnorables2.js", Utility.LATIN1_WINDOWS);
|
||||
|
||||
diLog.write('\uFEFF');
|
||||
|
||||
@ -1660,7 +1660,7 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
|
||||
int[] lenArray = new int[1];
|
||||
|
||||
Set alreadyDone = new HashSet();
|
||||
PrintWriter log2 = Utility.openPrintWriter("UCARules-log.txt", false, false);
|
||||
PrintWriter log2 = Utility.openPrintWriter("UCARules-log.txt", Utility.LATIN1_WINDOWS);
|
||||
|
||||
while (true) {
|
||||
String s = cc.next(ces, lenArray);
|
||||
@ -1784,7 +1784,7 @@ F900..FAFF; CJK Compatibility Ideographs
|
||||
if (shortPrint) filename += "_SHORT";
|
||||
if (option == IN_XML) filename += ".xml"; else filename += ".txt";
|
||||
|
||||
log = Utility.openPrintWriter(filename, false, false);
|
||||
log = Utility.openPrintWriter(filename, Utility.LATIN1_WINDOWS);
|
||||
|
||||
String[] commentText = {
|
||||
"UCA Rules",
|
||||
@ -3951,7 +3951,7 @@ static int swapCJK(int i) {
|
||||
Default.setUCD();
|
||||
|
||||
//log = new PrintWriter(new FileOutputStream("CheckCollationValidity.html"));
|
||||
log = Utility.openPrintWriter("CheckCollationValidity.html", false, false);
|
||||
log = Utility.openPrintWriter("CheckCollationValidity.html", Utility.UTF8_WINDOWS);
|
||||
|
||||
log.println("<html><head><meta http-equiv='Content-Type' content='text/html; charset=utf-8'>");
|
||||
log.println("<title>UCA Validity Log</title>");
|
||||
@ -4618,7 +4618,7 @@ A4C6;YI RADICAL KE;So;0;ON;;;;;N;;;;;
|
||||
|
||||
static PrintWriter writeHead(int counter, int end, String title, String other, String version, boolean show) throws IOException {
|
||||
|
||||
PrintWriter out = Utility.openPrintWriter(title + pad(counter) + ".html");
|
||||
PrintWriter out = Utility.openPrintWriter(title + pad(counter) + ".html", Utility.UTF8_WINDOWS);
|
||||
|
||||
copyFile(out, "HTML-Part1.txt");
|
||||
/*
|
||||
|
@ -5,8 +5,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/Main.java,v $
|
||||
* $Date: 2002/08/09 23:56:24 $
|
||||
* $Revision: 1.22 $
|
||||
* $Date: 2002/09/25 06:40:13 $
|
||||
* $Revision: 1.23 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -73,11 +73,15 @@ public final class Main implements UCD_Types {
|
||||
|
||||
else if (arg.equalsIgnoreCase("compareBlueberry")) VerifyUCD.compareBlueberry();
|
||||
|
||||
else if (arg.equalsIgnoreCase("quicktest")) QuickTest.test();
|
||||
else if (arg.equalsIgnoreCase("TernaryStore")) TernaryStore.test();
|
||||
|
||||
else if (arg.equalsIgnoreCase("checkBIDI")) VerifyUCD.checkBIDI();
|
||||
else if (arg.equalsIgnoreCase("Buildnames")) BuildNames.main(null);
|
||||
else if (arg.equalsIgnoreCase("TestNormalization")) TestNormalization.main(null);
|
||||
|
||||
|
||||
else if (arg.equalsIgnoreCase("GenerateCaseTest")) GenerateCaseTest.main(null);
|
||||
else if (arg.equalsIgnoreCase("checkDecompFolding")) VerifyUCD.checkDecompFolding();
|
||||
|
||||
else if (arg.equalsIgnoreCase("breaktest")) GenerateBreakTest.main(null);
|
||||
|
@ -5,8 +5,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD.java,v $
|
||||
* $Date: 2002/08/04 21:38:45 $
|
||||
* $Revision: 1.17 $
|
||||
* $Date: 2002/09/25 06:40:13 $
|
||||
* $Revision: 1.18 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -964,6 +964,9 @@ to guarantee identifier closure.
|
||||
|
||||
public boolean hasComputableName(int codePoint) {
|
||||
if (codePoint >= 0xF900 && codePoint <= 0xFA2D) return true;
|
||||
if (codePoint >= 0x2800 && codePoint <= 0x28FF) return true;
|
||||
if (codePoint >= 0x2F800 && codePoint <= 0x2FA1D) return true;
|
||||
|
||||
int rangeStart = mapToRepresentative(codePoint, major < 2);
|
||||
switch (rangeStart) {
|
||||
default:
|
||||
|
@ -5,8 +5,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/Utility.java,v $
|
||||
* $Date: 2002/08/04 21:38:44 $
|
||||
* $Revision: 1.24 $
|
||||
* $Date: 2002/09/25 06:40:14 $
|
||||
* $Revision: 1.25 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -18,11 +18,16 @@ import java.text.*;
|
||||
import java.io.*;
|
||||
import com.ibm.icu.text.UnicodeSet;
|
||||
import com.ibm.icu.text.UTF16;
|
||||
import com.ibm.icu.text.Replaceable;
|
||||
import com.ibm.icu.text.ReplaceableString;
|
||||
import com.ibm.icu.text.UnicodeMatcher;
|
||||
|
||||
import com.ibm.text.UCD.*;
|
||||
|
||||
public final class Utility implements UCD_Types { // COMMON UTILITIES
|
||||
|
||||
static final boolean UTF8 = true; // TODO -- make argument
|
||||
public static final char BOM = '\uFEFF';
|
||||
|
||||
public static String[] append(String[] array1, String[] array2) {
|
||||
String[] temp = new String[array1.length + array2.length];
|
||||
@ -334,6 +339,83 @@ public final class Utility implements UCD_Types { // COMMON UTILITIES
|
||||
}
|
||||
return output.toString();
|
||||
}
|
||||
|
||||
|
||||
public static final class Position {
|
||||
public int start, limit;
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds the next position in the text that matches.
|
||||
* @param divider A UnicodeMatcher, such as a UnicodeSet.
|
||||
* @text obvious
|
||||
* @offset starting offset
|
||||
* @output start and limit of the piece found. If the return is false, then start,limit = length
|
||||
* @return true iff match found
|
||||
*/
|
||||
public static boolean next(UnicodeMatcher matcher, Replaceable text, int offset,
|
||||
Position output) {
|
||||
int[] io = new int[1]; // TODO replace later; extra object creation
|
||||
int limit = text.length();
|
||||
// don't worry about surrogates; matcher will handle
|
||||
for (int i = offset; i <= limit; ++i) {
|
||||
io[0] = i;
|
||||
if (matcher.matches(text, io, limit, false) == UnicodeMatcher.U_MATCH) {
|
||||
// a hit, return
|
||||
output.start = i;
|
||||
output.limit = io[0];
|
||||
return true;
|
||||
}
|
||||
}
|
||||
output.start = output.limit = limit;
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds the next position in the text that matches.
|
||||
* @param divider A UnicodeMatcher, such as a UnicodeSet.
|
||||
* @text obvious
|
||||
* @offset starting offset
|
||||
* @output start and limit of the piece found. If the return is false, then start,limit = 0
|
||||
* @return true iff match found
|
||||
*/
|
||||
public static boolean previous(UnicodeMatcher matcher, Replaceable text, int offset,
|
||||
Position output) {
|
||||
int[] io = new int[1]; // TODO replace later; extra object creation
|
||||
int limit = 0;
|
||||
// don't worry about surrogates; matcher will handle
|
||||
for (int i = offset; i >= limit; --i) {
|
||||
io[0] = i;
|
||||
if (matcher.matches(text, io, offset, false) == UnicodeMatcher.U_MATCH) {
|
||||
// a hit, return
|
||||
output.start = i;
|
||||
output.limit = io[0];
|
||||
return true;
|
||||
}
|
||||
}
|
||||
output.start = output.limit = limit;
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Splits a string containing divider into pieces, storing in output
|
||||
* and returns the number of pieces. The string does not have to be terminated:
|
||||
* the segment after the last divider is returned in the last output element.
|
||||
* Thus if the string has no dividers, then the whole string is returned in output[0]
|
||||
* with a return value of 1.
|
||||
* @param divider A UnicodeMatcher, such as a UnicodeSet.
|
||||
* @param s the text to be divided
|
||||
* @param output where the resulting pieces go
|
||||
* @return the number of items put into output
|
||||
*/
|
||||
public static int split(UnicodeMatcher divider, Replaceable text, Position[] output) {
|
||||
int index = 0;
|
||||
for (int offset = 0;; offset = output[index-1].limit) {
|
||||
if (output[index] == null) output[index] = new Position();
|
||||
boolean matches = next(divider, text, offset, output[index++]);
|
||||
if (!matches) return index;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Splits a string containing divider into pieces, storing in output
|
||||
@ -358,14 +440,14 @@ public final class Utility implements UCD_Types { // COMMON UTILITIES
|
||||
}
|
||||
|
||||
public static String[] split(String s, char divider) {
|
||||
String[] result = new String[100];
|
||||
String[] result = new String[100]; // HACK
|
||||
int count = split(s, divider, result);
|
||||
return extract(result, 0, count);
|
||||
}
|
||||
|
||||
public static String[] extract(String[] source, int start, int end) {
|
||||
String[] result = new String[end-start];
|
||||
System.arraycopy(source, start, result, 0, end - start);
|
||||
public static String[] extract(String[] source, int start, int limit) {
|
||||
String[] result = new String[limit-start];
|
||||
System.arraycopy(source, start, result, 0, limit - start);
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -564,7 +646,8 @@ public final class Utility implements UCD_Types { // COMMON UTILITIES
|
||||
// Or if they are UTF8, use true, false
|
||||
public static PrintWriter openPrintWriter(String filename, byte options) throws IOException {
|
||||
File file = new File(getOutputName(filename));
|
||||
System.out.println("Creating File: " + file);
|
||||
Utility.fixDot();
|
||||
System.out.println("Creating File: " + file.getCanonicalPath());
|
||||
File parent = new File(file.getParent());
|
||||
//System.out.println("Creating File: "+ parent);
|
||||
parent.mkdirs();
|
||||
@ -609,6 +692,28 @@ public final class Utility implements UCD_Types { // COMMON UTILITIES
|
||||
}
|
||||
}
|
||||
|
||||
public static void print(PrintWriter pw, Map c, String pairSeparator, String separator, Breaker b) {
|
||||
Iterator it = c.keySet().iterator();
|
||||
boolean first = true;
|
||||
Object last = null;
|
||||
while (it.hasNext()) {
|
||||
Object obj = it.next();
|
||||
Object result = c.get(obj);
|
||||
if (b != null && !b.filter(obj)) continue;
|
||||
if (first) {
|
||||
first = false;
|
||||
} else {
|
||||
pw.print(separator);
|
||||
}
|
||||
if (b != null) {
|
||||
pw.print(b.get(obj, last) + pairSeparator + result);
|
||||
} else {
|
||||
pw.print(obj + pairSeparator + result);
|
||||
}
|
||||
last = obj;
|
||||
}
|
||||
}
|
||||
|
||||
public static void appendFile(String filename, boolean utf8, PrintWriter output) throws IOException {
|
||||
appendFile(filename, utf8, output, null);
|
||||
}
|
||||
@ -870,19 +975,35 @@ public final class Utility implements UCD_Types { // COMMON UTILITIES
|
||||
static PrintWriter showSetNamesPw;
|
||||
|
||||
public static void showSetNames(String prefix, UnicodeSet set, boolean separateLines, UCD ucd) {
|
||||
if (showSetNamesPw == null) showSetNamesPw = new PrintWriter(System.out);
|
||||
showSetNames(showSetNamesPw, prefix, set, separateLines, false, ucd);
|
||||
showSetNamesPw.flush();
|
||||
showSetNames(prefix, set, separateLines, false, false, ucd);
|
||||
}
|
||||
|
||||
public static void showSetNames(String prefix, UnicodeSet set, boolean separateLines, boolean IDN, UCD ucd) {
|
||||
showSetNames(prefix, set, separateLines, IDN, false, ucd);
|
||||
}
|
||||
|
||||
public static void showSetNames(PrintWriter pw, String prefix, UnicodeSet set, boolean separateLines, boolean IDN, UCD ucd) {
|
||||
showSetNames( pw, prefix, set, separateLines, IDN, false, ucd);
|
||||
}
|
||||
|
||||
public static void showSetNames(String prefix, UnicodeSet set, boolean separateLines, boolean IDN, boolean withChar, UCD ucd) {
|
||||
if (showSetNamesPw == null) showSetNamesPw = new PrintWriter(System.out);
|
||||
showSetNames(showSetNamesPw, prefix, set, separateLines, IDN, withChar, ucd);
|
||||
showSetNamesPw.flush();
|
||||
}
|
||||
|
||||
public static void showSetNames(PrintWriter pw, String prefix, UnicodeSet set, boolean separateLines, boolean IDN,
|
||||
boolean withChar, UCD ucd) {
|
||||
int count = set.getRangeCount();
|
||||
for (int i = 0; i < count; ++i) {
|
||||
int start = set.getRangeStart(i);
|
||||
int end = set.getRangeEnd(i);
|
||||
if (separateLines || (IDN && isSeparateLineIDN(start,end,ucd))) {
|
||||
for (int cp = start; cp <= end; ++cp) {
|
||||
if (!IDN) pw.println(prefix + ucd.getCodeAndName(cp));
|
||||
if (!IDN) pw.println(prefix + ucd.getCode(cp)
|
||||
+ "\t# "
|
||||
+ (withChar ? " (" + UTF16.valueOf(cp) + ") " : "")
|
||||
+ ucd.getName(cp));
|
||||
else {
|
||||
pw.println(prefix + Utility.hex(cp,4) + "; " + ucd.getName(cp));
|
||||
}
|
||||
@ -891,7 +1012,10 @@ public final class Utility implements UCD_Types { // COMMON UTILITIES
|
||||
if (!IDN) {
|
||||
pw.println(prefix + ucd.getCode(start)
|
||||
+ ((start != end) ? (".." + ucd.getCode(end)) : "")
|
||||
+ "\t# " + ucd.getName(start) + ((start != end) ? (".." + ucd.getName(end)) : "")
|
||||
+ "\t# "
|
||||
+ (withChar ? " (" + UTF16.valueOf(start)
|
||||
+ ((start != end) ? (".." + UTF16.valueOf(end)) : "") + ") " : "")
|
||||
+ ucd.getName(start) + ((start != end) ? (".." + ucd.getName(end)) : "")
|
||||
);
|
||||
} else {
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user