diff --git a/tools/unicodetools/com/ibm/text/UCA/GenOverlap.java b/tools/unicodetools/com/ibm/text/UCA/GenOverlap.java index 5ce495c30a..1a489a795e 100644 --- a/tools/unicodetools/com/ibm/text/UCA/GenOverlap.java +++ b/tools/unicodetools/com/ibm/text/UCA/GenOverlap.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/GenOverlap.java,v $ -* $Date: 2002/09/25 06:40:13 $ -* $Revision: 1.10 $ +* $Date: 2003/08/20 03:48:47 $ +* $Revision: 1.11 $ * ******************************************************************************* */ @@ -19,7 +19,7 @@ import com.ibm.text.UCD.*; import com.ibm.text.utility.*; import com.ibm.icu.text.UTF16; -public class GenOverlap implements UCD_Types { +public class GenOverlap implements UCD_Types, UCA_Types { static Map completes = new TreeMap(); static Map back = new HashMap(); @@ -164,8 +164,8 @@ public class GenOverlap implements UCD_Types { static boolean PROGRESS = false; static void fullCheck() throws IOException { - PrintWriter log = Utility.openPrintWriter("Overlap.html", Utility.UTF8_WINDOWS); - PrintWriter simpleList = Utility.openPrintWriter("Overlap.txt", Utility.UTF8_WINDOWS); + PrintWriter log = Utility.openPrintWriter(UCA_GEN_DIR, "Overlap.html", Utility.UTF8_WINDOWS); + PrintWriter simpleList = Utility.openPrintWriter(UCA_GEN_DIR, "Overlap.txt", Utility.UTF8_WINDOWS); Iterator it = completes.keySet().iterator(); int counter = 0; @@ -448,7 +448,7 @@ public class GenOverlap implements UCD_Types { newKeys.removeAll(joint); oldKeys.removeAll(joint); - PrintWriter log = Utility.openPrintWriter("UCA-old-vs-new" + (doMax ? "-MAX.txt" : ".txt"), Utility.UTF8_WINDOWS); + PrintWriter log = Utility.openPrintWriter(UCA_GEN_DIR, "UCA-old-vs-new" + (doMax ? "-MAX.txt" : ".txt"), Utility.UTF8_WINDOWS); Iterator it = list.iterator(); int last = -1; while (it.hasNext()) { @@ -631,7 +631,7 @@ public class GenOverlap implements UCD_Types { System.out.println("Data Gathered"); - PrintWriter log = Utility.openPrintWriter("checkstringsearchhash.html", Utility.UTF8_WINDOWS); + PrintWriter log = Utility.openPrintWriter(UCA_GEN_DIR, "checkstringsearchhash.html", Utility.UTF8_WINDOWS); Utility.writeHtmlHeader(log, "Check Hash"); log.println("
Shows collisions among primary values when hashed to table size = " + tableLength + ".");
@@ -694,7 +694,7 @@ public class GenOverlap implements UCD_Types {
}
public static void listCyrillic(UCA collatorIn) throws IOException {
- PrintWriter log = Utility.openPrintWriter("ListCyrillic.txt", Utility.UTF8_WINDOWS);
+ PrintWriter log = Utility.openPrintWriter(UCA_GEN_DIR, "ListCyrillic.txt", Utility.UTF8_WINDOWS);
Set set = new TreeSet(collatorIn);
Set set2 = new TreeSet(collatorIn);
ucd = UCD.make();
diff --git a/tools/unicodetools/com/ibm/text/UCA/Main.java b/tools/unicodetools/com/ibm/text/UCA/Main.java
index f3b55d3c28..11c2384d16 100644
--- a/tools/unicodetools/com/ibm/text/UCA/Main.java
+++ b/tools/unicodetools/com/ibm/text/UCA/Main.java
@@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/Main.java,v $
-* $Date: 2003/07/07 15:58:57 $
-* $Revision: 1.13 $
+* $Date: 2003/08/20 03:48:46 $
+* $Revision: 1.14 $
*
*******************************************************************************
*/
@@ -28,90 +28,97 @@ public class Main {
// NOTE: so far, we don't need to build the UCA with anything but the latest versions.
// A few changes would need to be made to the code to do older versions.
-
- System.out.println("Building UCA");
- WriteCollationData.collator = new UCA(null, UCDVersion);
- System.out.println("Built version " + WriteCollationData.collator.getDataVersion()
- + "/ucd: " + WriteCollationData.collator.getUCDVersion());
-
- System.out.println("Building UCD data");
- WriteCollationData.ucd = UCD.make(WriteCollationData.collator.getUCDVersion());
-
- if (args.length == 0) args = new String[] {"?"}; // force the help comment
- boolean shortPrint = false;
-
- for (int i = 0; i < args.length; ++i) {
- String arg = args[i];
- System.out.println("OPTION: " + arg);
- if (arg.charAt(0) == '#') return; // skip rest of line
+ try {
+ System.out.println("Building UCA");
+ WriteCollationData.collator = new UCA(null, UCDVersion);
+ System.out.println("Built version " + WriteCollationData.collator.getDataVersion()
+ + "/ucd: " + WriteCollationData.collator.getUCDVersion());
- if (arg.equalsIgnoreCase("ICU")) args = Utility.append(args, ICU_FILES);
- else if (arg.equalsIgnoreCase("GenOverlap")) GenOverlap.test(WriteCollationData.collator);
- else if (arg.equalsIgnoreCase("validateUCA")) GenOverlap.validateUCA(WriteCollationData.collator);
- //else if (arg.equalsIgnoreCase("writeNonspacingDifference")) WriteCollationData.writeNonspacingDifference();
+ System.out.println("Building UCD data");
+ WriteCollationData.ucd = UCD.make(WriteCollationData.collator.getUCDVersion());
- else if (arg.equalsIgnoreCase("collationChart")) WriteCharts.collationChart(WriteCollationData.collator);
- else if (arg.equalsIgnoreCase("scriptChart")) WriteCharts.scriptChart();
- else if (arg.equalsIgnoreCase("normalizationChart")) WriteCharts.normalizationChart();
- else if (arg.equalsIgnoreCase("caseChart")) WriteCharts.caseChart();
- else if (arg.equalsIgnoreCase("indexChart")) WriteCharts.indexChart();
- else if (arg.equalsIgnoreCase("special")) WriteCharts.special();
+ if (args.length == 0) args = new String[] {"?"}; // force the help comment
+ boolean shortPrint = false;
+ boolean noCE = false;
- else if (arg.equalsIgnoreCase("writeCompositionChart")) WriteCharts.writeCompositionChart();
+ for (int i = 0; i < args.length; ++i) {
+ String arg = args[i];
+ System.out.println("OPTION: " + arg);
+ if (arg.charAt(0) == '#') return; // skip rest of line
+
+ if (arg.equalsIgnoreCase("ICU")) {
+ args = Utility.append(ICU_FILES, Utility.subarray(args, i+1));
+ i = -1;
+ continue;
+ }
+ if (arg.equalsIgnoreCase("GenOverlap")) GenOverlap.test(WriteCollationData.collator);
+ else if (arg.equalsIgnoreCase("validateUCA")) GenOverlap.validateUCA(WriteCollationData.collator);
+ //else if (arg.equalsIgnoreCase("writeNonspacingDifference")) WriteCollationData.writeNonspacingDifference();
+
+ else if (arg.equalsIgnoreCase("collationChart")) WriteCharts.collationChart(WriteCollationData.collator);
+ else if (arg.equalsIgnoreCase("scriptChart")) WriteCharts.scriptChart();
+ else if (arg.equalsIgnoreCase("normalizationChart")) WriteCharts.normalizationChart();
+ else if (arg.equalsIgnoreCase("caseChart")) WriteCharts.caseChart();
+ else if (arg.equalsIgnoreCase("indexChart")) WriteCharts.indexChart();
+ else if (arg.equalsIgnoreCase("special")) WriteCharts.special();
+
+ else if (arg.equalsIgnoreCase("writeCompositionChart")) WriteCharts.writeCompositionChart();
+
+ else if (arg.equalsIgnoreCase("CheckHash")) GenOverlap.checkHash(WriteCollationData.collator);
+ else if (arg.equalsIgnoreCase("generateRevision")) GenOverlap.generateRevision(WriteCollationData.collator);
+ else if (arg.equalsIgnoreCase("listCyrillic")) GenOverlap.listCyrillic(WriteCollationData.collator);
+
+ else if (arg.equalsIgnoreCase("WriteRules")) WriteCollationData.writeRules(WriteCollationData.WITHOUT_NAMES, shortPrint, noCE);
+ // else if (arg.equalsIgnoreCase("WriteRulesWithNames")) WriteCollationData.writeRules(WriteCollationData.WITH_NAMES);
+ else if (arg.equalsIgnoreCase("WriteRulesXML")) WriteCollationData.writeRules(WriteCollationData.IN_XML, shortPrint, noCE);
+ else if (arg.equalsIgnoreCase("checkDisjointIgnorables")) WriteCollationData.checkDisjointIgnorables();
+ else if (arg.equalsIgnoreCase("writeContractions")) WriteCollationData.writeContractions();
+ else if (arg.equalsIgnoreCase("writeFractionalUCA")) WriteCollationData.writeFractionalUCA("FractionalUCA");
+ else if (arg.equalsIgnoreCase("writeConformance")) WriteCollationData.writeConformance("CollationTest_NON_IGNORABLE", UCA.NON_IGNORABLE, shortPrint);
+ else if (arg.equalsIgnoreCase("writeConformanceSHIFTED")) WriteCollationData.writeConformance("CollationTest_SHIFTED", UCA.SHIFTED, shortPrint);
+ else if (arg.equalsIgnoreCase("testCompatibilityCharacters")) WriteCollationData.testCompatibilityCharacters();
+ else if (arg.equalsIgnoreCase("writeCollationValidityLog")) WriteCollationData.writeCollationValidityLog();
+ else if (arg.equalsIgnoreCase("writeCaseExceptions")) WriteCollationData.writeCaseExceptions();
+ else if (arg.equalsIgnoreCase("writeJavascriptInfo")) WriteCollationData.writeJavascriptInfo();
+ else if (arg.equalsIgnoreCase("writeCaseFolding")) WriteCollationData.writeCaseFolding();
+ else if (arg.equalsIgnoreCase("javatest")) WriteCollationData.javatest();
+ else if (arg.equalsIgnoreCase("short")) shortPrint = true;
+ else if (arg.equalsIgnoreCase("noCE")) noCE = true;
+
+ else if (arg.equalsIgnoreCase("writeAllocation")) WriteCharts.writeAllocation();
+ else if (arg.equalsIgnoreCase("probe")) Probe.test();
+
+
+ else {
+ System.out.println();
+ System.out.println("UNKNOWN OPTION (" + arg + "): must be one of the following (case-insensitive)");
+ System.out.println("\tWriteRulesXML, WriteRulesWithNames, WriteRules,");
+ System.out.println("\tcheckDisjointIgnorables, writeContractions,");
+ System.out.println("\twriteFractionalUCA, writeConformance, writeConformanceSHIFTED, testCompatibilityCharacters,");
+ System.out.println("\twriteCollationValidityLog, writeCaseExceptions, writeJavascriptInfo, writeCaseFolding");
+ System.out.println("\tjavatest, hex (used for conformance)");
+ }
+ }
+ } finally {
+ System.out.println("Done");
- else if (arg.equalsIgnoreCase("CheckHash")) GenOverlap.checkHash(WriteCollationData.collator);
- else if (arg.equalsIgnoreCase("generateRevision")) GenOverlap.generateRevision(WriteCollationData.collator);
- else if (arg.equalsIgnoreCase("listCyrillic")) GenOverlap.listCyrillic(WriteCollationData.collator);
-
- else if (arg.equalsIgnoreCase("WriteRules")) WriteCollationData.writeRules(WriteCollationData.WITHOUT_NAMES, shortPrint);
- // else if (arg.equalsIgnoreCase("WriteRulesWithNames")) WriteCollationData.writeRules(WriteCollationData.WITH_NAMES);
- else if (arg.equalsIgnoreCase("WriteRulesXML")) WriteCollationData.writeRules(WriteCollationData.IN_XML, shortPrint);
- else if (arg.equalsIgnoreCase("checkDisjointIgnorables")) WriteCollationData.checkDisjointIgnorables();
- else if (arg.equalsIgnoreCase("writeContractions")) WriteCollationData.writeContractions();
- else if (arg.equalsIgnoreCase("writeFractionalUCA")) WriteCollationData.writeFractionalUCA("FractionalUCA");
- else if (arg.equalsIgnoreCase("writeConformance")) WriteCollationData.writeConformance("CollationTest_NON_IGNORABLE", UCA.NON_IGNORABLE, shortPrint);
- else if (arg.equalsIgnoreCase("writeConformanceSHIFTED")) WriteCollationData.writeConformance("CollationTest_SHIFTED", UCA.SHIFTED, shortPrint);
- else if (arg.equalsIgnoreCase("testCompatibilityCharacters")) WriteCollationData.testCompatibilityCharacters();
- else if (arg.equalsIgnoreCase("writeCollationValidityLog")) WriteCollationData.writeCollationValidityLog();
- else if (arg.equalsIgnoreCase("writeCaseExceptions")) WriteCollationData.writeCaseExceptions();
- else if (arg.equalsIgnoreCase("writeJavascriptInfo")) WriteCollationData.writeJavascriptInfo();
- else if (arg.equalsIgnoreCase("writeCaseFolding")) WriteCollationData.writeCaseFolding();
- else if (arg.equalsIgnoreCase("javatest")) WriteCollationData.javatest();
- else if (arg.equalsIgnoreCase("short")) shortPrint = true;
-
- else if (arg.equalsIgnoreCase("writeAllocation")) WriteCharts.writeAllocation();
- else if (arg.equalsIgnoreCase("probe")) Probe.test();
+ /*
+ String s = WriteCollationData.collator.getSortKey("\u1025\u102E", UCA.NON_IGNORABLE, true);
+ System.out.println(Utility.hex("\u0595\u0325") + ", " + WriteCollationData.collator.toString(s));
+ String t = WriteCollationData.collator.getSortKey("\u0596\u0325", UCA.NON_IGNORABLE, true);
+ System.out.println(Utility.hex("\u0596\u0325") + ", " + WriteCollationData.collator.toString(t));
- else {
- System.out.println();
- System.out.println("UNKNOWN OPTION (" + arg + "): must be one of the following (case-insensitive)");
- System.out.println("\tWriteRulesXML, WriteRulesWithNames, WriteRules,");
- System.out.println("\tcheckDisjointIgnorables, writeContractions,");
- System.out.println("\twriteFractionalUCA, writeConformance, writeConformanceSHIFTED, testCompatibilityCharacters,");
- System.out.println("\twriteCollationValidityLog, writeCaseExceptions, writeJavascriptInfo, writeCaseFolding");
- System.out.println("\tjavatest, hex (used for conformance)");
+ Normalizer foo = new Normalizer(Normalizer.NFKD);
+ char x = '\u1EE2';
+ System.out.println(Utility.hex(x) + " " + ucd.getName(x));
+ String nx = foo.normalize(x);
+ for (int i = 0; i < nx.length(); ++i) {
+ char c = nx.charAt(i);
+ System.out.println(ucd.getCanonicalClass(c));
}
- }
- System.out.println("Done");
-
- /*
- String s = WriteCollationData.collator.getSortKey("\u1025\u102E", UCA.NON_IGNORABLE, true);
- System.out.println(Utility.hex("\u0595\u0325") + ", " + WriteCollationData.collator.toString(s));
- String t = WriteCollationData.collator.getSortKey("\u0596\u0325", UCA.NON_IGNORABLE, true);
- System.out.println(Utility.hex("\u0596\u0325") + ", " + WriteCollationData.collator.toString(t));
-
-
- Normalizer foo = new Normalizer(Normalizer.NFKD);
- char x = '\u1EE2';
- System.out.println(Utility.hex(x) + " " + ucd.getName(x));
- String nx = foo.normalize(x);
- for (int i = 0; i < nx.length(); ++i) {
- char c = nx.charAt(i);
- System.out.println(ucd.getCanonicalClass(c));
+ System.out.println(Utility.hex(nx, " ") + " " + ucd.getName(nx));
+ */
}
- System.out.println(Utility.hex(nx, " ") + " " + ucd.getName(nx));
- */
-
}
}
\ No newline at end of file
diff --git a/tools/unicodetools/com/ibm/text/UCA/UCA.java b/tools/unicodetools/com/ibm/text/UCA/UCA.java
index c1856fcf23..0148c10d95 100644
--- a/tools/unicodetools/com/ibm/text/UCA/UCA.java
+++ b/tools/unicodetools/com/ibm/text/UCA/UCA.java
@@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/UCA.java,v $
-* $Date: 2003/03/19 17:30:56 $
-* $Revision: 1.20 $
+* $Date: 2003/08/20 03:48:45 $
+* $Revision: 1.21 $
*
*******************************************************************************
*/
@@ -79,7 +79,7 @@ final public class UCA implements Comparator, UCA_Types {
* Version of the UCA tables to use
*/
//private static final String VERSION = "-3.0.1d3"; // ""; // "-2.1.9d7";
- public static final String UCA_BASE = "3.1.1"; // ""; // "-2.1.9d7";
+ public static final String UCA_BASE = "4.0.0d1"; // "3.1.1"; // ; // ""; // "-2.1.9d7";
public static final String VERSION = "-" + UCA_BASE; // + "d6" ""; // "-2.1.9d7";
public static final String ALLFILES = "allkeys"; // null if not there
@@ -800,7 +800,7 @@ CP => [.AAAA.0020.0002.][.BBBB.0000.0000.]
/**
* Records the dataversion
*/
- private String dataVersion = "3.1d1";
+ private String dataVersion = "Missing @version in data!!";
/**
* Records the dataversion
diff --git a/tools/unicodetools/com/ibm/text/UCA/UCA_Types.java b/tools/unicodetools/com/ibm/text/UCA/UCA_Types.java
index 6caa908c70..85adcd55f0 100644
--- a/tools/unicodetools/com/ibm/text/UCA/UCA_Types.java
+++ b/tools/unicodetools/com/ibm/text/UCA/UCA_Types.java
@@ -5,15 +5,18 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/UCA_Types.java,v $
-* $Date: 2002/07/14 22:07:00 $
-* $Revision: 1.1 $
+* $Date: 2003/08/20 03:48:45 $
+* $Revision: 1.2 $
*
*******************************************************************************
*/
package com.ibm.text.UCA;
+import com.ibm.text.UCD.*;
+import com.ibm.text.utility.*;
public interface UCA_Types {
+ public static final String UCA_GEN_DIR = UCD_Types.GEN_DIR + "collation\\";
public static final char LEVEL_SEPARATOR = '\u0000';
/**
* Expanding characters are marked with a exception bit combination
diff --git a/tools/unicodetools/com/ibm/text/UCA/WriteCollationData.java b/tools/unicodetools/com/ibm/text/UCA/WriteCollationData.java
index 5cd2615565..cc1523b62f 100644
--- a/tools/unicodetools/com/ibm/text/UCA/WriteCollationData.java
+++ b/tools/unicodetools/com/ibm/text/UCA/WriteCollationData.java
@@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/WriteCollationData.java,v $
-* $Date: 2003/04/25 01:39:13 $
-* $Revision: 1.32 $
+* $Date: 2003/08/20 03:48:43 $
+* $Revision: 1.33 $
*
*******************************************************************************
*/
@@ -145,7 +145,7 @@ public class WriteCollationData implements UCD_Types, UCA_Types {
BufferedReader in = Utility.openUnicodeFile("CaseFolding", UNICODE_VERSION, true, Utility.LATIN1);
// new BufferedReader(new FileReader(DIR31 + "CaseFolding-3.d3.alpha.txt"), 64*1024);
// log = new PrintWriter(new FileOutputStream("CaseFolding_data.js"));
- log = Utility.openPrintWriter("CaseFolding_data.js", Utility.UTF8_WINDOWS);
+ log = Utility.openPrintWriter(UCA_GEN_DIR, "CaseFolding_data.js", Utility.UTF8_WINDOWS);
log.println("var CF = new Object();");
int count = 0;
while (true) {
@@ -190,7 +190,7 @@ public class WriteCollationData implements UCD_Types, UCA_Types {
Normalizer normKD = new Normalizer(Normalizer.NFKD, UNICODE_VERSION);
Normalizer normD = new Normalizer(Normalizer.NFD, UNICODE_VERSION);
//log = new PrintWriter(new FileOutputStream("Normalization_data.js"));
- log = Utility.openPrintWriter("Normalization_data.js", Utility.LATIN1_WINDOWS);
+ log = Utility.openPrintWriter(UCA_GEN_DIR, "Normalization_data.js", Utility.LATIN1_WINDOWS);
int count = 0;
@@ -319,7 +319,7 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
}
}
- PrintWriter log = Utility.openPrintWriter(filename + (shortPrint ? "_SHORT" : "") + ".txt", Utility.UTF8_WINDOWS);
+ PrintWriter log = Utility.openPrintWriter(UCA_GEN_DIR, filename + (shortPrint ? "_SHORT" : "") + ".txt", Utility.UTF8_WINDOWS);
//if (!shortPrint) log.write('\uFEFF');
log.println("# UCA Version: " + collator.getDataVersion() + "/" + collator.getUCDVersion());
log.println("# Generated: " + getNormalDate());
@@ -518,7 +518,7 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
*/
static void checkBadDecomps(int strength, boolean decomposition, UnicodeSet alreadySeen) {
if (ucd_uca_base == null) {
- ucd_uca_base = UCD.make(UCA.UCA_BASE);
+ ucd_uca_base = UCD.make(collator.getUCDVersion());
}
int oldStrength = collator.getStrength();
collator.setStrength(strength);
@@ -703,7 +703,7 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
}*/
static void testCompatibilityCharacters() throws IOException {
- log = Utility.openPrintWriter("UCA_CompatComparison.txt", Utility.UTF8_WINDOWS);
+ log = Utility.openPrintWriter(UCA_GEN_DIR, "UCA_CompatComparison.txt", Utility.UTF8_WINDOWS);
int[] kenCes = new int[50];
int[] markCes = new int[50];
@@ -940,7 +940,7 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
/*PrintWriter diLog = new PrintWriter(
new BufferedWriter(
new OutputStreamWriter(
- new FileOutputStream(GEN_DIR + "UCA_Nonspacing.txt"),
+ new FileOutputStream(UCA_GEN_DIR + "UCA_Nonspacing.txt"),
"UTF8"),
32*1024));
*/
@@ -1193,11 +1193,11 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
/*PrintWriter diLog = new PrintWriter(
new BufferedWriter(
new OutputStreamWriter(
- new FileOutputStream(GEN_DIR + "UCA_Contractions.txt"),
+ new FileOutputStream(UCA_GEN_DIR + "UCA_Contractions.txt"),
"UTF8"),
32*1024));
*/
- PrintWriter diLog = Utility.openPrintWriter("UCA_Contractions.txt", Utility.UTF8_WINDOWS);
+ PrintWriter diLog = Utility.openPrintWriter(UCA_GEN_DIR, "UCA_Contractions.txt", Utility.UTF8_WINDOWS);
diLog.write('\uFEFF');
@@ -1231,23 +1231,23 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
PrintWriter diLog = new PrintWriter(
new BufferedWriter(
new OutputStreamWriter(
- new FileOutputStream(GEN_DIR + "DisjointIgnorables.txt"),
+ new FileOutputStream(UCA_GEN_DIR + "DisjointIgnorables.txt"),
"UTF8"),
32*1024));
*/
- PrintWriter diLog = Utility.openPrintWriter("DisjointIgnorables.js", Utility.UTF8_WINDOWS);
+ PrintWriter diLog = Utility.openPrintWriter(UCA_GEN_DIR, "DisjointIgnorables.js", Utility.UTF8_WINDOWS);
diLog.write('\uFEFF');
/*
PrintWriter diLog = new PrintWriter(
// try new one
- new UTF8StreamWriter(new FileOutputStream(GEN_DIR + "DisjointIgnorables.txt"),
+ new UTF8StreamWriter(new FileOutputStream(UCA_GEN_DIR + "DisjointIgnorables.txt"),
32*1024));
diLog.write('\uFEFF');
*/
- //diLog = new PrintWriter(new FileOutputStream(GEN_DIR + "DisjointIgnorables.txt"));
+ //diLog = new PrintWriter(new FileOutputStream(UCA_GEN_DIR + "DisjointIgnorables.txt"));
Normalizer nfd = new Normalizer(Normalizer.NFD, UNICODE_VERSION);
@@ -1410,15 +1410,15 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
/*PrintWriter diLog = new PrintWriter(
new BufferedWriter(
new OutputStreamWriter(
- new FileOutputStream(GEN_DIR + "DisjointIgnorables.txt"),
+ new FileOutputStream(UCA_GEN_DIR + "DisjointIgnorables.txt"),
"UTF8"),
32*1024));
*/
- PrintWriter diLog = Utility.openPrintWriter("DisjointIgnorables2.js", Utility.UTF8_WINDOWS);
+ PrintWriter diLog = Utility.openPrintWriter(UCA_GEN_DIR, "DisjointIgnorables2.js", Utility.UTF8_WINDOWS);
diLog.write('\uFEFF');
- //diLog = new PrintWriter(new FileOutputStream(GEN_DIR + "DisjointIgnorables.txt"));
+ //diLog = new PrintWriter(new FileOutputStream(UCA_GEN_DIR + "DisjointIgnorables.txt"));
Normalizer nfd = new Normalizer(Normalizer.NFD, UNICODE_VERSION);
@@ -1627,13 +1627,15 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
expansionStart = 2; // move up if first is double-ce
}
if (len > expansionStart && homelessSecondaries.contains(UCA.getSecondary(ces[expansionStart]))) {
+ if (log2 != null) log2.println("Homeless: " + CEList.toString(ces, len));
++expansionStart; // move up if *second* is homeless ignoreable
}
return expansionStart;
}
+ static PrintWriter log2 = null;
- static void writeRules (byte option, boolean shortPrint) throws IOException {
+ static void writeRules (byte option, boolean shortPrint, boolean noCE) throws IOException {
//testTransitivity();
//if (true) return;
@@ -1661,7 +1663,7 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
int[] lenArray = new int[1];
Set alreadyDone = new HashSet();
- PrintWriter log2 = Utility.openPrintWriter("UCARules-log.txt", Utility.UTF8_WINDOWS);
+ log2 = Utility.openPrintWriter(UCA_GEN_DIR, "UCARules-log.txt", Utility.UTF8_WINDOWS);
while (true) {
String s = cc.next(ces, lenArray);
@@ -1785,7 +1787,7 @@ F900..FAFF; CJK Compatibility Ideographs
if (shortPrint) filename += "_SHORT";
if (option == IN_XML) filename += ".xml"; else filename += ".txt";
- log = Utility.openPrintWriter(filename, Utility.UTF8_WINDOWS);
+ log = Utility.openPrintWriter(UCA_GEN_DIR, filename, Utility.UTF8_WINDOWS);
String[] commentText = {
"UCA Rules",
@@ -1933,6 +1935,10 @@ F900..FAFF; CJK Compatibility Ideographs
}*/
+ if (chr.equals("\u0966")) {
+ System.out.println(CEList.toString(ces, len));
+ }
+
expansionStart = getFirstCELen(ces, len);
// int relation = getStrengthDifference(ces, len, lastCes, lastLen);
@@ -1979,10 +1985,6 @@ F900..FAFF; CJK Compatibility Ideographs
}
}
- if (chr.equals("\u2F00")) {
- System.out.println(CEList.toString(ces, len));
- }
-
// There are double-CEs, so we have to know what the length of the first bit is.
@@ -2039,9 +2041,9 @@ F900..FAFF; CJK Compatibility Ideographs
log.print("