diff --git a/tools/unicodetools/com/ibm/text/UCA/GenOverlap.java b/tools/unicodetools/com/ibm/text/UCA/GenOverlap.java index 5ce495c30a..1a489a795e 100644 --- a/tools/unicodetools/com/ibm/text/UCA/GenOverlap.java +++ b/tools/unicodetools/com/ibm/text/UCA/GenOverlap.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/GenOverlap.java,v $ -* $Date: 2002/09/25 06:40:13 $ -* $Revision: 1.10 $ +* $Date: 2003/08/20 03:48:47 $ +* $Revision: 1.11 $ * ******************************************************************************* */ @@ -19,7 +19,7 @@ import com.ibm.text.UCD.*; import com.ibm.text.utility.*; import com.ibm.icu.text.UTF16; -public class GenOverlap implements UCD_Types { +public class GenOverlap implements UCD_Types, UCA_Types { static Map completes = new TreeMap(); static Map back = new HashMap(); @@ -164,8 +164,8 @@ public class GenOverlap implements UCD_Types { static boolean PROGRESS = false; static void fullCheck() throws IOException { - PrintWriter log = Utility.openPrintWriter("Overlap.html", Utility.UTF8_WINDOWS); - PrintWriter simpleList = Utility.openPrintWriter("Overlap.txt", Utility.UTF8_WINDOWS); + PrintWriter log = Utility.openPrintWriter(UCA_GEN_DIR, "Overlap.html", Utility.UTF8_WINDOWS); + PrintWriter simpleList = Utility.openPrintWriter(UCA_GEN_DIR, "Overlap.txt", Utility.UTF8_WINDOWS); Iterator it = completes.keySet().iterator(); int counter = 0; @@ -448,7 +448,7 @@ public class GenOverlap implements UCD_Types { newKeys.removeAll(joint); oldKeys.removeAll(joint); - PrintWriter log = Utility.openPrintWriter("UCA-old-vs-new" + (doMax ? "-MAX.txt" : ".txt"), Utility.UTF8_WINDOWS); + PrintWriter log = Utility.openPrintWriter(UCA_GEN_DIR, "UCA-old-vs-new" + (doMax ? "-MAX.txt" : ".txt"), Utility.UTF8_WINDOWS); Iterator it = list.iterator(); int last = -1; while (it.hasNext()) { @@ -631,7 +631,7 @@ public class GenOverlap implements UCD_Types { System.out.println("Data Gathered"); - PrintWriter log = Utility.openPrintWriter("checkstringsearchhash.html", Utility.UTF8_WINDOWS); + PrintWriter log = Utility.openPrintWriter(UCA_GEN_DIR, "checkstringsearchhash.html", Utility.UTF8_WINDOWS); Utility.writeHtmlHeader(log, "Check Hash"); log.println("

Collisions

"); log.println("

Shows collisions among primary values when hashed to table size = " + tableLength + "."); @@ -694,7 +694,7 @@ public class GenOverlap implements UCD_Types { } public static void listCyrillic(UCA collatorIn) throws IOException { - PrintWriter log = Utility.openPrintWriter("ListCyrillic.txt", Utility.UTF8_WINDOWS); + PrintWriter log = Utility.openPrintWriter(UCA_GEN_DIR, "ListCyrillic.txt", Utility.UTF8_WINDOWS); Set set = new TreeSet(collatorIn); Set set2 = new TreeSet(collatorIn); ucd = UCD.make(); diff --git a/tools/unicodetools/com/ibm/text/UCA/Main.java b/tools/unicodetools/com/ibm/text/UCA/Main.java index f3b55d3c28..11c2384d16 100644 --- a/tools/unicodetools/com/ibm/text/UCA/Main.java +++ b/tools/unicodetools/com/ibm/text/UCA/Main.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/Main.java,v $ -* $Date: 2003/07/07 15:58:57 $ -* $Revision: 1.13 $ +* $Date: 2003/08/20 03:48:46 $ +* $Revision: 1.14 $ * ******************************************************************************* */ @@ -28,90 +28,97 @@ public class Main { // NOTE: so far, we don't need to build the UCA with anything but the latest versions. // A few changes would need to be made to the code to do older versions. - - System.out.println("Building UCA"); - WriteCollationData.collator = new UCA(null, UCDVersion); - System.out.println("Built version " + WriteCollationData.collator.getDataVersion() - + "/ucd: " + WriteCollationData.collator.getUCDVersion()); - - System.out.println("Building UCD data"); - WriteCollationData.ucd = UCD.make(WriteCollationData.collator.getUCDVersion()); - - if (args.length == 0) args = new String[] {"?"}; // force the help comment - boolean shortPrint = false; - - for (int i = 0; i < args.length; ++i) { - String arg = args[i]; - System.out.println("OPTION: " + arg); - if (arg.charAt(0) == '#') return; // skip rest of line + try { + System.out.println("Building UCA"); + WriteCollationData.collator = new UCA(null, UCDVersion); + System.out.println("Built version " + WriteCollationData.collator.getDataVersion() + + "/ucd: " + WriteCollationData.collator.getUCDVersion()); - if (arg.equalsIgnoreCase("ICU")) args = Utility.append(args, ICU_FILES); - else if (arg.equalsIgnoreCase("GenOverlap")) GenOverlap.test(WriteCollationData.collator); - else if (arg.equalsIgnoreCase("validateUCA")) GenOverlap.validateUCA(WriteCollationData.collator); - //else if (arg.equalsIgnoreCase("writeNonspacingDifference")) WriteCollationData.writeNonspacingDifference(); + System.out.println("Building UCD data"); + WriteCollationData.ucd = UCD.make(WriteCollationData.collator.getUCDVersion()); - else if (arg.equalsIgnoreCase("collationChart")) WriteCharts.collationChart(WriteCollationData.collator); - else if (arg.equalsIgnoreCase("scriptChart")) WriteCharts.scriptChart(); - else if (arg.equalsIgnoreCase("normalizationChart")) WriteCharts.normalizationChart(); - else if (arg.equalsIgnoreCase("caseChart")) WriteCharts.caseChart(); - else if (arg.equalsIgnoreCase("indexChart")) WriteCharts.indexChart(); - else if (arg.equalsIgnoreCase("special")) WriteCharts.special(); + if (args.length == 0) args = new String[] {"?"}; // force the help comment + boolean shortPrint = false; + boolean noCE = false; - else if (arg.equalsIgnoreCase("writeCompositionChart")) WriteCharts.writeCompositionChart(); + for (int i = 0; i < args.length; ++i) { + String arg = args[i]; + System.out.println("OPTION: " + arg); + if (arg.charAt(0) == '#') return; // skip rest of line + + if (arg.equalsIgnoreCase("ICU")) { + args = Utility.append(ICU_FILES, Utility.subarray(args, i+1)); + i = -1; + continue; + } + if (arg.equalsIgnoreCase("GenOverlap")) GenOverlap.test(WriteCollationData.collator); + else if (arg.equalsIgnoreCase("validateUCA")) GenOverlap.validateUCA(WriteCollationData.collator); + //else if (arg.equalsIgnoreCase("writeNonspacingDifference")) WriteCollationData.writeNonspacingDifference(); + + else if (arg.equalsIgnoreCase("collationChart")) WriteCharts.collationChart(WriteCollationData.collator); + else if (arg.equalsIgnoreCase("scriptChart")) WriteCharts.scriptChart(); + else if (arg.equalsIgnoreCase("normalizationChart")) WriteCharts.normalizationChart(); + else if (arg.equalsIgnoreCase("caseChart")) WriteCharts.caseChart(); + else if (arg.equalsIgnoreCase("indexChart")) WriteCharts.indexChart(); + else if (arg.equalsIgnoreCase("special")) WriteCharts.special(); + + else if (arg.equalsIgnoreCase("writeCompositionChart")) WriteCharts.writeCompositionChart(); + + else if (arg.equalsIgnoreCase("CheckHash")) GenOverlap.checkHash(WriteCollationData.collator); + else if (arg.equalsIgnoreCase("generateRevision")) GenOverlap.generateRevision(WriteCollationData.collator); + else if (arg.equalsIgnoreCase("listCyrillic")) GenOverlap.listCyrillic(WriteCollationData.collator); + + else if (arg.equalsIgnoreCase("WriteRules")) WriteCollationData.writeRules(WriteCollationData.WITHOUT_NAMES, shortPrint, noCE); + // else if (arg.equalsIgnoreCase("WriteRulesWithNames")) WriteCollationData.writeRules(WriteCollationData.WITH_NAMES); + else if (arg.equalsIgnoreCase("WriteRulesXML")) WriteCollationData.writeRules(WriteCollationData.IN_XML, shortPrint, noCE); + else if (arg.equalsIgnoreCase("checkDisjointIgnorables")) WriteCollationData.checkDisjointIgnorables(); + else if (arg.equalsIgnoreCase("writeContractions")) WriteCollationData.writeContractions(); + else if (arg.equalsIgnoreCase("writeFractionalUCA")) WriteCollationData.writeFractionalUCA("FractionalUCA"); + else if (arg.equalsIgnoreCase("writeConformance")) WriteCollationData.writeConformance("CollationTest_NON_IGNORABLE", UCA.NON_IGNORABLE, shortPrint); + else if (arg.equalsIgnoreCase("writeConformanceSHIFTED")) WriteCollationData.writeConformance("CollationTest_SHIFTED", UCA.SHIFTED, shortPrint); + else if (arg.equalsIgnoreCase("testCompatibilityCharacters")) WriteCollationData.testCompatibilityCharacters(); + else if (arg.equalsIgnoreCase("writeCollationValidityLog")) WriteCollationData.writeCollationValidityLog(); + else if (arg.equalsIgnoreCase("writeCaseExceptions")) WriteCollationData.writeCaseExceptions(); + else if (arg.equalsIgnoreCase("writeJavascriptInfo")) WriteCollationData.writeJavascriptInfo(); + else if (arg.equalsIgnoreCase("writeCaseFolding")) WriteCollationData.writeCaseFolding(); + else if (arg.equalsIgnoreCase("javatest")) WriteCollationData.javatest(); + else if (arg.equalsIgnoreCase("short")) shortPrint = true; + else if (arg.equalsIgnoreCase("noCE")) noCE = true; + + else if (arg.equalsIgnoreCase("writeAllocation")) WriteCharts.writeAllocation(); + else if (arg.equalsIgnoreCase("probe")) Probe.test(); + + + else { + System.out.println(); + System.out.println("UNKNOWN OPTION (" + arg + "): must be one of the following (case-insensitive)"); + System.out.println("\tWriteRulesXML, WriteRulesWithNames, WriteRules,"); + System.out.println("\tcheckDisjointIgnorables, writeContractions,"); + System.out.println("\twriteFractionalUCA, writeConformance, writeConformanceSHIFTED, testCompatibilityCharacters,"); + System.out.println("\twriteCollationValidityLog, writeCaseExceptions, writeJavascriptInfo, writeCaseFolding"); + System.out.println("\tjavatest, hex (used for conformance)"); + } + } + } finally { + System.out.println("Done"); - else if (arg.equalsIgnoreCase("CheckHash")) GenOverlap.checkHash(WriteCollationData.collator); - else if (arg.equalsIgnoreCase("generateRevision")) GenOverlap.generateRevision(WriteCollationData.collator); - else if (arg.equalsIgnoreCase("listCyrillic")) GenOverlap.listCyrillic(WriteCollationData.collator); - - else if (arg.equalsIgnoreCase("WriteRules")) WriteCollationData.writeRules(WriteCollationData.WITHOUT_NAMES, shortPrint); - // else if (arg.equalsIgnoreCase("WriteRulesWithNames")) WriteCollationData.writeRules(WriteCollationData.WITH_NAMES); - else if (arg.equalsIgnoreCase("WriteRulesXML")) WriteCollationData.writeRules(WriteCollationData.IN_XML, shortPrint); - else if (arg.equalsIgnoreCase("checkDisjointIgnorables")) WriteCollationData.checkDisjointIgnorables(); - else if (arg.equalsIgnoreCase("writeContractions")) WriteCollationData.writeContractions(); - else if (arg.equalsIgnoreCase("writeFractionalUCA")) WriteCollationData.writeFractionalUCA("FractionalUCA"); - else if (arg.equalsIgnoreCase("writeConformance")) WriteCollationData.writeConformance("CollationTest_NON_IGNORABLE", UCA.NON_IGNORABLE, shortPrint); - else if (arg.equalsIgnoreCase("writeConformanceSHIFTED")) WriteCollationData.writeConformance("CollationTest_SHIFTED", UCA.SHIFTED, shortPrint); - else if (arg.equalsIgnoreCase("testCompatibilityCharacters")) WriteCollationData.testCompatibilityCharacters(); - else if (arg.equalsIgnoreCase("writeCollationValidityLog")) WriteCollationData.writeCollationValidityLog(); - else if (arg.equalsIgnoreCase("writeCaseExceptions")) WriteCollationData.writeCaseExceptions(); - else if (arg.equalsIgnoreCase("writeJavascriptInfo")) WriteCollationData.writeJavascriptInfo(); - else if (arg.equalsIgnoreCase("writeCaseFolding")) WriteCollationData.writeCaseFolding(); - else if (arg.equalsIgnoreCase("javatest")) WriteCollationData.javatest(); - else if (arg.equalsIgnoreCase("short")) shortPrint = true; - - else if (arg.equalsIgnoreCase("writeAllocation")) WriteCharts.writeAllocation(); - else if (arg.equalsIgnoreCase("probe")) Probe.test(); + /* + String s = WriteCollationData.collator.getSortKey("\u1025\u102E", UCA.NON_IGNORABLE, true); + System.out.println(Utility.hex("\u0595\u0325") + ", " + WriteCollationData.collator.toString(s)); + String t = WriteCollationData.collator.getSortKey("\u0596\u0325", UCA.NON_IGNORABLE, true); + System.out.println(Utility.hex("\u0596\u0325") + ", " + WriteCollationData.collator.toString(t)); - else { - System.out.println(); - System.out.println("UNKNOWN OPTION (" + arg + "): must be one of the following (case-insensitive)"); - System.out.println("\tWriteRulesXML, WriteRulesWithNames, WriteRules,"); - System.out.println("\tcheckDisjointIgnorables, writeContractions,"); - System.out.println("\twriteFractionalUCA, writeConformance, writeConformanceSHIFTED, testCompatibilityCharacters,"); - System.out.println("\twriteCollationValidityLog, writeCaseExceptions, writeJavascriptInfo, writeCaseFolding"); - System.out.println("\tjavatest, hex (used for conformance)"); + Normalizer foo = new Normalizer(Normalizer.NFKD); + char x = '\u1EE2'; + System.out.println(Utility.hex(x) + " " + ucd.getName(x)); + String nx = foo.normalize(x); + for (int i = 0; i < nx.length(); ++i) { + char c = nx.charAt(i); + System.out.println(ucd.getCanonicalClass(c)); } - } - System.out.println("Done"); - - /* - String s = WriteCollationData.collator.getSortKey("\u1025\u102E", UCA.NON_IGNORABLE, true); - System.out.println(Utility.hex("\u0595\u0325") + ", " + WriteCollationData.collator.toString(s)); - String t = WriteCollationData.collator.getSortKey("\u0596\u0325", UCA.NON_IGNORABLE, true); - System.out.println(Utility.hex("\u0596\u0325") + ", " + WriteCollationData.collator.toString(t)); - - - Normalizer foo = new Normalizer(Normalizer.NFKD); - char x = '\u1EE2'; - System.out.println(Utility.hex(x) + " " + ucd.getName(x)); - String nx = foo.normalize(x); - for (int i = 0; i < nx.length(); ++i) { - char c = nx.charAt(i); - System.out.println(ucd.getCanonicalClass(c)); + System.out.println(Utility.hex(nx, " ") + " " + ucd.getName(nx)); + */ } - System.out.println(Utility.hex(nx, " ") + " " + ucd.getName(nx)); - */ - } } \ No newline at end of file diff --git a/tools/unicodetools/com/ibm/text/UCA/UCA.java b/tools/unicodetools/com/ibm/text/UCA/UCA.java index c1856fcf23..0148c10d95 100644 --- a/tools/unicodetools/com/ibm/text/UCA/UCA.java +++ b/tools/unicodetools/com/ibm/text/UCA/UCA.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/UCA.java,v $ -* $Date: 2003/03/19 17:30:56 $ -* $Revision: 1.20 $ +* $Date: 2003/08/20 03:48:45 $ +* $Revision: 1.21 $ * ******************************************************************************* */ @@ -79,7 +79,7 @@ final public class UCA implements Comparator, UCA_Types { * Version of the UCA tables to use */ //private static final String VERSION = "-3.0.1d3"; // ""; // "-2.1.9d7"; - public static final String UCA_BASE = "3.1.1"; // ""; // "-2.1.9d7"; + public static final String UCA_BASE = "4.0.0d1"; // "3.1.1"; // ; // ""; // "-2.1.9d7"; public static final String VERSION = "-" + UCA_BASE; // + "d6" ""; // "-2.1.9d7"; public static final String ALLFILES = "allkeys"; // null if not there @@ -800,7 +800,7 @@ CP => [.AAAA.0020.0002.][.BBBB.0000.0000.] /** * Records the dataversion */ - private String dataVersion = "3.1d1"; + private String dataVersion = "Missing @version in data!!"; /** * Records the dataversion diff --git a/tools/unicodetools/com/ibm/text/UCA/UCA_Types.java b/tools/unicodetools/com/ibm/text/UCA/UCA_Types.java index 6caa908c70..85adcd55f0 100644 --- a/tools/unicodetools/com/ibm/text/UCA/UCA_Types.java +++ b/tools/unicodetools/com/ibm/text/UCA/UCA_Types.java @@ -5,15 +5,18 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/UCA_Types.java,v $ -* $Date: 2002/07/14 22:07:00 $ -* $Revision: 1.1 $ +* $Date: 2003/08/20 03:48:45 $ +* $Revision: 1.2 $ * ******************************************************************************* */ package com.ibm.text.UCA; +import com.ibm.text.UCD.*; +import com.ibm.text.utility.*; public interface UCA_Types { + public static final String UCA_GEN_DIR = UCD_Types.GEN_DIR + "collation\\"; public static final char LEVEL_SEPARATOR = '\u0000'; /** * Expanding characters are marked with a exception bit combination diff --git a/tools/unicodetools/com/ibm/text/UCA/WriteCollationData.java b/tools/unicodetools/com/ibm/text/UCA/WriteCollationData.java index 5cd2615565..cc1523b62f 100644 --- a/tools/unicodetools/com/ibm/text/UCA/WriteCollationData.java +++ b/tools/unicodetools/com/ibm/text/UCA/WriteCollationData.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/WriteCollationData.java,v $ -* $Date: 2003/04/25 01:39:13 $ -* $Revision: 1.32 $ +* $Date: 2003/08/20 03:48:43 $ +* $Revision: 1.33 $ * ******************************************************************************* */ @@ -145,7 +145,7 @@ public class WriteCollationData implements UCD_Types, UCA_Types { BufferedReader in = Utility.openUnicodeFile("CaseFolding", UNICODE_VERSION, true, Utility.LATIN1); // new BufferedReader(new FileReader(DIR31 + "CaseFolding-3.d3.alpha.txt"), 64*1024); // log = new PrintWriter(new FileOutputStream("CaseFolding_data.js")); - log = Utility.openPrintWriter("CaseFolding_data.js", Utility.UTF8_WINDOWS); + log = Utility.openPrintWriter(UCA_GEN_DIR, "CaseFolding_data.js", Utility.UTF8_WINDOWS); log.println("var CF = new Object();"); int count = 0; while (true) { @@ -190,7 +190,7 @@ public class WriteCollationData implements UCD_Types, UCA_Types { Normalizer normKD = new Normalizer(Normalizer.NFKD, UNICODE_VERSION); Normalizer normD = new Normalizer(Normalizer.NFD, UNICODE_VERSION); //log = new PrintWriter(new FileOutputStream("Normalization_data.js")); - log = Utility.openPrintWriter("Normalization_data.js", Utility.LATIN1_WINDOWS); + log = Utility.openPrintWriter(UCA_GEN_DIR, "Normalization_data.js", Utility.LATIN1_WINDOWS); int count = 0; @@ -319,7 +319,7 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON } } - PrintWriter log = Utility.openPrintWriter(filename + (shortPrint ? "_SHORT" : "") + ".txt", Utility.UTF8_WINDOWS); + PrintWriter log = Utility.openPrintWriter(UCA_GEN_DIR, filename + (shortPrint ? "_SHORT" : "") + ".txt", Utility.UTF8_WINDOWS); //if (!shortPrint) log.write('\uFEFF'); log.println("# UCA Version: " + collator.getDataVersion() + "/" + collator.getUCDVersion()); log.println("# Generated: " + getNormalDate()); @@ -518,7 +518,7 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON */ static void checkBadDecomps(int strength, boolean decomposition, UnicodeSet alreadySeen) { if (ucd_uca_base == null) { - ucd_uca_base = UCD.make(UCA.UCA_BASE); + ucd_uca_base = UCD.make(collator.getUCDVersion()); } int oldStrength = collator.getStrength(); collator.setStrength(strength); @@ -703,7 +703,7 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON }*/ static void testCompatibilityCharacters() throws IOException { - log = Utility.openPrintWriter("UCA_CompatComparison.txt", Utility.UTF8_WINDOWS); + log = Utility.openPrintWriter(UCA_GEN_DIR, "UCA_CompatComparison.txt", Utility.UTF8_WINDOWS); int[] kenCes = new int[50]; int[] markCes = new int[50]; @@ -940,7 +940,7 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON /*PrintWriter diLog = new PrintWriter( new BufferedWriter( new OutputStreamWriter( - new FileOutputStream(GEN_DIR + "UCA_Nonspacing.txt"), + new FileOutputStream(UCA_GEN_DIR + "UCA_Nonspacing.txt"), "UTF8"), 32*1024)); */ @@ -1193,11 +1193,11 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON /*PrintWriter diLog = new PrintWriter( new BufferedWriter( new OutputStreamWriter( - new FileOutputStream(GEN_DIR + "UCA_Contractions.txt"), + new FileOutputStream(UCA_GEN_DIR + "UCA_Contractions.txt"), "UTF8"), 32*1024)); */ - PrintWriter diLog = Utility.openPrintWriter("UCA_Contractions.txt", Utility.UTF8_WINDOWS); + PrintWriter diLog = Utility.openPrintWriter(UCA_GEN_DIR, "UCA_Contractions.txt", Utility.UTF8_WINDOWS); diLog.write('\uFEFF'); @@ -1231,23 +1231,23 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON PrintWriter diLog = new PrintWriter( new BufferedWriter( new OutputStreamWriter( - new FileOutputStream(GEN_DIR + "DisjointIgnorables.txt"), + new FileOutputStream(UCA_GEN_DIR + "DisjointIgnorables.txt"), "UTF8"), 32*1024)); */ - PrintWriter diLog = Utility.openPrintWriter("DisjointIgnorables.js", Utility.UTF8_WINDOWS); + PrintWriter diLog = Utility.openPrintWriter(UCA_GEN_DIR, "DisjointIgnorables.js", Utility.UTF8_WINDOWS); diLog.write('\uFEFF'); /* PrintWriter diLog = new PrintWriter( // try new one - new UTF8StreamWriter(new FileOutputStream(GEN_DIR + "DisjointIgnorables.txt"), + new UTF8StreamWriter(new FileOutputStream(UCA_GEN_DIR + "DisjointIgnorables.txt"), 32*1024)); diLog.write('\uFEFF'); */ - //diLog = new PrintWriter(new FileOutputStream(GEN_DIR + "DisjointIgnorables.txt")); + //diLog = new PrintWriter(new FileOutputStream(UCA_GEN_DIR + "DisjointIgnorables.txt")); Normalizer nfd = new Normalizer(Normalizer.NFD, UNICODE_VERSION); @@ -1410,15 +1410,15 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON /*PrintWriter diLog = new PrintWriter( new BufferedWriter( new OutputStreamWriter( - new FileOutputStream(GEN_DIR + "DisjointIgnorables.txt"), + new FileOutputStream(UCA_GEN_DIR + "DisjointIgnorables.txt"), "UTF8"), 32*1024)); */ - PrintWriter diLog = Utility.openPrintWriter("DisjointIgnorables2.js", Utility.UTF8_WINDOWS); + PrintWriter diLog = Utility.openPrintWriter(UCA_GEN_DIR, "DisjointIgnorables2.js", Utility.UTF8_WINDOWS); diLog.write('\uFEFF'); - //diLog = new PrintWriter(new FileOutputStream(GEN_DIR + "DisjointIgnorables.txt")); + //diLog = new PrintWriter(new FileOutputStream(UCA_GEN_DIR + "DisjointIgnorables.txt")); Normalizer nfd = new Normalizer(Normalizer.NFD, UNICODE_VERSION); @@ -1627,13 +1627,15 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON expansionStart = 2; // move up if first is double-ce } if (len > expansionStart && homelessSecondaries.contains(UCA.getSecondary(ces[expansionStart]))) { + if (log2 != null) log2.println("Homeless: " + CEList.toString(ces, len)); ++expansionStart; // move up if *second* is homeless ignoreable } return expansionStart; } + static PrintWriter log2 = null; - static void writeRules (byte option, boolean shortPrint) throws IOException { + static void writeRules (byte option, boolean shortPrint, boolean noCE) throws IOException { //testTransitivity(); //if (true) return; @@ -1661,7 +1663,7 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON int[] lenArray = new int[1]; Set alreadyDone = new HashSet(); - PrintWriter log2 = Utility.openPrintWriter("UCARules-log.txt", Utility.UTF8_WINDOWS); + log2 = Utility.openPrintWriter(UCA_GEN_DIR, "UCARules-log.txt", Utility.UTF8_WINDOWS); while (true) { String s = cc.next(ces, lenArray); @@ -1785,7 +1787,7 @@ F900..FAFF; CJK Compatibility Ideographs if (shortPrint) filename += "_SHORT"; if (option == IN_XML) filename += ".xml"; else filename += ".txt"; - log = Utility.openPrintWriter(filename, Utility.UTF8_WINDOWS); + log = Utility.openPrintWriter(UCA_GEN_DIR, filename, Utility.UTF8_WINDOWS); String[] commentText = { "UCA Rules", @@ -1933,6 +1935,10 @@ F900..FAFF; CJK Compatibility Ideographs }*/ + if (chr.equals("\u0966")) { + System.out.println(CEList.toString(ces, len)); + } + expansionStart = getFirstCELen(ces, len); // int relation = getStrengthDifference(ces, len, lastCes, lastLen); @@ -1979,10 +1985,6 @@ F900..FAFF; CJK Compatibility Ideographs } } - if (chr.equals("\u2F00")) { - System.out.println(CEList.toString(ces, len)); - } - // There are double-CEs, so we have to know what the length of the first bit is. @@ -2039,9 +2041,9 @@ F900..FAFF; CJK Compatibility Ideographs log.print("" + Utility.quoteXML(expansion)); } if (!shortPrint) { - log.print("\t"); } @@ -2054,9 +2056,9 @@ F900..FAFF; CJK Compatibility Ideographs if (!firstTime) log.print(RELATION_NAMES[relation] + " " + quoteOperand(chr)); if (expansion.length() > 0) log.print(" / " + quoteOperand(expansion)); if (!shortPrint) { - log.print("\t# " - + CEList.toString(ces, len) + " " - + ucd.getCodeAndName(chr)); + log.print("\t# "); + if (!noCE) log.print(CEList.toString(ces, len) + " "); + log.print(ucd.getCodeAndName(chr)); if (expansion.length() > 0) log.print(" / " + Utility.hex(expansion)); } log.println(); @@ -2273,7 +2275,14 @@ F900..FAFF; CJK Compatibility Ideographs if (show || contains(testCase, 0, testCase.length, ces[0]) || testString.indexOf(s) > 0) { System.out.println("Test case: " + Utility.hex(s) + ", " + CEList.toString(ces, len)); } - backMap.put(new ArrayWrapper((int[])(ces.clone()), 0, len), s); + // NOTE: we add the back map based on the string value; the smallest (UTF-16 order) string wins + Object key = new ArrayWrapper((int[])(ces.clone()),0, len); + if (false) { + Object value = backMap.get(key); + if (value == null) return; + if (s.compareTo(value) >= 0) return; + } + backMap.put(key, s); /* // HACK until Ken fixes for (int i = 0; i < len; ++i) { @@ -2295,7 +2304,7 @@ F900..FAFF; CJK Compatibility Ideographs } - static UnicodeSet homelessSecondaries = new UnicodeSet(0x0153,0x0170); + static UnicodeSet homelessSecondaries = new UnicodeSet(0x0153,0x017F); /*static int[] ignorableList = new int[homelessSecondaries.size()]; @@ -2413,7 +2422,7 @@ F900..FAFF; CJK Compatibility Ideographs // we failed completely. Print error message, and bail - System.out.println("No back map for " + CEList.toString(ces[i]) + System.out.println("Fix Homeless! No back map for " + CEList.toString(ces[i]) + " from " + CEList.toString(ces, len)); System.out.println("\t" + ucd.getCodeAndName(chr) + " => " + ucd.getCodeAndName(nfkdNew.normalize(chr)) @@ -2502,32 +2511,34 @@ F900..FAFF; CJK Compatibility Ideographs || c >= '0' && c <= '9' || (c >= 0xA0 && !UCharacterProperty.isRuleWhiteSpace(c)) */ - needsQuoting = new UnicodeSet("[a-zA-Z0-9\\u00A0-\\U00010FFF]"); + needsQuoting = new UnicodeSet( + "[[:whitespace:][:c:][:z:][[:ascii:]-[a-zA-Z0-9]]]"); // needsQuoting.remove(); } s = NFC.normalize(s); quoteOperandBuffer.setLength(0); boolean noQuotes = true; boolean inQuote = false; - for (int i = 0; i < s.length(); ++i) { - char c = s.charAt(i); - if (!needsQuoting.contains(c)) { + int cp; + for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) { + cp = UTF16.charAt(s, i); + if (!needsQuoting.contains(cp)) { if (inQuote) { quoteOperandBuffer.append('\''); inQuote = false; } - quoteOperandBuffer.append(c); + quoteOperandBuffer.append(UTF16.valueOf(cp)); } else { noQuotes = false; - if (c == '\'') { + if (cp == '\'') { quoteOperandBuffer.append("''"); } else { if (!inQuote) { quoteOperandBuffer.append('\''); inQuote = true; } - if (c <= 0x20 || c > 0x7E) quoteOperandBuffer.append("\\u").append(Utility.hex(c)); - else quoteOperandBuffer.append(c); + if (cp <= 0x20 || cp > 0x7E) quoteOperandBuffer.append("\\u").append(Utility.hex(cp)); + else quoteOperandBuffer.append(UTF16.valueOf(cp)); } } /* @@ -2819,11 +2830,11 @@ F900..FAFF; CJK Compatibility Ideographs Utility.fixDot(); System.out.println("Writing"); - PrintWriter shortLog = new PrintWriter(new BufferedWriter(new FileWriter(GEN_DIR + filename + "_SHORT.txt"), 32*1024)); - PrintWriter longLog = new PrintWriter(new BufferedWriter(new FileWriter(GEN_DIR + filename + ".txt"), 32*1024)); + PrintWriter shortLog = new PrintWriter(new BufferedWriter(new FileWriter(UCA_GEN_DIR + filename + "_SHORT.txt"), 32*1024)); + PrintWriter longLog = new PrintWriter(new BufferedWriter(new FileWriter(UCA_GEN_DIR + filename + ".txt"), 32*1024)); log = new PrintWriter(new DualWriter(shortLog, longLog)); - PrintWriter summary = new PrintWriter(new BufferedWriter(new FileWriter(GEN_DIR + filename + "_summary.txt"), 32*1024)); + PrintWriter summary = new PrintWriter(new BufferedWriter(new FileWriter(UCA_GEN_DIR + filename + "_summary.txt"), 32*1024)); //log.println("[Variable Low = " + UCA.toString(collator.getVariableLow()) + "]"); //log.println("[Variable High = " + UCA.toString(collator.getVariableHigh()) + "]"); @@ -2861,27 +2872,38 @@ F900..FAFF; CJK Compatibility Ideographs log.println("# WARNING"); log.println("# - Differs from previous version in that MAX value was introduced at 1F."); log.println("# All tertiary values are shifted down by 1, filling the gap at 7!"); - + log.println(); + log.println("[UCA version =" + collator.getDataVersion() + "]"); + String lastChr = ""; int lastNp = 0; boolean doVariable = false; char[] codeUnits = new char[100]; - FCE firstSecondaryIgnorable = new FCE(false); - FCE lastSecondaryIgnorable = new FCE(true); + FCE firstTertiaryIgnorable = new FCE(false, "tertiary ignorable"); + FCE lastTertiaryIgnorable = new FCE(true, "tertiary ignorable"); - FCE firstPrimaryIgnorable = new FCE(false); - FCE lastPrimaryIgnorable = new FCE(true); + FCE firstSecondaryIgnorable = new FCE(false, "secondary ignorable"); + FCE lastSecondaryIgnorable = new FCE(true, "secondary ignorable"); + + FCE firstTertiaryInSecondaryNonIgnorable = new FCE(false, "tertiary in secondary non-ignorable"); + FCE lastTertiaryInSecondaryNonIgnorable = new FCE(true, "tertiary in secondary non-ignorable"); + + FCE firstPrimaryIgnorable = new FCE(false, "primary ignorable"); + FCE lastPrimaryIgnorable = new FCE(true, "primary ignorable"); - FCE firstVariable = new FCE(false); - FCE lastVariable = new FCE(true); + FCE firstSecondaryInPrimaryNonIgnorable = new FCE(false, "secondary in primary non-ignorable"); + FCE lastSecondaryInPrimaryNonIgnorable = new FCE(true, "secondary in primary non-ignorable"); + + FCE firstVariable = new FCE(false, "variable"); + FCE lastVariable = new FCE(true, "variable"); - FCE firstNonIgnorable = new FCE(false); - FCE lastNonIgnorable = new FCE(true); + FCE firstNonIgnorable = new FCE(false, "non-ignorable"); + FCE lastNonIgnorable = new FCE(true, "non-ignorable"); - FCE firstTrailing = new FCE(false); - FCE lastTrailing = new FCE(true); + FCE firstTrailing = new FCE(false, "trailing"); + FCE lastTrailing = new FCE(true, "trailing"); Map backMap = new TreeMap(); @@ -3022,26 +3044,34 @@ F900..FAFF; CJK Compatibility Ideographs // but ONLY if we are not part of an implicit if ((pri & MARK_CODE_POINT) == 0) { + if (np != 0) { + firstSecondaryInPrimaryNonIgnorable.setValue(0, ns, 0, chr); + lastSecondaryInPrimaryNonIgnorable.setValue(0, ns, 0, chr); + } + if (ns != 0) { + firstTertiaryInSecondaryNonIgnorable.setValue(0, 0, nt & 0x3F, chr); + lastTertiaryInSecondaryNonIgnorable.setValue(0, 0, nt & 0x3F, chr); + } if (np == 0 && ns == 0) { - firstSecondaryIgnorable.setValue(np, ns, nt); - lastSecondaryIgnorable.setValue(np, ns, nt); + firstSecondaryIgnorable.setValue(np, ns, nt, chr); + lastSecondaryIgnorable.setValue(np, ns, nt, chr); } else if (np == 0) { - firstPrimaryIgnorable.setValue(np, ns, nt); - lastPrimaryIgnorable.setValue(np, ns, nt); + firstPrimaryIgnorable.setValue(np, ns, nt, chr); + lastPrimaryIgnorable.setValue(np, ns, nt, chr); } else if (collator.isVariable(ces[q])) { - firstVariable.setValue(np, ns, nt); - lastVariable.setValue(np, ns, nt); + firstVariable.setValue(np, ns, nt, chr); + lastVariable.setValue(np, ns, nt, chr); } else if (UCA.getPrimary(ces[q]) > UNSUPPORTED_LIMIT) { // Trailing (none currently) System.out.println("Trailing: " + ucd.getCodeAndName(chr) + ", " + CEList.toString(ces[q]) + ", " + Utility.hex(pri) + ", " + Utility.hex(UNSUPPORTED_LIMIT)); - firstTrailing.setValue(np, ns, nt); - lastTrailing.setValue(np, ns, nt); + firstTrailing.setValue(np, ns, nt, chr); + lastTrailing.setValue(np, ns, nt, chr); } else { - firstNonIgnorable.setValue(np, ns, nt); - lastNonIgnorable.setValue(np, ns, nt); + firstNonIgnorable.setValue(np, ns, nt, chr); + lastNonIgnorable.setValue(np, ns, nt, chr); } } } @@ -3091,43 +3121,71 @@ F900..FAFF; CJK Compatibility Ideographs log.println(); log.println("# VALUES BASED ON UCA"); - log.println("[first tertiary ignorable " + new FCE(false,0,0, 0).formatFCE() + "]"); - log.println("[last tertiary ignorable " + new FCE(true,0,0, 0).formatFCE() + "]"); + if (firstTertiaryIgnorable.isUnset()) { + firstTertiaryIgnorable.setValue(0,0,0,""); + lastTertiaryIgnorable.setValue(0,0,0,""); + System.out.println(firstSecondaryIgnorable.formatFCE()); + } + + log.println(firstTertiaryIgnorable); + log.println(lastTertiaryIgnorable); // Since the UCA doesn't have secondary ignorables, fake them. if (firstSecondaryIgnorable.isUnset()) { - System.out.println("No first/last secondary ignorable: resetting"); - firstSecondaryIgnorable = new FCE(false, 0, 0, COMMON<<24); - lastSecondaryIgnorable = new FCE(true, 0, 0, COMMON<<24); + int bound = 0x3F03; + System.out.println("No first/last secondary ignorable: resetting to HARD CODED"); + //long bound = lastTertiaryInSecondaryNonIgnorable.getValue(2); + firstSecondaryIgnorable.setValue(0,0,bound,""); + lastSecondaryIgnorable.setValue(0,0,bound,""); System.out.println(firstSecondaryIgnorable.formatFCE()); } - log.println("[first secondary ignorable " + firstSecondaryIgnorable.formatFCE() + "]"); - log.println("[last secondary ignorable " + lastSecondaryIgnorable.formatFCE() + "]"); + log.println("# Warning: Case bits are masked in the following"); - log.println("[first primary ignorable " + firstPrimaryIgnorable.formatFCE() + "]"); - log.println("[last primary ignorable " + lastPrimaryIgnorable.formatFCE() + "]"); + log.println(firstTertiaryInSecondaryNonIgnorable.toString(true)); + log.println(lastTertiaryInSecondaryNonIgnorable.toString(true)); + + log.println(firstSecondaryIgnorable); + log.println(lastSecondaryIgnorable); - log.println("[first variable " + firstVariable.formatFCE() + "]"); - log.println("[last variable " + lastVariable.formatFCE() + "]"); + if (lastTertiaryInSecondaryNonIgnorable.getValue(2) >= firstSecondaryIgnorable.getValue(2)) { + log.println("# FAILURE: Overlap of tertiaries"); + } + + log.println(firstSecondaryInPrimaryNonIgnorable.toString(true)); + log.println(lastSecondaryInPrimaryNonIgnorable.toString(true)); + + log.println(firstPrimaryIgnorable); + log.println(lastPrimaryIgnorable); - log.println("[first regular " + firstNonIgnorable.formatFCE() + "]"); - log.println("[last regular " + lastNonIgnorable.formatFCE() + "]"); + if (lastSecondaryInPrimaryNonIgnorable.getValue(1) >= firstPrimaryIgnorable.getValue(1)) { + log.println("# FAILURE: Overlap of secondaries"); + } + + log.println(firstVariable); + log.println(lastVariable); + log.println(firstNonIgnorable); + log.println(lastNonIgnorable); - log.println("[first implicit " + (new FCE(false,firstImplicit, COMMON<<24, COMMON<<24)).formatFCE() + "]"); - log.println("[last implicit " + (new FCE(false,lastImplicit, COMMON<<24, COMMON<<24)).formatFCE() + "]"); + FCE firstImplicitFCE = new FCE(false, "first implicit"); + FCE lastImplicitFCE = new FCE(false, "last implicit"); + firstImplicitFCE.setValue(firstImplicit, COMMON, COMMON, ""); + lastImplicitFCE.setValue(lastImplicit, COMMON, COMMON, ""); + + log.println(firstImplicitFCE); // "[first implicit " + (new FCE(false,firstImplicit, COMMON<<24, COMMON<<24)).formatFCE() + "]"); + log.println(lastImplicitFCE); // "[last implicit " + (new FCE(false,lastImplicit, COMMON<<24, COMMON<<24)).formatFCE() + "]"); if (firstTrailing.isUnset()) { System.out.println("No first/last trailing: resetting"); - firstTrailing = new FCE(false, (IMPLICIT_LIMIT_BYTE+1)<<24, COMMON<<24, COMMON<<24); - lastTrailing = new FCE(true, (IMPLICIT_LIMIT_BYTE+1)<<24, COMMON<<24, COMMON<<24); + firstTrailing.setValue(IMPLICIT_LIMIT_BYTE+1, COMMON, COMMON, ""); + lastTrailing.setValue(IMPLICIT_LIMIT_BYTE+1, COMMON, COMMON, ""); System.out.println(firstTrailing.formatFCE()); } - log.println("[first trailing " + firstTrailing.formatFCE() + "]"); - log.println("[last trailing " + lastTrailing.formatFCE() + "]"); + log.println(firstTrailing); + log.println(lastTrailing); log.println(); log.println("# FIXED VALUES"); @@ -3218,39 +3276,59 @@ F900..FAFF; CJK Compatibility Ideographs long[] key; boolean max; boolean debugShow = false; + String source; + String title; - FCE (boolean max) { + FCE (boolean max, String title) { this.max = max; + this.title = title; if (max) key = new long[] {UNDEFINED_MIN, UNDEFINED_MIN, UNDEFINED_MIN}; // make small! else key = new long[] {UNDEFINED_MAX, UNDEFINED_MAX, UNDEFINED_MAX}; } + /* FCE (boolean max, int primary, int secondary, int tertiary) { this(max); - key[0] = primary & INT_MASK; - key[1] = secondary & INT_MASK; - key[2] = tertiary & INT_MASK; + key[0] = fixWeight(primary); + key[1] = fixWeight(secondary); + key[2] = fixWeight(tertiary); } FCE (boolean max, int primary) { this(max); key[0] = primary & INT_MASK; } + */ boolean isUnset() { return key[0] == UNDEFINED_MIN || key[0] == UNDEFINED_MAX; } + long fixWeight(int weight) { + long result = weight & INT_MASK; + if (result == 0) return result; + while ((result & 0xFF000000) == 0) result <<= 8; // shift to top + return result; + } + String formatFCE() { + return formatFCE(false); + } + + String formatFCE(boolean showEmpty) { String b0 = getBuffer(key[0], false); boolean key0Defined = key[0] != UNDEFINED_MIN && key[0] != UNDEFINED_MAX; + if (showEmpty && b0.length() == 0) b0 = "X"; String b1 = getBuffer(key[1], key0Defined); boolean key1Defined = key[1] != UNDEFINED_MIN && key[1] != UNDEFINED_MAX; if (b1.length() != 0) b1 = " " + b1; - + else if (showEmpty) b1 = " X"; + String b2 = getBuffer(key[2], key0Defined || key1Defined); if (b2.length() != 0) b2 = " " + b2; + else if (showEmpty) b2 = " X"; + return "[" + b0 + "," + b1 + "," + b2 + "]"; } @@ -3262,48 +3340,54 @@ F900..FAFF; CJK Compatibility Ideographs return result.toString(); } - void setValue(int npInt, int nsInt, int ntInt) { + long getValue(int zeroBasedLevel) { + return key[zeroBasedLevel]; + } + + String getSource() { + return source; + } + + public String toString() { + return toString(false); + } + + String toString(boolean showEmpty) { + String src = source.length() == 0 ? "CONSTRUCTED" : Default.ucd.getCodeAndName(source); + return "[" + (max ? "last " : "first ") + title + " " + formatFCE(showEmpty) + "] # " + src; + } + + void setValue(int npInt, int nsInt, int ntInt, String source) { if (debugShow) System.out.println("Setting FCE: " + Utility.hex(npInt) + ", " + Utility.hex(nsInt) + ", " + Utility.hex(ntInt)); // to get the sign right! - long np = npInt & INT_MASK; - long ns = nsInt & INT_MASK; - long nt = ntInt & INT_MASK; + long np = fixWeight(npInt); + long ns = fixWeight(nsInt); + long nt = fixWeight(ntInt); if (max) { + // return if the key is LEQ if (np < key[0]) return; - if (np > key[0]) { - key[0] = np; - key[1] = ns; - key[2] = nt; - return; - } - if (ns < key[1]) return; - if (ns > key[1]) { - key[1] = ns; - key[2] = nt; - return; - } - if (nt > key[2]) { - key[2] = nt; + if (np == key[0]) { + if (ns < key[1]) return; + if (ns == key[1]) { + if (nt <= key[2]) return; + } } } else { + // return if the key is GEQ if (np > key[0]) return; - if (np < key[0]) { - key[0] = np; - key[1] = ns; - key[2] = nt; - return; - } - if (ns > key[1]) return; - if (ns < key[1]) { - key[1] = ns; - key[2] = nt; - return; - } - if (nt > key[2]) { - key[2] = nt; + if (np == key[0]) { + if (ns > key[1]) return; + if (ns == key[1]) { + if (nt >= key[2]) return; + } } } + // we didn't bail, so reset! + key[0] = np; + key[1] = ns; + key[2] = nt; + this.source = source; } } @@ -3961,7 +4045,7 @@ static int swapCJK(int i) { Default.setUCD(); //log = new PrintWriter(new FileOutputStream("CheckCollationValidity.html")); - log = Utility.openPrintWriter("CheckCollationValidity.html", Utility.UTF8_WINDOWS); + log = Utility.openPrintWriter(UCA_GEN_DIR, "CheckCollationValidity.html", Utility.UTF8_WINDOWS); log.println(""); log.println("UCA Validity Log"); @@ -4628,7 +4712,7 @@ A4C6;YI RADICAL KE;So;0;ON;;;;;N;;;;; static PrintWriter writeHead(int counter, int end, String title, String other, String version, boolean show) throws IOException { - PrintWriter out = Utility.openPrintWriter(title + pad(counter) + ".html", Utility.UTF8_WINDOWS); + PrintWriter out = Utility.openPrintWriter(UCA_GEN_DIR, title + pad(counter) + ".html", Utility.UTF8_WINDOWS); copyFile(out, "HTML-Part1.txt"); /* diff --git a/tools/unicodetools/com/ibm/text/UCD/GenerateData.java b/tools/unicodetools/com/ibm/text/UCD/GenerateData.java index 4dad3f7c7e..bf1a5bc387 100644 --- a/tools/unicodetools/com/ibm/text/UCD/GenerateData.java +++ b/tools/unicodetools/com/ibm/text/UCD/GenerateData.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateData.java,v $ -* $Date: 2003/07/21 15:50:06 $ -* $Revision: 1.29 $ +* $Date: 2003/08/20 03:46:41 $ +* $Revision: 1.30 $ * ******************************************************************************* */ @@ -362,17 +362,17 @@ public class GenerateData implements UCD_Types { } else if (propAbb.equals("blk")) { type = CATALOG_PROP; } else if (propAbb.equals("na")) { - type = DESCRIPTIVE_PROP; + type = MISC_PROP; } else if (propAbb.equals("na1")) { - type = DESCRIPTIVE_PROP; + type = MISC_PROP; } else if (propAbb.equals("isc")) { - type = DESCRIPTIVE_PROP; + type = MISC_PROP; } addLine(sorted, UCD_Names.PROP_TYPE_NAMES[type][1], propAbb, prop); checkDuplicate(duplicates, accumulation, propAbb, prop); if (!prop.equals(propAbb)) checkDuplicate(duplicates, accumulation, prop, prop); } - addLine(sorted, UCD_Names.PROP_TYPE_NAMES[CATALOG_PROP][1], "URS", "Unicode_Radical_Stroke"); + addLine(sorted, UCD_Names.PROP_TYPE_NAMES[MISC_PROP][1], "URS", "Unicode_Radical_Stroke"); // TODO: merge above for (int k = 0; k < UCD_Names.SUPER_CATEGORIES.length; ++k) { @@ -529,7 +529,11 @@ public class GenerateData implements UCD_Types { Utility.appendFile("PropertyAliasHeader.txt", Utility.LATIN1, log); log.println(HORIZONTAL_LINE); log.println(); - Utility.print(log, sorted, "\r\n", new MyBreaker(true)); + int count = Utility.print(log, sorted, "\r\n", new MyBreaker(true)); + log.println(); + log.println(); + log.println(HORIZONTAL_LINE); + log.println("# Total: \t" + count); log.println(); log.close(); Utility.renameIdentical(mostRecent, Utility.getOutputName(newFile), batName[0]); @@ -588,6 +592,7 @@ public class GenerateData implements UCD_Types { static class MyBreaker implements Utility.Breaker { boolean status; + int count; public MyBreaker(boolean status) { this.status = status; diff --git a/tools/unicodetools/com/ibm/text/UCD/TestData.java b/tools/unicodetools/com/ibm/text/UCD/TestData.java index bee114fcd4..308e04f2a5 100644 --- a/tools/unicodetools/com/ibm/text/UCD/TestData.java +++ b/tools/unicodetools/com/ibm/text/UCD/TestData.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/TestData.java,v $ -* $Date: 2003/07/07 15:58:57 $ -* $Revision: 1.11 $ +* $Date: 2003/08/20 03:46:42 $ +* $Revision: 1.12 $ * ******************************************************************************* */ @@ -28,9 +28,32 @@ import com.ibm.text.utility.*; public class TestData implements UCD_Types { public static void main (String[] args) throws IOException { - Default.setUCD(); + if (true) return; + + UnicodeSet sterm = UnifiedProperty.getSet("Sentence_Terminal", Default.ucd); + UnicodeSet term = UnifiedProperty.getSet("Terminal_Punctuation", Default.ucd); + UnicodeSet po = new UnicodeSet("[:po:]"); + UnicodeSet empty = new UnicodeSet(); + + Utility.showSetDifferences( + "Sentence_Terminal", sterm, + "Empty", empty, + true, Default.ucd); + + Utility.showSetDifferences( + "Sentence_Terminal", sterm, + "Terminal_Punctuation", term, + true, Default.ucd); + + Utility.showSetDifferences( + "Terminal_Punctuation", term, + "Punctuation_Other", po, + true, Default.ucd); + + if (true) return; + UnicodeSet us = getSetForName("LATIN LETTER.*P"); Utility.showSetNames("",us,false,Default.ucd); @@ -85,6 +108,8 @@ public class TestData implements UCD_Types { } } + + static private UnicodeSet getSetForName(String regexPattern) { UnicodeSet result = new UnicodeSet(); Pattern p = Pattern.compile(regexPattern); diff --git a/tools/unicodetools/com/ibm/text/UCD/UCD_Names.java b/tools/unicodetools/com/ibm/text/UCD/UCD_Names.java index 4051f4f8f5..0e3f4f3968 100644 --- a/tools/unicodetools/com/ibm/text/UCD/UCD_Names.java +++ b/tools/unicodetools/com/ibm/text/UCD/UCD_Names.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD_Names.java,v $ -* $Date: 2003/07/21 15:50:05 $ -* $Revision: 1.22 $ +* $Date: 2003/08/20 03:46:43 $ +* $Revision: 1.23 $ * ******************************************************************************* */ @@ -936,7 +936,7 @@ final class UCD_Names implements UCD_Types { static final String[][] PROP_TYPE_NAMES = { {"Numeric", "AA"}, {"String", "AB"}, - {"Descriptive", "AC"}, + {"Miscellaneous", "AC"}, {"Catalog", "AD"}, {"Enumerated", "AE"}, {"Binary", "ZX"}, diff --git a/tools/unicodetools/com/ibm/text/UCD/UCD_Types.java b/tools/unicodetools/com/ibm/text/UCD/UCD_Types.java index 5857785c02..5711657d5b 100644 --- a/tools/unicodetools/com/ibm/text/UCD/UCD_Types.java +++ b/tools/unicodetools/com/ibm/text/UCD/UCD_Types.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD_Types.java,v $ -* $Date: 2003/07/21 15:50:05 $ -* $Revision: 1.24 $ +* $Date: 2003/08/20 03:46:44 $ +* $Revision: 1.25 $ * ******************************************************************************* */ @@ -47,7 +47,7 @@ public interface UCD_Types { static final byte NUMERIC_PROP = 0, STRING_PROP = 1, - DESCRIPTIVE_PROP = 2, + MISC_PROP = 2, CATALOG_PROP = 3, ENUMERATED_PROP = 4, BINARY_PROP = 5, diff --git a/tools/unicodetools/com/ibm/text/utility/Utility.java b/tools/unicodetools/com/ibm/text/utility/Utility.java index 0194d8b348..4acf0f0892 100644 --- a/tools/unicodetools/com/ibm/text/utility/Utility.java +++ b/tools/unicodetools/com/ibm/text/utility/Utility.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/Utility.java,v $ -* $Date: 2003/07/21 15:50:07 $ -* $Revision: 1.35 $ +* $Date: 2003/08/20 03:47:59 $ +* $Revision: 1.36 $ * ******************************************************************************* */ @@ -289,7 +289,7 @@ public final class Utility implements UCD_Types { // COMMON UTILITIES public static long longFrom(String p) { if (p.length() == 0) return Long.MIN_VALUE; - return Long.parseInt(p); + return Long.parseLong(p); } public static float floatFrom(String p) { @@ -707,12 +707,14 @@ public final class Utility implements UCD_Types { // COMMON UTILITIES UTF8_WINDOWS = Encoding.UTF8_WINDOWS; */ - + public static PrintWriter openPrintWriter(String filename, Encoding options) throws IOException { + return openPrintWriter(UCD_Types.GEN_DIR, filename, options); + } // Normally use false, false. // But for UCD files use true, true // Or if they are UTF8, use true, false - public static PrintWriter openPrintWriter(String filename, Encoding options) throws IOException { - File file = new File(getOutputName(filename)); + public static PrintWriter openPrintWriter(String directory, String filename, Encoding options) throws IOException { + File file = new File(directory + filename); Utility.fixDot(); System.out.println("Creating File: " + file.getCanonicalPath()); File parent = new File(file.getParent()); @@ -757,8 +759,9 @@ public final class Utility implements UCD_Types { // COMMON UTILITIES } } - public static void print(PrintWriter pw, Collection c, String separator, Breaker b) { + public static int print(PrintWriter pw, Collection c, String separator, Breaker b) { Iterator it = c.iterator(); + int count = 0; boolean first = true; Object last = null; while (it.hasNext()) { @@ -774,8 +777,10 @@ public final class Utility implements UCD_Types { // COMMON UTILITIES } else { pw.print(obj); } + count++; last = obj; } + return count; } public static void print(PrintWriter pw, Map c, String pairSeparator, String separator, Breaker b) {