Added chart program, minor edits.

X-SVN-Rev: 9918
2002-09-25 06:40:14 +00:00 · 2002-09-25 06:40:14 +00:00 · 5395623062
commit 5395623062
parent ca34222583
7 changed files with 544 additions and 210 deletions
--- a/tools/unicodetools/com/ibm/text/UCA/GenOverlap.java
+++ b/tools/unicodetools/com/ibm/text/UCA/GenOverlap.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/GenOverlap.java,v $ 
-* $Date: 2002/05/31 01:41:03 $ 
-* $Revision: 1.9 $
+* $Date: 2002/09/25 06:40:13 $ 
+* $Revision: 1.10 $
 *
 *******************************************************************************
 */
@ -164,8 +164,8 @@ public class GenOverlap implements UCD_Types {
    static boolean PROGRESS = false;
      
    static void fullCheck() throws IOException {
-        PrintWriter log = Utility.openPrintWriter("Overlap.html");
-        PrintWriter simpleList = Utility.openPrintWriter("Overlap.txt");
+        PrintWriter log = Utility.openPrintWriter("Overlap.html", Utility.UTF8_WINDOWS);
+        PrintWriter simpleList = Utility.openPrintWriter("Overlap.txt", Utility.UTF8_WINDOWS);
        
        Iterator it = completes.keySet().iterator();
        int counter = 0;
@ -448,7 +448,7 @@ public class GenOverlap implements UCD_Types {
        newKeys.removeAll(joint);
        oldKeys.removeAll(joint);
        
-        PrintWriter log = Utility.openPrintWriter("UCA-old-vs-new" + (doMax ? "-MAX.txt" : ".txt"), false, false);
+        PrintWriter log = Utility.openPrintWriter("UCA-old-vs-new" + (doMax ? "-MAX.txt" : ".txt"), Utility.UTF8_WINDOWS);
        Iterator it = list.iterator();
        int last = -1;
        while (it.hasNext()) {
@ -631,7 +631,7 @@ public class GenOverlap implements UCD_Types {
        
        System.out.println("Data Gathered");

-        PrintWriter log = Utility.openPrintWriter("checkstringsearchhash.html");
+        PrintWriter log = Utility.openPrintWriter("checkstringsearchhash.html", Utility.UTF8_WINDOWS);
        Utility.writeHtmlHeader(log, "Check Hash");
        log.println("<h1>Collisions</h1>");
        log.println("<p>Shows collisions among primary values when hashed to table size = " + tableLength + ".");
@ -694,7 +694,7 @@ public class GenOverlap implements UCD_Types {
    }
    
    public static void listCyrillic(UCA collatorIn) throws IOException {
-        PrintWriter log = Utility.openPrintWriter("ListCyrillic.txt", false, false);
+        PrintWriter log = Utility.openPrintWriter("ListCyrillic.txt", Utility.UTF8_WINDOWS);
        Set set = new TreeSet(collatorIn);
        Set set2 = new TreeSet(collatorIn);
        ucd = UCD.make();
--- a/tools/unicodetools/com/ibm/text/UCA/Main.java
+++ b/tools/unicodetools/com/ibm/text/UCA/Main.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/Main.java,v $ 
-* $Date: 2002/07/03 02:15:47 $ 
-* $Revision: 1.9 $
+* $Date: 2002/09/25 06:40:13 $ 
+* $Revision: 1.10 $
 *
 *******************************************************************************
 */
@ -55,6 +55,7 @@ public class Main {
            else if (arg.equalsIgnoreCase("indexChart")) WriteCharts.indexChart();
            else if (arg.equalsIgnoreCase("special")) WriteCharts.special();
            
+            else if (arg.equalsIgnoreCase("writeCompositionChart")) WriteCharts.writeCompositionChart();
            
            else if (arg.equalsIgnoreCase("CheckHash")) GenOverlap.checkHash(WriteCollationData.collator);
            else if (arg.equalsIgnoreCase("generateRevision")) GenOverlap.generateRevision(WriteCollationData.collator);
--- a/tools/unicodetools/com/ibm/text/UCA/WriteCharts.java
+++ b/tools/unicodetools/com/ibm/text/UCA/WriteCharts.java
@ -4,9 +4,9 @@
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 *
-* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/WriteCharts.java,v $ 
-* $Date: 2002/07/03 02:15:47 $ 
-* $Revision: 1.11 $
+* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/WriteCharts.java,v $
+* $Date: 2002/09/25 06:40:13 $
+* $Revision: 1.12 $
 *
 *******************************************************************************
 */
@ -19,12 +19,17 @@ import java.io.*;
 import com.ibm.text.UCD.*;
 import com.ibm.text.utility.*;
 import com.ibm.icu.text.UTF16;
+import com.ibm.icu.text.UnicodeSetIterator;
+import com.ibm.icu.text.Transliterator;
+import com.ibm.icu.text.UnicodeSet;
+
+
 import java.text.SimpleDateFormat;

 public class WriteCharts implements UCD_Types {
-    
+
    static boolean HACK_KANA = false;
-    
+
    static public void special() {
    	Default.setUCD();
    	for (int i = 0xE000; i < 0x10000; ++i) {
@ -33,58 +38,58 @@ public class WriteCharts implements UCD_Types {
    		System.out.println(Default.ucd.getCodeAndName(i));
    	}
    }
-    
+
    static public void collationChart(UCA uca) throws IOException {
    	Default.setUCD(uca.getUCDVersion());
    	HACK_KANA = true;
-    	
+
        uca.setAlternate(UCA.NON_IGNORABLE);
-        
+
        //Normalizer nfd = new Normalizer(Normalizer.NFD);
        //Normalizer nfc = new Normalizer(Normalizer.NFC);
-          
+
        UCA.UCAContents cc = uca.getContents(UCA.FIXED_CE, null); // nfd instead of null if skipping decomps
        cc.enableSamples();
-          
+
        Set set = new TreeSet();
-        
+
        while (true) {
            String x = cc.next();
            if (x == null) break;
            if (x.equals("\u2F00")) {
            	System.out.println("debug");
            }
-            
+
            set.add(new Pair(uca.getSortKey(x), x));
        }
-          
+
        PrintWriter output = null;
-        
+
        Iterator it = set.iterator();
-        
+
        byte oldScript = -127;
-        
+
        int[] scriptCount = new int[128];
-        
+
        int counter = 0;
-        
+
        String lastSortKey = "\u0000";
-        
+
        int high = uca.getSortKey("a").charAt(0);
        int variable = UCA.getPrimary(uca.getVariableHigh());
-        
+
        int columnCount = 0;
-        
+
        String[] replacement = new String[] {"%%%", "Collation Charts"};
        String folder = "charts\\uca\\";
-        
+
        Utility.copyTextFile("index.html", true, folder + "index.html", replacement);
        Utility.copyTextFile("charts.css", false, folder + "charts.css");
        Utility.copyTextFile("help.html", true, folder + "help.html");
-        
-        indexFile = Utility.openPrintWriter(folder + "index_list.html", false, false);
+
+        indexFile = Utility.openPrintWriter(folder + "index_list.html", Utility.UTF8_WINDOWS);
        Utility.appendFile("index_header.html", true, indexFile, replacement);
-        
+
        /*
        indexFile.println("<html><head><meta http-equiv='Content-Type' content='text/html; charset=utf-8'>");
        indexFile.println("<title>UCA Default Collation Table</title>");
@ -93,22 +98,22 @@ public class WriteCharts implements UCD_Types {
        indexFile.println("</head><body><h2 align='center'>UCA Default Collation Table</h2>");
        indexFile.println("<p align='center'><a href = 'help.html'>Help</a>");
        */
-        
+
        while (it.hasNext()) {
            Utility.dot(counter);
-            
+
            Pair p = (Pair) it.next();
            String sortKey = (String) p.first;
            String s = (String) p.second;
-            
+
            int cp = UTF16.charAt(s,0);
-            
+
            byte script = Default.ucd.getScript(cp);
-            
+
            // get first non-zero primary
            int currentPrimary = getFirstPrimary(sortKey);
            int primary = currentPrimary >>> 16;
-            
+
            if (sortKey.length() < 4) script = NULL_ORDER;
            else if (primary == 0) script = IGNORABLE_ORDER;
            else if (primary < variable) script = VARIABLE_ORDER;
@ -118,35 +123,35 @@ public class WriteCharts implements UCD_Types {
                else if (primary < UCA_Types.UNSUPPORTED_OTHER_BASE) script = CJK_AB;
                else script = UNSUPPORTED;
            }
-            
+
            if (script == KATAKANA_SCRIPT) script = HIRAGANA_SCRIPT;
            else if ((script == INHERITED_SCRIPT || script == COMMON_SCRIPT) && oldScript >= 0) script = oldScript;

-            if (script != oldScript 
+            if (script != oldScript
                    // && (script != COMMON_SCRIPT && script != INHERITED_SCRIPT)
                    ) {
                closeFile(output);
                output = null;
                oldScript = script;
            }
-            
+
            if (output == null) {
                ++scriptCount[script+3];
                if (scriptCount[script+3] > 1) {
-                    System.out.println("\t\tFAIL: " + scriptCount[script+3] + ", " + 
+                    System.out.println("\t\tFAIL: " + scriptCount[script+3] + ", " +
                        getChunkName(script, LONG) + ", " + Default.ucd.getCodeAndName(s));
                }
                output = openFile(scriptCount[script+3], folder, script);
            }
-            
+
            boolean firstPrimaryEquals = currentPrimary == getFirstPrimary(lastSortKey);
-            
+
            int strength = uca.strengthDifference(sortKey, lastSortKey);
            if (strength < 0) strength = -strength;
            lastSortKey = sortKey;
-            
+
            // find out if this is an expansion: more than one primary weight
-            
+
            int primaryCount = 0;
            for (int i = 0; i < sortKey.length(); ++i) {
                char w = sortKey.charAt(i);
@ -156,7 +161,7 @@ public class WriteCharts implements UCD_Types {
 				}
                ++ primaryCount;
            }
-            
+
            String breaker = "";
            if (columnCount > 10 || !firstPrimaryEquals) {
                columnCount = 0;
@ -166,20 +171,20 @@ public class WriteCharts implements UCD_Types {
                	++columnCount;
                }
            }
-            
+
            String classname = primaryCount > 1 ? XCLASSNAME[strength] : CLASSNAME[strength];
-            
+
            String name = Default.ucd.getName(s);
-            
-         
+
+
            if (s.equals("\u1eaf")) {
            	System.out.println("debug");
            }
-            
+
            String comp = Default.nfc.normalize(s);
-            
+
            String outline = breaker + classname
-                + " title='" 
+                + " title='"
                + (script != UNSUPPORTED
                    ? Utility.quoteXML(name, true) + ": "
                    : "")
@ -193,21 +198,21 @@ public class WriteCharts implements UCD_Types {
                    ? "<td class='name'><tt>" + Utility.quoteXML(name, true) + "</td>"
                    : "")
                ;
-            
+
            output.println(outline);
            ++columnCount;
        }
-        
+
        closeFile(output);
        closeIndexFile(indexFile, "<br>UCA: " + uca.getDataVersion(), COLLATION);
    }
-    
+
    static public void normalizationChart() throws IOException {
        Default.setUCD();
    	HACK_KANA = false;
-        
+
        Set set = new TreeSet();
-        
+
        for (int i = 0; i <= 0x10FFFF; ++i) {
        	if (!Default.ucd.isRepresented(i)) {
        		if (i < 0xAC00) continue;
@ -216,35 +221,35 @@ public class WriteCharts implements UCD_Types {
        	}
        	byte cat = Default.ucd.getCategory(i);
        	if (cat == Cs || cat == Co) continue;
-        	
+
        	if (Default.nfkd.isNormalized(i)) continue;
        	String decomp = Default.nfkd.normalize(i);
-        	
+
        	byte script = getBestScript(decomp);
-        	
+
            set.add(new Pair(new Integer(script == COMMON_SCRIPT ? cat + CAT_OFFSET : script),
            		new Pair(Default.ucd.getCase(decomp, FULL, FOLD),
            				 new Integer(i))));
        }
-          
+
        PrintWriter output = null;
-        
+
        Iterator it = set.iterator();
-        
+
        int oldScript = -127;
-        
+
        int counter = 0;
-        
+
        String[] replacement = new String[] {"%%%", "Normalization Charts"};
        String folder = "charts\\normalization\\";

        Utility.copyTextFile("index.html", true, folder + "index.html", replacement);
        Utility.copyTextFile("charts.css", false, folder + "charts.css");
        Utility.copyTextFile("norm_help.html", true, folder + "help.html");
-        
-        indexFile = Utility.openPrintWriter(folder + "index_list.html", false, false);
+
+        indexFile = Utility.openPrintWriter(folder + "index_list.html", Utility.UTF8_WINDOWS);
        Utility.appendFile("index_header.html", true, indexFile, replacement);
-        
+
        /*
        indexFile.println("<html><head><meta http-equiv='Content-Type' content='text/html; charset=utf-8'>");
        indexFile.println("<title>UCA Default Collation Table</title>");
@ -253,107 +258,107 @@ public class WriteCharts implements UCD_Types {
        indexFile.println("</head><body><h2 align='center'>UCA Default Collation Table</h2>");
        indexFile.println("<p align='center'><a href = 'help.html'>Help</a>");
        */
-        
+
        while (it.hasNext()) {
            Utility.dot(counter);
-            
+
            Pair p = (Pair) it.next();
            int script = ((Integer) p.first).intValue();
            int cp = ((Integer)((Pair) p.second).second).intValue();
-            
-            if (script != oldScript 
+
+            if (script != oldScript
                    // && (script != COMMON_SCRIPT && script != INHERITED_SCRIPT)
                    ) {
                closeFile(output);
                output = null;
                oldScript = script;
            }
-            
+
            if (output == null) {
                output = openFile(0, folder, script);
                output.println("<tr><td class='z'>Code</td><td class='z'>C</td><td class='z'>D</td><td class='z'>KC</td><td class='z'>KD</td></tr>");

            }
-            
+
            output.println("<tr>");
-            
+
            String prefix;
            String code = UTF16.valueOf(cp);
            String c = Default.nfc.normalize(cp);
            String d = Default.nfd.normalize(cp);
            String kc = Default.nfkc.normalize(cp);
            String kd = Default.nfkd.normalize(cp);
-            
+
            showCell(output, code, "<td class='z' ", "", false);
-            
+
            prefix = c.equals(code) ? "<td class='g' " : "<td class='n' ";
            showCell(output, c, prefix, "", c.equals(code));
-            
+
            prefix = d.equals(c) ? "<td class='g' " : "<td class='n' ";
            showCell(output, d, prefix, "", d.equals(c));
-            
+
            prefix = kc.equals(c) ? "<td class='g' " : "<td class='n' ";
            showCell(output, kc, prefix, "", kc.equals(c));
-            
+
            prefix = (kd.equals(d) || kd.equals(kc)) ? "<td class='g' " : "<td class='n' ";
            showCell(output, kd, prefix, "", (kd.equals(d) || kd.equals(kc)));
-            
+
            output.println("</tr>");
-            
+
        }
-        
+
        closeFile(output);
        closeIndexFile(indexFile, "", NORMALIZATION);
    }
-    
+
    static public void caseChart() throws IOException {
        Default.setUCD();
    	HACK_KANA = false;
-        
+
        Set set = new TreeSet();
-        
+
        for (int i = 0; i <= 0x10FFFF; ++i) {
        	if (!Default.ucd.isRepresented(i)) continue;
        	byte cat = Default.ucd.getCategory(i);
        	if (cat == Cs || cat == Co) continue;
-        	
+
            String code = UTF16.valueOf(i);
            String lower = Default.ucd.getCase(i, FULL, LOWER);
            String title = Default.ucd.getCase(i, FULL, TITLE);
            String upper = Default.ucd.getCase(i, FULL, UPPER);
            String fold = Default.ucd.getCase(i, FULL, FOLD);
-            
+
        	String decomp = Default.nfkd.normalize(i);
        	int script = 0;
            if (lower.equals(code) && upper.equals(code) && fold.equals(code) && title.equals(code)) {
            	if (!containsCase(decomp)) continue;
            	script = NO_CASE_MAPPING;
        	}
-        	
+
        	if (script == 0) script = getBestScript(decomp);
-        	
+
            set.add(new Pair(new Integer(script == COMMON_SCRIPT ? cat + CAT_OFFSET : script),
            		new Pair(Default.ucd.getCase(decomp, FULL, FOLD),
            				 new Integer(i))));
        }
-          
+
        PrintWriter output = null;
-        
+
        Iterator it = set.iterator();
-        
+
        int oldScript = -127;
-        
+
        int counter = 0;
        String[] replacement = new String[] {"%%%", "Case Charts"};
        String folder = "charts\\case\\";
-        
+
        Utility.copyTextFile("index.html", true, folder + "index.html", replacement);
        Utility.copyTextFile("charts.css", false, folder + "charts.css");
        Utility.copyTextFile("case_help.html", true, folder + "help.html");
-        
-        indexFile = Utility.openPrintWriter(folder + "index_list.html", false, false);
+
+        indexFile = Utility.openPrintWriter(folder + "index_list.html", Utility.UTF8_WINDOWS);
        Utility.appendFile("index_header.html", true, indexFile, replacement);
-        
+
        /*
        indexFile.println("<html><head><meta http-equiv='Content-Type' content='text/html; charset=utf-8'>");
        indexFile.println("<title>UCA Default Collation Table</title>");
@ -362,24 +367,24 @@ public class WriteCharts implements UCD_Types {
        indexFile.println("</head><body><h2 align='center'>UCA Default Collation Table</h2>");
        indexFile.println("<p align='center'><a href = 'help.html'>Help</a>");
        */
-        
+
        int columnCount = 0;
-        
+
        while (it.hasNext()) {
            Utility.dot(counter);
-            
+
            Pair p = (Pair) it.next();
            int script = ((Integer) p.first).intValue();
            int cp = ((Integer)((Pair) p.second).second).intValue();
-            
-            if (script != oldScript 
+
+            if (script != oldScript
                    // && (script != COMMON_SCRIPT && script != INHERITED_SCRIPT)
                    ) {
                closeFile(output);
                output = null;
                oldScript = script;
            }
-            
+
            if (output == null) {
                output = openFile(0, folder, script);
                if (script == NO_CASE_MAPPING) output.println("<tr>");
@ -387,7 +392,7 @@ public class WriteCharts implements UCD_Types {
                	+"<td class='z'>Upper</td><td class='z'>Fold</td></tr>");

            }
-            
+
            if (script == NO_CASE_MAPPING) {
            	if (columnCount > 10) {
            		output.println("</tr><tr>");
@ -397,38 +402,38 @@ public class WriteCharts implements UCD_Types {
            	++columnCount;
            	continue;
            }
-            
+
            output.println("<tr>");
-            
+
            String prefix;
            String code = UTF16.valueOf(cp);
            String lower = Default.ucd.getCase(cp, FULL, LOWER);
            String title = Default.ucd.getCase(cp, FULL, TITLE);
            String upper = Default.ucd.getCase(cp, FULL, UPPER);
            String fold = Default.ucd.getCase(cp, FULL, FOLD);
-            
+
            showCell(output, code, "<td class='z' ", "", false);
-            
+
            prefix = lower.equals(code) ? "<td class='g' " : "<td class='n' ";
            showCell(output, lower, prefix, "", lower.equals(code));
-            
+
            prefix = title.equals(upper) ? "<td class='g' " : "<td class='n' ";
            showCell(output, title, prefix, "", title.equals(upper));
-            
+
            prefix = upper.equals(code) ? "<td class='g' " : "<td class='n' ";
            showCell(output, upper, prefix, "", upper.equals(code));
-            
+
            prefix = fold.equals(lower) ? "<td class='g' " : "<td class='n' ";
            showCell(output, fold, prefix, "", fold.equals(lower));
-            
+
            output.println("</tr>");
-            
+
        }
-        
+
        closeFile(output);
        closeIndexFile(indexFile, "", CASE);
    }
-    
+
    static public void addMapChar(Map m, Set stoplist, String key, String ch) {
    	if (stoplist.contains(key)) return;
    	for (int i = 0; i < key.length(); ++i) {
@ -442,23 +447,23 @@ public class WriteCharts implements UCD_Types {
    	}
    	result.add(ch);
    }
-        
+
    static public void indexChart() throws IOException {
        Default.setUCD();
    	HACK_KANA = false;
-        
+
        Map map = new TreeMap();
        Set stoplist = new TreeSet();
-        
+
        String[] stops = {"LETTER", "CHARACTER", "AND", "CAPITAL", "SMALL", "COMPATIBILITY", "WITH"};
        stoplist.addAll(Arrays.asList(stops));
        System.out.println("Stop-list: " + stoplist);
-        
+
        for (int i = 0; i < LIMIT_SCRIPT; ++i) {
        	stoplist.add(Default.ucd.getScriptID_fromIndex((byte)i));
        }
        System.out.println("Stop-list: " + stoplist);
-        
+
        for (int i = 0; i <= 0x10FFFF; ++i) {
        	if (!Default.ucd.isRepresented(i)) continue;
        	if (0xAC00 <= i && i <= 0xD7A3) continue;
@ -466,7 +471,7 @@ public class WriteCharts implements UCD_Types {

        	String s = Default.ucd.getName(i);
        	if (s == null) continue;
-        	
+
        	if (s.startsWith("<")) {
        		System.out.println("Wierd character at " + Default.ucd.getCodeAndName(i));
        	}
@ -490,52 +495,52 @@ public class WriteCharts implements UCD_Types {
        				addMapChar(map, stoplist, word, ch);
        	}
        }
-          
+
        PrintWriter output = null;
-        
+
        Iterator it = map.keySet().iterator();
-        
+
        int oldScript = -127;
-        
+
        int counter = 0;
        String[] replacement = new String[] {"%%%", "Name Charts"};
        String folder = "charts\\name\\";
-        
+
        Utility.copyTextFile("index.html", true, folder + "index.html", replacement);
        Utility.copyTextFile("charts.css", false, folder + "charts.css");
        Utility.copyTextFile("name_help.html", true, folder + "help.html");
-        
-        indexFile = Utility.openPrintWriter(folder + "index_list.html", false, false);
+
+        indexFile = Utility.openPrintWriter(folder + "index_list.html", Utility.UTF8_WINDOWS);
        Utility.appendFile("index_header.html", true, indexFile, replacement);
-        
+
        int columnCount = 0;
        char lastInitial = 0;
-        
+
        while (it.hasNext()) {
            Utility.dot(counter);
-            
+
            String key = (String) it.next();
-            
+
            Set chars = (Set) map.get(key);
-            
+
            char initial = key.charAt(0);
-            
+
            if (initial != lastInitial) {
                closeFile(output);
                output = null;
                lastInitial = initial;
            }
-            
+
            if (output == null) {
                output = openFile2(0, folder, String.valueOf(initial));
            }
-            
+
            output.println("<tr><td class='h'>" + key + "</td>");
            columnCount = 1;
-            
+
            Iterator sublist = chars.iterator();
            while (sublist.hasNext()) {
-            	 
+
            	String ch = (String) sublist.next();
            	if (columnCount > 10) {
            		output.println("</tr><tr><td></td>");
@ -545,20 +550,20 @@ public class WriteCharts implements UCD_Types {
            	++columnCount;
            	continue;
            }
-            
+
            output.println("</tr>");
-            
+
        }
-        
+
        closeFile(output);
        closeIndexFile(indexFile, "", CASE);
    }
-    
+
    static void showCell(PrintWriter output, String s, String prefix, String extra, boolean skipName) {
        String name = Default.ucd.getName(s);
        String comp = Default.nfc.normalize(s);
-            
-        String outline = prefix 
+
+        String outline = prefix
            + (skipName ? "" : " title='" + Utility.quoteXML(name, true) + "'")
            + extra + ">"
            + Utility.quoteXML(comp, true)
@ -566,10 +571,10 @@ public class WriteCharts implements UCD_Types {
            + Utility.hex(s)
            //+ "<br>" + script
            + "</tt></td>";
-            
+
        output.println(outline);
    }
-    
+
    static byte getBestScript(String s) {
    	int cp;
    	byte result = COMMON_SCRIPT;
@ -588,33 +593,33 @@ public class WriteCharts implements UCD_Types {
 		}
 		return (result << 16);
    }
-    
+
    static final String[] CLASSNAME = {
-        "<td class='q'", 
-        "<td class='q'", 
-        "<td class='q'", 
-        "<td class='t'", 
-        "<td class='s'", 
+        "<td class='q'",
+        "<td class='q'",
+        "<td class='q'",
+        "<td class='t'",
+        "<td class='s'",
        "<td class='p'"};
-        
+
    static final String[] XCLASSNAME = {
-        "<td class='eq'", 
-        "<td class='eq'", 
-        "<td class='eq'", 
-        "<td class='et'", 
-        "<td class='es'", 
+        "<td class='eq'",
+        "<td class='eq'",
+        "<td class='eq'",
+        "<td class='et'",
+        "<td class='es'",
        "<td class='ep'"};
-        
+

    static PrintWriter indexFile;
-    
+
    static PrintWriter openFile(int count, String directory, int script) throws IOException {
        String scriptName = getChunkName(script, LONG);
        String shortScriptName = getChunkName(script, SHORT);
        String hover = scriptName.equals(shortScriptName) ? "" : "' title='" + shortScriptName;
-        
+
        String fileName = "chart_" + scriptName + (count > 1 ? count + "" : "") + ".html";
-        PrintWriter output = Utility.openPrintWriter(directory + fileName, false, false);
+        PrintWriter output = Utility.openPrintWriter(directory + fileName, Utility.UTF8_WINDOWS);
        Utility.fixDot();
        System.out.println("Writing: " + scriptName);
        indexFile.println(" <a href = '" + fileName + hover + "'>" + scriptName + "</a>");
@ -626,10 +631,10 @@ public class WriteCharts implements UCD_Types {
        output.println("<table>");
        return output;
    }
-    
+
    static PrintWriter openFile2(int count, String directory, String name) throws IOException {
        String fileName = "chart_" + name + (count > 1 ? count + "" : "") + ".html";
-        PrintWriter output = Utility.openPrintWriter(directory + fileName, false, false);
+        PrintWriter output = Utility.openPrintWriter(directory + fileName, Utility.UTF8_WINDOWS);
        Utility.fixDot();
        System.out.println("Writing: " + name);
        indexFile.println(" <a href = '" + fileName + "'>" + name + "</a>");
@ -641,8 +646,8 @@ public class WriteCharts implements UCD_Types {
        output.println("<table>");
        return output;
    }
-    
-    static final int 
+
+    static final int
    	NULL_ORDER = -3,
    	IGNORABLE_ORDER = -2,
    	VARIABLE_ORDER = -1,
@ -653,7 +658,7 @@ public class WriteCharts implements UCD_Types {
    	CAT_OFFSET = 128,
    	// categories in here
    	NO_CASE_MAPPING = 200;
-    
+
    static String getChunkName(int script, byte length) {
    	switch(script) {
    		case NO_CASE_MAPPING: return "NoCaseMapping";
@ -663,7 +668,7 @@ public class WriteCharts implements UCD_Types {
        	case CJK: return "CJK";
        	case CJK_AB: return "CJK-Extensions";
        	case UNSUPPORTED: return "Unsupported";
-        	default: 
+        	default:
    		if (script >= CAT_OFFSET) return Default.ucd.getCategoryID_fromIndex((byte)(script - CAT_OFFSET), length);
        	else if (script == HIRAGANA_SCRIPT && HACK_KANA) return length == SHORT ? "Kata-Hira" : "Katakana-Hiragana";
        	else return Default.ucd.getCase(Default.ucd.getScriptID_fromIndex((byte)script, length), FULL, TITLE);
@ -678,11 +683,11 @@ public class WriteCharts implements UCD_Types {


 	static final byte COLLATION = 0, NORMALIZATION = 1, CASE = 2;
-	
+
    static void closeIndexFile(PrintWriter indexFile, String extra, byte choice) {
        SimpleDateFormat df = new SimpleDateFormat("yyyy.MM.dd HH:mm:ss");
        df.setTimeZone(TimeZone.getTimeZone("GMT"));
-        
+
        indexFile.println("</p><hr width='50%'><p>");
        boolean gotOne = false;
        if (choice != COLLATION) {
@ -705,12 +710,12 @@ public class WriteCharts implements UCD_Types {
        indexFile.println("</p></body></html>");
        indexFile.close();
    }
-    
+
    static boolean containsCase(String s) {
    	int cp;
    	for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
    		cp = UTF16.charAt(s, i);
-			// contains Lu, Lo, Lt, or Lowercase or Uppercase 
+			// contains Lu, Lo, Lt, or Lowercase or Uppercase
 			byte cat = Default.ucd.getCategory(cp);
 			if (cat == Lu || cat == Ll || cat == Lt) return true;
 			if (Default.ucd.getBinaryProperty(cp, Other_Lowercase)) return true;
@ -718,7 +723,204 @@ public class WriteCharts implements UCD_Types {
 		}
 		return false;
 	}
-    
+
+    static final Transliterator addCircle = Transliterator.createFromRules(
+        "any-addCircle", "([[:Mn:][:Me:]]) > \u25CC $1", Transliterator.FORWARD);
+
+    public static void writeCompositionChart() throws IOException {
+        Default.setUCD();
+        UCA uca = new UCA(null,"");
+
+        Set letters = new TreeSet();
+        Set marks = new TreeSet(uca);
+        Set totalMarks = new TreeSet(uca);
+        Map decomposes = new HashMap();
+        Set notPrinted = new TreeSet(new UTF16.StringComparator());
+        Set printed = new HashSet();
+
+        // UnicodeSet latin = new UnicodeSet("[:latin:]");
+
+        PrintWriter out = Utility.openPrintWriter("composition_chart.html", Utility.UTF8_WINDOWS);
+        try {
+            out.println("<html><head><meta http-equiv='Content-Type' content='text/html; charset=utf-8'>");
+            out.println("<style>");
+
+            out.println("body { font-family: Arial Unicode MS }");
+            out.println("td { text-align: Center ; vertical-align: top; width: 1%; background-color: #EEEEEE }");
+            out.println("tt { font-size: 50% }");
+            out.println("table { width='1%' }");
+            out.println(".w { background-color: #FFFFFF }");
+            out.println(".h { background-color: #EEEEFF }");
+            out.println(".r { background-color: #FF0000 }");
+            out.println("</style>");
+            out.println("</head><body bgcolor='#FFFFFF'>");
+            out.println("<h1>Composites</h1>");
+
+            UnicodeSetIterator it = new UnicodeSetIterator();
+
+            for (byte script = 0; script < UCD_Types.LIMIT_SCRIPT; ++script) {
+                
+                String scriptName = "";
+                try {
+                    scriptName = Default.ucd.getScriptID_fromIndex(script);
+                    Utility.fixDot();
+                    System.out.println(scriptName);
+                } catch (IllegalArgumentException e) {
+                    System.out.println("Failed to create transliterator for: " + scriptName + "(" + script + ")");
+                    continue;
+                }
+
+
+                letters.clear();
+                letters.add(""); // header row
+                marks.clear();
+                notPrinted.clear();
+                printed.clear();
+
+                for (int cp = 0; cp < 0x10FFFF; ++cp) {
+                    byte type = Default.ucd.getCategory(cp);
+                    if (type == Default.ucd.UNASSIGNED || type == Default.ucd.PRIVATE_USE) continue; // skip chaff
+                    Utility.dot(cp);
+                    
+                    byte newScript = Default.ucd.getScript(cp);
+                    if (newScript != script) continue;
+
+                    String source = UTF16.valueOf(cp);
+                    String decomp = Default.nfd.normalize(source);
+                    if (decomp.equals(source)) continue;
+
+                    // pick up all decompositions
+                    int count = UTF16.getCharCount(UTF16.charAt(decomp, 0));
+
+                    if (count == decomp.length()) {
+                        notPrinted.add(source);
+                        continue; // skip unless marks
+                    }
+
+                    if (UCD.isHangulSyllable(cp)) count = 2;
+                    String first = decomp.substring(0, count);
+                    String second = decomp.substring(count);
+                    //if (!markSet.containsAll(second)) continue; // skip unless marks
+
+                    letters.add(first);
+                    marks.add(second);
+                    Utility.addToSet(decomposes, decomp, source);
+                    notPrinted.add(source);
+                    if (source.equals("\u212b")) System.out.println("A-RING!");
+                }
+
+                if (marks.size() != 0) {
+
+                    totalMarks.addAll(marks);
+
+
+                    out.println("<table border='1' cellspacing='0'>");
+                    out.println("<caption>" + scriptName + "<br>(" + letters.size() + " × " + marks.size() + ")</caption>");
+
+                    Iterator it2 = letters.iterator();
+                    while (it2.hasNext()) {
+                        String let = (String)it2.next();
+                        out.println("<tr>" + showCell(Default.nfc.normalize(let), "class='h'"));
+                        Iterator it3 = marks.iterator();
+                        while (it3.hasNext()) {
+                            String mark = (String)it3.next();
+                            String merge = let + mark;
+                            if (let.length() != 0 && decomposes.get(merge) == null) {
+                                out.println("<td>&nbsp;</td>");
+                                continue;
+                            }
+                            String comp;
+                            try {
+                                comp = Default.nfc.normalize(merge);
+                            } catch (Exception e) {
+                                System.out.println("Failed when trying to compose <" + Utility.hex(e) + ">");
+                                continue;
+                            }
+                            // skip unless single char or header
+                            /*if (let.length() != 0
+                                && (UTF16.countCodePoint(comp) != 1 || comp.equals(merge))) {
+                                    out.println("<td class='x'>&nbsp;</td>");
+                                    continue;
+                            }
+                            */
+                            Set decomps = (Set) decomposes.get(merge);
+                            if (let.length() == 0) {
+                                printed.add(comp);
+                                out.println(showCell(comp, "class='h'"));
+                            } else if (decomps.contains(comp)) {
+                                printed.add(comp);
+                                out.println(showCell(comp, "class='w'"));
+                            } else {
+                                comp = (String) new ArrayList(decomps).get(0);
+                                printed.add(comp);
+                                out.println(showCell(comp, "class='r'"));
+                            }
+                        }
+                        out.println("</tr>");
+                    }
+                    out.println("</table><br>");
+
+                    //out.println("<table><tr><th>Other Letters</th><th>Other Marks</th></tr><tr><td>");
+                    //tabulate(out, atomics.iterator(),16);
+                    //out.println("</td><td>");
+                    //out.println("</td></tr></table>");
+
+                }
+                notPrinted.removeAll(printed);
+                if (notPrinted.size() != 0) {
+                    tabulate(out, scriptName + " Excluded", notPrinted.iterator(), 24, "class='r'");
+                    out.println("<br>");
+                }
+            }
+
+            Set otherMarks = new TreeSet(uca);
+            UnicodeSet markSet = new UnicodeSet("[[:Me:][:Mn:]]");
+            it.reset(markSet);
+            while (it.next()) {
+                int cp = it.codepoint;
+                String source = UTF16.valueOf(cp);
+                if (totalMarks.contains(source)) continue; // skip all that we have already
+                otherMarks.add(source);
+            }
+            tabulate(out, "Marks that never combine", otherMarks.iterator(), 24, "class='b'");
+
+            out.println("</body></html>");
+
+        } finally {
+            out.close();
+        }
+    }
+
+    public static void tabulate(PrintWriter out, String caption, Iterator it2, int limit, String classType) {
+        int count = 0;
+        out.println("<table border='1' cellspacing='0'><tr>");
+        if (caption != null && caption.length() != 0) {
+            out.println("<caption>" + caption + "</caption>");
+        }
+        while (it2.hasNext()) {
+            if (++count > limit) {
+                out.println("</tr><tr>");
+                count = 1;
+            }
+
+            out.println(showCell((String)it2.next(), classType));
+        }
+        out.println("</tr></table>");
+    }
+
+    public static String showCell(String comp, String classType) {
+        if (comp == null) {
+            return "<td "
+                + classType + (classType.length() != 0 ? " " : "")
+                + ">&nbsp;</td>";
+        }
+        return "<td "
+            + classType + (classType.length() != 0 ? " " : "")
+            + "title='" + Utility.hex(comp) + " " + Default.ucd.getName(comp) + "'>" + addCircle.transliterate(comp)
+            + "<br><tt>" + Utility.hex(comp) + "</tt></td>";
+    }
+
+
 }


@ -730,7 +932,7 @@ public class WriteCharts implements UCD_Types {
    static final IntStack p2 = new IntStack(30);
    static final IntStack s2 = new IntStack(30);
    static final IntStack t2 = new IntStack(30);
-    
+
    static int getStrengthDifference(CEList ceList, CEList lastCEList) {
        extractNonzeros(ceList, p1, s1, t1);
        extractNonzeros(lastCEList, p2, s2, t2);
@ -742,12 +944,12 @@ public class WriteCharts implements UCD_Types {
        if (temp != 0) return 1;
        return 0;
    }
-    
+
    static void extractNonzeros(CEList ceList, IntStack primaries, IntStack secondaries, IntStack tertiaries) {
        primaries.clear();
        secondaries.clear();
        tertiaries.clear();
-        
+
        for (int i = 0; i < ceList.length(); ++i) {
            int ce = ceList.at(i);
            int temp = UCA.getPrimary(ce);
--- a/tools/unicodetools/com/ibm/text/UCA/WriteCollationData.java
+++ b/tools/unicodetools/com/ibm/text/UCA/WriteCollationData.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/WriteCollationData.java,v $ 
-* $Date: 2002/07/15 15:23:01 $ 
-* $Revision: 1.26 $
+* $Date: 2002/09/25 06:40:14 $ 
+* $Revision: 1.27 $
 *
 *******************************************************************************
 */
@ -144,7 +144,7 @@ public class WriteCollationData implements UCD_Types, UCA_Types {
        BufferedReader in = Utility.openUnicodeFile("CaseFolding", UNICODE_VERSION, true, false);
        // new BufferedReader(new FileReader(DIR31 + "CaseFolding-3.d3.alpha.txt"), 64*1024);
        // log = new PrintWriter(new FileOutputStream("CaseFolding_data.js"));
-        log = Utility.openPrintWriter("CaseFolding_data.js", false, false);
+        log = Utility.openPrintWriter("CaseFolding_data.js", Utility.UTF8_WINDOWS);
        log.println("var CF = new Object();");
        int count = 0;
        while (true) {
@ -189,7 +189,7 @@ public class WriteCollationData implements UCD_Types, UCA_Types {
        Normalizer normKD = new Normalizer(Normalizer.NFKD, UNICODE_VERSION);
        Normalizer normD = new Normalizer(Normalizer.NFD, UNICODE_VERSION);
        //log = new PrintWriter(new FileOutputStream("Normalization_data.js"));
-        log = Utility.openPrintWriter("Normalization_data.js", false, false);
+        log = Utility.openPrintWriter("Normalization_data.js", Utility.LATIN1_WINDOWS);
        
        
        int count = 0;
@ -318,7 +318,7 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
            }
        }
        
-        PrintWriter log = Utility.openPrintWriter(filename + (shortPrint ? "_SHORT" : "") + ".txt", true, true);
+        PrintWriter log = Utility.openPrintWriter(filename + (shortPrint ? "_SHORT" : "") + ".txt", Utility.UTF8_WINDOWS);
        //if (!shortPrint) log.write('\uFEFF');
        log.println("# UCA Version: " + collator.getDataVersion() + "/" + collator.getUCDVersion());
        log.println("# Generated:   " + getNormalDate());
@ -702,7 +702,7 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
    }*/
    
    static void testCompatibilityCharacters() throws IOException {
-        log = Utility.openPrintWriter("UCA_CompatComparison.txt");
+        log = Utility.openPrintWriter("UCA_CompatComparison.txt", Utility.UTF8_WINDOWS);
        
        int[] kenCes = new int[50];
        int[] markCes = new int[50];
@ -1196,7 +1196,7 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
                    "UTF8"),
                32*1024));
                */
-        PrintWriter diLog = Utility.openPrintWriter("UCA_Contractions.txt", false, false);
+        PrintWriter diLog = Utility.openPrintWriter("UCA_Contractions.txt", Utility.UTF8_WINDOWS);
                
        diLog.write('\uFEFF');

@ -1234,7 +1234,7 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
                    "UTF8"),
                32*1024));
                */
-        PrintWriter diLog = Utility.openPrintWriter("DisjointIgnorables.js", false, false);
+        PrintWriter diLog = Utility.openPrintWriter("DisjointIgnorables.js", Utility.LATIN1_WINDOWS);
                
        diLog.write('\uFEFF');

@ -1413,7 +1413,7 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
                    "UTF8"),
                32*1024));
                */
-        PrintWriter diLog = Utility.openPrintWriter("DisjointIgnorables2.js", false, false);
+        PrintWriter diLog = Utility.openPrintWriter("DisjointIgnorables2.js", Utility.LATIN1_WINDOWS);
                
        diLog.write('\uFEFF');

@ -1660,7 +1660,7 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
        int[] lenArray = new int[1];
        
        Set alreadyDone = new HashSet();
-        PrintWriter log2 = Utility.openPrintWriter("UCARules-log.txt", false, false);
+        PrintWriter log2 = Utility.openPrintWriter("UCARules-log.txt", Utility.LATIN1_WINDOWS);

        while (true) {
            String s = cc.next(ces, lenArray);
@ -1784,7 +1784,7 @@ F900..FAFF; CJK Compatibility Ideographs
        if (shortPrint) filename += "_SHORT";
        if (option == IN_XML) filename += ".xml"; else filename += ".txt";
        
-        log = Utility.openPrintWriter(filename, false, false);
+        log = Utility.openPrintWriter(filename, Utility.LATIN1_WINDOWS);
        
        String[] commentText = {
        	"UCA Rules",
@ -3951,7 +3951,7 @@ static int swapCJK(int i) {
        Default.setUCD();
    	
        //log = new PrintWriter(new FileOutputStream("CheckCollationValidity.html"));
-        log = Utility.openPrintWriter("CheckCollationValidity.html", false, false);
+        log = Utility.openPrintWriter("CheckCollationValidity.html", Utility.UTF8_WINDOWS);
        
        log.println("<html><head><meta http-equiv='Content-Type' content='text/html; charset=utf-8'>");
        log.println("<title>UCA Validity Log</title>");
@ -4618,7 +4618,7 @@ A4C6;YI RADICAL KE;So;0;ON;;;;;N;;;;;

    static PrintWriter writeHead(int counter, int end, String title, String other, String version, boolean show) throws IOException {

-        PrintWriter out = Utility.openPrintWriter(title + pad(counter) + ".html");
+        PrintWriter out = Utility.openPrintWriter(title + pad(counter) + ".html", Utility.UTF8_WINDOWS);
        
        copyFile(out, "HTML-Part1.txt");
        /*
--- a/tools/unicodetools/com/ibm/text/UCD/Main.java
+++ b/tools/unicodetools/com/ibm/text/UCD/Main.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/Main.java,v $
-* $Date: 2002/08/09 23:56:24 $
-* $Revision: 1.22 $
+* $Date: 2002/09/25 06:40:13 $
+* $Revision: 1.23 $
 *
 *******************************************************************************
 */
@ -73,11 +73,15 @@ public final class Main implements UCD_Types {
            
            else if (arg.equalsIgnoreCase("compareBlueberry")) VerifyUCD.compareBlueberry();
            
+            else if (arg.equalsIgnoreCase("quicktest")) QuickTest.test();
+            else if (arg.equalsIgnoreCase("TernaryStore")) TernaryStore.test();
+            
            else if (arg.equalsIgnoreCase("checkBIDI")) VerifyUCD.checkBIDI();
            else if (arg.equalsIgnoreCase("Buildnames")) BuildNames.main(null);
            else if (arg.equalsIgnoreCase("TestNormalization")) TestNormalization.main(null);
            
            
+            else if (arg.equalsIgnoreCase("GenerateCaseTest")) GenerateCaseTest.main(null);
            else if (arg.equalsIgnoreCase("checkDecompFolding")) VerifyUCD.checkDecompFolding();
            
            else if (arg.equalsIgnoreCase("breaktest")) GenerateBreakTest.main(null);
--- a/tools/unicodetools/com/ibm/text/UCD/UCD.java
+++ b/tools/unicodetools/com/ibm/text/UCD/UCD.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD.java,v $
-* $Date: 2002/08/04 21:38:45 $
-* $Revision: 1.17 $
+* $Date: 2002/09/25 06:40:13 $
+* $Revision: 1.18 $
 *
 *******************************************************************************
 */
@ -964,6 +964,9 @@ to guarantee identifier closure.

    public boolean hasComputableName(int codePoint) {
        if (codePoint >= 0xF900 && codePoint <= 0xFA2D) return true;
+        if (codePoint >= 0x2800 && codePoint <= 0x28FF) return true; 
+        if (codePoint >= 0x2F800 && codePoint <= 0x2FA1D) return true;
+        
        int rangeStart = mapToRepresentative(codePoint, major < 2);
        switch (rangeStart) {
          default:
--- a/tools/unicodetools/com/ibm/text/utility/Utility.java
+++ b/tools/unicodetools/com/ibm/text/utility/Utility.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/Utility.java,v $
-* $Date: 2002/08/04 21:38:44 $
-* $Revision: 1.24 $
+* $Date: 2002/09/25 06:40:14 $
+* $Revision: 1.25 $
 *
 *******************************************************************************
 */
@ -18,11 +18,16 @@ import java.text.*;
 import java.io.*;
 import com.ibm.icu.text.UnicodeSet;
 import com.ibm.icu.text.UTF16;
+import com.ibm.icu.text.Replaceable;
+import com.ibm.icu.text.ReplaceableString;
+import com.ibm.icu.text.UnicodeMatcher;
+
 import com.ibm.text.UCD.*;

 public final class Utility implements UCD_Types {    // COMMON UTILITIES

    static final boolean UTF8 = true; // TODO -- make argument
+    public static final char BOM = '\uFEFF';
    
    public static String[] append(String[] array1, String[] array2) {
        String[] temp = new String[array1.length + array2.length];
@ -334,6 +339,83 @@ public final class Utility implements UCD_Types {    // COMMON UTILITIES
        }
        return output.toString();
    }
+    
+    
+    public static final class Position {
+        public int start, limit;
+    }
+    
+    /**
+     * Finds the next position in the text that matches.
+     * @param divider A UnicodeMatcher, such as a UnicodeSet.
+     * @text obvious
+     * @offset starting offset
+     * @output start and limit of the piece found. If the return is false, then start,limit = length
+     * @return true iff match found
+     */
+    public static boolean next(UnicodeMatcher matcher, Replaceable text, int offset,
+      Position output) {
+        int[] io = new int[1]; // TODO replace later; extra object creation
+        int limit = text.length();
+        // don't worry about surrogates; matcher will handle
+        for (int i = offset; i <= limit; ++i) {
+            io[0] = i;
+            if (matcher.matches(text, io, limit, false) == UnicodeMatcher.U_MATCH) {
+                // a hit, return
+                output.start = i;
+                output.limit = io[0];
+                return true;
+            }
+        }
+        output.start = output.limit = limit;
+        return false;
+    }
+
+    /**
+     * Finds the next position in the text that matches.
+     * @param divider A UnicodeMatcher, such as a UnicodeSet.
+     * @text obvious
+     * @offset starting offset
+     * @output start and limit of the piece found. If the return is false, then start,limit = 0
+     * @return true iff match found
+     */
+    public static boolean previous(UnicodeMatcher matcher, Replaceable text, int offset,
+      Position output) {
+        int[] io = new int[1]; // TODO replace later; extra object creation
+        int limit = 0;
+        // don't worry about surrogates; matcher will handle
+        for (int i = offset; i >= limit; --i) {
+            io[0] = i;
+            if (matcher.matches(text, io, offset, false) == UnicodeMatcher.U_MATCH) {
+                // a hit, return
+                output.start = i;
+                output.limit = io[0];
+                return true;
+            }
+        }
+        output.start = output.limit = limit;
+        return false;
+    }
+
+    /**
+     * Splits a string containing divider into pieces, storing in output
+     * and returns the number of pieces. The string does not have to be terminated:
+     * the segment after the last divider is returned in the last output element.
+     * Thus if the string has no dividers, then the whole string is returned in output[0]
+     * with a return value of 1.
+     * @param divider A UnicodeMatcher, such as a UnicodeSet.
+     * @param s the text to be divided
+     * @param output where the resulting pieces go
+     * @return the number of items put into output
+     */
+	public static int split(UnicodeMatcher divider, Replaceable text, Position[] output) {
+	    int index = 0;
+	    for (int offset = 0;; offset = output[index-1].limit) {
+	        if (output[index] == null) output[index] = new Position();
+	        boolean matches = next(divider, text, offset, output[index++]);
+	        if (!matches) return index;
+	    }
+	}

    /**
     * Splits a string containing divider into pieces, storing in output
@ -358,14 +440,14 @@ public final class Utility implements UCD_Types {    // COMMON UTILITIES
 	}

 	public static String[] split(String s, char divider) {
-	    String[] result = new String[100];
+	    String[] result = new String[100]; // HACK
 	    int count = split(s, divider, result);
 	    return extract(result, 0, count);
 	}

-	public static String[] extract(String[] source, int start, int end) {
-	    String[] result = new String[end-start];
-	    System.arraycopy(source, start, result, 0, end - start);
+	public static String[] extract(String[] source, int start, int limit) {
+	    String[] result = new String[limit-start];
+	    System.arraycopy(source, start, result, 0, limit - start);
 	    return result;
 	}

@ -564,7 +646,8 @@ public final class Utility implements UCD_Types {    // COMMON UTILITIES
    // Or if they are UTF8, use true, false
    public static PrintWriter openPrintWriter(String filename, byte options) throws IOException {
        File file = new File(getOutputName(filename));
-        System.out.println("Creating File: " + file);
+        Utility.fixDot();
+        System.out.println("Creating File: " + file.getCanonicalPath());
        File parent = new File(file.getParent());
        //System.out.println("Creating File: "+ parent);
        parent.mkdirs();
@ -609,6 +692,28 @@ public final class Utility implements UCD_Types {    // COMMON UTILITIES
        }
    }
    
+    public static void print(PrintWriter pw, Map c, String pairSeparator, String separator, Breaker b) {
+        Iterator it = c.keySet().iterator();
+        boolean first = true;
+        Object last = null;
+        while (it.hasNext()) {
+            Object obj = it.next();
+            Object result = c.get(obj);
+            if (b != null && !b.filter(obj)) continue;
+            if (first) {
+                first = false;
+            } else {
+                pw.print(separator);
+            }
+            if (b != null) {
+                pw.print(b.get(obj, last) + pairSeparator + result);
+            } else {
+                pw.print(obj + pairSeparator + result);
+            }
+            last = obj;
+        }
+    }
+    
    public static void appendFile(String filename, boolean utf8, PrintWriter output) throws IOException {
    	appendFile(filename, utf8, output, null);
    }
@ -870,19 +975,35 @@ public final class Utility implements UCD_Types {    // COMMON UTILITIES
    static PrintWriter showSetNamesPw;
    
    public static void showSetNames(String prefix, UnicodeSet set, boolean separateLines, UCD ucd) {
-        if (showSetNamesPw == null) showSetNamesPw = new PrintWriter(System.out);
-        showSetNames(showSetNamesPw, prefix, set, separateLines, false, ucd);
-        showSetNamesPw.flush();
+        showSetNames(prefix,  set,  separateLines,  false,  false, ucd);
+    }
+    
+    public static void showSetNames(String prefix, UnicodeSet set, boolean separateLines, boolean IDN, UCD ucd) {
+        showSetNames(prefix,  set,  separateLines,  IDN,  false, ucd);
    }
    
    public static void showSetNames(PrintWriter pw, String prefix, UnicodeSet set, boolean separateLines, boolean IDN, UCD ucd) {
+        showSetNames( pw,  prefix,  set,  separateLines,  IDN,  false, ucd);
+    }
+    
+    public static void showSetNames(String prefix, UnicodeSet set, boolean separateLines, boolean IDN, boolean withChar, UCD ucd) {
+        if (showSetNamesPw == null) showSetNamesPw = new PrintWriter(System.out);
+        showSetNames(showSetNamesPw, prefix, set, separateLines, IDN, withChar, ucd);
+        showSetNamesPw.flush();
+    }
+    
+    public static void showSetNames(PrintWriter pw, String prefix, UnicodeSet set, boolean separateLines, boolean IDN, 
+            boolean withChar, UCD ucd) {
        int count = set.getRangeCount();
        for (int i = 0; i < count; ++i) {
            int start = set.getRangeStart(i);
            int end = set.getRangeEnd(i);
            if (separateLines || (IDN && isSeparateLineIDN(start,end,ucd))) {
                for (int cp = start; cp <= end; ++cp) {
-                    if (!IDN) pw.println(prefix + ucd.getCodeAndName(cp));
+                    if (!IDN) pw.println(prefix + ucd.getCode(cp)
+                        + "\t# " 
+                        + (withChar ? " (" + UTF16.valueOf(cp) + ") " : "")
+                        + ucd.getName(cp));
                    else {
                        pw.println(prefix + Utility.hex(cp,4) + "; " + ucd.getName(cp));
                    }
@ -891,7 +1012,10 @@ public final class Utility implements UCD_Types {    // COMMON UTILITIES
                if (!IDN) {
                    pw.println(prefix + ucd.getCode(start)
                        + ((start != end) ? (".." + ucd.getCode(end)) : "")
-                        + "\t# " + ucd.getName(start) + ((start != end) ? (".." + ucd.getName(end)) : "")
+                        + "\t# "
+                        + (withChar ? " (" + UTF16.valueOf(start)
+                            + ((start != end) ? (".." + UTF16.valueOf(end)) : "") + ") " : "")
+                        + ucd.getName(start) + ((start != end) ? (".." + ucd.getName(end)) : "")
                    );
                } else {