reorg

X-SVN-Rev: 5824
2001-09-19 23:33:52 +00:00 · 2001-09-19 23:33:52 +00:00 · 42bddd7bf5
commit 42bddd7bf5
parent dee8a86dee
24 changed files with 1015 additions and 220 deletions
--- a/tools/unicodetools/com/ibm/text/UCA/CEList.java
+++ b/tools/unicodetools/com/ibm/text/UCA/CEList.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/CEList.java,v $ 
-* $Date: 2001/08/31 00:20:40 $ 
-* $Revision: 1.2 $
+* $Date: 2001/09/19 23:32:21 $ 
+* $Revision: 1.3 $
 *
 *******************************************************************************
 */
@ -102,7 +102,8 @@ public final class CEList implements java.lang.Comparable, UCD_Types {

            for (int i = startOffset; i < min; ++i) {
                if (contents[i] != that.contents[i + delta]) {
-                    if (contents[i] < that.contents[i + delta]) return -1;
+                    if ((contents[i] & 0xFFFFFFFFL) 
+                        < (that.contents[i + delta] & 0xFFFFFFFFL)) return -1;
                    return 1;
                }
            }
@ -158,7 +159,9 @@ public final class CEList implements java.lang.Comparable, UCD_Types {
    public static String toString(int ce) {
        return "[" + Utility.hex(UCA.getPrimary(ce)) + "." 
          + Utility.hex(UCA.getSecondary(ce)) + "."
-          + Utility.hex(UCA.getTertiary(ce)) + "](" + NAME3[UCA.getTertiary(ce)] + ")";
+          + Utility.hex(UCA.getTertiary(ce)) + "]"
+         // + "(" + NAME3[UCA.getTertiary(ce)] + ")"
+          ;
    }
    
    static final String[] NAME3 = {
--- a/tools/unicodetools/com/ibm/text/UCA/GenOverlap.java
+++ b/tools/unicodetools/com/ibm/text/UCA/GenOverlap.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/GenOverlap.java,v $ 
-* $Date: 2001/09/06 01:30:31 $ 
-* $Revision: 1.3 $
+* $Date: 2001/09/19 23:32:21 $ 
+* $Revision: 1.4 $
 *
 *******************************************************************************
 */
@ -43,7 +43,7 @@ public class GenOverlap implements UCD_Types {
        nfd = new Normalizer(Normalizer.NFD);
        nfkd = new Normalizer(Normalizer.NFKD);
            
-        UCA.CollationContents cc = collator.getCollationContents(UCA.FIXED_CE, nfd);
+        UCA.UCAContents cc = collator.getContents(UCA.FIXED_CE, nfd);
            
        // store data for faster lookup
            
@ -307,7 +307,7 @@ public class GenOverlap implements UCD_Types {
        nfd = new Normalizer(Normalizer.NFD);
        nfkd = new Normalizer(Normalizer.NFKD);
            
-        UCA.CollationContents cc = collator.getCollationContents(UCA.FIXED_CE, nfd);
+        UCA.UCAContents cc = collator.getContents(UCA.FIXED_CE, nfd);
            
        // store data for faster lookup
            
@ -505,7 +505,7 @@ public class GenOverlap implements UCD_Types {
        //nfd = new Normalizer(Normalizer.NFD);
        //nfkd = new Normalizer(Normalizer.NFKD);
            
-        UCA.CollationContents cc = collator.getCollationContents(UCA.FIXED_CE, nfd);
+        UCA.UCAContents cc = collator.getContents(UCA.FIXED_CE, nfd);
        nfd = new Normalizer(Normalizer.NFD);
        nfkd = new Normalizer(Normalizer.NFKD);
            
--- a/tools/unicodetools/com/ibm/text/UCA/Main.java
+++ b/tools/unicodetools/com/ibm/text/UCA/Main.java
@ -0,0 +1,20 @@
+/**
+*******************************************************************************
+* Copyright (C) 1996-2001, International Business Machines Corporation and    *
+* others. All Rights Reserved.                                                *
+*******************************************************************************
+*
+* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/Main.java,v $ 
+* $Date: 2001/09/19 23:31:50 $ 
+* $Revision: 1.1 $
+*
+*******************************************************************************
+*/
+
+package com.ibm.text.UCA;
+
+public class Main {
+    public static void main(String args[]) throws Exception {
+        WriteCollationData.main(args); // TODO, pull from there to here.
+    }
+}
--- a/tools/unicodetools/com/ibm/text/UCA/UCA.java
+++ b/tools/unicodetools/com/ibm/text/UCA/UCA.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/UCA.java,v $ 
-* $Date: 2001/09/06 01:30:31 $ 
-* $Revision: 1.3 $
+* $Date: 2001/09/19 23:32:21 $ 
+* $Revision: 1.4 $
 *
 *******************************************************************************
 */
@ -765,15 +765,6 @@ final public class UCA {
     */
    static final int EXCEPTION_CE_MASK = 0xFFC00000;
    
-    /**
-     * Any unsupported characters (those not in the UCA data tables) 
-     * are marked with a exception bit combination
-     * so that they can be treated specially.<br>
-     * There are at least 34 values, so that we can use a range for surrogates
-     * However, we do add to the first weight if we have surrogate pairs!
-     */
-    static final int UNSUPPORTED = 0xFFC20101;
-    
    /**
     * Used to composed Hangul and Han characters
     */
@ -781,6 +772,18 @@ final public class UCA {
    static final int NEUTRAL_SECONDARY = 0x20;
    static final int NEUTRAL_TERTIARY = 0x02;
       
+    /**
+     * Any unsupported characters (those not in the UCA data tables) 
+     * are marked with a exception bit combination
+     * so that they can be treated specially.<br>
+     * There are at least 34 values, so that we can use a range for surrogates
+     * However, we do add to the first weight if we have surrogate pairs!
+     */
+    static final int UNSUPPORTED_P = 0xFFC2;
+    static final int UNSUPPORTED = makeKey(UNSUPPORTED_P, NEUTRAL_SECONDARY, NEUTRAL_TERTIARY);
+    
+    // was 0xFFC20101;
+    
    /**
     * Contracting characters are marked with a exception bit combination 
     * in the collationElement table.
@ -968,9 +971,14 @@ final public class UCA {
            // in code order.
                    // add bottom 5 bits to UNSUPPORTED, and push rest
                    //return UNSUPPORTED + (bigChar & 0xFFFF0000);    // top bits added
+            expandingStack.push(makeKey((bigChar & 0x7FFF) | 0x8000, 0, 0)); // primary = bottom 15 bits plus turn bottom bit on.
+            // secondary and tertiary are both zero
+            return makeKey(UNSUPPORTED_P + (bigChar >> 15), NEUTRAL_SECONDARY, NEUTRAL_TERTIARY); // top 34 values plus UNSUPPORTED
+            /*
            expandingStack.push(((bigChar & 0x7FFF) << 16) | 0x10000000); // primary = bottom 15 bits plus turn bottom bit on.
            // secondary and tertiary are both zero
            return UNSUPPORTED + ((bigChar << 1) & 0xFFFF0000); // top 34 values plus UNSUPPORTED
+            */
        }
        if (ce == CONTRACTING) {
            // Contracting is probably the most interesting (read "tricky") part
@ -1127,11 +1135,11 @@ final public class UCA {
        return new Hashtable(multiTable);
    }
    
-    public CollationContents getCollationContents(byte ceLimit, Normalizer skipDecomps) {
-        return new CollationContents(ceLimit, skipDecomps);
+    public UCAContents getContents(byte ceLimit, Normalizer skipDecomps) {
+        return new UCAContents(ceLimit, skipDecomps);
    }
    
-    public class CollationContents {
+    public class UCAContents {
        int current = -1;
        Normalizer skipDecomps = new Normalizer(Normalizer.NFD);
        Iterator enum = null;
@ -1140,16 +1148,15 @@ final public class UCA {
        /**
         * use FIXED_CE as the limit
         */
-        CollationContents(byte ceLimit, Normalizer skipDecomps) {
+        UCAContents(byte ceLimit, Normalizer skipDecomps) {
            this.ceLimit = ceLimit;
            this.skipDecomps = skipDecomps;
        }
-       
+        
        /**
-         * returns a string and its ces
+         * returns a string
         */
-        public String next(int[] ces, int[] len) {
-
+        public String next() {
            String result = null; // null if done
            
            // normal case
@ -1158,7 +1165,6 @@ final public class UCA {
                if (getCEType(ch) >= ceLimit) continue;
                if (skipDecomps != null && skipDecomps.hasDecomposition(ch)) continue;
                result = String.valueOf(ch);
-                len[0] = getCEs(result, true, ces);
                return result;
            }
            
@ -1166,11 +1172,36 @@ final public class UCA {
            if (enum == null) enum = multiTable.keySet().iterator();
            if (enum.hasNext()) {
                result = (String)enum.next();
-                len[0] = getCEs(result, true, ces);
            }
            
            return result;
        }
+        
+       
+        /**
+         * returns a string and its ces
+         */
+        public String next(int[] ces, int[] len) {
+
+            String result = next(); // null if done
+            if (result != null) {
+                len[0] = getCEs(result, true, ces);
+            }
+            return result;
+        }
+        
+        int[] lengthBuffer = new int[1];
+        
+        /**
+         * returns a string and its ces
+         */
+        public boolean next(Pair result) {
+            String s = next(ceListBuffer, lengthBuffer);
+            if (s == null) return false;
+            result.first = new CEList(ceListBuffer, 0, lengthBuffer[0]);
+            result.second = s;
+            return true;
+        }
    }
    
    /**
--- a/tools/unicodetools/com/ibm/text/UCA/WriteCharts.java
+++ b/tools/unicodetools/com/ibm/text/UCA/WriteCharts.java
@ -0,0 +1,213 @@
+/**
+*******************************************************************************
+* Copyright (C) 1996-2001, International Business Machines Corporation and    *
+* others. All Rights Reserved.                                                *
+*******************************************************************************
+*
+* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/WriteCharts.java,v $ 
+* $Date: 2001/09/19 23:31:50 $ 
+* $Revision: 1.1 $
+*
+*******************************************************************************
+*/
+
+package com.ibm.text.UCA;
+
+import java.util.*;
+
+import java.io.*;
+import com.ibm.text.UCD.*;
+import com.ibm.text.utility.*;
+import com.ibm.text.UTF16;
+
+public class WriteCharts implements UCD_Types {
+    
+    static UCD ucd;
+    
+    static public void test(UCA uca) throws IOException {
+  
+        uca.setAlternate(UCA.NON_IGNORABLE);
+        
+        ucd = UCD.make();
+        Normalizer nfd = new Normalizer(Normalizer.NFD);
+          
+        UCA.UCAContents cc = uca.getContents(UCA.FIXED_CE, null); // nfd instead of null if skipping decomps
+          
+        Set set = new TreeSet();
+        
+        while (true) {
+            String x = cc.next();
+            if (x == null) break;
+            set.add(new Pair(uca.getSortKey(x), x));
+        }
+          
+        PrintWriter output = null;
+        
+        Iterator it = set.iterator();
+        
+        int oldScript = -999;
+        
+        int[] scriptCount = new int[LIMIT_SCRIPT];
+        
+        int counter = 0;
+        
+        int lastPrimary = -1;
+        
+        String lastSortKey = null;
+        
+        int high = uca.getSortKey("a").charAt(0);
+        int variable = UCA.getPrimary(uca.getVariableHigh());
+        
+        int columnCount = 0;
+        
+        indexFile = Utility.openPrintWriter("CollationCharts\\index_list.html");
+
+        indexFile.println("<html><head><meta http-equiv='Content-Type' content='text/html; charset=utf-8'>");
+        indexFile.println("<title>UCA Default Collation Table</title>");
+        indexFile.println("<base target='main'>");
+        indexFile.println("</head><body><h2 align='center'>UCA Default Collation Table</h2>");
+        indexFile.println("<p align='center'><a href = 'help.html'>Help</a>");
+        
+        while (it.hasNext()) {
+            Utility.dot(counter);
+            
+            Pair p = (Pair) it.next();
+            String sortKey = (String) p.first;
+            String s = (String) p.second;
+            
+            int cp = UTF16.charAt(s,0);
+            byte script = ucd.getScript(cp);
+            if (script == KATAKANA_SCRIPT) script = HIRAGANA_SCRIPT;
+            else if (script == INHERITED_SCRIPT) script = COMMON_SCRIPT;
+            
+            // get first non-zero primary
+            int primary = sortKey.charAt(0);
+            if (sortKey.length() < 4) script = -3;
+            else if (primary == 0) script = -2;
+            else if (primary < variable) script = -1;
+            else if (primary < high) script = COMMON_SCRIPT;
+            
+            if (script != oldScript 
+                    && (oldScript < COMMON_SCRIPT || script != COMMON_SCRIPT && script != INHERITED_SCRIPT)) {
+                closeFile(output);
+                output = null;
+            }
+            if (output == null) {
+                ++scriptCount[script+3];
+                if (scriptCount[script+3] > 1) {
+                    System.out.println("\t\tFAIL: " + scriptCount[script+3] + ", " + 
+                        ucd.getScriptID_fromIndex(script) + ", " + ucd.getCodeAndName(s));
+                }
+                output = openFile(scriptCount[script+3], script);
+                oldScript = script;
+            }
+            
+            int strength = 6;
+            if (lastSortKey != null && sortKey.charAt(0) == lastSortKey.charAt(0)) {
+                strength = uca.strengthDifference(sortKey, lastSortKey);
+                if (strength < 0) strength = -strength;
+            }
+            lastSortKey = sortKey;
+            String breaker = "";
+            if (columnCount > 10 || strength > 5) {
+                if (strength <= 5) breaker = "</tr><tr><td></td>";
+                else breaker = "</tr><tr>";
+                columnCount = 0;
+            }
+            output.println(breaker + CLASSNAME[strength] + s 
+                + "<br><tt>" + Utility.hex(s) 
+                //+ "<br>" + script
+                //+ "<br>" + UCA.toString(sortKey) 
+                + "</tt></td>");
+            ++columnCount;
+        }
+        
+        closeFile(output);
+        indexFile.println("</body></html>");
+        indexFile.close();
+    }
+    
+    static final String[] CLASSNAME = {
+        "<td class='q'>", 
+        "<td class='q'>", 
+        "<td class='q'>", 
+        "<td class='t'>", 
+        "<td class='s'>", 
+        "<td class='p'>", 
+        "<td class='f'>"};
+        
+
+    static PrintWriter indexFile;
+    
+    static PrintWriter openFile(int count, byte script) throws IOException {
+        String scriptName = getChunkName(script);
+        scriptName = ucd.getCase(scriptName, FULL, TITLE);
+        
+        String fileName = "chart_" + scriptName + (count > 1 ? count + "" : "") + ".html";
+        PrintWriter output = Utility.openPrintWriter("CollationCharts\\" + fileName);
+        Utility.fixDot();
+        System.out.println("Writing: " + scriptName);
+        
+        indexFile.println(" | <a href = '" + fileName + "'>" + scriptName + "</a>");
+        String title = "UCA: " + scriptName;
+        output.println("<html><head><meta http-equiv='Content-Type' content='text/html; charset=utf-8'>");
+        output.println("<title>" + title + "</title>");
+        output.println("<link rel='stylesheet' href='charts.css' type='text/css'>");
+        output.println("</head><body><h2>" + scriptName + "</h2>");
+        output.println("<table>");
+        return output;
+    }
+    
+    static String getChunkName(byte script) {
+        if (script == -3) return "NULL";
+        else if (script == -2) return "IGNORABLE";
+        else if (script == -1) return "VARIABLE";
+        else if (script == HIRAGANA_SCRIPT) return "KATAKANA-HIRAGANA";
+        else return ucd.getScriptID_fromIndex(script);
+    }
+
+    static void closeFile(PrintWriter output) {
+        if (output == null) return;
+        output.println("</body></table></html>");
+        output.close();
+    }
+}
+
+
+
+    /*
+    static final IntStack p1 = new IntStack(30);
+    static final IntStack s1 = new IntStack(30);
+    static final IntStack t1 = new IntStack(30);
+    static final IntStack p2 = new IntStack(30);
+    static final IntStack s2 = new IntStack(30);
+    static final IntStack t2 = new IntStack(30);
+    
+    static int getStrengthDifference(CEList ceList, CEList lastCEList) {
+        extractNonzeros(ceList, p1, s1, t1);
+        extractNonzeros(lastCEList, p2, s2, t2);
+        int temp = p1.compareTo(p2);
+        if (temp != 0) return 3;
+        temp = s1.compareTo(s2);
+        if (temp != 0) return 2;
+        temp = t1.compareTo(t2);
+        if (temp != 0) return 1;
+        return 0;
+    }
+    
+    static void extractNonzeros(CEList ceList, IntStack primaries, IntStack secondaries, IntStack tertiaries) {
+        primaries.clear();
+        secondaries.clear();
+        tertiaries.clear();
+        
+        for (int i = 0; i < ceList.length(); ++i) {
+            int ce = ceList.at(i);
+            int temp = UCA.getPrimary(ce);
+            if (temp != 0) primaries.push(temp);
+            temp = UCA.getSecondary(ce);
+            if (temp != 0) secondaries.push(temp);
+            temp = UCA.getTertiary(ce);
+            if (temp != 0) tertiaries.push(temp);
+        }
+    }
+    */
--- a/tools/unicodetools/com/ibm/text/UCA/WriteCollationData.java
+++ b/tools/unicodetools/com/ibm/text/UCA/WriteCollationData.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/WriteCollationData.java,v $ 
-* $Date: 2001/09/06 01:30:30 $ 
-* $Revision: 1.3 $
+* $Date: 2001/09/19 23:32:21 $ 
+* $Revision: 1.4 $
 *
 *******************************************************************************
 */
@ -34,7 +34,6 @@ public class WriteCollationData implements UCD_Types {
    static final boolean EXCLUDE_UNSUPPORTED = true;    
    static final boolean GENERATED_NFC_MISMATCHES = true;    
    static final boolean DO_CHARTS = true;   
-    static final boolean WRITE_NAME_IN_CONFORMANCE = true;   
    
    
    static UCA collator;
@ -58,12 +57,13 @@ public class WriteCollationData implements UCD_Types {
        ucd = UCD.make("");
        
        if (args.length == 0) args = new String[] {"?"}; // force the help comment
-        boolean hex = false;
+        boolean shortPrint = false;
        
        for (int i = 0; i < args.length; ++i) {
            String arg = args[i];
            if      (arg.equalsIgnoreCase("WriteRulesWithNames")) writeRules(WITH_NAMES);
            else if (arg.equalsIgnoreCase("GenOverlap")) GenOverlap.test(collator);
+            else if (arg.equalsIgnoreCase("WriteCharts")) WriteCharts.test(collator);
            else if (arg.equalsIgnoreCase("CheckHash")) GenOverlap.checkHash(collator);
            else if (arg.equalsIgnoreCase("generateRevision")) GenOverlap.generateRevision(collator);
            
@ -72,15 +72,15 @@ public class WriteCollationData implements UCD_Types {
            else if (arg.equalsIgnoreCase("checkDisjointIgnorables")) checkDisjointIgnorables();
            else if (arg.equalsIgnoreCase("writeContractions")) writeContractions();
            else if (arg.equalsIgnoreCase("FractionalUCA")) writeFractionalUCA("FractionalUCA");
-            else if (arg.equalsIgnoreCase("writeConformance")) writeConformance("CollationTest_NON_IGNORABLE.txt", UCA.NON_IGNORABLE, hex);
-            else if (arg.equalsIgnoreCase("writeConformanceSHIFTED")) writeConformance("CollationTest_SHIFTED.txt", UCA.SHIFTED, hex);
+            else if (arg.equalsIgnoreCase("writeConformance")) writeConformance("CollationTest_NON_IGNORABLE", UCA.NON_IGNORABLE, shortPrint);
+            else if (arg.equalsIgnoreCase("writeConformanceSHIFTED")) writeConformance("CollationTest_SHIFTED", UCA.SHIFTED, shortPrint);
            else if (arg.equalsIgnoreCase("testCompatibilityCharacters")) testCompatibilityCharacters();
            else if (arg.equalsIgnoreCase("writeCollationValidityLog")) writeCollationValidityLog();
            else if (arg.equalsIgnoreCase("writeCaseExceptions")) writeCaseExceptions();
            else if (arg.equalsIgnoreCase("writeJavascriptInfo")) writeJavascriptInfo();
            else if (arg.equalsIgnoreCase("writeCaseFolding")) writeCaseFolding();
            else if (arg.equalsIgnoreCase("javatest")) javatest();
-            else if (arg.equalsIgnoreCase("hex")) hex = true;
+            else if (arg.equalsIgnoreCase("short")) shortPrint = true;
            else {
                System.out.println();
                System.out.println("UNKNOWN OPTION (" + arg + "): must be one of the following (case-insensitive)");
@ -339,15 +339,17 @@ public class WriteCollationData implements UCD_Types {
    }
    
    
-    static void writeConformance(String filename, byte option, boolean hex)  throws IOException {
-        UCD ucd30 = UCD.make("300");
+    static void writeConformance(String filename, byte option, boolean shortPrint)  throws IOException {
+        UCD ucd30 = UCD.make("3.0.0");
        
-        PrintWriter log = Utility.openPrintWriter(filename);
-        if (!hex) log.write('\uFEFF');
+        PrintWriter log = Utility.openPrintWriter(filename + (shortPrint ? "_SHORT" : "") + ".txt");
+        if (!shortPrint) log.write('\uFEFF');
        
        System.out.println("Sorting");
+        int counter = 0;
        
        for (int i = 0; i <= 0x10FFFF; ++i) {
+            Utility.dot(counter++);
            if (!ucd.isRepresented(i)) continue;
            addStringX(UTF32.valueOf32(i), option);
        }
@ -355,11 +357,14 @@ public class WriteCollationData implements UCD_Types {
        Hashtable multiTable = collator.getContracting();
        Enumeration enum = multiTable.keys();
        while (enum.hasMoreElements()) {
+            Utility.dot(counter++);
            addStringX((String)enum.nextElement(), option);
        }
        
        for (int i = 0; i < extraConformanceTests.length; ++i) { // put in sample non-characters
+            Utility.dot(counter++);
            String s = UTF32.valueOf32(extraConformanceTests[i]);
+            Utility.fixDot();
            System.out.println("Adding: " + Utility.hex(s));
            addStringX(s, option);
        }
@ -367,6 +372,7 @@ public class WriteCollationData implements UCD_Types {
        for (int i = 0; ; ++i) { // add first unallocated character
            if (!ucd.isAssigned(i)) {
                String s = UTF32.valueOf32(i);
+                Utility.fixDot();
                System.out.println("Adding: " + Utility.hex(s));
                addStringX(s, option);
                break;
@ -375,6 +381,7 @@ public class WriteCollationData implements UCD_Types {
        
        
        for (int i = 0; i < extraConformanceRanges.length; ++i) {
+            Utility.dot(counter++);
            int start = extraConformanceRanges[i][0];
            int end = extraConformanceRanges[i][1];
            int increment = ((end - start + 1) / 303) + 1;
@ -388,6 +395,7 @@ public class WriteCollationData implements UCD_Types {
            addStringX(end, option);
        }
        
+        Utility.fixDot();
        System.out.println("Total: " + sortedD.size());
        Iterator it;
        
@ -399,6 +407,7 @@ public class WriteCollationData implements UCD_Types {
        String lastKey = "";
        
        while (it.hasNext()) {
+            Utility.dot(counter);
            String key = (String) it.next();
            String source = (String) sortedD.get(key);
            int fluff = key.charAt(key.length() - 1);
@ -408,14 +417,12 @@ public class WriteCollationData implements UCD_Types {
            //log.println(source);
            String clipped = source.substring(0, source.length()-1);
            String stren = source.substring(source.length()-1);
-            if (hex) {
+            if (!shortPrint) {
                log.print(Utility.hex(source));
-            } else {
-                log.print(source + "\t" + Utility.hex(clipped));
-            }
-            if (WRITE_NAME_IN_CONFORMANCE) {
                log.print(
                    ";\t#" + ucd.getName(clipped)+ "\t" + UCA.toString(key));
+            } else {
+                log.print(source + "\t" + Utility.hex(clipped));
            }
            log.println();
        }
@ -754,7 +761,7 @@ public class WriteCollationData implements UCD_Types {
        
        int[] ces = new int[50];
        
-        UCA.CollationContents cc = collator.getCollationContents(UCA.FIXED_CE, nfd);
+        UCA.UCAContents cc = collator.getContents(UCA.FIXED_CE, nfd);
        int[] lenArray = new int[1];
        
        diLog.println("# Contractions");
@ -819,7 +826,7 @@ public class WriteCollationData implements UCD_Types {
            String s = String.valueOf(ch);
            int len = collator.getCEs(s, true, ces);
            */
-        UCA.CollationContents cc = collator.getCollationContents(UCA.FIXED_CE, nfd);
+        UCA.UCAContents cc = collator.getContents(UCA.FIXED_CE, nfd);
        int[] lenArray = new int[1];
        
        Set sortedCodes = new TreeSet();
@ -987,7 +994,7 @@ public class WriteCollationData implements UCD_Types {
            String s = String.valueOf(ch);
            int len = collator.getCEs(s, true, ces);
            */
-        UCA.CollationContents cc = collator.getCollationContents(UCA.FIXED_CE, nfd);
+        UCA.UCAContents cc = collator.getContents(UCA.FIXED_CE, nfd);
        int[] lenArray = new int[1];
        
        Set sortedCodes = new TreeSet();
@ -1179,7 +1186,7 @@ public class WriteCollationData implements UCD_Types {
        java.util.Comparator cm = new RuleComparator();
        Map ordered = new TreeMap(cm);
        
-        UCA.CollationContents cc = collator.getCollationContents(UCA.FIXED_CE, 
+        UCA.UCAContents cc = collator.getContents(UCA.FIXED_CE, 
            SKIP_CANONICAL_DECOMPOSIBLES ? nfd : null);
        int[] lenArray = new int[1];

--- a/tools/unicodetools/com/ibm/text/UCD/DerivedProperty.java
+++ b/tools/unicodetools/com/ibm/text/UCD/DerivedProperty.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/DerivedProperty.java,v $
-* $Date: 2001/09/06 01:29:48 $
-* $Revision: 1.3 $
+* $Date: 2001/09/19 23:33:16 $
+* $Revision: 1.4 $
 *
 *******************************************************************************
 */
@ -58,8 +58,10 @@ public class DerivedProperty implements UCD_Types {
        DefaultIgnorable = 26,
        GraphemeExtend = 27,
        GraphemeBase = 28,
+        
+        FC_NFC_Closure = 29,

-        LIMIT = 29;
+        LIMIT = 30;
    
    
    public DerivedProperty(UCD ucd) {
@ -156,8 +158,8 @@ public class DerivedProperty implements UCD_Types {
                compName = "NFD for the character";
            }
            header = "# Derived Property: " + name              
-                + "\r\n#   Normalized form " + NAME[i-GenNFD] + ", where DIFFERENT from " + compName + "."
-                + "\r\n#   HANGUL SYLLABLES are algorithmically decomposed, and not listed explicitly."
+                + "\r\n#   Lists characters in normalized form " + NAME[i-GenNFD] + "."
+                + "\r\n#   Only those characters whith normalized forms are DIFFERENT from " + compName + " are listed!"
                + "\r\n#   WARNING: Normalization of STRINGS must use the algorithm in UAX #15 because characters may interact."
                + "\r\n#            It is NOT sufficient to replace characters one-by-one with these results!";
        }
@ -422,6 +424,25 @@ of characters, the first of which has a non-zero combining class.
            boolean hasProperty(int cp) { return getProperty(cp).length() != 0; }
        };
        
+        dprops[FC_NFC_Closure] = new DProp() {
+            {
+                name = "FC_NFC_Closure";
+                header = "# Derived Property: " + name
+                    + "\r\n#  Generated from computing: b = NFC(Fold(a)); c = NFC(Fold(b));"
+                    + "\r\n#  Then if (c != b) add the mapping from a to c to the set of"
+                    + "\r\n#  mappings that constitute the FC_NFC_Closure list";
+            }
+            public boolean propertyVaries() {return true;} // default
+            public String getProperty(int cp) { 
+                if (!ucdData.isRepresented(cp)) return "";
+                String b = nfc.normalize(fold(cp));
+                String c = nfc.normalize(fold(b));
+                if (c.equals(b)) return "";
+                return "FN; " + Utility.hex(c);
+            } // default
+            boolean hasProperty(int cp) { return getProperty(cp).length() != 0; }
+        };
+        
        for (int i = QuickNFD; i <= QuickNFKC; ++i) {
            dprops[i] = new QuickDProp(i);
        }        
--- a/tools/unicodetools/com/ibm/text/UCD/DerivedPropertyLister.java
+++ b/tools/unicodetools/com/ibm/text/UCD/DerivedPropertyLister.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/DerivedPropertyLister.java,v $
-* $Date: 2001/09/06 01:29:48 $
-* $Revision: 1.4 $
+* $Date: 2001/09/19 23:33:16 $
+* $Revision: 1.5 $
 *
 *******************************************************************************
 */
@ -27,7 +27,7 @@ final class DerivedPropertyLister extends PropertyLister {
    int width;
    boolean varies;

-    public DerivedPropertyLister(UCD ucd, int propMask, PrintStream output) {
+    public DerivedPropertyLister(UCD ucd, int propMask, PrintWriter output) {
        this.propMask = propMask;
        this.output = output;
        this.ucdData = ucd;
@ -87,7 +87,7 @@ final class DerivedPropertyLister extends PropertyLister {
    String last;

    public byte status(int cp) {
-        if (!ucdData.isAssigned(cp)) return EXCLUDE;
+        if (!ucdData.isAssigned(cp) && propMask != DerivedProperty.DefaultIgnorable) return EXCLUDE;
        if (!varies) {
            return dprop.hasProperty(cp, propMask) ? INCLUDE : EXCLUDE;
        }
--- a/tools/unicodetools/com/ibm/text/UCD/DiffPropertyLister.java
+++ b/tools/unicodetools/com/ibm/text/UCD/DiffPropertyLister.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/DiffPropertyLister.java,v $
-* $Date: 2001/08/31 00:30:17 $
-* $Revision: 1.2 $
+* $Date: 2001/09/19 23:33:16 $
+* $Revision: 1.3 $
 *
 *******************************************************************************
 */
@ -17,14 +17,11 @@ import java.io.*;
 class DiffPropertyLister extends PropertyLister {
    private UCD oldUCD;

-    public DiffPropertyLister(String oldUCDName, String newUCDName, PrintStream output) {
+    public DiffPropertyLister(String oldUCDName, String newUCDName, PrintWriter output) {
        this.output = output;
        this.ucdData = UCD.make(newUCDName);
        if (oldUCDName != null) this.oldUCD = UCD.make(oldUCDName);
-    }
-
-    public byte status (int cp) {
-        return INCLUDE;
+        breakByCategory = false;
    }

    public String propertyName(int cp) {
@ -42,14 +39,23 @@ class DiffPropertyLister extends PropertyLister {
    */


-    public byte status(int lastCp, int cp) {
+    public byte status(int cp) {
        /*if (cp == 0xFFFF) {
            System.out.println("# " + Utility.hex(cp));
        }
        */
        return ucdData.isAllocated(cp) && (oldUCD == null || !oldUCD.isAllocated(cp)) ? INCLUDE : EXCLUDE;
    }
-
+    
+    public String headerString() {
+        if (oldUCD != null) {
+            return "# Differences between " + ucdData.getVersion() + " and " + oldUCD.getVersion();
+        } else {
+            return "# Allocated as of " + ucdData.getVersion();
+        }
+    }
+    
+    /*
    public int print() {
        String status;
        if (oldUCD != null) {
@ -73,6 +79,7 @@ class DiffPropertyLister extends PropertyLister {
        output.println();
        return count;
    }
+    */

 }

--- a/tools/unicodetools/com/ibm/text/UCD/GenerateCaseFolding.java
+++ b/tools/unicodetools/com/ibm/text/UCD/GenerateCaseFolding.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateCaseFolding.java,v $
-* $Date: 2001/08/31 00:30:17 $
-* $Revision: 1.2 $
+* $Date: 2001/09/19 23:33:16 $
+* $Revision: 1.3 $
 *
 *******************************************************************************
 */
@ -20,7 +20,7 @@ import com.ibm.text.utility.*;

 public class GenerateCaseFolding implements UCD_Types {
    public static boolean DEBUG = false;
-    public static UCD ucd = UCD.make("310");
+    public static UCD ucd = UCD.make("");

    public static void main(String[] args) throws java.io.IOException {
        makeCaseFold();
@ -285,71 +285,4 @@ public class GenerateCaseFolding implements UCD_Types {
        }
        return result + "}";
    }
-
-    static final void getAge() throws IOException {
-        PrintStream log = new PrintStream(
-            new BufferedOutputStream (
-            new FileOutputStream("UnicodeAge.txt"),
-            4*1024));
-        try {
-            log.println("# Derived file showing when various code points were allocated in Unicode");
-            log.println("# author: M. Davis");
-            log.println("# generated: " + new Date());
-            log.println("# Notes:");
-            log.println("# - The old Hangul Syllables (removed from 2.0) are not included in the 110 listing.");
-            log.println("# - The supplementary private use code points, although allocated earlier,");
-            log.println("#   were NOT specifically listed in the UCD until 3.0.1, and are not included until then.");
-            new DiffPropertyLister(null, "110", log).print();
-            new DiffPropertyLister("110", "200", log).print();
-            new DiffPropertyLister("200", "210", log).print();
-            new DiffPropertyLister("210", "300", log).print();
-            new DiffPropertyLister("300", "310", log).print();
-            /*
-            printDiff("110", "200");
-	        UnicodeSet u11 = fromFile(BASE_DIR + "UnicodeData\\Versions\\UnicodeData-1.1.txt", false);
-	        UnicodeSet u20 = fromFile(BASE_DIR + "UnicodeData\\Versions\\UnicodeData-2.0.txt", false);
-	        UnicodeSet u21 = fromFile(BASE_DIR + "UnicodeData\\Versions\\UnicodeData-2.1.txt", false);
-	        UnicodeSet u30 = fromFile(BASE_DIR + "UnicodeData\\Versions\\UnicodeData-3.0.txt", false);
-	        UnicodeSet u31 = fromFile(BASE_DIR + "UnicodeData\\Versions\\UnicodeData-3.1.txt", false);
-
-            log.println();
-            log.println("# Code points assigned in Unicode 1.1 (minus Hangul Syllables): "
-                + n.format(u11.count()));
-            log.println();
-            u11.print(log, false, false, "1.1");
-
-            UnicodeSet u20m = new UnicodeSet(u20).remove(u11);
-            log.println();
-            log.println("# Code points assigned in Unicode 2.0 (minus Unicode 1.1): "
-                + n.format(u20m.count()));
-            log.println();
-            u20m.print(log, false, false, "2.0");
-
-            UnicodeSet u21m = new UnicodeSet(u21).remove(u20);
-            log.println();
-            log.println("# Code points assigned in Unicode 2.1 (minus Unicode 2.0): "
-                + n.format(u21m.count()));
-            log.println();
-            u21m.print(log, false, false, "2.1");
-
-            UnicodeSet u30m = new UnicodeSet(u30).remove(u21);
-            log.println();
-            log.println("# Code points assigned in Unicode 3.0 (minus Unicode 2.1): "
-                + n.format(u30m.count()));
-            log.println();
-            u30m.print(log, false, false, "3.0");
-
-            UnicodeSet u31m = new UnicodeSet(u31).remove(u30);
-            log.println();
-            log.println("# Code points assigned in Unicode 3.1 (minus Unicode 3.0): "
-                + n.format(u31m.count()));
-            log.println();
-            u31m.print(log, false, false, "3.1");
-            */
-        } finally {
-            if (log != null) log.close();
-        }
-
-    }
-
 }
--- a/tools/unicodetools/com/ibm/text/UCD/GenerateData.java
+++ b/tools/unicodetools/com/ibm/text/UCD/GenerateData.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateData.java,v $
-* $Date: 2001/09/06 01:29:48 $
-* $Revision: 1.5 $
+* $Date: 2001/09/19 23:33:16 $
+* $Revision: 1.6 $
 *
 *******************************************************************************
 */
@ -22,9 +22,9 @@ import com.ibm.text.utility.*;

 public class GenerateData implements UCD_Types {

-    public static void main (String[] args) throws IOException {
+    public static void main (String inVersion, String[] args) throws IOException {
        System.out.println("START");
-        ucd = UCD.make();
+        ucd = UCD.make(inVersion);
        System.out.println("Loaded UCD " + ucd.getVersion() + " " + (new Date(ucd.getDate())));
        String version = ucd.getVersion();

@ -36,10 +36,7 @@ public class GenerateData implements UCD_Types {
            Utility.fixDot();
            System.out.println("Argument: " + args[i]);

-            if (arg.equalsIgnoreCase("version")) {
-                version = args[++i];
-                ucd = UCD.make(version);
-            } else if (arg.equalsIgnoreCase("partition")) {
+            if (arg.equalsIgnoreCase("partition")) {
                partitionProperties();
            } else if (arg.equalsIgnoreCase("list")) {
                listProperties();
@ -91,9 +88,12 @@ public class GenerateData implements UCD_Types {
                    
            } else if (arg.equalsIgnoreCase("DerivedCoreProperties")) {
                mask = Utility.setBits(0, DerivedProperty.PropMath, DerivedProperty.Mod_ID_Continue_NO_Cf);
-                mask = Utility.setBits(mask, DerivedProperty.DefaultIgnorable, DerivedProperty.LIMIT-1);
+                mask = Utility.setBits(mask, DerivedProperty.DefaultIgnorable, DerivedProperty.FC_NFC_Closure-1);
                generateDerived(mask, HEADER_DERIVED, "DerivedCoreProperties-" + version );
                
+            } else if (arg.equalsIgnoreCase("DerivedAge")) {
+                generateAge("DerivedAge-" + version );
+                
            } else if (arg.equalsIgnoreCase("DerivedLineBreak")) {
                generateVerticalSlice(LINE_BREAK, LINE_BREAK+NEXT_ENUM, KEEP_SPECIAL, HEADER_DERIVED,
                    "DerivedLineBreak-" + version );
@ -181,7 +181,7 @@ public class GenerateData implements UCD_Types {

    static final int HEADER_EXTEND = 0, HEADER_DERIVED = 1, HEADER_SCRIPTS = 2;

-    public static void doHeader(String fileName, PrintStream output, int headerChoice) {
+    public static void doHeader(String fileName, PrintWriter output, int headerChoice) {
        output.println("# " + fileName + ".txt");
        output.println("#");
        if (headerChoice == HEADER_SCRIPTS) {
@ -203,7 +203,7 @@ public class GenerateData implements UCD_Types {
    }

    public static void generateDerived (int bitMask, int headerChoice, String fileName) throws IOException {
-        PrintStream output = new PrintStream(new FileOutputStream(GEN_DIR + fileName + "dX.txt"));
+        PrintWriter output = Utility.openPrintWriter(fileName + "dX.txt");
        doHeader(fileName, output, headerChoice);
        for (int i = 0; i < DerivedProperty.LIMIT; ++i) {
            if ((bitMask & (1<<i)) == 0) continue;
@ -218,8 +218,8 @@ public class GenerateData implements UCD_Types {

    /*
    public static void listStrings(String file, int type, int subtype) throws IOException {
-        ucd = UCD.make("310");
-        UCD ucd30 = UCD.make("300");
+        ucd = UCD.make("3.1.0");
+        UCD ucd30 = UCD.make("3.0.0");
        PrintStream output = new PrintStream(new FileOutputStream(GEN_DIR + file));

        for (int i = 0; i < 0x10FFFF; ++i) {
@ -238,7 +238,7 @@ public class GenerateData implements UCD_Types {
    */

    public static void generateCompExclusions() throws IOException {
-        PrintStream output = new PrintStream(new FileOutputStream(GEN_DIR + "CompositionExclusionsDelta.txt"));
+        PrintWriter output = Utility.openPrintWriter("CompositionExclusionsDelta.txt");
        new CompLister(output).print();
        output.close();
    }
@ -247,10 +247,10 @@ public class GenerateData implements UCD_Types {
        UCD oldUCD;
        int oldLength = 0;

-        public CompLister(PrintStream output) {
+        public CompLister(PrintWriter output) {
            this.output = output;
-            ucdData = UCD.make("310");
-            oldUCD = UCD.make("300");
+            ucdData = UCD.make("3.1.0");
+            oldUCD = UCD.make("3.0.0");
            showOnConsole = true;
        }
        public String propertyName(int cp) {
@ -310,7 +310,7 @@ public class GenerateData implements UCD_Types {

    public static void listDifferences() throws IOException {

-        PrintStream output = new PrintStream(new FileOutputStream(GEN_DIR + "PropertyDifferences.txt"));
+        PrintWriter output = Utility.openPrintWriter("PropertyDifferences.txt");

        for (int i = 1; i < LIMIT_ENUM; ++i) {
            int iType = i & 0xFF00;
@ -441,7 +441,7 @@ public class GenerateData implements UCD_Types {
        //*/


-        PrintStream output = new PrintStream(new FileOutputStream(GEN_DIR + file + "dX.txt"));
+        PrintWriter output = Utility.openPrintWriter(file + "dX.txt");
        doHeader(file, output, headerChoice);
        int last = -1;
        for (int i = startEnum; i < endEnum; ++i) {
@ -685,5 +685,81 @@ public class GenerateData implements UCD_Types {
        "\u0592\u05B7\u05BC\u05A5\u05B0\u05C0\u05C4\u05AD"

    };
+    
+    static final void generateAge(String filename) throws IOException {
+        PrintWriter log = Utility.openPrintWriter(filename + "dX.txt");
+        try {
+            log.println("# Derived file showing when various code points were allocated in Unicode");
+            log.println("# author: M. Davis");
+            log.println("# generated: " + new Date());
+            log.println("# Notes:");
+            log.println("# - The old Hangul Syllables (removed from 2.0) are not included in the 1.1.0 listing.");
+            log.println("# - The supplementary private use code points and the non-character code points");
+            log.println("#   were allocated in version 2.0, but not specifically listed in the UCD");
+            log.println("#   until versions 3.0.1 and 3.1.0 respectively.");
+            
+            log.println("# ================================================");
+            log.println();
+            new DiffPropertyLister(null, "1.1.0", log).print();
+            log.println("# ================================================");
+            log.println();
+            new DiffPropertyLister("1.1.0", "2.0.0", log).print();
+            log.println("# ================================================");
+            log.println();
+            new DiffPropertyLister("2.0.0", "2.1.2", log).print();
+            log.println("# ================================================");
+            log.println();
+            new DiffPropertyLister("2.1.2", "3.0.0", log).print();
+            log.println("# ================================================");
+            log.println();
+            new DiffPropertyLister("3.0.0", "3.1.0", log).print();
+            /*
+            printDiff("110", "200");
+	        UnicodeSet u11 = fromFile(BASE_DIR + "UnicodeData\\Versions\\UnicodeData-1.1.txt", false);
+	        UnicodeSet u20 = fromFile(BASE_DIR + "UnicodeData\\Versions\\UnicodeData-2.0.txt", false);
+	        UnicodeSet u21 = fromFile(BASE_DIR + "UnicodeData\\Versions\\UnicodeData-2.1.txt", false);
+	        UnicodeSet u30 = fromFile(BASE_DIR + "UnicodeData\\Versions\\UnicodeData-3.0.txt", false);
+	        UnicodeSet u31 = fromFile(BASE_DIR + "UnicodeData\\Versions\\UnicodeData-3.1.txt", false);
+
+            log.println();
+            log.println("# Code points assigned in Unicode 1.1 (minus Hangul Syllables): "
+                + n.format(u11.count()));
+            log.println();
+            u11.print(log, false, false, "1.1");
+
+            UnicodeSet u20m = new UnicodeSet(u20).remove(u11);
+            log.println();
+            log.println("# Code points assigned in Unicode 2.0 (minus Unicode 1.1): "
+                + n.format(u20m.count()));
+            log.println();
+            u20m.print(log, false, false, "2.0");
+
+            UnicodeSet u21m = new UnicodeSet(u21).remove(u20);
+            log.println();
+            log.println("# Code points assigned in Unicode 2.1 (minus Unicode 2.0): "
+                + n.format(u21m.count()));
+            log.println();
+            u21m.print(log, false, false, "2.1");
+
+            UnicodeSet u30m = new UnicodeSet(u30).remove(u21);
+            log.println();
+            log.println("# Code points assigned in Unicode 3.0 (minus Unicode 2.1): "
+                + n.format(u30m.count()));
+            log.println();
+            u30m.print(log, false, false, "3.0");
+
+            UnicodeSet u31m = new UnicodeSet(u31).remove(u30);
+            log.println();
+            log.println("# Code points assigned in Unicode 3.1 (minus Unicode 3.0): "
+                + n.format(u31m.count()));
+            log.println();
+            u31m.print(log, false, false, "3.1");
+            */
+        } finally {
+            if (log != null) log.close();
+        }
+
+    }
+    

 }
--- a/tools/unicodetools/com/ibm/text/UCD/Main.java
+++ b/tools/unicodetools/com/ibm/text/UCD/Main.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/Main.java,v $
-* $Date: 2001/09/06 01:29:48 $
-* $Revision: 1.2 $
+* $Date: 2001/09/19 23:33:16 $
+* $Revision: 1.3 $
 *
 *******************************************************************************
 */
@ -49,7 +49,7 @@ public final class Main {
            //else if (arg.equalsIgnoreCase("checkAgainstUInfo")) checkAgainstUInfo();
            else if (arg.equalsIgnoreCase("checkScripts")) VerifyUCD.checkScripts();
            else if (arg.equalsIgnoreCase("IdentifierTest")) VerifyUCD.IdentifierTest();
-            else if (arg.equalsIgnoreCase("GenerateData")) GenerateData.main(Utility.split(args[++i],','));
+            else if (arg.equalsIgnoreCase("GenerateData")) GenerateData.main(ucdVersion, Utility.split(args[++i],','));
            else if (arg.equalsIgnoreCase("BuildNames")) BuildNames.main(null);
            else if (arg.equalsIgnoreCase("writeNormalizerTestSuite"))
                GenerateData.writeNormalizerTestSuite("NormalizationTest-3.1.1d1.txt");
--- a/tools/unicodetools/com/ibm/text/UCD/MyFloatLister.java
+++ b/tools/unicodetools/com/ibm/text/UCD/MyFloatLister.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/MyFloatLister.java,v $
-* $Date: 2001/08/31 00:30:17 $
-* $Revision: 1.2 $
+* $Date: 2001/09/19 23:33:16 $
+* $Revision: 1.3 $
 *
 *******************************************************************************
 */
@ -17,7 +17,7 @@ import java.io.*;
 class MyFloatLister extends PropertyLister {
    private float propMask;

-    public MyFloatLister(UCD ucd, float f, PrintStream output) {
+    public MyFloatLister(UCD ucd, float f, PrintWriter output) {
        this.propMask = f;
        this.output = output;
        this.ucdData = ucd;
--- a/tools/unicodetools/com/ibm/text/UCD/MyPropertyLister.java
+++ b/tools/unicodetools/com/ibm/text/UCD/MyPropertyLister.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/MyPropertyLister.java,v $
-* $Date: 2001/08/31 00:30:17 $
-* $Revision: 1.2 $
+* $Date: 2001/09/19 23:33:16 $
+* $Revision: 1.3 $
 *
 *******************************************************************************
 */
@ -22,7 +22,7 @@ final class MyPropertyLister extends PropertyLister {

    private int propMask;

-    public MyPropertyLister(UCD ucd, int propMask, PrintStream output) {
+    public MyPropertyLister(UCD ucd, int propMask, PrintWriter output) {
        this.propMask = propMask;
        this.output = output;
        this.ucdData = ucd;
@ -67,7 +67,7 @@ final class MyPropertyLister extends PropertyLister {
            if (s.length() == 0) s = "Other Combining Class";
            return "# " + s;
        } else if (main == BINARY_PROPERTIES) {
-            return "# Binary Property";
+            return "";
        } else if (main == JOINING_GROUP) {
            return "";
        } else {
--- a/tools/unicodetools/com/ibm/text/UCD/NormalizerSample.java
+++ b/tools/unicodetools/com/ibm/text/UCD/NormalizerSample.java
@ -0,0 +1,348 @@
+package com.ibm.text.UCD;
+
+import java.util.*;
+import com.ibm.text.*;
+
+import com.ibm.text.utility.*;
+
+
+/**
+ * Implements Unicode Normalization Forms C, D, KC, KD.<br>
+ * See UTR#15 for details.<br>
+ * Copyright © 1998-1999 Unicode, Inc. All Rights Reserved.<br>
+ * The Unicode Consortium makes no expressed or implied warranty of any
+ * kind, and assumes no liability for errors or omissions.
+ * No liability is assumed for incidental and consequential damages
+ * in connection with or arising out of the use of the information here.
+ * @author Mark Davis
+ */
+
+public class NormalizerSample implements UCD_Types {
+    static final String copyright = "Copyright (C) 2001, IBM Corp. and Unicode Inc. All Rights Reserved.";
+
+    public static boolean SHOW_PROGRESS = false;
+
+    /**
+     * Create a normalizer for a given form.
+     */
+    public NormalizerSample(byte form, String unicodeVersion) {
+        this.composition = (form & COMPOSITION_MASK) != 0;
+        this.compatibility = (form & COMPATIBILITY_MASK) != 0;
+        this.data = getData(unicodeVersion);
+    }
+
+    /**
+     * Create a normalizer for a given form.
+     */
+    public NormalizerSample(byte form) {
+        this(form,"");
+    }
+
+    /**
+    * Masks for the form selector
+    */
+    public static final byte
+        COMPATIBILITY_MASK = 1,
+        COMPOSITION_MASK = 2;
+
+    /**
+    * Normalization Form Selector
+    */
+    public static final byte
+        NFD = 0 ,
+        NFKD = COMPATIBILITY_MASK,
+        NFC = COMPOSITION_MASK,
+        NFKC = (byte)(COMPATIBILITY_MASK + COMPOSITION_MASK);
+
+    /**
+    * Normalizes text according to the chosen form,
+    * replacing contents of the target buffer.
+    * @param   source      the original text, unnormalized
+    * @param   target      the resulting normalized text
+    */
+    public StringBuffer normalize(String source, StringBuffer target) {
+
+        // First decompose the source into target,
+        // then compose if the form requires.
+
+        if (source.length() != 0) {
+            internalDecompose(source, target);
+            if (composition) {
+                internalCompose(target);
+            }
+        }
+        return target;
+    }
+
+    /**
+    * Normalizes text according to the chosen form
+    * @param   source      the original text, unnormalized
+    * @return  target      the resulting normalized text
+    */
+    public String normalize(String source) {
+        return normalize(source, new StringBuffer()).toString();
+    }
+
+    /**
+    * Normalizes text according to the chosen form
+    * @param   source      the original text, unnormalized
+    * @return  target      the resulting normalized text
+    */
+    public String normalize(int cp) {
+        return normalize(UTF16.valueOf(cp));
+    }
+
+    /**
+    */
+    private StringBuffer hasDecompositionBuffer = new StringBuffer();
+
+    public boolean hasDecomposition(int cp) {
+        hasDecompositionBuffer.setLength(0);
+        normalize(UTF16.valueOf(cp), hasDecompositionBuffer);
+        if (hasDecompositionBuffer.length() != 1) return true;
+        return cp != hasDecompositionBuffer.charAt(0);
+    }
+
+    /**
+    * Utility: Checks whether there is a recursive decomposition of a character from the
+    * Unicode Character Database. It is compatibility or canonical according to the particular
+    * normalizer.
+    * @param   ch      the source character
+    */
+    public boolean normalizationDiffers(int ch) {
+        return data.normalizationDiffers(ch, composition, compatibility);
+    }
+
+    /**
+    * Utility: Gets recursive decomposition of a character from the
+    * Unicode Character Database.
+    * @param   compatibility    If false selects the recursive
+    *                  canonical decomposition, otherwise selects
+    *                  the recursive compatibility AND canonical decomposition.
+    * @param   ch      the source character
+    * @param   buffer  buffer to be filled with the decomposition
+    */
+    public void getRecursiveDecomposition(char ch, StringBuffer buffer) {
+        data.getRecursiveDecomposition(ch, buffer, compatibility);
+    }
+
+
+    // ======================================
+    //                  PRIVATES
+    // ======================================
+
+    /**
+     * The current form.
+     */
+    private boolean composition;
+    private boolean compatibility;
+
+    /**
+    * Decomposes text, either canonical or compatibility,
+    * replacing contents of the target buffer.
+    * @param   form        the normalization form. If COMPATIBILITY_MASK
+    *                      bit is on in this byte, then selects the recursive
+    *                      compatibility decomposition, otherwise selects
+    *                      the recursive canonical decomposition.
+    * @param   source      the original text, unnormalized
+    * @param   target      the resulting normalized text
+    */
+    private void internalDecompose(String source, StringBuffer target) {
+        StringBuffer buffer = new StringBuffer();
+        int ch32;
+        for (int i = 0; i < source.length(); i += UTF16.getCharCount(ch32)) {
+            buffer.setLength(0);
+            ch32 = UTF16.charAt(source, i);
+            data.getRecursiveDecomposition(ch32, buffer, compatibility);
+
+            // add all of the characters in the decomposition.
+            // (may be just the original character, if there was
+            // no decomposition mapping)
+
+            int ch;
+            for (int j = 0; j < buffer.length(); j += UTF16.getCharCount(ch)) {
+                ch = UTF16.charAt(buffer, j);
+                int chClass = data.getCanonicalClass(ch);
+                int k = target.length(); // insertion point
+                if (chClass != 0) {
+
+                    // bubble-sort combining marks as necessary
+
+                    int ch2;
+                    for (; k > 0; k -= UTF16.getCharCount(ch2)) {
+                        ch2 = UTF16.charAt(target, k-1);
+                        if (data.getCanonicalClass(ch2) <= chClass) break;
+                    }
+                }
+                target.insert(k, UTF16.valueOf(ch));
+            }
+        }
+    }
+
+    /**
+    * Composes text in place. Target must already
+    * have been decomposed.
+    * Uses UTF16, which is a utility class for supplementary character support in Java.
+    * @param   target      input: decomposed text.
+    *                      output: the resulting normalized text.
+    */
+    private void internalCompose(StringBuffer target) {
+        int starterPos = 0;
+        int starterCh = UTF16.charAt(target,0);
+        int compPos = UTF16.getCharCount(starterCh); // length of last composition
+        int lastClass = data.getCanonicalClass(starterCh);
+        if (lastClass != 0) lastClass = 256; // fix for strings staring with a combining mark
+        int oldLen = target.length();
+
+        // Loop on the decomposed characters, combining where possible
+
+        int ch;
+        for (int decompPos = compPos; decompPos < target.length(); decompPos += UTF16.getCharCount(ch)) {
+            ch = UTF16.charAt(target, decompPos);
+            if (SHOW_PROGRESS) System.out.println(Utility.hex(target)
+                + ", decompPos: " + decompPos
+                + ", compPos: " + compPos
+                + ", ch: " + Utility.hex(ch)
+                );
+            int chClass = data.getCanonicalClass(ch);
+            int composite = data.getPairwiseComposition(starterCh, ch);
+            if (composite != data.NOT_COMPOSITE
+            && (lastClass < chClass || lastClass == 0)) {
+                UTF16.setCharAt(target, starterPos, composite);
+                // we know that we will only be replacing non-supplementaries by non-supplementaries
+                // so we don't have to adjust the decompPos
+                starterCh = composite;
+            } else {
+                if (chClass == 0) {
+                    starterPos = compPos;
+                    starterCh  = ch;
+                }
+                lastClass = chClass;
+                UTF16.setCharAt(target, compPos, ch);
+                if (target.length() != oldLen) { // MAY HAVE TO ADJUST!
+                    System.out.println("ADJUSTING: " + Utility.hex(target));
+                    decompPos += target.length() - oldLen;
+                    oldLen = target.length();
+                }
+                compPos += UTF16.getCharCount(ch);
+            }
+        }
+        target.setLength(compPos);
+    }
+    
+    // The following class makes use of the UCD class, which accesses data in the Unicode Character Database
+
+    static class Stub {
+        private UCD ucd;
+        private HashMap compTable = new HashMap();
+        private BitSet isSecond = new BitSet();
+        private BitSet canonicalRecompose = new BitSet();
+        private BitSet compatibilityRecompose = new BitSet();
+        static final int NOT_COMPOSITE = 0xFFFF;
+
+        Stub(String version) {
+            ucd = UCD.make(version);
+            for (int i = 0; i < 0x10FFFF; ++i) {
+                if (!ucd.isAssigned(i)) continue;
+                if (ucd.isPUA(i)) continue;
+                if (ucd.isTrailingJamo(i)) isSecond.set(i);
+                byte dt = ucd.getDecompositionType(i);
+                if (dt != CANONICAL) continue;
+                if (!ucd.getBinaryProperty(i, CompositionExclusion)) {
+                    try {
+                        String s = ucd.getDecompositionMapping(i);
+                        int len = UTF16.countCodePoint(s);
+                        if (len != 2) {
+                            if (len > 2) throw new IllegalArgumentException("BAD LENGTH: " + len + ucd.toString(i));
+                            continue;
+                        }
+                        int a = UTF16.charAt(s, 0);
+                        if (ucd.getCombiningClass(a) != 0) continue;
+
+                        int b = UTF16.charAt(s, UTF16.getCharCount(a));
+                        isSecond.set(b);
+
+                        // have a recomposition, so set the bit
+                        canonicalRecompose.set(i);
+
+                        // set the compatibility recomposition bit
+                        // ONLY if the component characters
+                        // don't compatibility decompose
+                        if (ucd.getDecompositionType(a) <= CANONICAL
+                         && ucd.getDecompositionType(b) <= CANONICAL) {
+                            compatibilityRecompose.set(i);
+                         }
+
+                        long key = (((long)a)<<32) | b;
+
+                        compTable.put(new Long(key), new Integer(i));
+                    } catch (Exception e) {
+                        throw new ChainException("Error: {0}", new Object[]{ucd.toString(i)}, e);
+                    }
+                }
+            }
+        }
+
+        short getCanonicalClass(int cp) {
+            return ucd.getCombiningClass(cp);
+        }
+
+        boolean isTrailing(int cp) {
+            return isSecond.get(cp);
+        }
+
+        boolean normalizationDiffers(int cp, boolean composition, boolean compatibility) {
+            byte dt = ucd.getDecompositionType(cp);
+            if (!composition) {
+                if (compatibility) return dt >= CANONICAL;
+                else return dt == CANONICAL;
+            } else {
+                // almost the same, except that we add back in the characters
+                // that RECOMPOSE
+                if (compatibility) return dt >= CANONICAL && !compatibilityRecompose.get(cp);
+                else return dt == CANONICAL && !canonicalRecompose.get(cp);
+            }
+        }
+
+        public void getRecursiveDecomposition(int cp, StringBuffer buffer, boolean compatibility) {
+            byte dt = ucd.getDecompositionType(cp);
+            // we know we decompose all CANONICAL, plus > CANONICAL if compatibility is TRUE.
+            if (dt == CANONICAL || dt > CANONICAL && compatibility) {
+                String s = ucd.getDecompositionMapping(cp);
+                for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
+                    cp = UTF16.charAt(s, i);
+                    getRecursiveDecomposition(cp, buffer, compatibility);
+                }
+            } else {
+                UTF16.append(buffer, cp);
+            }
+        }
+
+        int getPairwiseComposition(int starterCh, int ch) {
+            int hangulPoss = UCD.composeHangul(starterCh, ch);
+            if (hangulPoss != 0xFFFF) return hangulPoss;
+            Object obj = compTable.get(new Long((((long)starterCh)<<32) | ch));
+            if (obj == null) return 0xFFFF;
+            return ((Integer)obj).intValue();
+        }
+
+    }
+
+    /**
+    * Contains normalization data from the Unicode Character Database.
+    * use false for the minimal set, true for the real set.
+    */
+    private Stub data;
+
+    private static HashMap versionCache = new HashMap();
+
+    private static Stub getData (String version) {
+        if (version.length() == 0) version = UCD.latestVersion;
+        Stub result = (Stub)versionCache.get(version);
+        if (result == null) {
+            result = new Stub(version);
+            versionCache.put(version, result);
+        }
+        return result;
+    }
+}
--- a/tools/unicodetools/com/ibm/text/UCD/PropertyLister.java
+++ b/tools/unicodetools/com/ibm/text/UCD/PropertyLister.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/PropertyLister.java,v $
-* $Date: 2001/08/31 00:30:17 $
-* $Revision: 1.2 $
+* $Date: 2001/09/19 23:33:16 $
+* $Revision: 1.3 $
 *
 *******************************************************************************
 */
@ -15,6 +15,7 @@ package com.ibm.text.UCD;

 import java.io.*;
 import com.ibm.text.utility.*;
+import java.text.NumberFormat;


 abstract public class PropertyLister implements UCD_Types {
@ -24,9 +25,10 @@ abstract public class PropertyLister implements UCD_Types {


    protected UCD ucdData;
-    protected PrintStream output;
+    protected PrintWriter output;
    protected boolean showOnConsole;
    protected boolean usePropertyComment = true;
+    protected boolean breakByCategory = true;
    protected int firstRealCp = -2;
    protected int lastRealCp = -2;
    protected boolean alwaysBreaks = false; // set to true if property only breaks
@ -51,7 +53,7 @@ abstract public class PropertyLister implements UCD_Types {
    }

    public String optionalComment(int cp) {
-        if (!usePropertyComment) return "";
+        if (!usePropertyComment || !breakByCategory) return "";
        int cat = ucdData.getCategory(cp);
        if (cat == Lt || cat == Ll || cat == Lu) return "L&";
        return ucdData.getCategoryID(cp);
@ -167,7 +169,7 @@ abstract public class PropertyLister implements UCD_Types {
            if (s == INCLUDE && firstRealCp != -1) {
                byte cat = ucdData.getCategory(cp);
                if (cat == Lt || cat == Ll) cat = Lu;
-                if (cat != firstRealCpCat) s = BREAK;
+                if (breakByCategory && cat != firstRealCpCat) s = BREAK;
            }

            switch(s) {
@ -208,9 +210,12 @@ abstract public class PropertyLister implements UCD_Types {
        }

        if (count == 0) System.out.println("WARNING -- ZERO COUNT FOR " + header);
+        NumberFormat nf = NumberFormat.getInstance();
+        nf.setMaximumFractionDigits(0);
        output.println();
-        output.println("# Total code points: " + count);
+        output.println("# Total code points: " + nf.format(count));
        output.println();
        return count;
    }
+    
 }
--- a/tools/unicodetools/com/ibm/text/UCD/TestData.java
+++ b/tools/unicodetools/com/ibm/text/UCD/TestData.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/TestData.java,v $
-* $Date: 2001/09/01 00:06:15 $
-* $Revision: 1.3 $
+* $Date: 2001/09/19 23:33:16 $
+* $Revision: 1.4 $
 *
 *******************************************************************************
 */
@ -145,7 +145,7 @@ public class TestData implements UCD_Types {

    static final int HEADER_EXTEND = 0, HEADER_DERIVED = 1, HEADER_SCRIPTS = 2;

-    public static void doHeader(String fileName, PrintStream output, int headerChoice) {
+    public static void doHeader(String fileName, PrintWriter output, int headerChoice) {
        output.println("# " + fixFile(fileName));
        output.println("#");
        if (headerChoice == HEADER_SCRIPTS) {
@ -167,8 +167,8 @@ public class TestData implements UCD_Types {
    }

    public static void generateDerived (int bitMask, int headerChoice, String fileName) throws IOException {
-        ucd = UCD.make("310");
-        PrintStream output = new PrintStream(new FileOutputStream(GEN_DIR + fileName));
+        ucd = UCD.make("3.1.0");
+        PrintWriter output = Utility.openPrintWriter(fileName);
        doHeader(fileName, output, headerChoice);
        for (int i = 0; i < 32; ++i) {
            if ((bitMask & (1<<i)) == 0) continue;
@ -183,9 +183,9 @@ public class TestData implements UCD_Types {

    /*
    public static void listStrings(String file, int type, int subtype) throws IOException {
-        ucd = UCD.make("310");
-        UCD ucd30 = UCD.make("300");
-        PrintStream output = new PrintStream(new FileOutputStream(GEN_DIR + file));
+        ucd = UCD.make("3.1.0");
+        UCD ucd30 = UCD.make("3.0.0");
+        PrintWriter output = new PrintStream(new FileOutputStream(GEN_DIR + file));

        for (int i = 0; i < 0x10FFFF; ++i) {
            if ((i & 0xFFF) == 0) System.out.println("# " + i);
@ -203,7 +203,7 @@ public class TestData implements UCD_Types {
    */

    public static void generateCompExclusions() throws IOException {
-        PrintStream output = new PrintStream(new FileOutputStream(GEN_DIR + "CompositionExclusionsDelta.txt"));
+        PrintWriter output = Utility.openPrintWriter("CompositionExclusionsDelta.txt");
        new CompLister(output).print();
        output.close();
    }
@ -212,10 +212,10 @@ public class TestData implements UCD_Types {
        UCD oldUCD;
        int oldLength = 0;

-        public CompLister(PrintStream output) {
+        public CompLister(PrintWriter output) {
            this.output = output;
-            ucdData = UCD.make("310");
-            oldUCD = UCD.make("300");
+            ucdData = UCD.make("3.1.0");
+            oldUCD = UCD.make("3.0.0");
            showOnConsole = true;
        }
        public String propertyName(int cp) {
@ -249,7 +249,7 @@ public class TestData implements UCD_Types {
        //*/


-        PrintStream output = new PrintStream(new FileOutputStream(GEN_DIR + file));
+        PrintWriter output = Utility.openPrintWriter(file);
        doHeader(file, output, headerChoice);
        int last = -1;
        for (int i = startEnum; i < endEnum; ++i) {
--- a/tools/unicodetools/com/ibm/text/UCD/UCD.java
+++ b/tools/unicodetools/com/ibm/text/UCD/UCD.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD.java,v $
-* $Date: 2001/08/31 00:30:17 $
-* $Revision: 1.2 $
+* $Date: 2001/09/19 23:33:16 $
+* $Revision: 1.3 $
 *
 *******************************************************************************
 */
@ -42,6 +42,7 @@ public final class UCD implements UCD_Types {
     */
    public static UCD make(String version) {
        if (version == null || version.length() == 0) version = latestVersion;
+        if (version.indexOf('.') < 0) throw new IllegalArgumentException("Version must be of form 3.1.1");
        UCD result = (UCD)versionCache.get(version);
        if (result == null) {
            result = new UCD();
@ -74,6 +75,7 @@ public final class UCD implements UCD_Types {
            if (major < 2 && codePoint > 0xFFFF) return false;
            return true;         // Noncharacter
        }
+        if (major >= 2 && codePoint >= 0xF0000 && codePoint <= 0x10FFFD) return true;
        if (codePoint >= 0xFDD0 && codePoint <= 0xFDEF && major >= 3 && minor >= 1) return true;
        return false;
    }
@ -438,6 +440,21 @@ public final class UCD implements UCD_Types {
    public byte getScript(int codePoint) {
        return get(codePoint, false).script;
    }
+    
+    
+    public byte getScript(String s) {
+        byte result = COMMON_SCRIPT;
+        if (s == null || s.length() == 0) return result;
+        int cp;
+        for (int i = 0; i < s.length(); i += UTF32.count16(cp)) {
+            cp = UTF32.char32At(s, i);
+            byte script = getScript(cp);
+            if (script == INHERITED_SCRIPT) continue;
+            result = script;
+        }
+        return result;
+    }
+    

    public byte getAge(int codePoint) {
        return get(codePoint, false).age;
--- a/tools/unicodetools/com/ibm/text/UCD/UCD_Types.java
+++ b/tools/unicodetools/com/ibm/text/UCD/UCD_Types.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD_Types.java,v $
-* $Date: 2001/08/31 00:29:50 $
-* $Revision: 1.2 $
+* $Date: 2001/09/19 23:33:16 $
+* $Revision: 1.3 $
 *
 *******************************************************************************
 */
@ -15,8 +15,8 @@ package com.ibm.text.UCD;

 public interface UCD_Types {
    public static final String DATA_DIR = "C:\\DATA\\";
-    public static final String BIN_DIR = DATA_DIR + "\\BIN\\";
-    public static final String GEN_DIR = DATA_DIR + "\\GEN\\";
+    public static final String BIN_DIR = DATA_DIR + "BIN\\";
+    public static final String GEN_DIR = DATA_DIR + "GEN\\";


    static final byte BINARY_FORMAT = 5; // bumped if binary format of UCD changes
--- a/tools/unicodetools/com/ibm/text/UCD/VerifyUCD.java
+++ b/tools/unicodetools/com/ibm/text/UCD/VerifyUCD.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/VerifyUCD.java,v $
-* $Date: 2001/09/06 01:29:48 $
-* $Revision: 1.4 $
+* $Date: 2001/09/19 23:33:15 $
+* $Revision: 1.5 $
 *
 *******************************************************************************
 */
@ -20,6 +20,7 @@ import java.math.BigDecimal;
 import java.util.*;
 import java.io.*;
 //import java.text.*;
+import com.ibm.text.*;

 import com.ibm.text.utility.*;

@ -331,6 +332,7 @@ public class VerifyUCD implements UCD_Types {
        System.out.println("Checking Prohibited and Unassigned");
        System.out.println();
        for (int cp = 0; cp <= 0x10FFFF; ++cp) {
+            Utility.dot(cp);
            if (mappedOut.get(cp)) continue;

            boolean ucdUnassigned = !ucd.isAllocated(cp);
@ -339,33 +341,89 @@ public class VerifyUCD implements UCD_Types {
            boolean idnProhibited = prohibited.get(cp);

            if (ucdUnassigned && !idnUnassigned) {
-                showError("UCD Unassigned but not IDN Unassigned: ", cp);
+                showError("?UCD Unassigned but not IDN Unassigned", cp, "");
                ++errorCount;
            } else if (!ucdUnassigned && idnUnassigned) {
-                showError("Not UCD Unassigned but IDN Unassigned: ", cp);
+                showError("?Not UCD Unassigned but IDN Unassigned", cp, "");
                ++errorCount;
            }

            if (idnProhibited && unassigned.get(cp)) {
-                showError("Both IDN Unassigned AND IDN Prohibited: ", cp);
+                showError("?Both IDN Unassigned AND IDN Prohibited", cp, "");
                ++errorCount;
            }

            if (guess && !idnProhibited) {
-                showError("UCD ?prohibited? but not IDN Prohibited: ", cp);
+                showError("?UCD ?prohibited? but not IDN Prohibited ", cp, "");
                ++errorCount;
            } else if (!guess && idnProhibited) {
-                showError("Not UCD ?prohibited? but IDN Prohibited: ", cp);
+                showError("?Not UCD ?prohibited? but IDN Prohibited ", cp, "");
                ++errorCount;
            }
+            
+            if (cp == 0x3131) {
+                System.out.println("Debug: " + idnProhibited
+                    + ", " + idnUnassigned
+                    + ", " + nfkc.hasDecomposition(cp)
+                    + ", " + ucd.getCodeAndName(nfkc.normalize(cp))
+                    + ", " + ucd.getCodeAndName(nfc.normalize(cp)));
+            } 
+            
+            if (!idnProhibited && ! idnUnassigned && nfkc.hasDecomposition(cp)) {
+                String kc = nfkc.normalize(cp);
+                String c = nfc.normalize(cp);
+                if (kc.equals(c)) continue;
+                int cp2;
+                boolean excluded = false;
+                for (int j = 0; j < kc.length(); j += UTF16.getCharCount(cp2)) {
+                    cp2 = UTF16.charAt(kc, j);
+                    if (prohibited.get(cp2)) {
+                        showError("Prohibited with NFKC, but output with NFC", cp, "");
+                        excluded = true;
+                        break;
+                    }
+                }
+                if (!excluded) {
+                    showError("Remapped to core abstract character with NFKC (but not NFC)", cp, ""); // , "\t=> " + ucd.getCodeAndName(kc));
+                }
+            }

        }
-        System.out.println();
-        System.out.println("Total Errors: " + errorCount);
+        System.out.println("Writing IDNCheck.txt");
+        
+        
+        PrintWriter log = Utility.openPrintWriter("IDNCheck.txt");
+        log.println("IDN Check");
+        log.println("Total Errors: " + errorCount);
+       
+        Iterator it = idnMap.keySet().iterator();
+        while (it.hasNext()) {
+            String description = (String) it.next();
+            Map map = (Map) idnMap.get(description);
+            log.println();
+            log.println(description);
+            log.println("Total: " + map.size());
+            log.println();
+            
+            Iterator it2 = map.keySet().iterator();
+            while (it2.hasNext()) {
+                Object key = it2.next();
+                String line = (String) map.get(key);
+                log.println("  " + line);
+            }
+        }
+        log.close();
    }
+    
+    static Map idnMap = new HashMap();

-    static void showError(String description, int cp) {
-        System.out.println(description + ucd.getCodeAndName(cp) + " (" + ucd.getCategoryID(cp) + ")");
+    static void showError(String description, int cp, String option) {
+        Map probe = (Map) idnMap.get(description);
+        if (probe == null) {
+            probe = new TreeMap();
+            idnMap.put(description, probe);
+        }
+        probe.put(new Integer(cp), ucd.getCodeAndName(cp) + " (" + ucd.getCategoryID(cp) + ")" + option);
    }


@ -611,8 +669,7 @@ E0020-E007F; [TAGGING CHARACTERS]
                if (reason.equals("Map out")) {
                    value = Utility.fromHex(parts[1]);
                    Utility.fixDot();
-                    System.out.println("Note, Mapping Out: " + ucd.getCodeAndName(cp)
-                        + ", " + ucd.getCodeAndName(value) + ", " + ucd.getCategoryID(cp));
+                    showError("Mapping Out: ", cp, "");
                    mappedOut.set(cp);
                }
                idnFold.put(key, value);
@ -1033,26 +1090,37 @@ E0020-E007F; [TAGGING CHARACTERS]
    int sum = 0;
    long start, end;
    
+    java.text.NumberFormat nf = java.text.NumberFormat.getPercentInstance();
+    
+    start = System.currentTimeMillis();
+    for (int i = count; i >= 0; --i) {
+        sum += dummy0(i).length();
+    }
+    end = System.currentTimeMillis();
+    double base = end - start;
+    
+    System.out.println("unsynchronized static char[]: " + nf.format((end - start)/base));
+
    start = System.currentTimeMillis();
    for (int i = count; i >= 0; --i) {
        sum += dummy2(i).length();
    }
    end = System.currentTimeMillis();
-    System.out.println("synchronized: " + (end - start));
+    System.out.println("synchronized static char[]: " + nf.format((end - start)/base));

    start = System.currentTimeMillis();
    for (int i = count; i >= 0; --i) {
        sum += dummy1(i).length();
    }
    end = System.currentTimeMillis();
-    System.out.println("char[] each time: " + (end - start));
+    System.out.println("char[] each time: " + nf.format((end - start)/base));
    
    start = System.currentTimeMillis();
    for (int i = count; i >= 0; --i) {
        sum += dummy3(i).length();
    }
    end = System.currentTimeMillis();
-    System.out.println("String +: " + (end - start));
+    System.out.println("two valueofs: " + nf.format((end - start)/base));
    
    System.out.println(sum);
  }
@ -1074,6 +1142,12 @@ E0020-E007F; [TAGGING CHARACTERS]
    }
  }
  
+  static String dummy0(int a) {
+        temp2[0] = (char)(a >>> 16);
+        temp2[1] = (char)a;
+        return new String(temp2);
+  }
+  
  static String dummy3(int a) {
    return String.valueOf((char)(a >>> 16)) + (char)a;
  }
--- a/tools/unicodetools/com/ibm/text/utility/IntStack.java
+++ b/tools/unicodetools/com/ibm/text/utility/IntStack.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/IntStack.java,v $
-* $Date: 2001/08/31 00:19:16 $
-* $Revision: 1.2 $
+* $Date: 2001/09/19 23:33:52 $
+* $Revision: 1.3 $
 *
 *******************************************************************************
 */
@ -17,7 +17,7 @@ package com.ibm.text.utility;
 // Simple stack mechanism, with push, pop and access
 // =============================================================

-public final class IntStack {
+public final class IntStack implements Comparable {
    private int[] values;
    private int top = 0;

@ -51,4 +51,31 @@ public final class IntStack {
    public boolean isEmpty() {
        return top == 0;
    }
+    
+    public void clear() {
+        top = 0;
+    }
+    
+    public int compareTo(Object other) {
+        IntStack that = (IntStack) other;
+        int min = top;
+        if (min < that.top) min = that.top;
+        for (int i = 0; i < min; ++i) {
+            int result = values[i] - that.values[i];
+            if (result != 0) return result;
+        }
+        return top - that.top;
+    }
+
+    public boolean equals(Object other) {
+        return compareTo(other) == 0;
+    }
+
+    public int hashCode() {
+        int result = top;
+        for (int i = 0; i < top; ++i) {
+            result = result * 37 + values[i];
+        }
+        return result;
+    }
 }
--- a/tools/unicodetools/com/ibm/text/utility/Pair.java
+++ b/tools/unicodetools/com/ibm/text/utility/Pair.java
@ -5,15 +5,15 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/Pair.java,v $
-* $Date: 2001/08/31 00:19:16 $
-* $Revision: 1.2 $
+* $Date: 2001/09/19 23:33:52 $
+* $Revision: 1.3 $
 *
 *******************************************************************************
 */

 package com.ibm.text.utility;

-public final class Pair implements java.lang.Comparable {
+public final class Pair implements java.lang.Comparable, Cloneable {

  public Comparable first, second;

@ -41,4 +41,12 @@ public final class Pair implements java.lang.Comparable {
        if (trial != 0) return trial;
        return second.compareTo(that.second);
    }
+    
+    public Object clone() {
+        try {
+            return super.clone();
+        } catch (CloneNotSupportedException e) {
+            return null;
+        }
+    }
 }
--- a/tools/unicodetools/com/ibm/text/utility/UTF8StreamWriter.java
+++ b/tools/unicodetools/com/ibm/text/utility/UTF8StreamWriter.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/UTF8StreamWriter.java,v $
-* $Date: 2001/08/31 00:19:16 $
-* $Revision: 1.2 $
+* $Date: 2001/09/19 23:33:52 $
+* $Revision: 1.3 $
 *
 *******************************************************************************
 */
@ -56,7 +56,7 @@ public final class UTF8StreamWriter extends Writer {
        TRAILING_TOP = 0x80;

    private static final int MAGIC = 0x10000 + ((0 - 0xD800) << 10) + (0 - 0xDC00);
-
+    
    public final void write(char[] buffer, int cStart, int cLength) throws IOException {
        int cEnd = cStart + cLength;
        while (cStart < cEnd) {
@ -71,6 +71,8 @@ public final class UTF8StreamWriter extends Writer {
            // get code point

            int utf32 = buffer[cStart++];
+            
+            if (utf32 == 0x0D) continue; // skip write

            // special check for surrogates

--- a/tools/unicodetools/com/ibm/text/utility/Utility.java
+++ b/tools/unicodetools/com/ibm/text/utility/Utility.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/Utility.java,v $
-* $Date: 2001/09/06 01:29:03 $
-* $Revision: 1.3 $
+* $Date: 2001/09/19 23:33:52 $
+* $Revision: 1.4 $
 *
 *******************************************************************************
 */
@ -408,12 +408,15 @@ public final class Utility {    // COMMON UTILITIES

    private static final String[] searchPath = {
        "EXTRAS",
-        "3.1.2",
+        "3.2.0",
        "3.1.1",
        "3.1.0",
        "3.0.1",
        "3.0.0",
        "2.1.9",
+        "2.1.8",
+        "2.1.5",
+        "2.1.2",
        "2.0.0",
        "1.1.0",
    };