New d5 after discussions with Ken; added HOMELESS in Fractional UCA

X-SVN-Rev: 8969
2002-06-28 01:59:58 +00:00 · 2002-06-28 01:59:58 +00:00 · 72a043bed7
commit 72a043bed7
parent 46138ef4f3
3 changed files with 464 additions and 137 deletions
--- a/tools/unicodetools/com/ibm/text/UCA/Main.java
+++ b/tools/unicodetools/com/ibm/text/UCA/Main.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/Main.java,v $ 
-* $Date: 2002/06/22 21:02:16 $ 
-* $Revision: 1.7 $
+* $Date: 2002/06/28 01:59:58 $ 
+* $Revision: 1.8 $
 *
 *******************************************************************************
 */
@ -18,7 +18,7 @@ import com.ibm.text.utility.*;

 public class Main {
 	static final String UCDVersion = "";
-	static final String[] ICU_FILES = {"writeCollationValidityLog", "FractionalUCA",
+	static final String[] ICU_FILES = {"writeCollationValidityLog", "writeFractionalUCA",
 		"WriteRules", "WriteRulesWithNames", "WriteRulesXML",
 		"writeconformance", "writeconformanceshifted", 
 		"short", "writeconformance", "writeconformanceshifted", 
@ -65,7 +65,7 @@ public class Main {
            else if (arg.equalsIgnoreCase("WriteRulesXML")) WriteCollationData.writeRules(WriteCollationData.IN_XML);
            else if (arg.equalsIgnoreCase("checkDisjointIgnorables")) WriteCollationData.checkDisjointIgnorables();
            else if (arg.equalsIgnoreCase("writeContractions")) WriteCollationData.writeContractions();
-            else if (arg.equalsIgnoreCase("FractionalUCA")) WriteCollationData.writeFractionalUCA("FractionalUCA");
+            else if (arg.equalsIgnoreCase("writeFractionalUCA")) WriteCollationData.writeFractionalUCA("FractionalUCA");
            else if (arg.equalsIgnoreCase("writeConformance")) WriteCollationData.writeConformance("CollationTest_NON_IGNORABLE", UCA.NON_IGNORABLE, shortPrint);
            else if (arg.equalsIgnoreCase("writeConformanceSHIFTED")) WriteCollationData.writeConformance("CollationTest_SHIFTED", UCA.SHIFTED, shortPrint);
            else if (arg.equalsIgnoreCase("testCompatibilityCharacters")) WriteCollationData.testCompatibilityCharacters();
@ -80,7 +80,7 @@ public class Main {
                System.out.println("UNKNOWN OPTION (" + arg + "): must be one of the following (case-insensitive)");
                System.out.println("\tWriteRulesXML, WriteRulesWithNames, WriteRules,");
                System.out.println("\tcheckDisjointIgnorables, writeContractions,");
-                System.out.println("\tFractionalUCA, writeConformance, writeConformanceSHIFTED, testCompatibilityCharacters,");
+                System.out.println("\twriteFractionalUCA, writeConformance, writeConformanceSHIFTED, testCompatibilityCharacters,");
                System.out.println("\twriteCollationValidityLog, writeCaseExceptions, writeJavascriptInfo, writeCaseFolding");
                System.out.println("\tjavatest, hex (used for conformance)");
            }
--- a/tools/unicodetools/com/ibm/text/UCA/UCA.java
+++ b/tools/unicodetools/com/ibm/text/UCA/UCA.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/UCA.java,v $ 
-* $Date: 2002/06/24 15:25:10 $ 
-* $Revision: 1.15 $
+* $Date: 2002/06/28 01:59:58 $ 
+* $Revision: 1.16 $
 *
 *******************************************************************************
 */
@ -75,7 +75,8 @@ final public class UCA implements Comparator, UCA_Types {
     * Version of the UCA tables to use
     */
    //private static final String VERSION = "-3.0.1d3"; // ""; // "-2.1.9d7"; 
-    public static final String VERSION = "-3.1.1d1"; // ""; // "-2.1.9d7"; 
+    public static final String UCA_BASE = "3.1.1"; // ""; // "-2.1.9d7"; 
+    public static final String VERSION = "-" + UCA_BASE + "d5"; // ""; // "-2.1.9d7"; 
    public static final String ALLFILES = "allkeys"; // null if not there
    
    /**
@ -240,7 +241,9 @@ final public class UCA implements Comparator, UCA_Types {
            // add weights
            char w = getPrimary(ce);
            if (DEBUG) System.out.println("\tCE: " + Utility.hex(ce));
-            if (w != 0) primaries.append(w);
+            if (w != 0) {
+                primaries.append(w);
+            }
            
            w = getSecondary(ce);
            if (w != 0) {
@ -252,9 +255,13 @@ final public class UCA implements Comparator, UCA_Types {
            }
            
            w = getTertiary(ce);
-            if (w != 0) tertiaries.append(w);
+            if (w != 0) {
+                tertiaries.append(w);
+            }
   
-            if (weight4 != 0) quaternaries.append(weight4);
+            if (weight4 != 0) {
+                quaternaries.append(weight4);
+            }
        }
        
        // Produce weight strings
@ -263,13 +270,13 @@ final public class UCA implements Comparator, UCA_Types {
        
        StringBuffer result = primaries;
        if (strength >= 2) {
-            result.append('\u0000');    // separator
+            result.append(LEVEL_SEPARATOR);    // separator
            result.append(secondaries);
            if (strength >= 3) {
-                result.append('\u0000');    // separator
+                result.append(LEVEL_SEPARATOR);    // separator
                result.append(tertiaries);
                if (strength >= 4) {
-                    result.append('\u0000');    // separator
+                    result.append(LEVEL_SEPARATOR);    // separator
                    if (alternate == SHIFTED_TRIMMED) {
                        int q;
                        for (q = quaternaries.length()-1; q >= 0; --q) {
@ -303,7 +310,7 @@ final public class UCA implements Comparator, UCA_Types {
            char c2 = sortKey2.charAt(i);
            if (c1 < c2) return -strength;
            if (c1 > c2) return strength;
-            if (c1 == '\u0000') --strength; // Separator!
+            if (c1 == LEVEL_SEPARATOR) --strength; // Separator!
        }
        if (len1 < len2) return -strength;
        if (len1 > len2) return strength;
@ -399,15 +406,21 @@ final public class UCA implements Comparator, UCA_Types {
     * @param source Normal UTF-16 (Java) string
     * @return sort key (as string)
     * @author Markus Scherer (cast into Java by MD)
+     * NOTE: changed to be longer, but handle isolated surrogates
     */
    public static StringBuffer appendInCodePointOrder(String source, StringBuffer target) {
-        for (int i = 0; i < source.length(); ++i) {
-            int ch = source.charAt(i);
+        int cp;
+        for (int i = 0; i < source.length(); i += UTF16.getCharCount(cp)) {
+            cp = UTF16.charAt(source, i);
+            target.append((char)((cp >> 15) | 0x8000));
+            target.append((char)(cp | 0x8000));
+            /*
            if (ch <= 1) { // hack to avoid nulls
                target.append('\u0001');
                target.append((char)(ch+1));
            }
            target.append((char)(ch + utf16CodePointOrder[ch>>11]));
+            */
        }
        return target;
    }
@ -659,9 +672,7 @@ CP => [.AAAA.0020.0002.][.BBBB.0000.0000.]
 */		
    
    /**
-     * Returns implicit value as pair, first part in high word; second part in low word
-     * So to get first part use (x >>> 16) -- remember the >>>!
-     * and to get the second part use (x & 0xFFFF)
+     * Returns implicit value
     */
    
    void CodepointToImplicit(int cp, int[] output) {
@ -673,9 +684,7 @@ CP => [.AAAA.0020.0002.][.BBBB.0000.0000.]
    }
    
    /**
-     * Takes implicit value as pair, first part in high word; second part in low word
-     * So to get first part use (x >>> 16) -- remember the >>>!
-     * and to get the second part use (x & 0xFFFF)
+     * Takes implicit value
     */
    
    static int ImplicitToCodePoint(int leadImplicit, int trailImplicit) {
@ -997,7 +1006,7 @@ CP => [.AAAA.0020.0002.][.BBBB.0000.0000.]
            
        // push BBBB
                        
-        expandingStack.push(makeKey(implicit[1], NEUTRAL_SECONDARY, NEUTRAL_TERTIARY));
+        expandingStack.push(makeKey(implicit[1], 0, 0));
        
        // return AAAA
            
@ -1127,7 +1136,7 @@ CP => [.AAAA.0020.0002.][.BBBB.0000.0000.]
            
            // normal case
            while (current++ < 0x10FFFF) {
-                if (current == 0x406) {
+                if (DEBUG && current == 0xdbff) {
                    System.out.println("DEBUG");
                }
                //char ch = (char)current;
--- a/tools/unicodetools/com/ibm/text/UCA/WriteCollationData.java
+++ b/tools/unicodetools/com/ibm/text/UCA/WriteCollationData.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/WriteCollationData.java,v $ 
-* $Date: 2002/06/24 15:25:10 $ 
-* $Revision: 1.23 $
+* $Date: 2002/06/28 01:59:53 $ 
+* $Revision: 1.24 $
 *
 *******************************************************************************
 */
@ -25,6 +25,8 @@ import java.io.*;
 import java.text.RuleBasedCollator;
 import java.text.CollationElementIterator;
 import java.text.Collator;
+import java.text.DateFormat;
+import java.text.SimpleDateFormat;

 import com.ibm.text.UCD.*;
 import com.ibm.text.UCD.UCD_Types;
@ -34,7 +36,7 @@ import com.ibm.text.UCD.Normalizer;
 public class WriteCollationData implements UCD_Types, UCA_Types {
 	
 	static final boolean DEBUG = false;
-	static final boolean DEBUG_SHOW_ITERATION = true;
+	static final boolean DEBUG_SHOW_ITERATION = false;
 	
 	
 	
@ -299,18 +301,27 @@ public class WriteCollationData implements UCD_Types, UCA_Types {
 U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
 => U+00DC LATIN CAPITAL LETTER U WITH DIAERESIS, U+0304 COMBINING MACRON
 */
-        String[] testList = {"\uF934", "U", "U\u0308", "\u00DC", "\u00DC\u0304", "U\u0308\u0304"};
-        for (int jj = 0; jj < testList.length; ++jj) {
-            String t = testList[jj];
-            System.out.println(ucd.getCodeAndName(t));
-            String test = collator.getSortKey(t, UCA.NON_IGNORABLE);
-            System.out.println("Decomp: " + collator.toString(test));
-            test = collator.getSortKey(t, UCA.NON_IGNORABLE, false);
-            System.out.println("No Dec: " + collator.toString(test));
+        if (DEBUG) {
+            String[] testList = {"\u3192", "\u3220", "\u0344", "\u0385", "\uF934", "U", "U\u0308", "\u00DC", "\u00DC\u0304", "U\u0308\u0304"};
+            for (int jj = 0; jj < testList.length; ++jj) {
+                String t = testList[jj];
+                System.out.println(ucd.getCodeAndName(t));
+                
+                CEList ces = collator.getCEList(t, true);
+                System.out.println("CEs:    " + ces);
+                
+                String test = collator.getSortKey(t, option);
+                System.out.println("Decomp: " + collator.toString(test));
+                
+                test = collator.getSortKey(t, option, false);
+                System.out.println("No Dec: " + collator.toString(test));
+            }
        }
        
-        PrintWriter log = Utility.openPrintWriter(filename + (shortPrint ? "_SHORT" : "") + ".txt", true, false);
-        if (!shortPrint) log.write('\uFEFF');
+        PrintWriter log = Utility.openPrintWriter(filename + (shortPrint ? "_SHORT" : "") + ".txt", true, true);
+        //if (!shortPrint) log.write('\uFEFF');
+        log.println("# UCA Version: " + collator.getDataVersion() + "/" + collator.getUCDVersion());
+        log.println("# Generated:   " + getNormalDate());
        
        System.out.println("Sorting");
        int counter = 0;
@ -333,7 +344,6 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
            }
            Utility.dot(counter++);
            addStringX(s, option);
-            // TODO: add other accents with Cyrillic
        }
        
        UnicodeSet found = collator.found;
@ -472,7 +482,7 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
                for (int j = 0; j < CONTRACTION_TEST.length; ++j) {
                    String extra = s.substring(0,i) + CONTRACTION_TEST[j] + s.substring(i);
                    addStringY(extra + 'a', option);
-                    System.out.println(addCounter++ + " Adding " + Default.ucd.getCodeAndName(extra));
+                    if (DEBUG) System.out.println(addCounter++ + " Adding " + Default.ucd.getCodeAndName(extra));
                }
            }
        }
@ -488,31 +498,51 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
        sortedD.put(colDbase, s);
    }
    
-    
+    static UCD ucd_uca_base = null;
    
    /** 
     * Check that the primaries are the same as the compatibility decomposition.
     */
    static void checkBadDecomps(int strength, boolean decomposition, UnicodeSet alreadySeen) {
+        if (ucd_uca_base == null) {
+            ucd_uca_base = UCD.make(UCA.UCA_BASE);
+        }
        int oldStrength = collator.getStrength();
        collator.setStrength(strength);
        Normalizer nfkd = new Normalizer(Normalizer.NFKD, UNICODE_VERSION);
-        if (strength == 1) {
-            log.println("<h2>3. Primaries Incompatible with Decompositions</h2><table border='1' cellspacing='0' cellpadding='2'>");
-        } else {
-            log.println("<h2>4. Secondaries Incompatible with Decompositions</h2><table border='1' cellspacing='0' cellpadding='2'>");
+        Normalizer nfc = new Normalizer(Normalizer.NFC, UNICODE_VERSION);
+        switch (strength) {
+            case 1: log.println("<h2>3. Primaries Incompatible with Decompositions</h2>"); break;
+            case 2: log.println("<h2>4. Secondaries Incompatible with Decompositions</h2>"); break;
+            case 3: log.println("<h2>5. Tertiaries Incompatible with Decompositions</h2>"); 
+                log.println("<p>Note: Tertiary differences are not really errors; these are just warnings</p>"); 
+            break;
+            default: throw new IllegalArgumentException("bad strength: " + strength);
        }
+        log.println("<p>Warning: only checking characters defined in base: " + ucd_uca_base.getVersion() + "</p>");
+        log.println("<table border='1' cellspacing='0' cellpadding='2'>");
        log.println("<tr><th>Code</td><th>Sort Key</th><th>Decomposed Sort Key</th><th>Name</th></tr>");
+        
+        int errorCount = 0;
+        
+        UnicodeSet skipSet = new UnicodeSet();
+        
        for (int ch = 0; ch < 0x10FFFF; ++ch) {
-        	if (!ucd.isAllocated(ch)) continue;
+        	if (!ucd_uca_base.isAllocated(ch)) continue;
            if (nfkd.isNormalized(ch)) continue;
            if (ch > 0xAC00 && ch < 0xD7A3) continue; // skip most of Hangul
            if (alreadySeen.contains(ch)) continue;
            Utility.dot(ch);
            
            String decomp = nfkd.normalize(ch);
-            if (ch != ' ' && decomp.charAt(0) == ' ') continue; // skip wierd decomps
-            if (ch != '\u0640' && decomp.charAt(0) == '\u0640') continue; // skip wierd decomps
+            if (ch != ' ' && decomp.charAt(0) == ' ') {
+                skipSet.add(ch);
+                continue; // skip wierd decomps
+            }
+            if (ch != '\u0640' && decomp.charAt(0) == '\u0640') {
+                skipSet.add(ch);
+                continue; // skip wierd decomps
+            }
            
            
            String sortKey = collator.getSortKey(UTF16.valueOf(ch), UCA.NON_IGNORABLE, decomposition);
@ -521,21 +551,97 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
                sortKey = remove(sortKey, '\u0020');
                decompSortKey = remove(decompSortKey, '\u0020');
            }
-            if (!sortKey.equals(decompSortKey)) {
-                log.println("<tr><td>" + Utility.hex(ch)
-                    + "</td><td>" + UCA.toString(sortKey)
-                    + "</td><td>" + UCA.toString(decompSortKey)
-                    + "</td><td>" + ucd.getName(ch)
-                    + "</td></tr>"
-                    );
-            	alreadySeen.add(ch);
+            
+            if (sortKey.equals(decompSortKey)) continue; // no problem!
+            
+            // fix key in the case of strength 3
+            
+            if (strength == 3) {
+                String newSortKey = remapSortKey(ch, decomposition);
+                if (!sortKey.equals(newSortKey)) {
+                    System.out.println("Fixing: " + ucd.getCodeAndName(ch));
+                    System.out.println("  Old:" + collator.toString(decompSortKey));
+                    System.out.println("  New: " + collator.toString(newSortKey));
+                    System.out.println("  Tgt: " + collator.toString(sortKey));
+                }
+                decompSortKey = newSortKey;
            }
+            
+            if (sortKey.equals(decompSortKey)) continue; // no problem!
+            
+            log.println("<tr><td>" + Utility.hex(ch)
+                + "</td><td>" + UCA.toString(sortKey)
+                + "</td><td>" + UCA.toString(decompSortKey)
+                + "</td><td>" + ucd.getName(ch)
+                + "</td></tr>"
+                );
+            alreadySeen.add(ch);
+            errorCount++;
        }
        log.println("</table>");
+        log.println("<p>Errors: " + errorCount + "</p>");
+        log.println("<p>Space/Tatweel exceptions: " + skipSet.toPattern(true) + "</p>");
        collator.setStrength(oldStrength);
        Utility.fixDot();
    }
    
+    static String remapSortKey(int cp, boolean decomposition) {
+        if (toD.isNormalized(cp)) return remapCanSortKey(cp, decomposition);
+        
+        // we know that it is not NFKD.
+        String canDecomp = toD.normalize(cp);
+        String result = "";
+        int ch;
+        for (int j = 0; j < canDecomp.length(); j += UTF16.getCharCount(ch)) {
+            ch = UTF16.charAt(canDecomp, j);
+            System.out.println("* " + Default.ucd.getCodeAndName(ch));
+            String newSortKey = remapCanSortKey(ch, decomposition);
+            System.out.println("* " + UCA.toString(newSortKey));
+            result = mergeSortKeys(result, newSortKey);
+            System.out.println("= " + UCA.toString(result));
+        }
+        return result;
+    }
+    
+    static String remapCanSortKey(int ch, boolean decomposition) {
+        String compatDecomp = Default.nfkd.normalize(ch);
+        String decompSortKey = collator.getSortKey(compatDecomp, UCA.NON_IGNORABLE, decomposition);
+                
+        byte type = ucd.getDecompositionType(ch);
+        int pos = decompSortKey.indexOf(UCA.LEVEL_SEPARATOR) + 1; // after first separator
+        pos = decompSortKey.indexOf(UCA.LEVEL_SEPARATOR, pos) + 1; // after second separator
+        String newSortKey = decompSortKey.substring(0, pos);
+        for (int i = pos; i < decompSortKey.length(); ++i) {
+            int weight = decompSortKey.charAt(i);
+            int newWeight = CEList.remap(ch, type, weight);
+            if (i > pos + 1) newWeight = 0x1F;
+            newSortKey += (char)newWeight;
+        }
+        return newSortKey;
+    }
+    
+    // keys must be of the same strength
+    static String mergeSortKeys(String key1, String key2) {
+        StringBuffer result = new StringBuffer();
+        int end1 = 0, end2 = 0;
+        while (true) {
+            int pos1 = key1.indexOf(UCA.LEVEL_SEPARATOR, end1);
+            int pos2 = key2.indexOf(UCA.LEVEL_SEPARATOR, end2);
+            if (pos1 < 0) {
+                result.append(key1.substring(end1)).append(key2.substring(end2));
+                return result.toString();
+            }
+            if (pos2 < 0) {
+                result.append(key1.substring(end1, pos1)).append(key2.substring(end2)).append(key1.substring(pos1));
+                return result.toString();
+            }
+            result.append(key1.substring(end1, pos1)).append(key2.substring(end2, pos2)).append(UCA.LEVEL_SEPARATOR);
+            end1 = pos1 + 1;
+            end2 = pos2 + 1;
+        }
+    }
+    
+    
    static final String remove (String s, char ch) {
        StringBuffer buf = new StringBuffer();
        for (int i = 0; i < s.length(); ++i) {
@ -630,7 +736,8 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
        Iterator it = forLater.keySet().iterator();
        byte oldType = (byte)0xFF; // anything unique
        int caseCount = 0;
-        log.println("Generated: " + new Date());
+        log.println("# UCA Version: " + collator.getDataVersion() + "/" + collator.getUCDVersion());
+        log.println("Generated: " + getNormalDate());
        while (it.hasNext()) {
            String key = (String) it.next();
            byte type = (byte)key.charAt(0);
@ -863,7 +970,8 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
        int[] lenArray = new int[1];
        
        diLog.println("# Contractions");
-        diLog.println("# Generated " + new Date());
+        diLog.println("# Generated " + getNormalDate());
+        diLog.println("# UCA Version: " + collator.getDataVersion() + "/" + collator.getUCDVersion());
        while (true) {
            String s = cc.next(ces, lenArray);
            if (s == null) break;
@ -1409,6 +1517,9 @@ F900..FAFF; CJK Compatibility Ideographs
        log = Utility.openPrintWriter(filename, false, false);
        
        String[] commentText = {
+        	"UCA Rules",
+        	"This file contains the UCA tables for the given version, but transformed into rule syntax.",
+            "Generated:   " + getNormalDate(),
        	"NOTE: Since UCA handles canonical equivalents, no composites are necessary",
        	"(except in extensions).",
        	"For syntax description, see: http://oss.software.ibm.com/icu/userguide/Collate_Intro.html"
@ -1833,6 +1944,24 @@ F900..FAFF; CJK Compatibility Ideographs
    		System.out.println("Test case: " + Utility.hex(s) + ", " + CEList.toString(ces, len));
    	}
 		backMap.put(new ArrayWrapper((int[])(ces.clone()), 0, len), s);
+		/*
+		// HACK until Ken fixes
+		for (int i = 0; i < len; ++i) {
+		    int ce = ces[i];
+		    if (collator.isImplicitLeadCE(ce)) {
+		        ++i;
+		        ce = ces[i];
+		        if (DEBUG
+		            && (UCA.getPrimary(ce) == 0 || UCA.getSecondary(ce) != 0 || UCA.getTertiary(ce) != 0)) {
+		            System.out.println("WEIRD 2nd IMPLICIT: " 
+		                + CEList.toString(ces, len)
+		                + ", " + ucd.getCodeAndName(s));
+		        }
+		        ces[i] = UCA.makeKey(UCA.getPrimary(ce), NEUTRAL_SECONDARY, NEUTRAL_TERTIARY);
+		    }
+		}
+		backMap.put(new ArrayWrapper((int[])(ces.clone()), 0, len), s);
+		*/
    }
    
    static int[] ignorableList = {
@ -1915,6 +2044,12 @@ F900..FAFF; CJK Compatibility Ideographs
            		// Try stomping the value to different tertiaries
            		
    				int probe = ces[i];
+    				if (UCA.isImplicitLeadCE(probe)) {
+                        s = UTF16.valueOf(UCA.ImplicitToCodePoint(UCA.getPrimary(probe), UCA.getPrimary(ces[i+1])));
+                        ++i; // skip over next item!!
+                        break;
+    				}
+    				
        			char primary = collator.getPrimary(probe);
        			char secondary = collator.getSecondary(probe);
 	        		
@ -2115,6 +2250,7 @@ F900..FAFF; CJK Compatibility Ideographs
    static int[] primaryDelta;
    
    static void writeFractionalUCA(String filename) throws IOException {
+        Default.setUCD();
        
        checkImplicit();
        checkFixes();
@ -2345,13 +2481,14 @@ F900..FAFF; CJK Compatibility Ideographs
        EquivalenceClass secEq = new EquivalenceClass("\r\n#", 2, true);
        EquivalenceClass terEq = new EquivalenceClass("\r\n#", 2, true);
        String[] sampleEq = new String[500];
+        int[] sampleLen = new int[500];
        
        Iterator it = ordered.keySet().iterator();
        int oldFirstPrimary = UCA.getPrimary(UCA.TERMINATOR);
        boolean wasVariable = false;
        
        log.println("# Fractional UCA Table, generated from standard UCA");
-        log.println("# M. Davis, " + new Date());
+        log.println("# " + getNormalDate());
        log.println("# VERSION: UCA=" + collator.getDataVersion() + ", UCD=" + collator.getUCDVersion());
        log.println();
        log.println("# Generated processed version, as described in ICU design document.");
@ -2389,6 +2526,8 @@ F900..FAFF; CJK Compatibility Ideographs
                    
        FCE firstTrailing = new FCE(false);
        FCE lastTrailing = new FCE(true);
+        
+        Map backMap = new TreeMap();
                            
        while (it.hasNext()) {
            Object sortKey = it.next();
@ -2399,19 +2538,19 @@ F900..FAFF; CJK Compatibility Ideographs
            int firstPrimary = UCA.getPrimary(ces[0]);
            if (firstPrimary != oldFirstPrimary) {
                log.println();
-                oldFirstPrimary = firstPrimary;
                boolean isVariable = collator.isVariable(ces[0]);
                if (isVariable != wasVariable) {
                    if (isVariable) {
                        log.println("# START OF VARIABLE SECTION!!!");
                        summary.println("# START OF VARIABLE SECTION!!!");
                    } else {
-                        log.println("[variable top = " + Utility.hex(primaryDelta[firstPrimary]) + "] # END OF VARIABLE SECTION!!!");
+                        log.println("[variable top = " + Utility.hex(primaryDelta[oldFirstPrimary]) + "] # END OF VARIABLE SECTION!!!");
                        doVariable = true;
                    }
                    log.println();
                }
                wasVariable = isVariable;
+                oldFirstPrimary = firstPrimary;
            }
            oldStr.setLength(0);
            chr.getChars(0, chr.length(), codeUnits, 0);
@ -2473,8 +2612,24 @@ F900..FAFF; CJK Compatibility Ideographs
                if (ter != 0x2) {
                    boolean changed = terEq.add(new Integer(ter), new Integer((pri << 16) | sec));
                }
-                if (sampleEq[sec] == null) sampleEq[sec] = chr;
-                if (sampleEq[ter] == null) sampleEq[ter] = chr;
+                
+                if (sampleEq[sec] == null || sampleLen[sec] > len) {
+                    sampleEq[sec] = chr;
+                    sampleLen[sec] = len;
+                }
+                if (sampleEq[ter] == null || sampleLen[sec] > len) {
+                    sampleEq[ter] = chr;
+                    sampleLen[sec] = len;
+                }
+                
+                if ((pri & MARK_CODE_POINT) == 0 && pri == 0) {
+                    Integer key = new Integer(ces[q]);
+                    Pair value = (Pair) backMap.get(key);
+                    if (value == null
+                    || (len < ((Integer)(value.first)).intValue())) {
+                        backMap.put(key, new Pair(new Integer(len), chr));
+                    }
+                }
                
                // int oldPrimaryValue = UCA.getPrimary(ces[q]);
                int np = fixPrimary(pri);
@ -2508,38 +2663,76 @@ F900..FAFF; CJK Compatibility Ideographs
                    + "]");
                    
                // RECORD STATS
+                // but ONLY if we are not part of an implicit
                
-                if (np == 0 && ns == 0) {
-                    firstSecondaryIgnorable.setValue(np, ns, nt);
-                    lastSecondaryIgnorable.setValue(np, ns, nt); 
-                } else if (np == 0) {
-                    firstPrimaryIgnorable.setValue(np, ns, nt);
-                    lastPrimaryIgnorable.setValue(np, ns, nt); 
-                } else if (collator.isVariable(ces[q])) {
-                    firstVariable.setValue(np, ns, nt);
-                    lastVariable.setValue(np, ns, nt); 
-                } else if (UCA.getPrimary(ces[q]) > UNSUPPORTED_LIMIT) {        // Trailing (none currently)
-                    System.out.println("Trailing: " + CEList.toString(ces[q]) 
-                        + ", " + Utility.hex(pri) + ", " + Utility.hex(UNSUPPORTED_LIMIT));
-                    firstTrailing.setValue(np, ns, nt);
-                    lastTrailing.setValue(np, ns, nt); 
-                } else if ((pri & MARK_CODE_POINT) == 0) {          // skip implicits
-                    firstNonIgnorable.setValue(np, ns, nt);
-                    lastNonIgnorable.setValue(np, ns, nt); 
+                if ((pri & MARK_CODE_POINT) == 0) {
+                    if (np == 0 && ns == 0) {
+                        firstSecondaryIgnorable.setValue(np, ns, nt);
+                        lastSecondaryIgnorable.setValue(np, ns, nt); 
+                    } else if (np == 0) {
+                        firstPrimaryIgnorable.setValue(np, ns, nt);
+                        lastPrimaryIgnorable.setValue(np, ns, nt); 
+                    } else if (collator.isVariable(ces[q])) {
+                        firstVariable.setValue(np, ns, nt);
+                        lastVariable.setValue(np, ns, nt); 
+                    } else if (UCA.getPrimary(ces[q]) > UNSUPPORTED_LIMIT) {        // Trailing (none currently)
+                        System.out.println("Trailing: " 
+                            + ucd.getCodeAndName(chr) + ", "
+                            + CEList.toString(ces[q]) + ", " 
+                            + Utility.hex(pri) + ", " 
+                            + Utility.hex(UNSUPPORTED_LIMIT));
+                        firstTrailing.setValue(np, ns, nt);
+                        lastTrailing.setValue(np, ns, nt); 
+                    } else {
+                        firstNonIgnorable.setValue(np, ns, nt);
+                        lastNonIgnorable.setValue(np, ns, nt); 
+                    }
                }
            }
            if (nonePrinted) {
                log.print("[,,]");
                oldStr.append(CEList.toString(0));
            }
-            longLog.print("    # " + oldStr + " # " + ucd.getName(UTF16.charAt(chr, 0)));
+            longLog.print("\t# " + oldStr + "\t* " + ucd.getName(UTF16.charAt(chr, 0)));
            log.println();
            lastChr = chr;
        }
        
+        // ADD HOMELESS COLLATION ELEMENTS
+        log.println();
+        log.println("# HOMELESS COLLATION ELEMENTS");
+        char fakeTrail = 'a';
+        Iterator it3 = backMap.keySet().iterator();
+        while (it3.hasNext()) {
+            Integer key = (Integer) it3.next();
+            Pair pair = (Pair) backMap.get(key);
+            if (((Integer)pair.first).intValue() < 2) continue;
+            String sample = (String)pair.second;
+            
+            int ce = key.intValue();
+            
+            int np = fixPrimary(UCA.getPrimary(ce));
+            int ns = fixSecondary(UCA.getSecondary(ce));
+            int nt = fixTertiary(UCA.getTertiary(ce));
+                    
+            newPrimary.setLength(0);
+            newSecondary.setLength(0);
+            newTertiary.setLength(0);
+            
+            hexBytes(np, newPrimary);
+            hexBytes(ns, newSecondary);
+            hexBytes(nt, newTertiary);
+            
+            log.print(Utility.hex('\uFDD0' + "" + (char)(fakeTrail++)) + "; " 
+                + "[, " + newSecondary + ", " + newTertiary + "]");
+            longLog.print("\t# " + collator.getCEList(sample, true) + "\t* " + ucd.getCodeAndName(sample));
+            log.println();
+        }
+
        int firstImplicit = getImplicitPrimary(CJK_BASE);
        int lastImplicit = getImplicitPrimary(0x10FFFF);
        
+        log.println();
        log.println("# VALUES BASED ON UCA");
        
        log.println("[first tertiary ignorable " + new FCE(false,0,0, 0).formatFCE() + "]");
@ -2580,16 +2773,17 @@ F900..FAFF; CJK Compatibility Ideographs
        log.println("[first trailing " + firstTrailing.formatFCE() + "]");
        log.println("[last trailing " + lastTrailing.formatFCE() + "]");
        
+        log.println();
        log.println("# FIXED VALUES");
        
-        log.println("[top "  + Utility.hex(0xA0,2) + "]");
-        log.println("[first implicit byte " + Utility.hex(IMPLICIT_BASE_BYTE,2) + "]");
-        log.println("[last implicit byte " + Utility.hex(IMPLICIT_LIMIT_BYTE,2) + "]");
-        log.println("[first trail byte" + Utility.hex(IMPLICIT_LIMIT_BYTE+1,2) + "]");
-        log.println("[last implicit byte" + Utility.hex(SPECIAL_BASE-1,2) + "]");
-        log.println("[first special byte" + Utility.hex(SPECIAL_BASE,2) + "]");
-        log.println("[last special byte" + Utility.hex(0xFF,2) + "]");
-
+        log.println("# superceded! [top "  + lastNonIgnorable.formatFCE() + "]");
+        log.println("[fixed first implicit byte " + Utility.hex(IMPLICIT_BASE_BYTE,2) + "]");
+        log.println("[fixed last implicit byte " + Utility.hex(IMPLICIT_LIMIT_BYTE,2) + "]");
+        log.println("[fixed first trail byte " + Utility.hex(IMPLICIT_LIMIT_BYTE+1,2) + "]");
+        log.println("[fixed last trail byte " + Utility.hex(SPECIAL_BASE-1,2) + "]");
+        log.println("[fixed first special byte " + Utility.hex(SPECIAL_BASE,2) + "]");
+        log.println("[fixed last special byte " + Utility.hex(0xFF,2) + "]");
+        
        
        summary.println("Last:  " + Utility.hex(lastNp) + ", " + ucd.getCodeAndName(UTF16.charAt(lastChr, 0)));
        
@ -2636,6 +2830,7 @@ F900..FAFF; CJK Compatibility Ideographs
        summary.println();
        summary.println("# UCA : (FRAC) CODE [    UCA CE    ] Name");
        summary.println();
+        
        for (int i = 0; i < sampleEq.length; ++i) {
            if (sampleEq[i] == null) continue;
            if (i == 0x20) {
@ -2653,6 +2848,7 @@ F900..FAFF; CJK Compatibility Ideographs
                summary.print(CEList.toString(ces[q]));
            }
            summary.println(" " + ucd.getName(sampleEq[i]));
+            
        }
        log.close();
        summary.close();
@ -3379,6 +3575,13 @@ static int swapCJK(int i) {
        
    }
    
+    static DateFormat myDateFormat = new SimpleDateFormat("yyyy-MM-dd','HH:mm:ss' GMT'");
+    
+    static String getNormalDate() {
+        return myDateFormat.format(new Date()) + " [MD]";
+    }
+    
+    
    static void setSingle(char ch, int[] ces) {
        collator.getCEs(String.valueOf(ch), true, ces);
        singles.set(UCA.getPrimary(ces[0]));
@ -3396,12 +3599,18 @@ static int swapCJK(int i) {
        input.close();
    }
    
+    static UnicodeSet compatibilityExceptions = new UnicodeSet("[\u0CCB\u0DDD\u017F\u1E9B\uFB05]");
+    
    static void writeCollationValidityLog() throws IOException {
+        Default.setUCD();
    	
        //log = new PrintWriter(new FileOutputStream("CheckCollationValidity.html"));
        log = Utility.openPrintWriter("CheckCollationValidity.html", false, false);
        
-        log.println("<html><body>");
+        log.println("<html><head><meta http-equiv='Content-Type' content='text/html; charset=utf-8'>");
+        log.println("<title>UCA Validity Log</title>");
+        log.println("<style>.bottom { border-bottom-style: solid; border-bottom-color: #0000FF }</style>");
+        log.println("</head><body bgcolor='#FFFFFF'>");

        
        //collator = new UCA(null);
@ -3412,14 +3621,14 @@ static int swapCJK(int i) {
            
        }
        System.out.println("Sorting");
-        
-        for (int i = 0; i <= 0xFFFF; ++i) {
+        /*
+        for (int i = 0; i <= 0x10FFFF; ++i) {
            if (EXCLUDE_UNSUPPORTED && !collator.found.contains(i)) continue;
            if (0xD800 <= i && i <= 0xF8FF) continue; // skip surrogates and private use
            //if (0xA000 <= c && c <= 0xA48F) continue; // skip YI
            addString(UTF32.valueOf32(i), option);
        }
-        
+        */

        UCA.UCAContents cc = collator.getContents(UCA.FIXED_CE, null);
        cc.enableSamples();
@ -3469,18 +3678,24 @@ static int swapCJK(int i) {
        System.out.println("Writing");
        String version = collator.getDataVersion();
        
+        log.println("<h1>Collation Validity Checks</h1>");
+        log.println("<table><tr><td>Generated: </td><td>" + getNormalDate() + "</td></tr>");
+        log.println("<tr><td>File Version: </td><td>" + collator.getDataVersion() + "/" + collator.getUCDVersion() + "</td></tr></table>");
+        
+        
        if (GENERATED_NFC_MISMATCHES) showMismatches();
        removeAdjacentDuplicates2();
        
-        UnicodeSet exceptions = new UnicodeSet("[\u0CCB\u0DDD\u017F\u1E9B\uFB05]");
        
-        UnicodeSet alreadySeen = new UnicodeSet(exceptions);
+        UnicodeSet alreadySeen = new UnicodeSet(compatibilityExceptions);
        
        checkBadDecomps(1, false, alreadySeen); // if decomposition is off, all primaries should be identical
-        checkBadDecomps(2, true, alreadySeen); // if decomposition is ON, all primaries and secondaries should be identical
+        checkBadDecomps(2, false, alreadySeen); // if decomposition is ON, all primaries and secondaries should be identical
+        checkBadDecomps(3, false, alreadySeen); // if decomposition is ON, all primaries and secondaries should be identical
+        //checkBadDecomps(2, true, alreadySeen); // if decomposition is ON, all primaries and secondaries should be identical
        
        log.println("<p>Note: characters with decompositions to space + X, and tatweel + X are excluded,"
-        	+ " as are a few special characters: " + exceptions.toPattern(true) + "</p>");
+        	+ " as are a few special characters: " + compatibilityExceptions.toPattern(true) + "</p>");
        
        if (DO_CHARTS) {
        	System.out.println("Charts");
@ -3564,6 +3779,7 @@ static int swapCJK(int i) {
        }
        
        checkWellformedTable();
+        addClosure();
        
        log.println("</body></html>");
        log.close();
@ -3572,10 +3788,83 @@ static int swapCJK(int i) {
    }
    
    
+    static void addClosure() {
+        int canCount = 0;
+        System.out.println("Add missing decomposibles");
+    	log.println("<h2>7. Comparing Other Equivalents</h2>");
+    	log.println("<p>These are not necessarily errors, but should be examined for <i>possible</i> errors</p>");
+    	log.println("<p>Each of the three strings is canonically equivalent, but has different sort keys</p>");
+        log.println("<table border='1' cellspacing='0' cellpadding='2'>");
+        log.println("<tr><th>Count</th><th>Name</th><th>Code</th><th>Sort Keys</th></tr>");
+    	
+        
+        Set contentsForCanonicalIteration = new TreeSet();
+        UCA.UCAContents ucac = collator.getContents(UCA.FIXED_CE, null); // NFD
+        int ccounter = 0;
+        while (true) {
+            Utility.dot(ccounter++);
+            String s = ucac.next();
+            if (s == null) break;
+            contentsForCanonicalIteration.add(s);
+        }
+        
+        Set additionalSet = new HashSet();
+        
+        System.out.println("Loading canonical iterator");
+        if (canIt == null) canIt = new CanonicalIterator(".");
+        Iterator it2 = contentsForCanonicalIteration.iterator();
+        System.out.println("Adding any FCD equivalents that have different sort keys");
+        while (it2.hasNext()) {
+            String key = (String)it2.next();
+            if (key == null) {
+                System.out.println("Null Key");
+                continue;
+            }
+            canIt.setSource(key);
+            String nfdKey = toD.normalize(key);
+            
+            boolean first = true;
+            while (true) {
+                String s = canIt.next();
+                if (s == null) break;
+                if (s.equals(key)) continue;
+                if (contentsForCanonicalIteration.contains(s)) continue;
+                if (additionalSet.contains(s)) continue;
+                
+                
+                // Skip anything that is not FCD.
+                if (!NFD.isFCD(s)) continue;
+                
+                // We ONLY add if the sort key would be different
+                // Than what we would get if we didn't decompose!!
+                String sortKey = collator.getSortKey(s, UCA.NON_IGNORABLE);
+                String nonDecompSortKey = collator.getSortKey(s, UCA.NON_IGNORABLE, false);
+                if (sortKey.equals(nonDecompSortKey)) continue;
+                
+                if (DEBUG && first) {
+                    System.out.println(" " + ucd.getCodeAndName(key));
+                    first = false;
+                }
+                log.println("<tr><td rowspan='3'>" + (++canCount) + "</td><td>" + Utility.replace(ucd.getName(key), ", ", ",<br>") + "</td>");
+                log.println("<td>" + Utility.hex(key) + "</td>");
+                log.println("<td>" + collator.toString(sortKey) + "</td></tr>");
+                log.println("<tr><td>" + Utility.replace(ucd.getName(nfdKey), ", ", ",<br>") + "</td>");
+                log.println("<td>" + Utility.hex(nfdKey) + "</td>");
+                log.println("<td>" + collator.toString(sortKey) + "</td></tr>");
+                log.println("<tr><td class='bottom'>" + Utility.replace(ucd.getName(s), ", ", ",<br>") + "</td>");
+                log.println("<td class='bottom'>" + Utility.hex(s) + "</td>");
+                log.println("<td class='bottom'>" + collator.toString(nonDecompSortKey) + "</td></tr>");
+                additionalSet.add(s);
+            }
+        }
+    	log.println("</table>");
+    	log.println("<p>Items: " + canCount + "</p>");
+    }
+    
 	static void checkWellformedTable() throws IOException {
    	System.out.println("Checking for well-formedness");
    	
-    	log.println("<h2>5. Checking for well-formedness</h2>");
+    	log.println("<h2>6. Checking for well-formedness</h2>");
    	
        Normalizer nfd = new Normalizer(Normalizer.NFD, UNICODE_VERSION);
        
@ -3620,6 +3909,7 @@ static int swapCJK(int i) {
        
        cc = collator.getContents(UCA.FIXED_CE, nfd);
        log.println("<table border='1' cellspacing='0' cellpadding='2'>");
+        int lastPrimary = 0;
        
        while (true) {
            String str = cc.next(ces, lenArray);
@ -3632,40 +3922,64 @@ static int swapCJK(int i) {
            	int s = UCA.getSecondary(ce);
            	int t = UCA.getTertiary(ce);
            	
+            	// IF we are at the start of an implicit, then just check that the implicit is in range
+            	// CHECK implicit
+            	if (collator.isImplicitLeadPrimary(lastPrimary)) {
+            	    try {
+            	        if (s != 0 || t != 0) throw new Exception("Second implicit must be [X,0,0]");
+            	        collator.ImplicitToCodePoint(lastPrimary, p);   // throws exception if bad
+            	    } catch (Exception e) {
+            		    log.println("<tr><td>" + (++errorCount) + ". BAD IMPLICIT: " + e.getMessage()
+            			    + "</td><td>" + CEList.toString(ces, len) 
+            			    + "</td><td>" + ucd.getCodeAndName(str) + "</td></tr>");
+            	    }
+            	    // zap the primary, since we worry about the last REAL primary:
+            	    lastPrimary = 0;
+            	    continue;
+            	}
+            	
+            	// IF we are in the trailing range, something is wrong.
+            	if (p >= UCA_Types.UNSUPPORTED_LIMIT) {
+                    log.println("<tr><td>" + (++errorCount) + ". > " + Utility.hex(UCA_Types.UNSUPPORTED_LIMIT,4)
+            			+ "</td><td>" + CEList.toString(ces, len) 
+            			+ "</td><td>" + ucd.getCodeAndName(str) + "</td></tr>");
+            	    lastPrimary = p;
+            	    continue;
+            	}
+            	
            	// Check WF#1
            	
            	if (p != 0 && s == 0) {
-            		log.println("<tr><td>WF1.1"
+            		log.println("<tr><td>" + (++errorCount) + ". WF1.1"
            			+ "</td><td>" + CEList.toString(ces, len) 
            			+ "</td><td>" + ucd.getCodeAndName(str) + "</td></tr>");
-            		errorCount++;
            	}
            	if (s != 0 && t == 0) {
-            		log.println("<tr><td>WF1.2"
+            		log.println("<tr><td>" + (++errorCount) + ". WF1.2"
            			+ "</td><td>" + CEList.toString(ces, len) 
            			+ "</td><td>" + ucd.getCodeAndName(str) + "</td></tr>");
-            		errorCount++;
            	}
            	
            	// Check WF#2

            	if (p != 0) {
            		if (s > minps) {
-            		log.println("<tr><td>WF2.2"
+            		log.println("<tr><td>" + (++errorCount) + ". WF2.2"
            			+ "</td><td>" + CEList.toString(ces, len) 
            			+ "</td><td>" + ucd.getCodeAndName(str) + "</td></tr>");
-            			errorCount++;
            		}
            	}
            	if (s != 0) {
            		if (t > minpst) {
-            		log.println("<tr><td>WF2.3"
+            		log.println("<tr><td>" + (++errorCount) + ". WF2.3"
            			+ "</td><td>" + CEList.toString(ces, len) 
            			+ "</td><td>" + ucd.getCodeAndName(str) + "</td></tr>");
-            			errorCount++;
            		}
            	} else {
            	}
+            	
+            	lastPrimary = p;
+            	    
            }
        }
        log.println("</table>");
@ -3679,9 +3993,7 @@ static int swapCJK(int i) {
        }
                
        
-        if (errorCount > 0) {
-        	log.println("<p>Well-formedness errors: " + errorCount + "</p>");
-        }
+        log.println("<p>Errors: " + errorCount + "</p>");
    }
    
    
@ -3738,7 +4050,7 @@ A4C6;YI RADICAL KE;So;0;ON;;;;;N;;;;;
        String colDbase = collator.getSortKey(ch, option, true);
        String colNbase = collator.getSortKey(ch, option, false);
        String colCbase = collator.getSortKey(toC.normalize(ch), option, false);
-        if (!colNbase.equals(colCbase)) {
+        if (!colNbase.equals(colCbase) || !colNbase.equals(colDbase) ) {
            /*System.out.println(Utility.hex(ch));
            System.out.println(printableKey(colNbase));
            System.out.println(printableKey(colNbase));
@ -3770,10 +4082,11 @@ A4C6;YI RADICAL KE;So;0;ON;;;;;N;;;;;
        String lastChar = "";
        int countRem = 0;
        int countDups = 0;
+        int errorCount = 0;
        Iterator it1 = sortedD.keySet().iterator();
        Iterator it2 = sortedN.keySet().iterator();
        Differ differ = new Differ(250,3);
-        log.println("<h1>2. Differences in Ordering</h1>");
+        log.println("<h2>2. Differences in Ordering</h2>");
        log.println("<p>Codes and names are in the white rows: bold means that the NO-NFD sort key differs from UCA key.</p>");
        log.println("<p>Keys are in the light blue rows: green is the bad key, blue is UCA, black is where they equal.</p>");
        log.println("<table border='1' cellspacing='0' cellpadding='2'>");
@ -3818,6 +4131,7 @@ A4C6;YI RADICAL KE;So;0;ON;;;;;N;;;;;
                    }
                    log.println("</td></tr>");
                }
+                errorCount++;
            }
            //differ.flush();
            
@ -3826,6 +4140,8 @@ A4C6;YI RADICAL KE;So;0;ON;;;;;N;;;;;
        
        log.println("</table>");
        
+        log.println("<p>Errors: " + errorCount + "</p>");
+        
        //log.println("Removed " + countRem + " adjacent duplicates.<br>");
        System.out.println("Left " + countDups + " conflicts.<br>");
        log.println("Left " + countDups + " conflicts.<br>");
@ -3835,10 +4151,12 @@ A4C6;YI RADICAL KE;So;0;ON;;;;;N;;;;;
        String lastChar = "";
        int countRem = 0;
        int countDups = 0;
+        int errorCount = 0;
        Iterator it = sortedD.keySet().iterator();
        log.println("<h1>2. Differences in Ordering</h1>");
        log.println("<p>Codes and names are in the white rows: bold means that the NO-NFD sort key differs from UCA key.</p>");
        log.println("<p>Keys are in the light blue rows: green is the bad key, blue is UCA, black is where they equal.</p>");
+        log.println("<p>Note: so black lines are generally ok.</p>");
        log.println("<table border='1' cellspacing='0' cellpadding='2'>");
        log.println("<tr><th>File Order</th><th>Code and Decomp</th><th>Key and Decomp-Key</th></tr>");
        
@ -3876,9 +4194,11 @@ A4C6;YI RADICAL KE;So;0;ON;;;;;N;;;;;
                if (!showedLast) {
                    log.println("<tr><td colspan='3'></td><tr>");
                    showLine(count-1, lastCh, lastCol, lastColN);
+                    errorCount++;
                }
                showedLast = true;
                showLine(count,ch, col, colN);
+                errorCount++;
            }
            lastCol = col;
            lastColN = colN;
@ -3886,6 +4206,7 @@ A4C6;YI RADICAL KE;So;0;ON;;;;;N;;;;;
        }
        
        log.println("</table>");
+        log.println("<p>Errors: " + errorCount + "</p>");
   }
   
    static int compareMinusLast(String a, String b) {
@ -3919,39 +4240,36 @@ A4C6;YI RADICAL KE;So;0;ON;;;;;N;;;;;
    static final String[] alternateName = {"SHIFTED", "ZEROED", "NON_IGNORABLE", "SHIFTED_TRIMMED"};
   
    static void showMismatches() {
-        MLStreamWriter out = new MLStreamWriter(log);
-        out.el("h1").tx("1. Mismatches when NFD is OFF").cl();
-        out.el("h2").tx("Date:" + new Date()).cl();
-        out.el("h2").tx("File Version:" + UCA.VERSION).cl();
-        out.el("p").tx("Alternate Handling = " + alternateName[option]).cl();
-        out.el("table").at("border",1);
-            out.el("caption").tx("Mismatches in UCA-NOD: Plain vs NFC: ").tx(MismatchedC.size()).cl("caption");
-            out.el("tr");
-                out.el("th").tx("Code").cl();
-                out.el("th").tx("Type").cl();
-                out.el("th").tx("CC?").cl();
-                out.el("th").tx("Key").cl();
-            out.cl("tr");
+        log.println("<h2>1. Mismatches when NFD is OFF</h2>");
+        log.println("<p>Alternate Handling = " + alternateName[option] + "</p>");
+        log.println("<p>NOTE: NFD form is used by UCA,"
+            + "so if other forms are different there are <i>ignored</i>. This may indicate a problem, e.g. missing contraction.</p>");
+        log.println("<table border='1'>");
+        log.println("<tr><th>Name</th><th>Type</th><th>Unicode</th><th>Key</th></tr>");
        Iterator it = MismatchedC.keySet().iterator();
+        int errorCount = 0;
        while (it.hasNext()) {
            String ch = (String)it.next();
            String MN = (String)MismatchedN.get(ch);
            String MC = (String)MismatchedC.get(ch);
+            String MD = (String)MismatchedD.get(ch);
            String chInC = toC.normalize(ch);
-            out.el("tr");
-              out.el("th").at("rowSpan",2).at("align","right").tx16(ch).tx(' ').tx(ucd.getName(ch));
-                out.el("br").cl().tx("NFC=").tx16(chInC).cl();
-              out.el("th").tx("Plain").cl();
-              out.el("th").tx(containsCombining(ch) ? "y" : "n").cl();
-              out.el("td").tx(printableKey(MN)).cl();
-            out.cl("tr");
-            out.el("tr");
-              out.el("th").tx("NFC").cl();
-              out.el("th").tx(containsCombining(chInC) ? "Y" : "ERROR").cl();
-              out.el("td").tx(printableKey(MC)).cl();
-            out.cl("tr");
+            String chInD = toD.normalize(ch);
+            
+            log.println("<tr><td rowSpan='3' class='bottom'>" + Utility.replace(ucd.getName(ch), ", ", ",<br>")
+                + "</td><td>NFD</td><td>" + Utility.hex(chInD) 
+                + "</td><td>" + printableKey(MD) + "</td></tr>");
+
+            log.println("<tr><td>NFC</td><td>" + Utility.hex(chInC) 
+                + "</td><td>" + printableKey(MC) + "</td></tr>");
+            
+            log.println("<tr><td class='bottom'>Plain</td><td class='bottom'>" + Utility.hex(ch) 
+                + "</td><td class='bottom'>" + printableKey(MN) + "</td></tr>");
+            
+            errorCount++;
        }
-        out.closeAllElements();
+        log.println("</table>");
+        log.println("<p>Errors: " + errorCount + "</p>");
        log.println("<br>");
    }