Completed most of the upgrade to UCA 3.1.1;

involved double CEs for Han; fixing back-mappings, etc. did a bit of code cleanup too. Remaining to do: backmap from UCA double CEs to original character codes, for constructing Fractional UCA. X-SVN-Rev: 8754
2002-05-31 01:41:04 +00:00 · 2002-05-31 01:41:04 +00:00 · 693b0c9b91
commit 693b0c9b91
parent ce883f6d81
15 changed files with 492 additions and 166 deletions
--- a/tools/unicodetools/com/ibm/text/UCA/CEList.java
+++ b/tools/unicodetools/com/ibm/text/UCA/CEList.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/CEList.java,v $ 
-* $Date: 2001/09/19 23:32:21 $ 
-* $Revision: 1.3 $
+* $Date: 2002/05/31 01:41:03 $ 
+* $Revision: 1.4 $
 *
 *******************************************************************************
 */
@ -156,6 +156,15 @@ public final class CEList implements java.lang.Comparable, UCD_Types {
        return result.toString();
    }
    
+    public static String toString(int[] ces, int len) {
+        StringBuffer result = new StringBuffer();
+        for (int i = 0; i < len; ++i) {
+            if (i != 0) result.append(' ');
+            result.append(toString(ces[i]));
+        }
+        return result.toString();
+    }
+    
    public static String toString(int ce) {
        return "[" + Utility.hex(UCA.getPrimary(ce)) + "." 
          + Utility.hex(UCA.getSecondary(ce)) + "."
--- a/tools/unicodetools/com/ibm/text/UCA/GenOverlap.java
+++ b/tools/unicodetools/com/ibm/text/UCA/GenOverlap.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/GenOverlap.java,v $ 
-* $Date: 2002/04/23 01:59:14 $ 
-* $Revision: 1.8 $
+* $Date: 2002/05/31 01:41:03 $ 
+* $Revision: 1.9 $
 *
 *******************************************************************************
 */
@ -375,7 +375,7 @@ public class GenOverlap implements UCD_Types {
                    System.out.println("debug");
                }
                boolean mashLast = false;
-                if (nfkd.normalizationDiffers(cp)) {
+                if (!nfkd.isNormalized(cp)) {
                    String decomp = nfkd.normalize(cp);
                    String canon = nfd.normalize(cp);
                    len = collator.getCEs(decomp, true, ces);
@ -578,7 +578,7 @@ public class GenOverlap implements UCD_Types {
            
            if (UTF16.countCodePoint(s) != 1) continue; // skip ligatures
            int cp = UTF16.charAt(s, 0);
-            if (nfkd.normalizationDiffers(cp)) continue;
+            if (!nfkd.isNormalized(cp)) continue;
            
            int script = ucd.getScript(cp);
            int len = lenArray[0];
@ -607,7 +607,7 @@ public class GenOverlap implements UCD_Types {
                
            Utility.dot(counter++);
            if (!ucd.isAllocated(cp)) continue;
-            if (nfkd.normalizationDiffers(cp)) continue;
+            if (!nfkd.isNormalized(cp)) continue;
            if (ucd.getCategory(cp) == Lu) continue; // don't count case
            
            String scp = UTF16.valueOf(cp);
--- a/tools/unicodetools/com/ibm/text/UCA/Main.java
+++ b/tools/unicodetools/com/ibm/text/UCA/Main.java
@ -5,18 +5,20 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/Main.java,v $ 
-* $Date: 2002/05/29 23:18:15 $ 
-* $Revision: 1.2 $
+* $Date: 2002/05/31 01:41:03 $ 
+* $Revision: 1.3 $
 *
 *******************************************************************************
 */

 package com.ibm.text.UCA;
 import com.ibm.text.UCD.*;
+import com.ibm.text.utility.*;


 public class Main {
 	static final String UCDVersion = "";
+	static final String[] ICU_FILES = {"FractionalUCA", "writeconformance", "writeconformanceshifted", "WriteRules"};
 	
 	public static void main(String args[]) throws Exception {
 		
@ -36,7 +38,10 @@ public class Main {
        
        for (int i = 0; i < args.length; ++i) {
            String arg = args[i];
-            if      (arg.equalsIgnoreCase("WriteRulesWithNames")) WriteCollationData.writeRules(WriteCollationData.WITH_NAMES);
+            System.out.println("OPTION: " + arg);
+            
+            if		(arg.equalsIgnoreCase("ICU")) args = Utility.append(args, ICU_FILES);
+			else if (arg.equalsIgnoreCase("WriteRulesWithNames")) WriteCollationData.writeRules(WriteCollationData.WITH_NAMES);
            else if (arg.equalsIgnoreCase("GenOverlap")) GenOverlap.test(WriteCollationData.collator);
            else if (arg.equalsIgnoreCase("validateUCA")) GenOverlap.validateUCA(WriteCollationData.collator);
            else if (arg.equalsIgnoreCase("writeNonspacingDifference")) WriteCollationData.writeNonspacingDifference();
--- a/tools/unicodetools/com/ibm/text/UCA/UCA.java
+++ b/tools/unicodetools/com/ibm/text/UCA/UCA.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/UCA.java,v $ 
-* $Date: 2002/04/23 01:59:14 $ 
-* $Revision: 1.10 $
+* $Date: 2002/05/31 01:41:03 $ 
+* $Revision: 1.11 $
 *
 *******************************************************************************
 */
@ -108,6 +108,8 @@ final public class UCA implements Comparator {
    static final boolean RECORDING_DATA = false;
    static final boolean RECORDING_CHARS = true;
    
+    private UCD ucd;
+    
 // =============================================================
 // Main Methods
 // =============================================================
@ -129,7 +131,8 @@ final public class UCA implements Comparator {
            toD = new Normalizer(Normalizer.NFD, unicodeVersion);
        }
        
-        ucdVersion = UCD.make(unicodeVersion).getVersion();
+        ucd = UCD.make(unicodeVersion);
+        ucdVersion = ucd.getVersion();
        
        // either get the full sources, or just a demo set
        if (fullData) {
@ -478,7 +481,9 @@ final public class UCA implements Comparator {
     * CE Type
     */
    static final byte NORMAL_CE = 0, CONTRACTING_CE = 1, EXPANDING_CE = 2, 
-        FIXED_CE = 3, HANGUL_CE = 5, SURROGATE_CE = 6, UNSUPPORTED_CE = 7;
+        CJK_CE = 3, CJK_AB_CE = 4, HANGUL_CE = 5, UNSUPPORTED_CE = 7,
+        FIXED_CE = 3;
+        // SURROGATE_CE = 6, 
   
    /**
     * Returns the char associated with a FIXED value
@ -502,12 +507,13 @@ final public class UCA implements Comparator {
            // Special check for Han, Hangul
            if (isHangul(ch)) return HANGUL_CE;
            
-            if (isFixed(ch)) return FIXED_CE;
+            if (isCJK(ch)) return CJK_CE;
+            if (isCJK_AB(ch)) return CJK_AB_CE;
                        
            // special check for unsupported surrogate pair, 20 1/8 bits
-            if (0xD800 <= ch && ch <= 0xDFFF) {
-                return SURROGATE_CE;
-            }
+            //if (0xD800 <= ch && ch <= 0xDFFF) {
+            //    return SURROGATE_CE;
+            //}
            return UNSUPPORTED_CE;
        }
            
@ -632,6 +638,12 @@ final public class UCA implements Comparator {
        return result.toString();
    }
    
+    
+    static boolean isImplicitCE(int ce) {
+    	int primary = getPrimary(ce);
+    	return primary >= UNSUPPORTED_BASE && primary <= UNSUPPORTED_TOP;
+    }
+    
    /**
     * Supplies a zero-padded hex representation of an integer (without 0x)
     */
@ -790,9 +802,9 @@ final public class UCA implements Comparator {
    
    /**
     * A special bit combination in a CE is used to reserve exception cases. This has the effect
-     * of removing 32 primary key values out of the 65536 possible.
+     * of removing a small number of the primary key values out of the 65536 possible.
     */
-    static final int EXCEPTION_CE_MASK = 0xFF000000;
+    static final int EXCEPTION_CE_MASK = 0xF8000000;
    
    /**
     * Used to composed Hangul and Han characters
@ -808,8 +820,13 @@ final public class UCA implements Comparator {
     * There are at least 34 values, so that we can use a range for surrogates
     * However, we do add to the first weight if we have surrogate pairs!
     */
-    public static final int UNSUPPORTED_BASE = 0xFF40;
-    public static final int UNSUPPORTED_TOP = 0xFFFF;
+    public static final int UNSUPPORTED_CJK_BASE = 0xFF40;
+    public static final int UNSUPPORTED_CJK_AB_BASE = 0xFF80;
+    public static final int UNSUPPORTED_OTHER_BASE = 0xFFC0;
+    
+    public static final int UNSUPPORTED_BASE = UNSUPPORTED_CJK_BASE;
+    public static final int UNSUPPORTED_TOP = UNSUPPORTED_OTHER_BASE + 0x40;
+    
    static final int UNSUPPORTED = makeKey(UNSUPPORTED_BASE, NEUTRAL_SECONDARY, NEUTRAL_TERTIARY);
    
    // was 0xFFC20101;
@ -821,7 +838,7 @@ final public class UCA implements Comparator {
     * to be looked up (with following characters) in the contractingTable.<br>
     * This isn't a MASK since there is exactly one value.
     */
-    static final int CONTRACTING = 0xFF310000;
+    static final int CONTRACTING = 0xFA310000;

    /**
     * Expanding characters are marked with a exception bit combination
@ -829,7 +846,7 @@ final public class UCA implements Comparator {
     * This means that they map to more than one CE, which is looked up in
     * the expansionTable by index. See EXCEPTION_INDEX_MASK
     */
-    static final int EXPANDING_MASK = 0xFF300000; // marks expanding range start
+    static final int EXPANDING_MASK = 0xFA300000; // marks expanding range start
    
    /**
     * This mask is used to get the index from an EXPANDING exception.
@ -976,12 +993,11 @@ final public class UCA implements Comparator {
                // RECURSIVE!!!
            }
                        
-            // Special check for Han, YI
-            if (isFixed(bigChar)) {
-                return makeKey(bigChar, NEUTRAL_SECONDARY, NEUTRAL_TERTIARY);
+            if (ucd.isNoncharacter(bigChar)) { // illegal code value, ignore!!
+                return 0;
            }
-                        
-            // special check for unsupported surrogate pair, 20 1/8 bits
+            
+            // special check and fix for unsupported surrogate pair, 20 1/8 bits
            if (0xD800 <= bigChar && bigChar <= 0xDFFF) {
                // ignore unmatched surrogates (e.g. return zero)
                if (bigChar >= 0xDC00 || index >= decompositionBuffer.length()) return 0; // unmatched
@ -990,25 +1006,38 @@ final public class UCA implements Comparator {
                index++; // skip next char
                bigChar = 0x10000 + ((ch - 0xD800) << 10) + (ch2 - 0xDC00); // extract value
            }
-
-            if ((bigChar & 0xFFFE) == 0xFFFE) { // illegal code value, ignore!!
-                return 0;
-            }
            
-            // The result is 2 CEs. One is UNSUPPORTED + top bits, and the other
-            // is a primary that is the next fifteen bits
-            // This has the effect of putting all unsupported characters at the end,
-            // in code order.
-                    // add bottom 5 bits to UNSUPPORTED, and push rest
-                    //return UNSUPPORTED + (bigChar & 0xFFFF0000);    // top bits added
-            expandingStack.push(makeKey((bigChar & 0x7FFF) | 0x8000, 0, 0)); // primary = bottom 15 bits plus turn bottom bit on.
-            // secondary and tertiary are both zero
-            return makeKey(UNSUPPORTED_BASE + (bigChar >>> 15), NEUTRAL_SECONDARY, NEUTRAL_TERTIARY); // top 34 values plus UNSUPPORTED
-            /*
-            expandingStack.push(((bigChar & 0x7FFF) << 16) | 0x10000000); // primary = bottom 15 bits plus turn bottom bit on.
-            // secondary and tertiary are both zero
-            return UNSUPPORTED + ((bigChar << 1) & 0xFFFF0000); // top 34 values plus UNSUPPORTED
-            */
+
+/*
+The formula from the UCA:
+
+BASE:
+
+FB40 CJK Ideograph 
+FB80 CJK Ideograph Extension A/B 
+FBC0 Any other code point 
+
+AAAA = BASE + (CP >> 15);
+BBBB = (CP & 0x7FFF) | 0x8000;The mapping given to CP is then given by:
+
+CP => [.AAAA.0020.0002.][.BBBB.0000.0000.]
+*/		
+			// divide the three cases
+			
+			int base = UNSUPPORTED_OTHER_BASE;
+            if (isCJK(bigChar)) base = UNSUPPORTED_CJK_BASE;
+            else if (isCJK_AB(bigChar)) base = UNSUPPORTED_CJK_AB_BASE;
+            
+            // Now compose the two keys
+            // first push BBBB
+                        
+            // HACK: expandingStack.push(makeKey((bigChar & 0x7FFF) | 0x8000, 0, 0));
+            expandingStack.push(makeKey((bigChar & 0x7FFF) | 0x8000, NEUTRAL_SECONDARY, NEUTRAL_TERTIARY));
+            
+            // now return AAAA
+            
+            return makeKey(base + (bigChar >>> 15), NEUTRAL_SECONDARY, NEUTRAL_TERTIARY);
+
        }
        if (ce == CONTRACTING) {
            // Contracting is probably the most interesting (read "tricky") part
@ -1084,12 +1113,18 @@ final public class UCA implements Comparator {
        return expandingStack.pop(); // pop last (guaranteed to exist!)
    }
    
-    public final boolean isFixed(int bigChar) {
-        return (0x3400 <= bigChar && bigChar <= 0x4DB5
-             || 0x4E00 <= bigChar && bigChar <= 0x9FA5
-             // || 0xA000 <= bigChar && bigChar <= 0xA48F
-             );
+    public final boolean isCJK(int bigChar) {
+        return (0x4E00 <= bigChar && bigChar <= 0x9FFF);
    }
+    public final boolean isCJK_AB(int bigChar) {
+        return (0x3400 <= bigChar && bigChar <= 0x4DBF
+             || 0x20000 <= bigChar && bigChar <= 0x2A6DF);
+    }
+/*
+3400..4DBF; CJK Unified Ideographs Extension A
+4E00..9FFF; CJK Unified Ideographs
+20000..2A6DF; CJK Unified Ideographs Extension B
+*/
    
    private final boolean isHangul(int bigChar) {
        return (0xAC00 <= bigChar && bigChar <= 0xD7A3);
@ -1176,7 +1211,7 @@ final public class UCA implements Comparator {
        Normalizer nfd = skipDecomps;
        Iterator enum = null;
        byte ceLimit;
-        int currentRange = Integer.MAX_VALUE; // set to ZERO to enable
+        int currentRange = SAMPLE_RANGES.length; // set to ZERO to enable
        int startOfRange = SAMPLE_RANGES[0][0];
        int endOfRange = startOfRange;
        int itemInRange = startOfRange;
@ -1206,13 +1241,16 @@ final public class UCA implements Comparator {
            
            // normal case
            while (current++ < 0x10FFFF) {
+
                //char ch = (char)current;
                byte type = getCEType(current);
+                if (type >= ceLimit || type == CONTRACTING_CE) continue;
+                
+                //if (nfd.isNormalized(current) || type == HANGUL_CE) {
+                //}
+                
+                if (skipDecomps != null && !skipDecomps.isNormalized(current)) continue; // CHECK THIS
                
-                if (!nfd.normalizationDiffers(current) || type == HANGUL_CE) {
-                    if (type >= ceLimit) continue;
-                    if (skipDecomps != null && skipDecomps.normalizationDiffers(current)) continue;
-                }
                result = UTF16.valueOf(current);
                return result;
            }
@ -1226,6 +1264,7 @@ final public class UCA implements Comparator {
            
            // extra samples
            if (currentRange < SAMPLE_RANGES.length) {
+            	System.out.println("*");
                try {
                    result = UTF16.valueOf(itemInRange);
                } catch (RuntimeException e) {
@ -1274,6 +1313,7 @@ final public class UCA implements Comparator {
            result.second = s;
            return true;
        }
+        
    }
    
    static final int[][] SAMPLE_RANGES = {
@ -1299,7 +1339,7 @@ final public class UCA implements Comparator {
                {0x100000, 0x1000FD},
                {0x10FF00, 0x10FFFD},
    };
-                
+	                
    /**
     * Adds the collation elements from a file (or other stream) in the UCA format.
     * Values will override any previous mappings.
@ -1366,7 +1406,7 @@ final public class UCA implements Comparator {
            boolean record = true;
            /* if (multiChars.length() > 0) record = false;
            else */
-            if (toD.normalizationDiffers(value)) record = false;
+            if (!toD.isNormalized(value)) record = false;
            
            // collect CEs
            if (value == 0x2F00) {
@ -1402,6 +1442,8 @@ final public class UCA implements Comparator {
                expandingTable.push(TERMINATOR);
            }
            
+            //if (value == 0xd801) System.out.print("DEBUG: " + line);
+            	
            // assign CE(s) to char(s)
            if (multiChars.length() > 0) {
                contractingTable.put(multiChars.toString(), new Integer(ce));
@ -1455,8 +1497,9 @@ final public class UCA implements Comparator {
        }
        
        // assign CE(s) to char(s)
-        
        int value = source.charAt(0);
+        //if (value == 0x10000) System.out.print("DEBUG2: " + source);
+            	        
        if (source.length() > 0) {
            contractingTable.put(source.toString(), new Integer(ce));
            if (collationElements[value] == UNSUPPORTED) {
@ -1772,7 +1815,7 @@ final public class UCA implements Comparator {
     * Used for checking data file integrity
     */
    private void checkUnique(char value, int result, int fourth, String line) {
-        if (toD.normalizationDiffers(value)) return; // don't check decomposables.
+        if (!toD.isNormalized(value)) return; // don't check decomposables.
        Object ceObj = new Long(((long)result << 16) | fourth);
        Object probe = uniqueTable.get(ceObj);
        if (probe != null) {
--- a/tools/unicodetools/com/ibm/text/UCA/WriteCharts.java
+++ b/tools/unicodetools/com/ibm/text/UCA/WriteCharts.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/WriteCharts.java,v $ 
-* $Date: 2002/05/29 02:01:00 $ 
-* $Revision: 1.8 $
+* $Date: 2002/05/31 01:41:03 $ 
+* $Revision: 1.9 $
 *
 *******************************************************************************
 */
@ -29,7 +29,7 @@ public class WriteCharts implements UCD_Types {
    	Default.setUCD();
    	for (int i = 0xE000; i < 0x10000; ++i) {
    		if (!Default.ucd.isRepresented(i)) continue;
-    		if (Default.nfkc.normalizationDiffers(i)) continue;
+    		if (!Default.nfkc.isNormalized(i)) continue;
    		System.out.println(Default.ucd.getCodeAndName(i));
    	}
    }
@ -205,7 +205,7 @@ public class WriteCharts implements UCD_Types {
        	byte cat = Default.ucd.getCategory(i);
        	if (cat == Cs || cat == Co) continue;
        	
-        	if (!Default.nfkd.normalizationDiffers(i)) continue;
+        	if (Default.nfkd.isNormalized(i)) continue;
        	String decomp = Default.nfkd.normalize(i);
        	
        	byte script = getBestScript(decomp);
--- a/tools/unicodetools/com/ibm/text/UCA/WriteCollationData.java
+++ b/tools/unicodetools/com/ibm/text/UCA/WriteCollationData.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/WriteCollationData.java,v $ 
-* $Date: 2002/05/29 23:18:15 $ 
-* $Revision: 1.12 $
+* $Date: 2002/05/31 01:41:03 $ 
+* $Revision: 1.13 $
 *
 *******************************************************************************
 */
@ -190,7 +190,7 @@ public class WriteCollationData implements UCD_Types {
        for (char c = 0; c < 0xFFFF; ++c) {
            if ((c & 0xFFF) == 0) System.err.println(Utility.hex(c));
            if (0xAC00 <= c && c <= 0xD7A3) continue;
-            if (normKD.normalizationDiffers(c)) {
+            if (!normKD.isNormalized(c)) {
                ++count;
                String decomp = normKD.normalize(c);
                datasize += decomp.length();
@ -218,7 +218,7 @@ public class WriteCollationData implements UCD_Types {
        for (char c = 0; c < 0xFFFF; ++c) {
            if ((c & 0xFFF) == 0) System.err.println(Utility.hex(c));
            if (0xAC00 <= c && c <= 0xD7A3) continue;
-            if (normD.normalizationDiffers(c)) {
+            if (!normD.isNormalized(c)) {
                ++count;
                String decomp = normD.normalize(c);
                datasize += decomp.length();
@ -408,7 +408,7 @@ public class WriteCollationData implements UCD_Types {
        }
        log.println("<tr><th>Code</td><th>Sort Key</th><th>Decomposed Sort Key</th><th>Name</th></tr>");
        for (char ch = 0; ch < 0xFFFF; ++ch) {
-            if (!nfkd.normalizationDiffers(ch)) continue;
+            if (nfkd.isNormalized(ch)) continue;
            if (ch > 0xAC00 && ch < 0xD7A3) continue; // skip most of Hangul
            String sortKey = collator.getSortKey(String.valueOf(ch), UCA.NON_IGNORABLE, decomposition);
            String decompSortKey = collator.getSortKey(nfkd.normalize(ch), UCA.NON_IGNORABLE, decomposition);
@ -1148,6 +1148,9 @@ public class WriteCollationData implements UCD_Types {
        }
    }
    
+    static Normalizer nfdNew = new Normalizer(Normalizer.NFD, "");
+    static Normalizer nfkdNew = new Normalizer(Normalizer.NFKD, "");
+    
    static void writeRules (byte option) throws IOException {
        
        //testTransitivity();
@ -1155,6 +1158,7 @@ public class WriteCollationData implements UCD_Types {
        
        int[] ces = new int[50];
        Normalizer nfd = new Normalizer(Normalizer.NFD, UNICODE_VERSION);
+        Normalizer nfkd = new Normalizer(Normalizer.NFKD, UNICODE_VERSION);
        
        if (false) {
        int len2 = collator.getCEs("\u2474", true, ces);
@ -1173,29 +1177,64 @@ public class WriteCollationData implements UCD_Types {
        UCA.UCAContents cc = collator.getContents(UCA.FIXED_CE, 
            SKIP_CANONICAL_DECOMPOSIBLES ? nfd : null);
        int[] lenArray = new int[1];
+        
+        Set alreadyDone = new HashSet();
+        PrintWriter log2 = Utility.openPrintWriter("UCARules-log.txt", false, false);

        while (true) {
            String s = cc.next(ces, lenArray);
            if (s == null) break;
            int len = lenArray[0];
+            
+            if (s.equals("\uD800")) {
+            	System.out.println("Check: " + CEList.toString(ces, len));
+            }
+            
+            log2.println(s + "\t" + CEList.toString(ces, len) + "\t" + ucd.getCodeAndName(s));
+            
+            addToBackMap(backMap, ces, len, s, false);

-            if (len == 1) backMap.put(new Integer(ces[0]), s);
            String key = String.valueOf((char)(ces[0]>>>16))
                + String.valueOf((char)(ces[0] & 0xFFFF))
                + collator.getSortKey(s, UCA.NON_IGNORABLE) + '\u0000' + UCA.codePointOrder(s);
+                
            ordered.put(key, s);
+            alreadyDone.add(s);
+            
            Object result = ordered.get(key);
            if (result == null) {
                System.out.println("BAD SORT: " + Utility.hex(key) + ", " + Utility.hex(s));
            }
        }
+        
+        System.out.println("Adding Kanji");
+        for (int i = 0; i < 0x10FFFF; ++i) {
+        	if (!ucd.isAllocated(i)) continue;
+        	if (nfkd.isNormalized(i)) continue;
+        	Utility.dot(i);
+        	String decomp = nfkd.normalize(i);
+        	int cp;
+        	for (int j = 0; j < decomp.length(); j += UTF16.getCharCount(cp)) {
+        		cp = UTF16.charAt(decomp, j);
+        		String s = UTF16.valueOf(cp);
+        		if (alreadyDone.contains(s)) continue;

+        		alreadyDone.add(s);
+        		int len = collator.getCEs(s, true, ces);
+        		
+            	log2.println(s+ "\t" + CEList.toString(ces, len)
+            		+ "\t" + ucd.getCodeAndName(s) + " from " + ucd.getCodeAndName(i));
+            		
+            	addToBackMap(backMap, ces, len, s, false);
+        	}
+        }
+        
        System.out.println("Writing");
        
        String filename = "UCA_Rules.txt";
        if (option == WITH_NAMES) filename = "UCA_Rules_With_Names.txt";
        else if (option == IN_XML) filename = "UCA_Rules.xml";
-        log = Utility.openPrintWriter(filename);
+        log = Utility.openPrintWriter(filename, false, false);
        
        if (option == IN_XML) log.println("<uca>");
        else log.write('\uFEFF'); // BOM
@ -1351,60 +1390,35 @@ public class WriteCollationData implements UCD_Types {
            
            // get relation
            
-            int relation = 3;
            
            /*if (chr.charAt(0) == 0xFFFB) {
                System.out.println("DEBUG");
            }*/
            
-            if (collator.getPrimary(ce) != collator.getPrimary(lastCE)) {
-                relation = 0;
-            } else if (collator.getSecondary(ce) != collator.getSecondary(lastCE)) {
-                relation = 1;
-            } else if (collator.getTertiary(ce) != collator.getTertiary(lastCE)) {
-                relation = 2;
-            } else if (len > lastLen) {
-                relation = 2; // HACK
-            } else {
-                int minLen = len < lastLen ? len : lastLen;
-                for (int kk = 1; kk < minLen; ++kk) {
-                    int lc = lastCes[kk];
-                    int c = ces[kk];
-                    if (collator.getPrimary(c) != collator.getPrimary(lc)
-                      || collator.getSecondary(c) != collator.getSecondary(lc)) {
-                        relation = 3;   // reset relation on FIRST char, since differ anyway
-                        break;
-                      } else if (collator.getTertiary(c) > collator.getTertiary(lc)) {
-                        relation = 2;   // reset to tertiary (but later ce's might override!)
-                    }
-                }
-            }
+            
+            int relation = getStrengthDifference(ces, len, lastCes, lastLen);

-            /*if (chr.equals("\u2474")) {
+            if (chr.equals("\u2F00")) {
                System.out.println(UCA.ceToString(ces, len));
-            }*/
+            }
+            
+            // There are double-CEs, so we have to know what the length of the first bit is.
+            
+    		int expansionStart = 1;
+    		if (UCA.isImplicitCE(ces[0])) {
+    			expansionStart = 2; // move up if first is double-ce
+    		}
            
            // check expansions
            
            String expansion = "";
-            if (len > 1) {
-                int tert0 = ces[0] & 0xFF;
-                boolean isCompat = tert0 != 2 && tert0 != 8;
-                for (int i = 1; i < len; ++i) {
-                    int probe = ces[i];
-                    String s = getFromBackMap(backMap, probe);
-                    if (s == null) {
-                        int meHack = UCA.makeKey(0x1795,0x0020,0x0004);
-                        if (probe == meHack) {
-                            s = "\u3081";
-                        } else {
-                            System.out.println("No back map for " + collator.ceToString(ces[i])
-                                + ": " + ucd.getCodeAndName(chr));
-                            s = "[" + Utility.hex(ces[i]) + "]";
-                        }
-                    }
-                    expansion += s;
-                }
+            if (len > expansionStart) {
+                //int tert0 = ces[0] & 0xFF;
+                //boolean isCompat = tert0 != 2 && tert0 != 8;
+                log2.println("Exp: " + ucd.getCodeAndName(chr) + ", " + CEList.toString(ces, len) + ", start: " + expansionStart);
+                int[] rel = {relation};
+                expansion = getFromBackMap(backMap, ces, expansionStart, len, chr, rel);
+                relation = rel[0];
            }
            
            // print results
@ -1429,28 +1443,268 @@ public class WriteCollationData implements UCD_Types {
            } else {
                if (reset.length() != 0) log.println(reset);
                log.print(RELATION_NAMES[relation] + " " + quoteOperand(chr));
-                if (len > 1) log.print(" / " + quoteOperand(expansion));
+                if (expansion.length() > 0) log.print(" / " + quoteOperand(expansion));
                if (option == WITH_NAMES) {
                    log.print("\t# " 
                        + collator.ceToString(ces, len) + " " 
                        + ucd.getCodeAndName(chr));
-                    if (len > 1) log.print(" / " + Utility.hex(expansion));
+                    if (expansion.length() > 0) log.print(" / " + Utility.hex(expansion));
                }
                log.println();
            }
        }
        // log.println("& [top]"); // RESET
        if (option == IN_XML) log.println("</uca>");
+        log2.close();
        log.close();
        Utility.fixDot();
    }
    
+    static long getPrimary(int[] ces) {
+    	if (UCA.isImplicitCE(ces[0])) {
+    		return (UCA.getPrimary(ces[0]) << 16) + UCA.getPrimary(ces[1]);
+    	} else {
+    		return UCA.getPrimary(ces[0]);
+    	}
+    }
+    
+    static long getSecondary(int[] ces) {
+    	if (UCA.isImplicitCE(ces[0])) {
+    		return (UCA.getSecondary(ces[0]) << 16) + UCA.getSecondary(ces[1]);
+    	} else {
+    		return UCA.getSecondary(ces[0]);
+    	}
+    }
+    
+    static long getTertiary(int[] ces) {
+    	if (UCA.isImplicitCE(ces[0])) {
+    		return (UCA.getTertiary(ces[0]) << 16) + UCA.getTertiary(ces[1]);
+    	} else {
+    		return UCA.getTertiary(ces[0]);
+    	}
+    }
+    
+	static int getStrengthDifference(int[] ces, int len, int[] lastCes, int lastLen) {
+		
+        int relation = 3;
+        if (getPrimary(ces) != getPrimary(lastCes)) {
+            relation = 0;
+        } else if (getSecondary(ces) != getSecondary(lastCes)) {
+            relation = 1;
+        } else if (getTertiary(ces) != getTertiary(lastCes)) {
+            relation = 2;
+        } else if (len > lastLen) {
+            relation = 2; // HACK
+        } else {
+            int minLen = len < lastLen ? len : lastLen;
+			int start = UCA.isImplicitCE(ces[0]) ? 2 : 1;
+            for (int kk = start; kk < minLen; ++kk) {
+                int lc = lastCes[kk];
+                int c = ces[kk];
+                if (collator.getPrimary(c) != collator.getPrimary(lc)
+                    || collator.getSecondary(c) != collator.getSecondary(lc)) {
+                    relation = 3;   // reset relation on FIRST char, since differ anyway
+                    break;
+                    } else if (collator.getTertiary(c) > collator.getTertiary(lc)) {
+                    relation = 2;   // reset to tertiary (but later ce's might override!)
+                }
+            }
+        }
+        return relation;
+    }
+    
+    
    // static final String[] RELATION_NAMES = {" <", "   <<", "     <<<", "         ="};
    static final String[] RELATION_NAMES = {" <\t", "  <<\t", "   <<<\t", "    =\t"};
    static final String[] XML_RELATION_NAMES = {"o1", "o2", "o3", "o4"};
    
-    static final String getFromBackMap(Map backMap, int probe) {
-        String s = (String)backMap.get(new Integer(probe));
+    static class ArrayWrapper {
+    	int[] array;
+    	int start;
+    	int limit;
+    	
+    	/*public ArrayWrapper(int[] contents) {
+    		set(contents, 0, contents.length);
+    	}
+    	*/
+    	
+    	public ArrayWrapper(int[] contents, int start, int limit) {
+    		set(contents, start, limit);
+    	}
+    	
+    	private void set(int[] contents, int start, int limit) {
+    		array = contents;
+    		this.start = start;
+    		this.limit = limit;
+		}
+    	
+    	public boolean equals(Object other) {
+    		ArrayWrapper that = (ArrayWrapper) other;
+    		if (that.limit - that.start != limit - start) return false;
+    		for (int i = start; i < limit; ++i) {
+    			if (array[i] != that.array[i - start + that.start]) return false;
+    		}
+    		return true;
+    	}
+    	
+    	public int hashCode() {
+    		int result = limit - start;
+    		for (int i = start; i < limit; ++i) {
+    			result = result * 37 + array[i];
+    		}
+    		return result;
+    	}
+    }
+    
+    static int testCase[] = {
+    	//collator.makeKey(0xFF40, 0x0020, 0x0002),
+    	collator.makeKey(0x0255, 0x0020, 0x000E),
+    };
+    
+    static String testString = "\u33C2\u002E";
+    
+    static boolean contains(int[] array, int start, int limit, int key) {
+    	for (int i = start; i < limit; ++i) {
+    		if (array[i] == key) return true;
+    	}
+    	return false;
+    }
+    
+    static final void addToBackMap(Map backMap, int[] ces, int len, String s, boolean show) {
+    	if (show || contains(testCase, 0, testCase.length, ces[0]) || testString.indexOf(s) > 0) {
+    		System.out.println("Test case: " + Utility.hex(s) + ", " + CEList.toString(ces, len));
+    	}
+		backMap.put(new ArrayWrapper((int[])(ces.clone()), 0, len), s);
+    }
+    
+    static int[] ignorableList = {
+    	UCA.makeKey(0x0000, 0x0153, 0x0002),
+    	UCA.makeKey(0x0000, 0x0154, 0x0002),
+    	UCA.makeKey(0x0000, 0x0155, 0x0002),
+    	UCA.makeKey(0x0000, 0x0156, 0x0002),
+    	UCA.makeKey(0x0000, 0x0157, 0x0002),
+    	UCA.makeKey(0x0000, 0x0158, 0x0002),
+    	UCA.makeKey(0x0000, 0x0159, 0x0002),
+    	UCA.makeKey(0x0000, 0x015A, 0x0002),
+    	UCA.makeKey(0x0000, 0x015B, 0x0002),
+    	UCA.makeKey(0x0000, 0x015C, 0x0002),
+    	UCA.makeKey(0x0000, 0x015D, 0x0002),
+    	UCA.makeKey(0x0000, 0x015E, 0x0002),
+    	UCA.makeKey(0x0000, 0x015F, 0x0002),
+    	UCA.makeKey(0x0000, 0x0160, 0x0002),
+    	UCA.makeKey(0x0000, 0x0161, 0x0002),
+    	UCA.makeKey(0x0000, 0x0162, 0x0002),
+    	UCA.makeKey(0x0000, 0x0163, 0x0002),
+    	UCA.makeKey(0x0000, 0x0164, 0x0002),
+    	UCA.makeKey(0x0000, 0x0165, 0x0002),
+    	UCA.makeKey(0x0000, 0x0166, 0x0002),
+    	UCA.makeKey(0x0000, 0x0167, 0x0002),
+    	UCA.makeKey(0x0000, 0x0168, 0x0002),
+    	UCA.makeKey(0x0000, 0x0169, 0x0002),
+    	UCA.makeKey(0x0000, 0x016A, 0x0002),
+    	UCA.makeKey(0x0000, 0x016B, 0x0002),
+    	UCA.makeKey(0x0000, 0x016C, 0x0002),
+    	UCA.makeKey(0x0000, 0x016D, 0x0002),
+    	UCA.makeKey(0x0000, 0x016E, 0x0002),
+    	UCA.makeKey(0x0000, 0x016F, 0x0002),
+    	UCA.makeKey(0x0000, 0x0170, 0x0002),
+    };
+    
+    static final String getFromBackMap(Map backMap, int[] originalces, int expansionStart, int len, String chr, int[] rel) {
+    	int[] ces = (int[])(originalces.clone());
+    	
+    	String expansion = "";
+    	
+    	// process ces to neutralize tertiary
+    	
+    	for (int i = expansionStart; i < len; ++i) {
+    		int probe = ces[i];
+        	char primary = collator.getPrimary(probe);
+        	char secondary = collator.getSecondary(probe);
+        	char tertiary = collator.getTertiary(probe);
+    		
+            int tert = tertiary;
+            switch (tert) {
+            case 8: case 9: case 0xA: case 0xB: case 0xC: case 0x1D:
+                tert = 8;
+                break;
+            case 0xD: case 0x10: case 0x11: case 0x12: case 0x13: case 0x1C:
+                tert = 0xE;
+                break;
+            default:
+                tert = 2;
+                break;
+            }
+            ces[i] = collator.makeKey(primary, secondary, tert);
+    	}
+    	
+        for (int i = expansionStart; i < len;) {
+        	int limit;
+        	String s = null;
+        	for (limit = len; limit > i; --limit) {
+        		ArrayWrapper wrapper = new ArrayWrapper(ces, i, limit);
+        		s = (String)backMap.get(wrapper);
+            	if (s != null) break;
+            }
+            if (s == null) {
+            	do {
+            		if (contains(ignorableList, 0, ignorableList.length, ces[i])) {
+            			s = "";
+            			if (rel[0] > 1) rel[0] = 1; // HACK
+            			break;
+            		}
+            		
+            		// Try stomping the value to different tertiaries
+            		
+    				int probe = ces[i];
+        			char primary = collator.getPrimary(probe);
+        			char secondary = collator.getSecondary(probe);
+	        		
+            		ces[i] = collator.makeKey(primary, secondary, 2);
+        			ArrayWrapper wrapper = new ArrayWrapper(ces, i, i+1);
+        			s = (String)backMap.get(wrapper);
+        			if (s != null) break;
+            
+            		ces[i] = collator.makeKey(primary, secondary,0xE);
+        			wrapper = new ArrayWrapper(ces, i, i+1);
+        			s = (String)backMap.get(wrapper);
+        			if (s != null) break;
+
+					/*
+                	int meHack = UCA.makeKey(0x1795,0x0020,0x0004);
+                	if (ces[i] == meHack) {
+                    	s = "\u3081";
+                    	break;
+                    }
+                    */
+                    
+                    // we failed completely. Print error message, and bail
+                    
+                    System.out.println("No back map for " + collator.ceToString(ces[i])
+                        + " from " + CEList.toString(ces, len));
+                    System.out.println("\t" + ucd.getCodeAndName(chr)
+                        + " => " + ucd.getCodeAndName(nfkdNew.normalize(chr))
+                    );
+                    s = "[" + Utility.hex(ces[i]) + "]";
+        	    } while (false); // exactly one time, just for breaking
+            	limit = i + 1;
+            }
+            expansion += s;
+            i = limit;
+        }
+        return expansion;
+    }
+    
+    /*
+
+    static final String getFromBackMap(Map backMap, int[] ces, int index, int limit) {
+    	ArrayWrapper wrapper = new ArrayWrapper(ces, index, limit);
+    	
+    	int probe = ces[index];
+    	wrapperContents[0] = probe;
+        String s = (String)backMap.get(wrapper);
+        
+        outputLen[0] = 1;
        if (s != null) return s;
        
        char primary = collator.getPrimary(probe);
@ -1473,25 +1727,31 @@ public class WriteCollationData implements UCD_Types {
                break;
            }
            probe = collator.makeKey(primary, secondary, tert);
-            s = (String)backMap.get(new Integer(probe));
+            wrapperContents[0] = probe;
+            s = (String)backMap.get(wrapper);
            if (s != null) return s;
                
            probe = collator.makeKey(primary, secondary, collator.NEUTRAL_TERTIARY);
-            s = (String)backMap.get(new Integer(probe));
+            wrapperContents[0] = probe;
+            s = (String)backMap.get(wrapper);
        }
        if (s != null) return s;
        
        if (primary != 0 && secondary != collator.NEUTRAL_SECONDARY) {
-            String first = getFromBackMap(backMap, 
-                collator.makeKey(primary, collator.NEUTRAL_SECONDARY, tertiary));
-            String second = getFromBackMap(backMap, 
-                collator.makeKey(0, secondary, collator.NEUTRAL_TERTIARY));
+        	int[] dummyArray = new int[1];
+        	dummyArray[0] = collator.makeKey(primary, collator.NEUTRAL_SECONDARY, tertiary);
+            String first = getFromBackMap(backMap, dummyArray, 0, outputLen);
+            
+            dummyArray[0] = collator.makeKey(0, secondary, collator.NEUTRAL_TERTIARY);
+            String second = getFromBackMap(backMap, dummyArray, 0, outputLen);
+            
            if (first != null && second != null) {
                s = first + second;
            }
        }
        return s;
    }
+    */
    
    static final String[] RELATION = {
        "<", " << ", "  <<<  ", "    =    ", "    =    ", "    =    ", "  >>>  ", " >> ", ">"
--- a/tools/unicodetools/com/ibm/text/UCA/WriteHTMLCollation.java
+++ b/tools/unicodetools/com/ibm/text/UCA/WriteHTMLCollation.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/WriteHTMLCollation.java,v $ 
-* $Date: 2002/04/23 01:59:16 $ 
-* $Revision: 1.6 $
+* $Date: 2002/05/31 01:41:03 $ 
+* $Revision: 1.7 $
 *
 *******************************************************************************
 */
@ -283,7 +283,7 @@ public class WriteHTMLCollation implements UCD_Types {
        }
        log.println("<tr><th>Code</td><th>Sort Key</th><th>Decomposed Sort Key</th><th>Name</th></tr>");
        for (char ch = 0; ch < 0xFFFF; ++ch) {
-            if (!nfkd.normalizationDiffers(ch)) continue;
+            if (nfkd.isNormalized(ch)) continue;
            if (ch > 0xAC00 && ch < 0xD7A3) continue; // skip most of Hangul
            String sortKey = collator.getSortKey(String.valueOf(ch), UCA.NON_IGNORABLE, decomposition);
            String decompSortKey = collator.getSortKey(nfkd.normalize(ch), UCA.NON_IGNORABLE, decomposition);
--- a/tools/unicodetools/com/ibm/text/UCD/DerivedProperty.java
+++ b/tools/unicodetools/com/ibm/text/UCD/DerivedProperty.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/DerivedProperty.java,v $
-* $Date: 2002/04/23 01:59:13 $
-* $Revision: 1.13 $
+* $Date: 2002/05/31 01:41:04 $
+* $Revision: 1.14 $
 *
 *******************************************************************************
 */
@ -281,7 +281,7 @@ public final class DerivedProperty implements UCD_Types {
        }
                
        public String getValue(int cp, byte style) { 
-            if (nfx.normalizationDiffers(cp)) return NO;
+            if (!nfx.isNormalized(cp)) return NO;
            else if (nfx.isTrailing(cp)) return MAYBE;
            else return "";
        }
@ -598,7 +598,7 @@ of characters, the first of which has a non-zero combining class.
            }
            boolean hasValue(int cp) {
                if (hasSoftDot(cp)) return true;
-                if (!Default.nfkd.normalizationDiffers(cp)) return false;
+                if (Default.nfkd.isNormalized(cp)) return false;
                String decomp = Default.nfd.normalize(cp);
                boolean ok = false;
                for (int i = decomp.length()-1; i >= 0; --i) {
@ -700,7 +700,7 @@ of characters, the first of which has a non-zero combining class.
        
       // if (true) throw new IllegalArgumentException("FIX Default.nf[2]");
        
-        if (!Default.nf[NFKD].normalizationDiffers(cp)) return Lo;
+        if (Default.nf[NFKD].isNormalized(cp)) return Lo;

        String norm = Default.nf[NFKD].normalize(cp);
        int cp2;
--- a/tools/unicodetools/com/ibm/text/UCD/GenerateCaseFolding.java
+++ b/tools/unicodetools/com/ibm/text/UCD/GenerateCaseFolding.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateCaseFolding.java,v $
-* $Date: 2002/04/23 01:59:14 $
-* $Revision: 1.9 $
+* $Date: 2002/05/31 01:41:04 $
+* $Revision: 1.10 $
 *
 *******************************************************************************
 */
@ -416,7 +416,7 @@ public class GenerateCaseFolding implements UCD_Types {
    
    static boolean specialNormalizationDiffers(int ch) {
        if (ch == 0x00DF) return true;                  // es-zed
-        return Default.nfkd.normalizationDiffers(ch);
+        return !Default.nfkd.isNormalized(ch);
    }
    
    static String specialNormalization(String s) {
--- a/tools/unicodetools/com/ibm/text/UCD/GenerateData.java
+++ b/tools/unicodetools/com/ibm/text/UCD/GenerateData.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateData.java,v $
-* $Date: 2002/05/29 02:01:00 $
-* $Revision: 1.18 $
+* $Date: 2002/05/31 01:41:04 $
+* $Revision: 1.19 $
 *
 *******************************************************************************
 */
@ -1232,7 +1232,7 @@ public class GenerateData implements UCD_Types {
            Utility.dot(i);
            if (!Default.ucd.isRepresented(i)) continue;
            
-            if (!Default.nfd.normalizationDiffers(i)) {
+            if (Default.nfd.isNormalized(i)) {
                if (Default.ucd.getScript(i) == LATIN_SCRIPT) {
                    int cp = i;
                    String hex = "u" + Utility.hex(cp, 4);
@ -1358,7 +1358,7 @@ public class GenerateData implements UCD_Types {
        for (int i = 0; i < 0x10FFFF; ++i) {
            if ((i & 0xFFF) == 0) System.out.println("# " + i);
            if (!Default.ucd.isAssigned(i)) continue;
-            if (!Default.nfd.normalizationDiffers(i)) continue;
+            if (Default.nfd.isNormalized(i)) continue;
            String decomp = Default.nfd.normalize(i);
            int cp;
            for (int j = 0; j < decomp.length(); j += UTF16.getCharCount(cp)) {
--- a/tools/unicodetools/com/ibm/text/UCD/Main.java
+++ b/tools/unicodetools/com/ibm/text/UCD/Main.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/Main.java,v $
-* $Date: 2002/05/29 02:01:00 $
-* $Revision: 1.12 $
+* $Date: 2002/05/31 01:41:04 $
+* $Revision: 1.13 $
 *
 *******************************************************************************
 */
@ -119,10 +119,12 @@ public final class Main implements UCD_Types {

            if (arg.equalsIgnoreCase("All")) {
                // Append all args at end
+                /*
                String[] temp = new String[args.length + ALL_FILES.length];
                System.arraycopy(args, 0, temp, 0, args.length);
                System.arraycopy(ALL_FILES, 0, temp, args.length, ALL_FILES.length);
-                args = temp;
+                */
+                args = Utility.append(args, ALL_FILES);
                expanding = true;

            // EXTRACTED PROPERTIES
--- a/tools/unicodetools/com/ibm/text/UCD/NFSkippable.java
+++ b/tools/unicodetools/com/ibm/text/UCD/NFSkippable.java
@ -67,7 +67,7 @@ public final class NFSkippable extends UnicodeProperty {
        if (!ucd.isAssigned(cp)) return true;
        
        if (DEBUG) cause = "\t\tnf differs";
-        if (nf.normalizationDiffers(cp)) return false;
+        if (!nf.isNormalized(cp)) return false;
        
        if (DEBUG) cause = "\t\tnon-zero cc";
        if (ucd.getCombiningClass(cp) != 0) return false;
@ -87,7 +87,7 @@ public final class NFSkippable extends UnicodeProperty {
        // "displaced", so we don't have to test further
        
        if (DEBUG) cause = "\t\tno decomp";
-        if (!nfd.normalizationDiffers(cp)) return true;
+        if (nfd.isNormalized(cp)) return true;
        
        // OPTIMIZATION -- careful
        // Hangul syllables are skippable IFF they are isLeadingJamoComposition
@ -265,7 +265,7 @@ public final class NFSkippable extends UnicodeProperty {
                    byte cat = skipper.ucd.getCategory(cp);
                    if (cat == PRIVATE_USE || cat == SURROGATE) continue;
                    if (skipper.ucd.getCombiningClass(cp) != 0) continue;
-                    if (skipper.nf.normalizationDiffers(cp)) continue;
+                    if (!skipper.nf.isNormalized(cp)) continue;
                    if ((cp < 0xAC00 || cp > 0xAE00)
                        && cp != skipper.ucd.mapToRepresentative(cp, false)) continue;
                }
--- a/tools/unicodetools/com/ibm/text/UCD/Normalizer.java
+++ b/tools/unicodetools/com/ibm/text/UCD/Normalizer.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/Normalizer.java,v $
-* $Date: 2002/03/20 00:21:42 $
-* $Revision: 1.8 $
+* $Date: 2002/05/31 01:41:03 $
+* $Revision: 1.9 $
 *
 *******************************************************************************
 */
@ -205,8 +205,8 @@ public final class Normalizer implements UCD_Types {
    * normalizer.
    * @param   ch      the source character
    */
-    public boolean normalizationDiffers(int ch) {
-        return data.normalizationDiffers(ch, composition, compatibility);
+    public boolean isNormalized(int ch) {
+        return !data.normalizationDiffers(ch, composition, compatibility);
    }

    /**
--- a/tools/unicodetools/com/ibm/text/UCD/VerifyUCD.java
+++ b/tools/unicodetools/com/ibm/text/UCD/VerifyUCD.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/VerifyUCD.java,v $
-* $Date: 2002/05/29 02:01:00 $
-* $Revision: 1.13 $
+* $Date: 2002/05/31 01:41:03 $
+* $Revision: 1.14 $
 *
 *******************************************************************************
 */
@ -273,7 +273,7 @@ public class VerifyUCD implements UCD_Types {
    }
    
    public static boolean checkNormalizer(Normalizer x, int cp) {
-    	boolean result = x.normalizationDiffers(cp);
+    	boolean result = !x.isNormalized(cp);
    	if (false) {
    		String s = x.normalize(cp);
    		boolean sResult = !s.equals(UTF16.valueOf(cp));
@ -291,7 +291,7 @@ public class VerifyUCD implements UCD_Types {
            Utility.dot(cp);
            if (!Default.ucd.isAllocated(cp)) continue;
            
-            if (!Default.nfd.normalizationDiffers(cp)) continue;
+            if (Default.nfd.isNormalized(cp)) continue;
            
            String decomp = Default.nfd.normalize(cp);
            String comp = Default.nfc.normalize(cp);
@ -979,12 +979,12 @@ can help you narrow these down.
            if (cp == 0x3131) {
                System.out.println("Debug: " + idnProhibited
                    + ", " + idnUnassigned
-                    + ", " + Default.nfkd.normalizationDiffers(cp)
+                    + ", " + !Default.nfkd.isNormalized(cp)
                    + ", " + Default.ucd.getCodeAndName(Default.nfkc.normalize(cp))
                    + ", " + Default.ucd.getCodeAndName(Default.nfc.normalize(cp)));
            } 
            
-            if (!idnProhibited && ! idnUnassigned && Default.nfkd.normalizationDiffers(cp)) {
+            if (!idnProhibited && ! idnUnassigned && !Default.nfkd.isNormalized(cp)) {
                String kc = Default.nfkc.normalize(cp);
                String c = Default.nfc.normalize(cp);
                if (kc.equals(c)) continue;
@ -1415,7 +1415,7 @@ E0020-E007F; [TAGGING CHARACTERS]
                Utility.dot(cp);
                if (!Default.ucd.isAssigned(cp)) continue;
                if (Default.ucd.isPUA(cp)) continue;
-                if (!normalizationDiffers(cp, j)) continue;
+                if (isNormalized(cp, j)) continue;

                if (cp == 0xFDFB || cp == 0x0140) {
                    System.out.println("debug point");
@ -1478,9 +1478,9 @@ E0020-E007F; [TAGGING CHARACTERS]
        return Default.ucd.getCase(s, FULL, FOLD);
    }

-    static boolean normalizationDiffers(int cp, int j) {
-        if (j < 4) return Default.nf[j].normalizationDiffers(cp);
-        return true;
+    static boolean isNormalized(int cp, int j) {
+        if (j < 4) return !Default.nf[j].isNormalized(cp);
+        return false;
    }

    private static final String[] NAMES = {"Default.nfd", "NFC", "NFKD", "NFKC", "Fold"};
@ -1489,7 +1489,7 @@ E0020-E007F; [TAGGING CHARACTERS]
        for (int j = 0; j < 4; ++j) {
            Normalizer nfx = Default.nf[j];
            System.out.println();
-            System.out.println("Testing normalizationDiffers for " + NAMES[j]);
+            System.out.println("Testing isNormalized for " + NAMES[j]);
            System.out.println();
            for (int i = 0; i < 0x10FFFF; ++i) {
                Utility.dot(i);
@ -1497,7 +1497,7 @@ E0020-E007F; [TAGGING CHARACTERS]
                if (Default.ucd.isPUA(i)) continue;
                String s = nfx.normalize(i);
                boolean differs = !s.equals(UTF32.valueOf32(i));
-                boolean call = nfx.normalizationDiffers(i);
+                boolean call = !nfx.isNormalized(i);
                if (differs != call) {
                    Utility.fixDot();
                    System.out.println("Problem: differs: " + differs
@ -1597,7 +1597,7 @@ E0020-E007F; [TAGGING CHARACTERS]
    
    static public void verifyNormalizationStability2(String version) {
        
-        Default.nfd.normalizationDiffers(0x10300);
+        // Default.nfd.normalizationDiffers(0x10300);
        
        UCD older = UCD.make(version); // Default.ucd.getPreviousVersion();
        
@ -1640,7 +1640,7 @@ E0020-E007F; [TAGGING CHARACTERS]
            } else {
            	// not in older version. 
            	// (1) If there is a decomp, and it is composed of all OLD characters, then it must NOT compose
-            	if (Default.nfd.normalizationDiffers(i)) {
+            	if (!Default.nfd.isNormalized(i)) {
            		String decomp = Default.nfd.normalize(i);
            		if (noneHaveCategory(decomp, Cn, older)) {
            			String recomp = Default.nfc.normalize(decomp);
--- a/tools/unicodetools/com/ibm/text/utility/Utility.java
+++ b/tools/unicodetools/com/ibm/text/utility/Utility.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/Utility.java,v $
-* $Date: 2002/04/24 02:38:52 $
-* $Revision: 1.15 $
+* $Date: 2002/05/31 01:41:04 $
+* $Revision: 1.16 $
 *
 *******************************************************************************
 */
@ -22,6 +22,13 @@ import com.ibm.text.UCD.*;
 public final class Utility {    // COMMON UTILITIES

    static final boolean UTF8 = true; // TODO -- make argument
+    
+    public static String[] append(String[] array1, String[] array2) {
+        String[] temp = new String[array1.length + array2.length];
+        System.arraycopy(array1, 0, temp, 0, array1.length);
+        System.arraycopy(array2, 0, temp, array1.length, array2.length);
+        return temp;
+    }

    public static String getName(int i, String[] names) {
        try {