Fixed Fractional UCA for surrogates

X-SVN-Rev: 6504
2001-10-31 00:02:54 +00:00 · 2001-10-31 00:02:54 +00:00 · 73ed7bfac5
commit 73ed7bfac5
parent 02f44eee5c
7 changed files with 320 additions and 162 deletions
--- a/tools/unicodetools/com/ibm/text/UCA/UCA.java
+++ b/tools/unicodetools/com/ibm/text/UCA/UCA.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/UCA.java,v $ 
-* $Date: 2001/10/26 23:32:03 $ 
-* $Revision: 1.6 $
+* $Date: 2001/10/31 00:01:28 $ 
+* $Revision: 1.7 $
 *
 *******************************************************************************
 */
@ -236,7 +236,7 @@ final public class UCA implements Comparator {
            
            // add weights
            char w = getPrimary(ce);
-            if (DEBUG) System.out.println("\tCE: " + hex(ce));
+            if (DEBUG) System.out.println("\tCE: " + Utility.hex(ce));
            if (w != 0) primaries.append(w);
            
            w = getSecondary(ce);
@ -490,7 +490,10 @@ final public class UCA implements Comparator {
    /**
     * Return the type of the CE
     */
-    public byte getCEType(char ch) {
+    public byte getCEType(int ch) {
+        
+        if (ch > 0xFFFF) ch = UTF16.getLeadSurrogate(ch); // first if expands
+        
        int ce = collationElements[ch];
        if ((ce & EXCEPTION_CE_MASK) != EXCEPTION_CE_MASK) return NORMAL_CE;
        if (ce == UNSUPPORTED) {
@ -586,7 +589,7 @@ final public class UCA implements Comparator {
                result.append("|");
                needSep = true;
            } else {
-                result.append(hex(ch));
+                result.append(Utility.hex(ch));
                needSep = true;
            }
        }
@ -598,9 +601,9 @@ final public class UCA implements Comparator {
     * Produces a human-readable string for a collation element
     */
    static public String ceToString(int ce) {
-        return "[" + hex(getPrimary(ce)) + "." 
-          + hex(getSecondary(ce)) + "."
-          + hex(getTertiary(ce)) + "]";
+        return "[" + Utility.hex(getPrimary(ce)) + "." 
+          + Utility.hex(getSecondary(ce)) + "."
+          + Utility.hex(getTertiary(ce)) + "]";
    }
    
    /**
@ -631,32 +634,36 @@ final public class UCA implements Comparator {
    /**
     * Supplies a zero-padded hex representation of an integer (without 0x)
     */
+    /*
    static public String hex(int i) {
        String result = Long.toString(i & 0xFFFFFFFFL, 16).toUpperCase();
        return "00000000".substring(result.length(),8) + result;
    }
-    
+    */
    /**
     * Supplies a zero-padded hex representation of a Unicode character (without 0x, \\u)
     */
+    /*
    static public String hex(char i) {
        String result = Integer.toString(i, 16).toUpperCase();
        return "0000".substring(result.length(),4) + result;
    }
-    
+    */
    /**
     * Supplies a zero-padded hex representation of a Unicode character (without 0x, \\u)
     */
+     /*
    static public String hex(byte b) {
        int i = b & 0xFF;
        String result = Integer.toString(i, 16).toUpperCase();
        return "00".substring(result.length(),2) + result;
    }
-    
+    */
    /**
     * Supplies a zero-padded hex representation of a Unicode String (without 0x, \\u)
     *@param sep can be used to give a sequence, e.g. hex("ab", ",") gives "0061,0062"
     */
+     /*
    static public String hex(String s, String sep) {
        StringBuffer result = new StringBuffer();
        for (int i = 0; i < s.length(); ++i) {
@ -665,11 +672,12 @@ final public class UCA implements Comparator {
        }
        return result.toString();
    }
-    
+    */
    /**
     * Supplies a zero-padded hex representation of a Unicode String (without 0x, \\u)
     *@param sep can be used to give a sequence, e.g. hex("ab", ",") gives "0061,0062"
     */
+     /*
    static public String hex(StringBuffer s, String sep) {
        StringBuffer result = new StringBuffer();
        for (int i = 0; i < s.length(); ++i) {
@ -678,6 +686,7 @@ final public class UCA implements Comparator {
        }
        return result.toString();
    }
+    */
    
 // =============================================================
 // Privates
@ -1161,6 +1170,7 @@ final public class UCA implements Comparator {
    public class UCAContents {
        int current = -1;
        Normalizer skipDecomps = new Normalizer(Normalizer.NFD);
+        Normalizer nfd = skipDecomps;
        Iterator enum = null;
        byte ceLimit;
        int currentRange = Integer.MAX_VALUE; // set to ZERO to enable
@ -1191,11 +1201,15 @@ final public class UCA implements Comparator {
            String result = null; // null if done
            
            // normal case
-            while (current++ <= 0xFFFF) {
-                char ch = (char)current;
-                if (getCEType(ch) >= ceLimit) continue;
-                if (skipDecomps != null && skipDecomps.hasDecomposition(ch)) continue;
-                result = String.valueOf(ch);
+            while (current++ < 0x10FFFF) {
+                //char ch = (char)current;
+                byte type = getCEType(current);
+                
+                if (!nfd.normalizationDiffers(current) || type == HANGUL_CE) {
+                    if (type >= ceLimit) continue;
+                    if (skipDecomps != null && skipDecomps.hasDecomposition(current)) continue;
+                }
+                result = UTF16.valueOf(current);
                return result;
            }
            
@ -1502,19 +1516,19 @@ final public class UCA implements Comparator {
        
        hangulHackBottom = collationElements[0x1100] & 0xFFFF0000; // remove secondaries & tertiaries
        hangulHackTop = collationElements[0x11F9] | 0xFFFF; // bump up secondaries and tertiaries
-        if (SHOW_STATS) System.out.println("\tHangul Hack: " + hex(hangulHackBottom) + ", " + hex(hangulHackTop));
+        if (SHOW_STATS) System.out.println("\tHangul Hack: " + Utility.hex(hangulHackBottom) + ", " + Utility.hex(hangulHackTop));
        
        // show some statistics
        if (SHOW_STATS) System.out.println("\tcount1: " + count1);
        if (SHOW_STATS) System.out.println("\tcount2: " + max2);
        if (SHOW_STATS) System.out.println("\tcount3: " + max3);
        
-        if (SHOW_STATS) System.out.println("\tMIN1/MAX1: " + hex(MIN1) + "/" + hex(MAX1));
-        if (SHOW_STATS) System.out.println("\tMIN2/MAX2: " + hex(MIN2) + "/" + hex(MAX2));
-        if (SHOW_STATS) System.out.println("\tMIN3/MAX3: " + hex(MIN3) + "/" + hex(MAX3));
+        if (SHOW_STATS) System.out.println("\tMIN1/MAX1: " + Utility.hex(MIN1) + "/" + Utility.hex(MAX1));
+        if (SHOW_STATS) System.out.println("\tMIN2/MAX2: " + Utility.hex(MIN2) + "/" + Utility.hex(MAX2));
+        if (SHOW_STATS) System.out.println("\tMIN3/MAX3: " + Utility.hex(MIN3) + "/" + Utility.hex(MAX3));
        
-        if (SHOW_STATS) System.out.println("\tVar Min/Max: " + hex(variableLow) + "/" + hex(variableHigh));
-        if (SHOW_STATS) System.out.println("\tNon-Var Min: " + hex(nonVariableLow));
+        if (SHOW_STATS) System.out.println("\tVar Min/Max: " + Utility.hex(variableLow) + "/" + Utility.hex(variableHigh));
+        if (SHOW_STATS) System.out.println("\tNon-Var Min: " + Utility.hex(nonVariableLow));
        
        if (SHOW_STATS) System.out.println("\trenumberedVariable: " + renumberedVariable);
    }
@ -1565,7 +1579,7 @@ final public class UCA implements Comparator {
            if (strength > 1) {
                if (weights.get(i)) {
                    count++;
-                    p.println(mf.format(new Object[] {hex((char)i), new Integer(stCounts[strength][i])}));
+                    p.println(mf.format(new Object[] {Utility.hex((char)i), new Integer(stCounts[strength][i])}));
                }
                continue;
            }
@ -1575,8 +1589,8 @@ final public class UCA implements Comparator {
                int last = i-1;
                int diff = last - first + 1;
                count += diff;
-                String lastStr = last == first ? "" : hex((char)last);
-                p.println(mf.format(new Object[] {hex((char)first),lastStr,new Integer(diff), new Integer(count)}));
+                String lastStr = last == first ? "" : Utility.hex((char)last);
+                p.println(mf.format(new Object[] {Utility.hex((char)first),lastStr,new Integer(diff), new Integer(count)}));
                first = -1;
            }
        }
@ -1623,17 +1637,17 @@ final public class UCA implements Comparator {
            variable = false; // FIX DATA FILE
        }
        if (key2 > 0x1FF) {
-            throw new IllegalArgumentException("Weight2 doesn't fit: " + hex(key2) + "," + line);
+            throw new IllegalArgumentException("Weight2 doesn't fit: " + Utility.hex(key2) + "," + line);
        }
        if (key3 > 0x7F) {
-            throw new IllegalArgumentException("Weight3 doesn't fit: " + hex(key3) + "," + line);
+            throw new IllegalArgumentException("Weight3 doesn't fit: " + Utility.hex(key3) + "," + line);
        }
        // adjust variable bounds, if needed
        if (variable) {
            if (key1 > nonVariableLow) {
                if (!haveVariableWarning) {
                    System.out.println("\tBAD DATA: Variable overlap, nonvariable low: "
-                    + hex(nonVariableLow) + ", line: \"" + line + "\"");
+                    + Utility.hex(nonVariableLow) + ", line: \"" + line + "\"");
                    haveVariableWarning = true;
                }
            } else {
@ -1644,7 +1658,7 @@ final public class UCA implements Comparator {
            if (key1 < variableHigh) {
                if (!haveVariableWarning) {
                    System.out.println("\tBAD DATA: Variable overlap, variable high: "
-                    + hex(variableHigh) + ", line: \"" + line + "\"");
+                    + Utility.hex(variableHigh) + ", line: \"" + line + "\"");
                    haveVariableWarning = true;
                }
            } else {
@ -1717,8 +1731,8 @@ final public class UCA implements Comparator {
        Object ceObj = new Long(((long)result << 16) | fourth);
        Object probe = uniqueTable.get(ceObj);
        if (probe != null) {
-            System.out.println("\tCE(" + hex(value) 
-              + ")=CE(" + hex(((Character)probe).charValue()) + "); " + line);
+            System.out.println("\tCE(" + Utility.hex(value) 
+              + ")=CE(" + Utility.hex(((Character)probe).charValue()) + "); " + line);
              
        } else {
            uniqueTable.put(ceObj, new Character(value));
--- a/tools/unicodetools/com/ibm/text/UCA/WriteCollationData.java
+++ b/tools/unicodetools/com/ibm/text/UCA/WriteCollationData.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/WriteCollationData.java,v $ 
-* $Date: 2001/10/26 23:32:03 $ 
-* $Revision: 1.6 $
+* $Date: 2001/10/31 00:01:28 $ 
+* $Revision: 1.7 $
 *
 *******************************************************************************
 */
@ -14,6 +14,7 @@
 package com.ibm.text.UCA;

 import java.util.*;
+import com.ibm.text.UTF16;

 import java.io.*;
 //import java.text.*;
@ -106,13 +107,13 @@ public class WriteCollationData implements UCD_Types {
        
        Normalizer foo = new Normalizer(Normalizer.NFKD);
        char x = '\u1EE2';
-        System.out.println(UCA.hex(x) + " " + ucd.getName(x));
+        System.out.println(Utility.hex(x) + " " + ucd.getName(x));
        String nx = foo.normalize(x);
        for (int i = 0; i < nx.length(); ++i) {
            char c = nx.charAt(i);
            System.out.println(ucd.getCanonicalClass(c));
        }
-        System.out.println(UCA.hex(nx, " ") + " " + ucd.getName(nx));
+        System.out.println(Utility.hex(nx, " ") + " " + ucd.getName(nx));
        */
        
    }
@ -251,7 +252,7 @@ public class WriteCollationData implements UCD_Types {
        CompactShortArray csa = new CompactShortArray((short)0);
        
        for (char c = 0; c < 0xFFFF; ++c) {
-            if ((c & 0xFFF) == 0) System.err.println(UCA.hex(c));
+            if ((c & 0xFFF) == 0) System.err.println(Utility.hex(c));
            if (0xAC00 <= c && c <= 0xD7A3) continue;
            if (normKD.hasDecomposition(c)) {
                ++count;
@ -260,7 +261,7 @@ public class WriteCollationData implements UCD_Types {
                if (max < decomp.length()) max = decomp.length();
                if (decomp.length() > 7) ++over7;
                csa.setElementAt(c, (short)count);
-                log.println("\t KD[0x" + UCA.hex(c) + "]='\\u" + UCA.hex(decomp,"\\u") + "';");
+                log.println("\t KD[0x" + Utility.hex(c) + "]='\\u" + Utility.hex(decomp,"\\u") + "';");
            }
        }
        csa.compact();
@ -279,7 +280,7 @@ public class WriteCollationData implements UCD_Types {
        csa = new CompactShortArray((short)0);
        
        for (char c = 0; c < 0xFFFF; ++c) {
-            if ((c & 0xFFF) == 0) System.err.println(UCA.hex(c));
+            if ((c & 0xFFF) == 0) System.err.println(Utility.hex(c));
            if (0xAC00 <= c && c <= 0xD7A3) continue;
            if (normD.hasDecomposition(c)) {
                ++count;
@ -287,7 +288,7 @@ public class WriteCollationData implements UCD_Types {
                datasize += decomp.length();
                if (max < decomp.length()) max = decomp.length();
                csa.setElementAt(c, (short)count);
-                log.println("\t D[0x" + UCA.hex(c) + "]='\\u" + UCA.hex(decomp,"\\u") + "';");
+                log.println("\t D[0x" + Utility.hex(c) + "]='\\u" + Utility.hex(decomp,"\\u") + "';");
            }
        }
        csa.compact();
@ -304,12 +305,12 @@ public class WriteCollationData implements UCD_Types {
        CompactByteArray cba = new CompactByteArray();
        
        for (char c = 0; c < 0xFFFF; ++c) {
-            if ((c & 0xFFF) == 0) System.err.println(UCA.hex(c));
+            if ((c & 0xFFF) == 0) System.err.println(Utility.hex(c));
            int canClass = normKD.getCanonicalClass(c);
            if (canClass != 0) {
                ++count;
                
-                log.println("\t CC[0x" + UCA.hex(c) + "]=" + canClass + ";");
+                log.println("\t CC[0x" + Utility.hex(c) + "]=" + canClass + ";");
            }
        }
        cba.compact();
@ -332,7 +333,7 @@ public class WriteCollationData implements UCD_Types {
            char val = (char) enum.value();
            if (0xAC00 <= val && val <= 0xD7A3) continue;
            ++count;
-            log.println("\tC[0x" + UCA.hex(key) + "]=0x" + UCA.hex(val) + ";");
+            log.println("\tC[0x" + Utility.hex(key) + "]=0x" + Utility.hex(val) + ";");
        }
        log.println("// " + count + " composition mappings total");
        log.println();
@ -480,7 +481,7 @@ public class WriteCollationData implements UCD_Types {
                decompSortKey = remove(decompSortKey, '\u0020');
            }
            if (!sortKey.equals(decompSortKey)) {
-                log.println("<tr><td>" + UCA.hex(ch)
+                log.println("<tr><td>" + Utility.hex(ch)
                    + "</td><td>" + UCA.toString(sortKey)
                    + "</td><td>" + UCA.toString(decompSortKey)
                    + "</td><td>" + ucd.getName(ch)
@ -649,9 +650,11 @@ public class WriteCollationData implements UCD_Types {
    static final byte getDecompType(int cp) {
        byte result = ucd.getDecompositionType(cp);
        if (result == ucd.CANONICAL) {
-            String d = NFD.normalize((char)cp); // TODO
-            for (int i = 0; i < d.length(); ++i) {
-                byte t = ucd.getDecompositionType(d.charAt(i));
+            String d = NFD.normalize(cp); // TODO
+            int cp1;
+            for (int i = 0; i < d.length(); i += UTF16.getCharCount(cp1)) {
+                cp1 = UTF16.charAt(d, i);
+                byte t = ucd.getDecompositionType(cp1);
                if (t > ucd.CANONICAL) return t;
            }
        }
@ -707,7 +710,7 @@ public class WriteCollationData implements UCD_Types {
    static int[] markCes = new int[50];
    
    static int fixCompatibilityCE(String s, boolean decompose, int[] output, boolean compress) {
-        byte type = getDecompType(s.charAt(0));
+        byte type = getDecompType(UTF16.charAt(s, 0));
        char ch = s.charAt(0);
        
        String decomp = NFKD.normalize(s);
@ -1654,6 +1657,7 @@ public class WriteCollationData implements UCD_Types {
    static final int COMMON = 5;
    
    static int gapForA = 0;
+    static int[] primaryDelta;
    
    static void writeFractionalUCA(String filename) throws IOException {
        
@ -1672,9 +1676,9 @@ public class WriteCollationData implements UCD_Types {
        for (int secondary = 0; secondary < compactSecondary.length; ++secondary) {
            if (secondarySet.get(secondary)) {
                compactSecondary[secondary] = subtotal++;
-                /*System.out.println("compact[" + UCA.hex(secondary)
-                    + "]=" + UCA.hex(compactSecondary[secondary])
-                    + ", " + UCA.hex(fixSecondary(secondary)));*/
+                /*System.out.println("compact[" + Utility.hex(secondary)
+                    + "]=" + Utility.hex(compactSecondary[secondary])
+                    + ", " + Utility.hex(fixSecondary(secondary)));*/
            }
        }
        System.out.println();
@ -1687,7 +1691,9 @@ public class WriteCollationData implements UCD_Types {
        
        System.out.println("Fixing Primaries");
        BitSet primarySet = collator.getWeightUsage(1);        
-        int[] primaryDelta = new int[65536];
+        
+        primaryDelta = new int[65536];
+        
        // start at 1 so zero stays zero.
        for (int primary = 1; primary < 0xFFFF; ++primary) {
            if (primarySet.get(primary)) primaryDelta[primary] = 2;
@ -1749,7 +1755,7 @@ public class WriteCollationData implements UCD_Types {
                
                lastValue = primaryDelta[primary] = CE >>> 8; 
            }
-            //if ((primary & 0xFF) == 0) System.out.println(UCA.hex(primary) + " => " + hexBytes(primaryDelta[primary]));
+            //if ((primary & 0xFF) == 0) System.out.println(Utility.hex(primary) + " => " + hexBytes(primaryDelta[primary]));
        }
        
        
@ -1757,19 +1763,37 @@ public class WriteCollationData implements UCD_Types {
        
        System.out.println("Sorting");
        Map ordered = new TreeMap();
-        
-        for (char ch = 0; ch < 0xFFFF; ++ch) {
-            byte type = collator.getCEType(ch);
-            if (type >= UCA.FIXED_CE) continue;
-            String s = String.valueOf(ch);
+        UCA.UCAContents ucac = collator.getContents(UCA.FIXED_CE, null);
+        int ccounter = 0;
+        while (true) {
+            Utility.dot(ccounter++);
+            String s = ucac.next();
+            if (s == null) break;
            ordered.put(collator.getSortKey(s, UCA.NON_IGNORABLE) + '\u0000' + s, s);
        }
+            
+        
+        /*
+        
+        for (int ch = 0; ch < 0x10FFFF; ++ch) {
+            Utility.dot(ch);
+            byte type = collator.getCEType(ch);
+            if (type >= UCA.FIXED_CE && !nfd.hasDecomposition(ch))
+                continue;
+            }
+            String s = com.ibm.text.UTF16.valueOf(ch);
+            ordered.put(collator.getSortKey(s, UCA.NON_IGNORABLE) + '\u0000' + s, s);
+        }
+        
        Hashtable multiTable = collator.getContracting();
        Enumeration enum = multiTable.keys();
+        int ecount = 0;
        while (enum.hasMoreElements()) {
+            Utility.dot(ecount++);
            String s = (String)enum.nextElement();
            ordered.put(collator.getSortKey(s, UCA.NON_IGNORABLE) + '\u0000' + s, s);
        }
+        */
        // JUST FOR TESTING
        if (false) {
            String sample = "\u3400\u3401\u4DB4\u4DB5\u4E00\u4E01\u9FA4\u9FA5\uAC00\uAC01\uD7A2\uD7A3";
@ -1779,6 +1803,7 @@ public class WriteCollationData implements UCD_Types {
            }
        }
        
+        Utility.fixDot();
        System.out.println("Writing");
        PrintWriter shortLog = new PrintWriter(new BufferedWriter(new FileWriter(GEN_DIR + filename + ".txt"), 32*1024));
        PrintWriter longLog = new PrintWriter(new BufferedWriter(new FileWriter(GEN_DIR + filename + "_long.txt"), 32*1024));
@ -1821,6 +1846,8 @@ public class WriteCollationData implements UCD_Types {
        String lastChr = "";
        int lastNp = 0;
        boolean doVariable = false;
+        char[] codeUnits = new char[100];
+        
        
        while (it.hasNext()) {
            Object sortKey = it.next();
@ -1846,8 +1873,12 @@ public class WriteCollationData implements UCD_Types {
                wasVariable = isVariable;
            }
            oldStr.setLength(0);
-            log.print(UCA.hex(chr, " ") + "; ");
+            chr.getChars(0, chr.length(), codeUnits, 0);
+            
+            log.print(Utility.hex(codeUnits, 0, chr.length(), " ") + "; ");
            boolean nonePrinted = true;
+            boolean isFirst = true;
+            
            for (int q = 0; q < len; ++q) {
                nonePrinted = false;
                newPrimary.setLength(0);
@ -1856,7 +1887,32 @@ public class WriteCollationData implements UCD_Types {
                
                int pri = UCA.getPrimary(ces[q]);
                int sec = UCA.getSecondary(ces[q]); 
-                int ter = UCA.getTertiary(ces[q]); 
+                int ter = UCA.getTertiary(ces[q]);
+                
+                oldStr.append(UCA.ceToString(ces[q]));// + "," + Integer.toString(ces[q],16);
+                
+                // special hack for unsupported!
+                
+                if (pri >= UCA.UNSUPPORTED_BASE) {
+                    ++q;
+                    oldStr.append(UCA.ceToString(ces[q]));// + "," + Integer.toString(ces[q],16);
+                
+                    int pri2 = UCA.getPrimary(ces[q]);
+                    // get old code point
+                    // pri = UNSUPPORTED_BASE + (bigChar >>> 15)
+                    // pri2 = (bigChar & 0x7FFF) | 0x8000
+                    pri -= UCA.UNSUPPORTED_BASE;
+                    pri <<= 15;
+                    pri2 &= 0x7FFF;
+                    pri += pri2;
+                    System.out.println("Unsupported: "
+                        + Utility.hex(UCA.getPrimary(ces[q-1]))
+                        + ", " + Utility.hex(UCA.getPrimary(ces[q]))
+                        + ", " + Utility.hex(pri)
+                        + ", " + Utility.hex(fixPrimary(pri) & 0xFFFFFFFFL)
+                        );
+                        
+                }
                
                if (sec != 0x20) {
                    boolean changed = secEq.add(new Integer(sec), new Integer(pri));
@ -1866,28 +1922,26 @@ public class WriteCollationData implements UCD_Types {
                }
                if (sampleEq[sec] == null) sampleEq[sec] = chr;
                if (sampleEq[ter] == null) sampleEq[ter] = chr;
-                oldStr.append(UCA.ceToString(ces[q]));// + "," + Integer.toString(ces[q],16);
-                int oldPrimaryValue = UCA.getPrimary(ces[q]);
-                int np = primaryDelta[oldPrimaryValue];
-                if (oldPrimaryValue > 0x3400) {
-                    System.out.println(Utility.hex(oldPrimaryValue) + " => " + Utility.hex(np));
-                }
+                
+                // int oldPrimaryValue = UCA.getPrimary(ces[q]);
+                int np = fixPrimary(pri);
                
                hexBytes(np, newPrimary);
-                hexBytes(fixSecondary(UCA.getSecondary(ces[q])), newSecondary);
-                hexBytes(fixTertiary(UCA.getTertiary(ces[q])), newTertiary);
-                if (q == 0) {
+                hexBytes(fixSecondary(sec), newSecondary);
+                hexBytes(fixTertiary(ter), newTertiary);
+                if (isFirst) {
                    if (!sameTopByte(np, lastNp)) {
-                        summary.println("Last:  " + Utility.hex(lastNp) + " " + ucd.getName(lastChr.charAt(0)));
+                        summary.println("Last:  " + Utility.hex(lastNp & 0xFFFFFFFFL) + " " + ucd.getName(UTF16.charAt(lastChr,0)));
                        summary.println();
                        if (doVariable) {
                            doVariable = false;
                            summary.println("[variable top = " + Utility.hex(primaryDelta[firstPrimary]) + "] # END OF VARIABLE SECTION!!!");
                            summary.println();
                        }
-                        summary.println("First: " + Utility.hex(np) + " " + ucd.getName(chr.charAt(0)));
+                        summary.println("First: " + Utility.hex(np & 0xFFFFFFFFL) + " " + ucd.getName(UTF16.charAt(chr,0)));
                    }
                    lastNp = np;
+                    isFirst = false;
                }
                log.print("[" + newPrimary 
                    + ", " + newSecondary 
@ -1898,17 +1952,17 @@ public class WriteCollationData implements UCD_Types {
                log.print("[,,]");
                oldStr.append(UCA.ceToString(0));
            }
-            longLog.print("    # " + oldStr + " # " + ucd.getName(chr.charAt(0)));
+            longLog.print("    # " + oldStr + " # " + ucd.getName(UTF16.charAt(chr, 0)));
            log.println();
            lastChr = chr;
        }
-        summary.println("Last:  " + Utility.hex(lastNp) + " " + ucd.getName(lastChr.charAt(0)));
+        summary.println("Last:  " + Utility.hex(lastNp) + " " + ucd.getName(UTF16.charAt(lastChr, 0)));
        
        /*
        String sample = "\u3400\u3401\u4DB4\u4DB5\u4E00\u4E01\u9FA4\u9FA5\uAC00\uAC01\uD7A2\uD7A3";
        for (int i = 0; i < sample.length(); ++i) {
            char ch = sample.charAt(i);
-            log.println(UCA.hex(ch) + " => " + UCA.hex(fixHan(ch))
+            log.println(Utility.hex(ch) + " => " + Utility.hex(fixHan(ch))
                    + "          " + ucd.getName(ch));
        }
        */
@ -1981,8 +2035,24 @@ public class WriteCollationData implements UCD_Types {
    
    
    static boolean isFixedIdeograph(int cp) {
-        return (0x3400 <= cp && cp <= 0x4DB5 || 0x4E00 <= cp && cp <= 0x9FA5 || 0xF900 <= cp && cp <= 0xFA2D);
+        return (0x3400 <= cp && cp <= 0x4DB5 
+            || 0x4E00 <= cp && cp <= 0x9FA5 
+            || 0xF900 <= cp && cp <= 0xFA2D // compat: most of these decompose anyway
+            || 0x20000 <= cp && cp <= 0x2A6D6
+            || 0x2F800 <= cp && cp <= 0x2FA1D // compat: most of these decompose anyway
+            );
    }
+/*
+3400;<CJK Ideograph Extension A, First>;Lo;0;L;;;;;N;;;;;
+4DB5;<CJK Ideograph Extension A, Last>;Lo;0;L;;;;;N;;;;;
+4E00;<CJK Ideograph, First>;Lo;0;L;;;;;N;;;;;
+9FA5;<CJK Ideograph, Last>;Lo;0;L;;;;;N;;;;;
+20000;<CJK Ideograph Extension B, First>;Lo;0;L;;;;;N;;;;;
+2A6D6;<CJK Ideograph Extension B, Last>;Lo;0;L;;;;;N;;;;;
+2F800;CJK COMPATIBILITY IDEOGRAPH-2F800;Lo;0;L;4E3D;;;;N;;;;;
+...
+2FA1D;CJK COMPATIBILITY IDEOGRAPH-2FA1D;Lo;0;L;2A600;;;;N;;;;;
+*/
    
    static int remapUCA_CompatibilityIdeographToCp(int cp) {
        switch (cp) {    
@ -2175,6 +2245,18 @@ public class WriteCollationData implements UCD_Types {
    
    static final int secondaryDoubleStart = 0xD0;
    
+    static int fixPrimary(int x) {
+        int result = 0;
+        if (x <= 0xFFFF) result = primaryDelta[x];
+        else result = getImplicitPrimary(x);
+        
+        /*if (x > 0x3400) {
+            System.out.println(Utility.hex(x) + " => " + Utility.hex(result));
+        }
+        */
+        return result;
+    }
+    
    static int fixSecondary(int x) {
        x = compactSecondary[x];
        return fixSecondary2(x, compactSecondary[0x153], compactSecondary[0x157]);
@ -2301,7 +2383,7 @@ public class WriteCollationData implements UCD_Types {
            byte b = (byte)(x >>> shift);
            if (b != 0) {
                if (result.length() != 0) result.append(" ");
-                result.append(UCA.hex(b));
+                result.append(Utility.hex(b));
                //if (lastb == 0) System.err.println(" bad zero byte: " + result);
            }
            lastb = b;
@ -2352,7 +2434,7 @@ public class WriteCollationData implements UCD_Types {
                    if (cat <= ucd.OTHER_LETTER && cat != ucd.Lm) {
                        scripts[script] = primary;
                        scriptChar[script] = ch;
-                        if (script == ucd.GREEK_SCRIPT) System.out.println("*" + UCA.hex(primary) + ucd.getName(ch));
+                        if (script == ucd.GREEK_SCRIPT) System.out.println("*" + Utility.hex(primary) + ucd.getName(ch));
                    }
                }
                // get representative char for primary
@ -2469,7 +2551,7 @@ public class WriteCollationData implements UCD_Types {
                source = source.substring(0,source.length()-1);
                if (endMark == MARK1) {
                    log.println("<br>");
-                    log.println("Mismatch: " + UCA.hex(source, " ")
+                    log.println("Mismatch: " + Utility.hex(source, " ")
                        + ", " + ucd.getName(source) + "<br>");
                    log.print("  NFD:");
                } else {
@ -2557,11 +2639,11 @@ public class WriteCollationData implements UCD_Types {
                //if (firstRow) out.print(" width='6%'");
                out.print(">");
                
-                //log.println(UCA.hex(ch2.charAt(0)));
+                //log.println(Utility.hex(ch2.charAt(0)));
                boolean ignorable = col2.charAt(0) == 0;
                out.print(HTMLString(ch2) + "<br><tt>"
                    + (ignorable ? "<u>" : "")
-                    + UCA.hex(ch2, " ")
+                    + Utility.hex(ch2, " ")
                    + (ignorable ? "</u>" : "")
                    );
                if (SHOW_CE) out.print("</tt><br><tt><b>" + UCA.toString(col2) + "</b>");
@ -2633,7 +2715,7 @@ A4C6;YI RADICAL KE;So;0;ON;;;;;N;;;;;
        String colNbase = collator.getSortKey(ch, option, false);
        String colCbase = collator.getSortKey(toC.normalize(ch), option, false);
        if (!colNbase.equals(colCbase)) {
-            /*System.out.println(UCA.hex(ch));
+            /*System.out.println(Utility.hex(ch));
            System.out.println(printableKey(colNbase));
            System.out.println(printableKey(colNbase));
            System.out.println(printableKey(colNbase));*/
@ -2747,10 +2829,10 @@ A4C6;YI RADICAL KE;So;0;ON;;;;;N;;;;;
            String ch = (String)sortedD.get(col);
            String colN = (String)backN.get(ch);
            if (colN == null || colN.length() < 1) {
-                System.out.println("Missing colN value for " + UCA.hex(ch, " ") + ": " + printableKey(colN));
+                System.out.println("Missing colN value for " + Utility.hex(ch, " ") + ": " + printableKey(colN));
            }
            if (col == null || col.length() < 1) {
-                System.out.println("Missing col value for " + UCA.hex(ch, " ") + ": " + printableKey(col));
+                System.out.println("Missing col value for " + Utility.hex(ch, " ") + ": " + printableKey(col));
            }
            
            if (compareMinusLast(col, lastCol) == compareMinusLast(colN, lastColN)) {
@ -2758,14 +2840,14 @@ A4C6;YI RADICAL KE;So;0;ON;;;;;N;;;;;
            } else {
                if (true && count < 200) {
                    System.out.println();
-                    System.out.println(UCA.hex(ch, " ") + ", " + UCA.hex(lastCh, " "));
-                    System.out.println("      col: " + UCA.hex(col, " "));
+                    System.out.println(Utility.hex(ch, " ") + ", " + Utility.hex(lastCh, " "));
+                    System.out.println("      col: " + Utility.hex(col, " "));
                    System.out.println(compareMinusLast(col, lastCol));
-                    System.out.println("  lastCol: " + UCA.hex(lastCol, " "));
+                    System.out.println("  lastCol: " + Utility.hex(lastCol, " "));
                    System.out.println();
-                    System.out.println("     colN: " + UCA.hex(colN, " "));
+                    System.out.println("     colN: " + Utility.hex(colN, " "));
                    System.out.println(compareMinusLast(colN, lastColN));
-                    System.out.println(" lastColN: " + UCA.hex(lastColN, " "));
+                    System.out.println(" lastColN: " + Utility.hex(lastColN, " "));
                }
                if (!showedLast) {
                    log.println("<tr><td colspan='3'></td><tr>");
@ -2791,9 +2873,9 @@ A4C6;YI RADICAL KE;So;0;ON;;;;;N;;;;;
    
    static void showLine(int count, String ch, String keyD, String keyN) {
        String decomp = toD.normalize(ch);
-        if (decomp.equals(ch)) decomp = ""; else decomp = "<br><" + UCA.hex(decomp, " ") + "> ";
+        if (decomp.equals(ch)) decomp = ""; else decomp = "<br><" + Utility.hex(decomp, " ") + "> ";
        log.println("<tr><td>" + count + "</td><td>" 
-            + UCA.hex(ch, " ")
+            + Utility.hex(ch, " ")
            + " " + ucd.getName(ch)
            + decomp
            + "</td><td>");
@ -2863,12 +2945,12 @@ A4C6;YI RADICAL KE;So;0;ON;;;;;N;;;;;
        if (showName) {
            if (ch.equals(decomp)) {
                log.println(//title + counter + " "
-                    UCA.hex(ch, " ") 
+                    Utility.hex(ch, " ") 
                    + " " + ucd.getName(ch)
                );
            } else {
                log.println(//title + counter + " "
-                    "<b>" + UCA.hex(ch, " ") 
+                    "<b>" + Utility.hex(ch, " ") 
                    + " " + ucd.getName(ch) + "</b>"
                );
            }
@ -2877,11 +2959,11 @@ A4C6;YI RADICAL KE;So;0;ON;;;;;N;;;;;
            String keyN = printableKey(backN.get(chobj));
            if (keyD.equals(keyN)) {
                log.println(//title + counter + " "
-                    UCA.hex(ch, " ") + " " + keyN);
+                    Utility.hex(ch, " ") + " " + keyN);
            } else {
                log.println(//title + counter + " "
-                    "<font color='#009900'>" + UCA.hex(ch, " ") + " " + keyN
-                    + "</font><br><font color='#000099'>" + UCA.hex(decomp, " ") + " " + keyD + "</font>"
+                    "<font color='#009900'>" + Utility.hex(ch, " ") + " " + keyN
+                    + "</font><br><font color='#000099'>" + Utility.hex(decomp, " ") + " " + keyD + "</font>"
                );
            }
        }
--- a/tools/unicodetools/com/ibm/text/UCA/WriteHTMLCollation.java
+++ b/tools/unicodetools/com/ibm/text/UCA/WriteHTMLCollation.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/WriteHTMLCollation.java,v $ 
-* $Date: 2001/10/26 23:32:03 $ 
-* $Revision: 1.3 $
+* $Date: 2001/10/31 00:01:28 $ 
+* $Revision: 1.4 $
 *
 *******************************************************************************
 */
@ -285,7 +285,7 @@ public class WriteHTMLCollation implements UCD_Types {
                decompSortKey = remove(decompSortKey, '\u0020');
            }
            if (!sortKey.equals(decompSortKey)) {
-                log.println("<tr><td>" + UCA.hex(ch)
+                log.println("<tr><td>" + Utility.hex(ch)
                    + "</td><td>" + UCA.toString(sortKey)
                    + "</td><td>" + UCA.toString(decompSortKey)
                    + "</td><td>" + ucd.getName(ch)
@ -762,9 +762,9 @@ public class WriteHTMLCollation implements UCD_Types {
        for (int secondary = 0; secondary < compactSecondary.length; ++secondary) {
            if (secondarySet.get(secondary)) {
                compactSecondary[secondary] = subtotal++;
-                /*System.out.println("compact[" + UCA.hex(secondary)
-                    + "]=" + UCA.hex(compactSecondary[secondary])
-                    + ", " + UCA.hex(fixSecondary(secondary)));*/
+                /*System.out.println("compact[" + Utility.hex(secondary)
+                    + "]=" + Utility.hex(compactSecondary[secondary])
+                    + ", " + Utility.hex(fixSecondary(secondary)));*/
            }
        }
        System.out.println();
@ -822,7 +822,7 @@ public class WriteHTMLCollation implements UCD_Types {
                
                primaryDelta[primary] = CE >>> 8; 
            }
-            if ((primary & 0xFF) == 0) System.out.println(UCA.hex(primary) + " => " + hexBytes(primaryDelta[primary]));
+            if ((primary & 0xFF) == 0) System.out.println(Utility.hex(primary) + " => " + hexBytes(primaryDelta[primary]));
        }
        
        
@ -916,7 +916,7 @@ public class WriteHTMLCollation implements UCD_Types {
                wasVariable = isVariable;
            }
            oldStr.setLength(0);
-            log.print(UCA.hex(chr, " ") + "; " + (needsCaseBit(chr) ? '1' : '0') + "; ");
+            log.print(Utility.hex(chr, " ") + "; " + (needsCaseBit(chr) ? '1' : '0') + "; ");
            boolean nonePrinted = true;
            for (int q = 0; q < len; ++q) {
                nonePrinted = false;
@ -972,7 +972,7 @@ public class WriteHTMLCollation implements UCD_Types {
        String sample = "\u3400\u3401\u4DB4\u4DB5\u4E00\u4E01\u9FA4\u9FA5\uAC00\uAC01\uD7A2\uD7A3";
        for (int i = 0; i < sample.length(); ++i) {
            char ch = sample.charAt(i);
-            log.println(UCA.hex(ch) + " => " + UCA.hex(fixHan(ch))
+            log.println(Utility.hex(ch) + " => " + Utility.hex(fixHan(ch))
                    + "          " + ucd.getName(ch));
        }
        */
@ -1311,7 +1311,7 @@ public class WriteHTMLCollation implements UCD_Types {
            byte b = (byte)(x >>> shift);
            if (b != 0) {
                if (result.length() != 0) result.append(" ");
-                result.append(UCA.hex(b));
+                result.append(Utility.hex(b));
                //if (lastb == 0) System.err.println(" bad zero byte: " + result);
            }
            lastb = b;
@ -1360,7 +1360,7 @@ public class WriteHTMLCollation implements UCD_Types {
                    if (cat <= ucd.OTHER_LETTER && cat != ucd.Lm) {
                        scripts[script] = primary;
                        scriptChar[script] = ch;
-                        if (script == ucd.GREEK_SCRIPT) System.out.println("*" + UCA.hex(primary) + ucd.getName(ch));
+                        if (script == ucd.GREEK_SCRIPT) System.out.println("*" + Utility.hex(primary) + ucd.getName(ch));
                    }
                }
                // get representative char for primary
@ -1478,7 +1478,7 @@ public class WriteHTMLCollation implements UCD_Types {
                source = source.substring(0,source.length()-1);
                if (endMark == MARK1) {
                    log.println("<br>");
-                    log.println("Mismatch: " + UCA.hex(source, " ")
+                    log.println("Mismatch: " + Utility.hex(source, " ")
                        + ", " + ucd.getName(source) + "<br>");
                    log.print("  NFD:");
                } else {
@ -1566,11 +1566,11 @@ public class WriteHTMLCollation implements UCD_Types {
                //if (firstRow) out.print(" width='6%'");
                out.print(">");
                
-                //log.println(UCA.hex(ch2.charAt(0)));
+                //log.println(Utility.hex(ch2.charAt(0)));
                boolean ignorable = col2.charAt(0) == 0;
                out.print(HTMLString(ch2) + "<br><tt>"
                    + (ignorable ? "<u>" : "")
-                    + UCA.hex(ch2, " ")
+                    + Utility.hex(ch2, " ")
                    + (ignorable ? "</u>" : "")
                    );
                if (SHOW_CE) out.print("</tt><br><tt><b>" + UCA.toString(col2) + "</b>");
@ -1632,7 +1632,7 @@ A4C6;YI RADICAL KE;So;0;ON;;;;;N;;;;;
        String colNbase = collator.getSortKey(ch, option, false);
        String colCbase = collator.getSortKey(toC.normalize(ch), option, false);
        if (!colNbase.equals(colCbase)) {
-            /*System.out.println(UCA.hex(ch));
+            /*System.out.println(Utility.hex(ch));
            System.out.println(printableKey(colNbase));
            System.out.println(printableKey(colNbase));
            System.out.println(printableKey(colNbase));*/
@ -1746,10 +1746,10 @@ A4C6;YI RADICAL KE;So;0;ON;;;;;N;;;;;
            String ch = (String)sortedD.get(col);
            String colN = (String)backN.get(ch);
            if (colN == null || colN.length() < 1) {
-                System.out.println("Missing colN value for " + UCA.hex(ch, " ") + ": " + printableKey(colN));
+                System.out.println("Missing colN value for " + Utility.hex(ch, " ") + ": " + printableKey(colN));
            }
            if (col == null || col.length() < 1) {
-                System.out.println("Missing col value for " + UCA.hex(ch, " ") + ": " + printableKey(col));
+                System.out.println("Missing col value for " + Utility.hex(ch, " ") + ": " + printableKey(col));
            }
            
            if (compareMinusLast(col, lastCol) == compareMinusLast(colN, lastColN)) {
@ -1757,14 +1757,14 @@ A4C6;YI RADICAL KE;So;0;ON;;;;;N;;;;;
            } else {
                if (true && count < 200) {
                    System.out.println();
-                    System.out.println(UCA.hex(ch, " ") + ", " + UCA.hex(lastCh, " "));
-                    System.out.println("      col: " + UCA.hex(col, " "));
+                    System.out.println(Utility.hex(ch, " ") + ", " + Utility.hex(lastCh, " "));
+                    System.out.println("      col: " + Utility.hex(col, " "));
                    System.out.println(compareMinusLast(col, lastCol));
-                    System.out.println("  lastCol: " + UCA.hex(lastCol, " "));
+                    System.out.println("  lastCol: " + Utility.hex(lastCol, " "));
                    System.out.println();
-                    System.out.println("     colN: " + UCA.hex(colN, " "));
+                    System.out.println("     colN: " + Utility.hex(colN, " "));
                    System.out.println(compareMinusLast(colN, lastColN));
-                    System.out.println(" lastColN: " + UCA.hex(lastColN, " "));
+                    System.out.println(" lastColN: " + Utility.hex(lastColN, " "));
                }
                if (!showedLast) {
                    log.println("<tr><td colspan='3'></td><tr>");
@ -1790,9 +1790,9 @@ A4C6;YI RADICAL KE;So;0;ON;;;;;N;;;;;
    
    static void showLine(int count, String ch, String keyD, String keyN) {
        String decomp = toD.normalize(ch);
-        if (decomp.equals(ch)) decomp = ""; else decomp = "<br><" + UCA.hex(decomp, " ") + "> ";
+        if (decomp.equals(ch)) decomp = ""; else decomp = "<br><" + Utility.hex(decomp, " ") + "> ";
        log.println("<tr><td>" + count + "</td><td>" 
-            + UCA.hex(ch, " ")
+            + Utility.hex(ch, " ")
            + " " + ucd.getName(ch)
            + decomp
            + "</td><td>");
@ -1862,12 +1862,12 @@ A4C6;YI RADICAL KE;So;0;ON;;;;;N;;;;;
        if (showName) {
            if (ch.equals(decomp)) {
                log.println(//title + counter + " "
-                    UCA.hex(ch, " ") 
+                    Utility.hex(ch, " ") 
                    + " " + ucd.getName(ch)
                );
            } else {
                log.println(//title + counter + " "
-                    "<b>" + UCA.hex(ch, " ") 
+                    "<b>" + Utility.hex(ch, " ") 
                    + " " + ucd.getName(ch) + "</b>"
                );
            }
@ -1876,11 +1876,11 @@ A4C6;YI RADICAL KE;So;0;ON;;;;;N;;;;;
            String keyN = printableKey(backN.get(chobj));
            if (keyD.equals(keyN)) {
                log.println(//title + counter + " "
-                    UCA.hex(ch, " ") + " " + keyN);
+                    Utility.hex(ch, " ") + " " + keyN);
            } else {
                log.println(//title + counter + " "
-                    "<font color='#009900'>" + UCA.hex(ch, " ") + " " + keyN
-                    + "</font><br><font color='#000099'>" + UCA.hex(decomp, " ") + " " + keyD + "</font>"
+                    "<font color='#009900'>" + Utility.hex(ch, " ") + " " + keyN
+                    + "</font><br><font color='#000099'>" + Utility.hex(decomp, " ") + " " + keyD + "</font>"
                );
            }
        }
--- a/tools/unicodetools/com/ibm/text/UCD/GenerateData.java
+++ b/tools/unicodetools/com/ibm/text/UCD/GenerateData.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateData.java,v $
-* $Date: 2001/10/26 23:33:07 $
-* $Revision: 1.8 $
+* $Date: 2001/10/31 00:02:27 $
+* $Revision: 1.9 $
 *
 *******************************************************************************
 */
@ -414,15 +414,36 @@ public class GenerateData implements UCD_Types {


    public static void listProperties() throws IOException {
-        String propAbb = "";
        String prop = "";
+        String propAbb = "";
+        String value = "";
+        String valueAbb = "";
        
        Map duplicates = new TreeMap();
        Set sorted = new TreeSet(java.text.Collator.getInstance());
-        Map accumulation = new TreeMap();
+        Set accumulation = new TreeSet(java.text.Collator.getInstance());
        String spacing;
        
-        for(int k = 0; k < UCD_Names.NON_ENUMERATED.length; ++k) {
+        BufferedReader blocks = Utility.openUnicodeFile("Blocks", ucd.getVersion());
+        String[] parts = new String[10];
+        while (true) {
+            String line = blocks.readLine();
+            if (line == null) break;
+            int commentPos = line.indexOf('#');
+            if (commentPos >= 0) line = line.substring(0,commentPos);
+            line = line.trim();
+            if (line.length() == 0) continue;
+            int count = Utility.split(line,';',parts);
+            if (count != 2) System.out.println("Whow!");
+            value = fixGaps(parts[1].trim(), true);
+            valueAbb = "n/a";
+            spacing = Utility.repeat(" ", 10-valueAbb.length());
+            sorted.add("blk; " + valueAbb + spacing + "; " + value);
+            checkDuplicate(duplicates, accumulation, value, "Block=" + value);
+        }
+        blocks.close();
+        
+        for (int k = 0; k < UCD_Names.NON_ENUMERATED.length; ++k) {
            propAbb = fixGaps(UCD_Names.NON_ENUMERATED[k][0], false);
            prop = fixGaps(UCD_Names.NON_ENUMERATED[k][1], true);
            spacing = Utility.repeat(" ", 10-propAbb.length());
@ -430,6 +451,15 @@ public class GenerateData implements UCD_Types {
            checkDuplicate(duplicates, accumulation, propAbb, prop);
            if (!prop.equals(propAbb)) checkDuplicate(duplicates, accumulation, prop, prop);
        }
+ 
+        for (int k = 0; k < UCD_Names.SUPER_CATEGORIES.length; ++k) {
+            valueAbb = fixGaps(UCD_Names.SUPER_CATEGORIES[k][0], false);
+            value = fixGaps(UCD_Names.SUPER_CATEGORIES[k][1], true);
+            spacing = Utility.repeat(" ", 10-valueAbb.length());
+            sorted.add("gc; " + valueAbb + spacing + "; " + value);
+            checkDuplicate(duplicates, accumulation, value, "General_Category=" + value);
+            if (!value.equals(valueAbb)) checkDuplicate(duplicates, accumulation, valueAbb, "General_Category=" + value);
+        }
        
        sorted.add("xx; T         ; True");
        checkDuplicate(duplicates, accumulation, "T", "xx=True");
@ -460,7 +490,7 @@ public class GenerateData implements UCD_Types {
            if (!ubp.isDefined(i)) continue;
            if (ubp.isTest(i)) continue;
            
-            String value = ubp.getID(i, LONG);
+            value = ubp.getID(i, LONG);
            if (value.length() == 0) value = "none";
            else if (value.equals("<unused>")) continue;
            value = fixGaps(value, true);
@ -469,9 +499,9 @@ public class GenerateData implements UCD_Types {
                value = ucd.getCase(value, FULL, TITLE);
            }
            
-            String abbvalue = ubp.getID(i, SHORT);
-            if (abbvalue.length() == 0) abbvalue = "no";
-            abbvalue = fixGaps(abbvalue, false);
+            valueAbb = ubp.getID(i, SHORT);
+            if (valueAbb.length() == 0) valueAbb = "no";
+            valueAbb = fixGaps(valueAbb, false);

            if (type == COMBINING_CLASS) {
                if (value.startsWith("Fixed_")) { continue; }
@ -480,13 +510,13 @@ public class GenerateData implements UCD_Types {
            /*
            String elide = "";
            if (type == CATEGORY || type == SCRIPT || type == BINARY_PROPERTIES) elide = "\\p{"
-                + abbvalue
+                + valueAbb
                + "}";
            String abb = "";
            if (type != BINARY_PROPERTIES) abb = "\\p{"
                + UCD_Names.ABB_UNIFIED_PROPERTIES[i>>8]
                + "="
-                + abbvalue
+                + valueAbb
                + "}";
            String norm = "";
            if (type != BINARY_PROPERTIES) norm = "\\p{"
@ -497,18 +527,18 @@ public class GenerateData implements UCD_Types {
            System.out.println("<tr><td>" + elide + "</td><td>" + abb + "</td><td>" + norm + "</td></tr>");
            */
            
-            spacing = Utility.repeat(" ", 10-abbvalue.length());
+            spacing = Utility.repeat(" ", 10-valueAbb.length());
            
            if (type == BINARY_PROPERTIES || type == DERIVED) {
-                sorted.add("ZZ; " + abbvalue + spacing + "; " + value);
+                sorted.add("ZZ; " + valueAbb + spacing + "; " + value);
                checkDuplicate(duplicates, accumulation, value, value);
-                if (!value.equalsIgnoreCase(abbvalue)) checkDuplicate(duplicates, accumulation, abbvalue, value);
+                if (!value.equalsIgnoreCase(valueAbb)) checkDuplicate(duplicates, accumulation, valueAbb, value);
                continue;
            }
            
-            sorted.add(propAbb + "; " + abbvalue + spacing + "; " + value);
+            sorted.add(propAbb + "; " + valueAbb + spacing + "; " + value);
            checkDuplicate(duplicates, accumulation, value, prop + "=" + value);
-            if (!value.equalsIgnoreCase(abbvalue)) checkDuplicate(duplicates, accumulation, abbvalue, prop + "=" + value);
+            if (!value.equalsIgnoreCase(valueAbb)) checkDuplicate(duplicates, accumulation, valueAbb, prop + "=" + value);
        }
        
        PrintWriter log = Utility.openPrintWriter("PropertyAliases-" + ucd.getVersion() + "dX.txt");
@ -525,7 +555,7 @@ public class GenerateData implements UCD_Types {
        log.println("# Note: no two property names can be the same,");
        log.println("# nor can two property value names for the same property be the same.");
        log.println();
-        Utility.print(log, accumulation.values(), "\r\n", new MyBreaker());
+        Utility.print(log, accumulation, "\r\n", new MyBreaker());
        log.println();
        log.close();
    }
@ -542,7 +572,7 @@ public class GenerateData implements UCD_Types {
        }
    }
    
-    static void checkDuplicate(Map m, Map accumulation, String toCheck, String originalComment) {
+    static void checkDuplicate(Map m, Set accumulation, String toCheck, String originalComment) {
        toCheck = skeleton(toCheck);
        String comment = "{" + originalComment + "}";
        
@ -575,14 +605,15 @@ public class GenerateData implements UCD_Types {
            }
            
            // accumulate differences
+            /*
            String acc = (String)accumulation.get(toCheck);
-            /*if (acc == null) {
+            if (acc == null) {
                acc = "# \"" + toCheck + "\":\t" + originalComment;
            }
            acc += ";\t" + result;
            */
            result.add(comment);
-            accumulation.put(toCheck, "# \"" + toCheck + "\":\t" + result);
+            accumulation.add("# " + result.toString() + ":\t" + toCheck);
        } else {
            result = new TreeSet();
            result.add(comment);
--- a/tools/unicodetools/com/ibm/text/UCD/PropertyAliasHeader.txt
+++ b/tools/unicodetools/com/ibm/text/UCD/PropertyAliasHeader.txt
@ -12,8 +12,10 @@
 #
 # FORMAT
 #
-# Each line has three fields. Where the first field is AA, BB, or ZZ, then
-# the line describes a property name.
+# Each line has three fields, separated by semicolons.
+#
+# First Field: Where the first field is AA, BB, or ZZ, then the line describes a property name:
+#
 # AA - non-enumerated properties
 # BB - enumerated, non-binary properties
 # ZZ - binary properties and quick-check properties
@ -28,10 +30,20 @@
 # xx stands for any binary property
 # qc stands for any quick-check property
 #
-# With loose matching of property names, case distinctions, whitespace,
+# Second Field: The second field is an abbreviated name.
+# If there is no abbreviated name available, the field is marked with "n/a".
+#
+# Third Field: The third field is a long name.
+#
+# With loose matching of property names, the case distinctions, whitespace,
 # and '_' are ignored.
 #
-# NOTE: the property value names are NOT unique across properties, especially
+# NOTE: Currently there is at most one abbreviated name and one long name for
+# each property and property value. However, in the future additional aliases
+# may be added. In such a case, the first line for the property or property value
+# would have the preferred alias for output.
+#
+# NOTE: The property value names are NOT unique across properties, especially
 # with loose matches. For example,
 # AL means Arabic Letter for the Bidi_Class property, and
 # AL means Alpha_Left for the Combining_Class property, and
@ -41,5 +53,7 @@
 # cc means Combining_Class property, and
 # cc means the General_Category property value Control (cc)
 #
+# Comments at the end of the file show cases of non-unique names.
+#
 # The combination of property value and property name is, however, unique.
 # For more information, see UTR #24: Regular Expression Guidelines
--- a/tools/unicodetools/com/ibm/text/UCD/UCD_Names.java
+++ b/tools/unicodetools/com/ibm/text/UCD/UCD_Names.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD_Names.java,v $
-* $Date: 2001/10/26 23:33:07 $
-* $Revision: 1.5 $
+* $Date: 2001/10/31 00:02:27 $
+* $Revision: 1.6 $
 *
 *******************************************************************************
 */
@ -31,7 +31,8 @@ final class UCD_Names implements UCD_Types {
        {"suc", "Simple_Uppercase_Mapping"},
        {"stc", "Simple_Titlecase_Mapping"},
        {"sfc", "Simple_Case_Folding"},
-        {"scc", "Special_Case_Condition"}
+        {"scc", "Special_Case_Condition"},
+        {"blk", "Block"}
    };

    static final String[] UNIFIED_PROPERTIES = {
@ -404,9 +405,19 @@ final class UCD_Names implements UCD_Types {
        "FinalPunctuation" // = Punctuation, Final quote 30 (may behave like Ps or Pe dependingon usage)
    };

+    static final String[][] SUPER_CATEGORIES = {
+        {"L", "Letter"},
+        {"M", "Mark"},
+        {"N", "Number"},
+        {"Z", "Separator"},
+        {"C", "Other"},
+        {"S", "Symbol"},
+        {"P", "Punctuation"},
+    };


-    static String[] BC = {
+
+    static final String[] BC = {
        "L", //	Left-Right; Most alphabetic, syllabic, and logographic characters (e.g., CJK ideographs)
        "R", //	Right-Left; Arabic, Hebrew, and punctuation specific to those scripts
        "EN", //	European Number
@ -752,8 +763,6 @@ final class UCD_Names implements UCD_Types {
        "H",    // U+11C2; H; HANGUL JONGSEONG HIEUH
    };

-
-
 /*
    static {
        UNASSIGNED_INFO.code = '\uFFFF';
--- a/tools/unicodetools/com/ibm/text/utility/Utility.java
+++ b/tools/unicodetools/com/ibm/text/utility/Utility.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/Utility.java,v $
-* $Date: 2001/10/26 23:33:48 $
-* $Revision: 1.6 $
+* $Date: 2001/10/31 00:02:54 $
+* $Revision: 1.7 $
 *
 *******************************************************************************
 */
@ -122,6 +122,14 @@ public final class Utility {    // COMMON UTILITIES
 	    return hex(ch,4);
 	}

+	public static String hex(byte ch) {
+	    return hex(ch & 0xFF,2);
+	}
+
+	public static String hex(char ch) {
+	    return hex(ch & 0xFFFF,4);
+	}
+
 	public static String hex(Object s) {
 	    return hex(s, 4, " ");
 	}
@ -149,21 +157,21 @@ public final class Utility {    // COMMON UTILITIES
 	    return result.toString();
 	}

-	public static String hex(byte[] o, int start, int end) {
+	public static String hex(byte[] o, int start, int end, String separator) {
 	    StringBuffer result = new StringBuffer();
 	    //int ch;
 	    for (int i = start; i < end; ++i) {
-	        if (i != 0) result.append(' ');
-	        result.append(hex(o[i] & 0xFF, 2));
+	        if (i != 0) result.append(separator);
+	        result.append(hex(o[i]));
 	    }
 	    return result.toString();
 	}

-	public static String hex(char[] o, int start, int end) {
+	public static String hex(char[] o, int start, int end, String separator) {
 	    StringBuffer result = new StringBuffer();
 	    for (int i = start; i < end; ++i) {
-	        if (i != 0) result.append(' ');
-	        result.append(hex(o[i], 4));
+	        if (i != 0) result.append(separator);
+	        result.append(hex(o[i]));
 	    }
 	    return result.toString();
 	}