scuffed-code/tools/unicodetools/com/ibm/text/UCA/WriteCollationData.java

/**
*******************************************************************************
* Copyright (C) 1996-2001, International Business Machines Corporation and    *
* others. All Rights Reserved.                                                *
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/WriteCollationData.java,v $ 
* $Date: 2003/04/01 02:51:57 $ 
* $Revision: 1.31 $
*
*******************************************************************************
*/

package com.ibm.text.UCA;

import java.util.*;
import com.ibm.icu.text.UTF16;
import com.ibm.icu.text.UnicodeSet;
import com.ibm.icu.text.CanonicalIterator;
import com.ibm.icu.impl.UCharacterProperty;

import java.io.*;
//import java.text.*;
//import com.ibm.text.unicode.*;

import java.text.RuleBasedCollator;
import java.text.CollationElementIterator;
import java.text.Collator;
import java.text.DateFormat;
import java.text.SimpleDateFormat;

import com.ibm.text.UCD.*;
import com.ibm.text.UCD.UCD_Types;
import com.ibm.text.utility.*;
import com.ibm.text.UCD.Normalizer;

public class WriteCollationData implements UCD_Types, UCA_Types {
	
	static final boolean DEBUG = false;
	static final boolean DEBUG_SHOW_ITERATION = false;
	
	
	
    public static final String copyright = 
      "Copyright (C) 2000, IBM Corp. and others. All Rights Reserved.";
      
    static final boolean EXCLUDE_UNSUPPORTED = true;    
    static final boolean GENERATED_NFC_MISMATCHES = true;    
    static final boolean DO_CHARTS = false;   
    
    
    static final String UNICODE_VERSION = UCD.latestVersion;
    
    static UCA collator;
    static char unique = '\u0000';
    static TreeMap sortedD = new TreeMap();
    static TreeMap sortedN = new TreeMap();
    static HashMap backD = new HashMap();
    static HashMap backN = new HashMap();
    static TreeMap duplicates = new TreeMap();
    static int duplicateCount = 0;
    static PrintWriter log;
    
    static UCD ucd;
    

    
    static public void javatest() throws Exception {
        checkJavaRules("& J , K / B & K , M", new String[] {"JA", "MA", "KA", "KC", "JC", "MC"});
        checkJavaRules("& J , K / B , M", new String[] {"JA", "MA", "KA", "KC", "JC", "MC"});
    }
    
    static public void checkJavaRules(String rules, String[] tests) throws Exception {
        System.out.println();
        System.out.println("Rules: " + rules);
        System.out.println();
        
        // duplicate the effect of ICU 1.8 by grabbing the default rules and appending
        
        RuleBasedCollator defaultCollator = (RuleBasedCollator) Collator.getInstance(Locale.US);
        RuleBasedCollator col = new RuleBasedCollator(defaultCollator.getRules() + rules);
        
        // check to make sure each pair is in order
        
        int i = 1;
        for (; i < tests.length; ++i) {
            System.out.println(tests[i-1] + "\t=> " + showJavaCollationKey(col, tests[i-1]));
            if (col.compare(tests[i-1], tests[i]) > 0) {
                System.out.println("Failure: " + tests[i-1] + " > " + tests[i]);
            }
        }
        System.out.println(tests[i-1] + "\t=> " + showJavaCollationKey(col, tests[i-1]));
    }
    
    static public String showJavaCollationKey(RuleBasedCollator col, String test) {
        CollationElementIterator it = col.getCollationElementIterator(test);
        String result = "[";
        for (int i = 0; ; ++i) {
            int ce = it.next();
            if (ce == CollationElementIterator.NULLORDER) break;
            if (i != 0) result += ", ";
            result += Utility.hex(ce,8);
        }
        return result + "]";
    }
    
    //private static final String DIR = "c:\\Documents and Settings\\Davis\\My Documents\\UnicodeData\\Update 3.0.1\\";
    //private static final String DIR31 = "c:\\Documents and Settings\\Davis\\My Documents\\UnicodeData\\Update 3.1\\";
    
    static public void writeCaseExceptions() {
        System.err.println("Writing Case Exceptions");
        Normalizer NFKC = new Normalizer(Normalizer.NFKC, UNICODE_VERSION);
        for (char a = 0; a < 0xFFFF; ++a) {
            if (!ucd.isRepresented(a)) continue;
            //if (0xA000 <= a && a <= 0xA48F) continue; // skip YI

            String b = Case.fold(a);
            String c = NFKC.normalize(b);
            String d = Case.fold(c);
            String e = NFKC.normalize(d);
            if (!e.equals(c)) {
                System.out.println(Utility.hex(a) + "; " + Utility.hex(d, " ") + " # " + ucd.getName(a));
                /*
                System.out.println(Utility.hex(a) 
                + ", " + Utility.hex(b, " ")
                + ", " + Utility.hex(c, " ")
                + ", " + Utility.hex(d, " ")
                + ", " + Utility.hex(e, " "));
                
                System.out.println(ucd.getName(a) 
                + ", " + ucd.getName(b)
                + ", " + ucd.getName(c)
                + ", " + ucd.getName(d)
                + ", " + ucd.getName(e));
                */
            }
            String f = Case.fold(e);
            String g = NFKC.normalize(f);
            if (!f.equals(d) || !g.equals(e)) System.out.println("!!!!!!SKY IS FALLING!!!!!!");
        }
    }
 
    static public void writeCaseFolding() throws IOException {
        System.err.println("Writing Javascript data");
        BufferedReader in = Utility.openUnicodeFile("CaseFolding", UNICODE_VERSION, true, Utility.LATIN1);
        // new BufferedReader(new FileReader(DIR31 + "CaseFolding-3.d3.alpha.txt"), 64*1024);
        // log = new PrintWriter(new FileOutputStream("CaseFolding_data.js"));
        log = Utility.openPrintWriter("CaseFolding_data.js", Utility.UTF8_WINDOWS);
        log.println("var CF = new Object();");
        int count = 0;
        while (true) {
            String line = in.readLine();
            if (line == null) break;
            int comment = line.indexOf('#');                    // strip comments
            if (comment != -1) line = line.substring(0,comment);
            if (line.length() == 0) continue;
            int semi1 = line.indexOf(';');
            int semi2 = line.indexOf(';', semi1+1);
            int semi3 = line.indexOf(';', semi2+1);
            char type = line.substring(semi1+1,semi2).trim().charAt(0);
            if (type == 'C' || type == 'F' || type == 'T') {
                String code = line.substring(0,semi1).trim();
                String result = " " + line.substring(semi2+1,semi3).trim();
                result = replace(result, ' ', "\\u");
                log.println("\t CF[0x" + code + "]='" + result + "';");
                count++;
            }
        }
        log.println("// " + count + " case foldings total");
        
        in.close();
        log.close();
    }
    
    static public String replace(String source, char toBeReplaced, String toReplace) {
        StringBuffer result = new StringBuffer();
        for (int i = 0; i < source.length(); ++i) {
            char c = source.charAt(i);
            if (c == toBeReplaced) {
                result.append(toReplace);
            } else {
                result.append(c);
            }
        }
        return result.toString();
    }
    
    static public void writeJavascriptInfo() throws IOException {
        System.err.println("Writing Javascript data");
        Normalizer normKD = new Normalizer(Normalizer.NFKD, UNICODE_VERSION);
        Normalizer normD = new Normalizer(Normalizer.NFD, UNICODE_VERSION);
        //log = new PrintWriter(new FileOutputStream("Normalization_data.js"));
        log = Utility.openPrintWriter("Normalization_data.js", Utility.LATIN1_WINDOWS);
        
        
        int count = 0;
        int datasize = 0;
        int max = 0;
        int over7 = 0;
        log.println("var KD = new Object(); // NFKD compatibility decomposition mappings");
        log.println("// NOTE: Hangul is done in code!");
        CompactShortArray csa = new CompactShortArray((short)0);
        
        for (char c = 0; c < 0xFFFF; ++c) {
            if ((c & 0xFFF) == 0) System.err.println(Utility.hex(c));
            if (0xAC00 <= c && c <= 0xD7A3) continue;
            if (!normKD.isNormalized(c)) {
                ++count;
                String decomp = normKD.normalize(c);
                datasize += decomp.length();
                if (max < decomp.length()) max = decomp.length();
                if (decomp.length() > 7) ++over7;
                csa.setElementAt(c, (short)count);
                log.println("\t KD[0x" + Utility.hex(c) + "]='\\u" + Utility.hex(decomp,"\\u") + "';");
            }
        }
        csa.compact();
        log.println("// " + count + " NFKD mappings total");
        log.println("// " + datasize + " total characters of results");
        log.println("// " + max + " string length, maximum");
        log.println("// " + over7 + " result strings with length > 7");
        log.println("// " + csa.storage() + " trie length (doesn't count string size)");
        log.println();

        count = 0;
        datasize = 0;
        max = 0;
        log.println("var D = new Object();  // NFD canonical decomposition mappings");
        log.println("// NOTE: Hangul is done in code!");
        csa = new CompactShortArray((short)0);
        
        for (char c = 0; c < 0xFFFF; ++c) {
            if ((c & 0xFFF) == 0) System.err.println(Utility.hex(c));
            if (0xAC00 <= c && c <= 0xD7A3) continue;
            if (!normD.isNormalized(c)) {
                ++count;
                String decomp = normD.normalize(c);
                datasize += decomp.length();
                if (max < decomp.length()) max = decomp.length();
                csa.setElementAt(c, (short)count);
                log.println("\t D[0x" + Utility.hex(c) + "]='\\u" + Utility.hex(decomp,"\\u") + "';");
            }
        }
        csa.compact();
        
        log.println("// " + count + " NFD mappings total");
        log.println("// " + datasize + " total characters of results");
        log.println("// " + max + " string length, maximum");
        log.println("// " + csa.storage() + " trie length (doesn't count string size)");
        log.println();

        count = 0;
        datasize = 0;
        log.println("var CC = new Object(); // canonical class mappings");
        CompactByteArray cba = new CompactByteArray();
        
        for (char c = 0; c < 0xFFFF; ++c) {
            if ((c & 0xFFF) == 0) System.err.println(Utility.hex(c));
            int canClass = normKD.getCanonicalClass(c);
            if (canClass != 0) {
                ++count;
                
                log.println("\t CC[0x" + Utility.hex(c) + "]=" + canClass + ";");
            }
        }
        cba.compact();
        log.println("// " + count + " canonical class mappings total");
        log.println("// " + cba.storage() + " trie length");
        log.println();
        
        count = 0;
        datasize = 0;
        log.println("var C = new Object();  // composition mappings");
        log.println("// NOTE: Hangul is done in code!");
        
        System.out.println("WARNING -- COMPOSITIONS UNFINISHED!!");
        
        /*

        IntHashtable.IntEnumeration enum = normKD.getComposition();
        while (enum.hasNext()) {
            int key = enum.next();
            char val = (char) enum.value();
            if (0xAC00 <= val && val <= 0xD7A3) continue;
            ++count;
            log.println("\tC[0x" + Utility.hex(key) + "]=0x" + Utility.hex(val) + ";");
        }
        log.println("// " + count + " composition mappings total");
        log.println();
        */
        
        log.close();
        System.err.println("Done writing Javascript data");
    }
    
    
    static void writeConformance(String filename, byte option, boolean shortPrint)  throws IOException {
        Default.setUCD();
        //UCD ucd30 = UCD.make("3.0.0");
        
/*
U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
 => U+00DC LATIN CAPITAL LETTER U WITH DIAERESIS, U+0304 COMBINING MACRON
*/
        if (DEBUG) {
            String[] testList = {"\u3192", "\u3220", "\u0344", "\u0385", "\uF934", "U", "U\u0308", "\u00DC", "\u00DC\u0304", "U\u0308\u0304"};
            for (int jj = 0; jj < testList.length; ++jj) {
                String t = testList[jj];
                System.out.println(ucd.getCodeAndName(t));
                
                CEList ces = collator.getCEList(t, true);
                System.out.println("CEs:    " + ces);
                
                String test = collator.getSortKey(t, option);
                System.out.println("Decomp: " + collator.toString(test));
                
                test = collator.getSortKey(t, option, false);
                System.out.println("No Dec: " + collator.toString(test));
            }
        }
        
        PrintWriter log = Utility.openPrintWriter(filename + (shortPrint ? "_SHORT" : "") + ".txt", Utility.UTF8_WINDOWS);
        //if (!shortPrint) log.write('\uFEFF');
        log.println("# UCA Version: " + collator.getDataVersion() + "/" + collator.getUCDVersion());
        log.println("# Generated:   " + getNormalDate());
        
        System.out.println("Sorting");
        int counter = 0;
        
        UCA.UCAContents cc = collator.getContents(UCA.FIXED_CE, null);
        cc.enableSamples();
        UnicodeSet found2 = new UnicodeSet();
        
        while (true) {
            String s = cc.next();
            if (s == null) break;
            
            found2.addAll(s);
            
            if (DEBUG_SHOW_ITERATION) {
                int cp = UTF16.charAt(s, 0);
                if (cp == 0x220 || !ucd.isAssigned(cp) || ucd.isCJK_BASE(cp)) {
                    System.out.println(ucd.getCodeAndName(s));
                }
            }
            Utility.dot(counter++);
            addStringX(s, option);
        }
        
        // Add special examples
        /*
        addStringX("\u2024\u2024", option);
        addStringX("\u2024\u2024\u2024", option);
        addStringX("\u2032\u2032", option);
        addStringX("\u2032\u2032\u2032", option);
        addStringX("\u2033\u2033\u2033", option);
        addStringX("\u2034\u2034", option);
        */
        
        
        
        UnicodeSet found = collator.found;
        if (!found2.containsAll(found2)) {
            System.out.println("In both: " + new UnicodeSet(found).retainAll(found2).toPattern(true));
            System.out.println("In UCA but not iteration: " + new UnicodeSet(found).removeAll(found2).toPattern(true));
            System.out.println("In iteration but not UCA: " + new UnicodeSet(found2).removeAll(found).toPattern(true));
            throw new IllegalArgumentException("Inconsistent data");
            
        }
        
        /*
        for (int i = 0; i <= 0x10FFFF; ++i) {
            if (!ucd.isAssigned(i)) continue;
            addStringX(UTF32.valueOf32(i), option);
        }
        
        Hashtable multiTable = collator.getContracting();
        Enumeration enum = multiTable.keys();
        while (enum.hasMoreElements()) {
            Utility.dot(counter++);
            addStringX((String)enum.nextElement(), option);
        }
        
        for (int i = 0; i < extraConformanceTests.length; ++i) { // put in sample non-characters
            Utility.dot(counter++);
            String s = UTF32.valueOf32(extraConformanceTests[i]);
            Utility.fixDot();
            System.out.println("Adding: " + Utility.hex(s));
            addStringX(s, option);
        }
        
        
        
        for (int i = 0; i < extraConformanceRanges.length; ++i) {
            Utility.dot(counter++);
            int start = extraConformanceRanges[i][0];
            int end = extraConformanceRanges[i][1];
            int increment = ((end - start + 1) / 303) + 1;
            //System.out.println("Range: " + start + ", " + end + ", " + increment);
            addStringX(start, option);
            for (int j = start+1; j < end-1; j += increment) {
                addStringX(j, option);
                addStringX(j+1, option);
            }
            addStringX(end-1, option);
            addStringX(end, option);
        }
        */
        
        Utility.fixDot();
        System.out.println("Total: " + sortedD.size());
        Iterator it;
        
        System.out.println("Writing");
        //String version = collator.getVersion();
        
        it = sortedD.keySet().iterator();
        
        String lastKey = "";
        
        while (it.hasNext()) {
            Utility.dot(counter);
            String key = (String) it.next();
            String source = (String) sortedD.get(key);
            int fluff = key.charAt(key.length() - 1);
            key = key.substring(0, key.length()- fluff - 2);
            //String status = key.equals(lastKey) ? "*" : "";
            //lastKey = key;
            //log.println(source);
            char extra = source.charAt(source.length()-1);
            String clipped = source.substring(0, source.length()-1);
            if (clipped.charAt(0) == LOW_ACCENT && extra != LOW_ACCENT) {
                extra = LOW_ACCENT;
                clipped = source.substring(1);
            }
            if (!shortPrint) {
                log.print(Utility.hex(source));
                log.print(
                    ";\t# " + (extra != LOW_ACCENT ? extra : '.') + " " + ucd.getName(clipped, SHORT) + "\t" + UCA.toString(key));
            } else {
                log.print(Utility.hex(source) + ";\t" + Utility.hex(clipped));
            }
            log.println();
        }
        
        log.close();
        sortedD.clear();
        System.out.println("Done");
    }

    static void addStringX(int x, byte option) {
        addStringX(UTF32.valueOf32(x), option);
    }
    
    static final char LOW_ACCENT = '\u0334';
    static final String SUPPLEMENTARY_ACCENT = UTF16.valueOf(0x1D165);
    static final String COMPLETELY_IGNOREABLE = "\u0001";
    static final String COMPLETELY_IGNOREABLE_ACCENT = "\u0591";
    static final String[] CONTRACTION_TEST = {SUPPLEMENTARY_ACCENT, COMPLETELY_IGNOREABLE, COMPLETELY_IGNOREABLE_ACCENT};
    
    static int addCounter = 0;
   
    static void addStringX(String s, byte option) {
        int firstChar = UTF16.charAt(s,0);
        // add characters with different strengths, to verify the order
        addStringY(s + 'a', option);
        addStringY(s + 'b', option);
        addStringY(s + '<27>', option);
        addStringY(s + 'A', option);
        addStringY(s + '!', option);
        if (option == SHIFTED && collator.isVariable(firstChar)) addStringY(s + LOW_ACCENT, option);
        
        // NOW, if the character decomposes, or is a combining mark (non-zero), try combinations
        
        if (Default.ucd.getCombiningClass(firstChar) > 0 
            || !Default.nfd.isNormalized(s) && !Default.ucd.isHangulSyllable(firstChar)) {
        // if it ends with a non-starter, try the decompositions.
            String decomp = Default.nfd.normalize(s);
            if (Default.ucd.getCombiningClass(UTF16.charAt(decomp, decomp.length()-1)) > 0) {
                if (canIt == null) canIt = new CanonicalIterator(".");
                canIt.setSource(s + LOW_ACCENT);
                int limit = 4;
                for (String can = canIt.next(); can != null; can = canIt.next()) {
                    if (s.equals(can)) continue;
                    if (--limit < 0) continue; // just include a sampling
                    addStringY(can, option);
                    // System.out.println(addCounter++ + " Adding " + Default.ucd.getCodeAndName(can));
                }
            }
        }
        if (UTF16.countCodePoint(s) > 1) {
            for (int i = 1; i < s.length(); ++i) {
                if (UTF16.isLeadSurrogate(s.charAt(i-1))) continue; // skip if in middle of supplementary
                
                for (int j = 0; j < CONTRACTION_TEST.length; ++j) {
                    String extra = s.substring(0,i) + CONTRACTION_TEST[j] + s.substring(i);
                    addStringY(extra + 'a', option);
                    if (DEBUG) System.out.println(addCounter++ + " Adding " + Default.ucd.getCodeAndName(extra));
                }
            }
        }
    }
    
    static CanonicalIterator canIt = null;
    
    static char counter;
    
    static void addStringY(String s, byte option) {
        String cpo = UCA.codePointOrder(s);
        String colDbase = collator.getSortKey(s, option, true) + "\u0000" + cpo + (char)cpo.length();
        sortedD.put(colDbase, s);
    }
    
    static UCD ucd_uca_base = null;
    
    /** 
     * Check that the primaries are the same as the compatibility decomposition.
     */
    static void checkBadDecomps(int strength, boolean decomposition, UnicodeSet alreadySeen) {
        if (ucd_uca_base == null) {
            ucd_uca_base = UCD.make(UCA.UCA_BASE);
        }
        int oldStrength = collator.getStrength();
        collator.setStrength(strength);
        Normalizer nfkd = new Normalizer(Normalizer.NFKD, UNICODE_VERSION);
        Normalizer nfc = new Normalizer(Normalizer.NFC, UNICODE_VERSION);
        switch (strength) {
            case 1: log.println("<h2>3. Primaries Incompatible with Decompositions</h2>"); break;
            case 2: log.println("<h2>4. Secondaries Incompatible with Decompositions</h2>"); break;
            case 3: log.println("<h2>5. Tertiaries Incompatible with Decompositions</h2>"); 
                log.println("<p>Note: Tertiary differences are not really errors; these are just warnings</p>"); 
            break;
            default: throw new IllegalArgumentException("bad strength: " + strength);
        }
        log.println("<p>Warning: only checking characters defined in base: " + ucd_uca_base.getVersion() + "</p>");
        log.println("<table border='1' cellspacing='0' cellpadding='2'>");
        log.println("<tr><th>Code</td><th>Sort Key</th><th>Decomposed Sort Key</th><th>Name</th></tr>");
        
        int errorCount = 0;
        
        UnicodeSet skipSet = new UnicodeSet();
        
        for (int ch = 0; ch < 0x10FFFF; ++ch) {
        	if (!ucd_uca_base.isAllocated(ch)) continue;
            if (nfkd.isNormalized(ch)) continue;
            if (ch > 0xAC00 && ch < 0xD7A3) continue; // skip most of Hangul
            if (alreadySeen.contains(ch)) continue;
            Utility.dot(ch);
            
            String decomp = nfkd.normalize(ch);
            if (ch != ' ' && decomp.charAt(0) == ' ') {
                skipSet.add(ch);
                continue; // skip wierd decomps
            }
            if (ch != '\u0640' && decomp.charAt(0) == '\u0640') {
                skipSet.add(ch);
                continue; // skip wierd decomps
            }
            
            
            String sortKey = collator.getSortKey(UTF16.valueOf(ch), UCA.NON_IGNORABLE, decomposition);
            String decompSortKey = collator.getSortKey(decomp, UCA.NON_IGNORABLE, decomposition);
            if (false && strength == 2) {
                sortKey = remove(sortKey, '\u0020');
                decompSortKey = remove(decompSortKey, '\u0020');
            }
            
            if (sortKey.equals(decompSortKey)) continue; // no problem!
            
            // fix key in the case of strength 3
            
            if (strength == 3) {
                String newSortKey = remapSortKey(ch, decomposition);
                if (!sortKey.equals(newSortKey)) {
                    System.out.println("Fixing: " + ucd.getCodeAndName(ch));
                    System.out.println("  Old:" + collator.toString(decompSortKey));
                    System.out.println("  New: " + collator.toString(newSortKey));
                    System.out.println("  Tgt: " + collator.toString(sortKey));
                }
                decompSortKey = newSortKey;
            }
            
            if (sortKey.equals(decompSortKey)) continue; // no problem!
            
            log.println("<tr><td>" + Utility.hex(ch)
                + "</td><td>" + UCA.toString(sortKey)
                + "</td><td>" + UCA.toString(decompSortKey)
                + "</td><td>" + ucd.getName(ch)
                + "</td></tr>"
                );
            alreadySeen.add(ch);
            errorCount++;
        }
        log.println("</table>");
        log.println("<p>Errors: " + errorCount + "</p>");
        log.println("<p>Space/Tatweel exceptions: " + skipSet.toPattern(true) + "</p>");
    	log.flush();
        collator.setStrength(oldStrength);
        Utility.fixDot();
    }
    
    static String remapSortKey(int cp, boolean decomposition) {
        if (toD.isNormalized(cp)) return remapCanSortKey(cp, decomposition);
        
        // we know that it is not NFKD.
        String canDecomp = toD.normalize(cp);
        String result = "";
        int ch;
        for (int j = 0; j < canDecomp.length(); j += UTF16.getCharCount(ch)) {
            ch = UTF16.charAt(canDecomp, j);
            System.out.println("* " + Default.ucd.getCodeAndName(ch));
            String newSortKey = remapCanSortKey(ch, decomposition);
            System.out.println("* " + UCA.toString(newSortKey));
            result = mergeSortKeys(result, newSortKey);
            System.out.println("= " + UCA.toString(result));
        }
        return result;
    }
    
    static String remapCanSortKey(int ch, boolean decomposition) {
        String compatDecomp = Default.nfkd.normalize(ch);
        String decompSortKey = collator.getSortKey(compatDecomp, UCA.NON_IGNORABLE, decomposition);
                
        byte type = ucd.getDecompositionType(ch);
        int pos = decompSortKey.indexOf(UCA.LEVEL_SEPARATOR) + 1; // after first separator
        pos = decompSortKey.indexOf(UCA.LEVEL_SEPARATOR, pos) + 1; // after second separator
        String newSortKey = decompSortKey.substring(0, pos);
        for (int i = pos; i < decompSortKey.length(); ++i) {
            int weight = decompSortKey.charAt(i);
            int newWeight = CEList.remap(ch, type, weight);
            if (i > pos + 1) newWeight = 0x1F;
            newSortKey += (char)newWeight;
        }
        return newSortKey;
    }
    
    // keys must be of the same strength
    static String mergeSortKeys(String key1, String key2) {
        StringBuffer result = new StringBuffer();
        int end1 = 0, end2 = 0;
        while (true) {
            int pos1 = key1.indexOf(UCA.LEVEL_SEPARATOR, end1);
            int pos2 = key2.indexOf(UCA.LEVEL_SEPARATOR, end2);
            if (pos1 < 0) {
                result.append(key1.substring(end1)).append(key2.substring(end2));
                return result.toString();
            }
            if (pos2 < 0) {
                result.append(key1.substring(end1, pos1)).append(key2.substring(end2)).append(key1.substring(pos1));
                return result.toString();
            }
            result.append(key1.substring(end1, pos1)).append(key2.substring(end2, pos2)).append(UCA.LEVEL_SEPARATOR);
            end1 = pos1 + 1;
            end2 = pos2 + 1;
        }
    }
    
    
    static final String remove (String s, char ch) {
        StringBuffer buf = new StringBuffer();
        for (int i = 0; i < s.length(); ++i) {
            char c = s.charAt(i);
            if (c == ch) continue;
            buf.append(c);
        }
        return buf.toString();
    }
    
        /*
        log = new PrintWriter(new FileOutputStream("Frequencies.html"));
        log.println("<html><body>");
        MessageFormat mf = new MessageFormat("<tr><td><tt>{0}</tt></td><td><tt>{1}</tt></td><td align='right'><tt>{2}</tt></td><td align='right'><tt>{3}</tt></td></tr>");
        MessageFormat mf2 = new MessageFormat("<tr><td><tt>{0}</tt></td><td align='right'><tt>{1}</tt></td></tr>");
        String header = mf.format(new String[] {"Start", "End", "Count", "Subtotal"});
        int count;
        
        log.println("<h2>Writing Used Weights</h2>");
        log.println("<p>Primaries</p><table border='1'>" + mf.format(new String[] {"Start", "End", "Count", "Subtotal"}));
        count = collator.writeUsedWeights(log, 1, mf);
        log.println(MessageFormat.format("<tr><td>Count:</td><td>{0}</td></tr>", new Object[] {new Integer(count)}));
        log.println("</table>");
        
        log.println("<p>Secondaries</p><table border='1'>" + mf2.format(new String[] {"Code", "Frequency"}));
        count = collator.writeUsedWeights(log, 2, mf2);
        log.println(MessageFormat.format("<tr><td>Count:</td><td>{0}</td></tr>", new Object[] {new Integer(count)}));
        log.println("</table>");
        
        log.println("<p>Tertiaries</p><table border='1'>" + mf2.format(new String[] {"Code", "Frequency"}));
        count = collator.writeUsedWeights(log, 3, mf2);
        log.println(MessageFormat.format("<tr><td>Count:</td><td>{0}</td></tr>", new Object[] {new Integer(count)}));
        log.println("</table>");
        log.println("</body></html>");
        log.close();
        */
        
    static int[] compactSecondary;
    
    /*static void checkEquivalents() {
        Normalizer nfkd = new Normalizer(Normalizer.NFKC);
        Normalizer nfd = new Normalizer(Normalizer.NFKD);
        for (char c = 0; c < 0xFFFF; ++c) {
            
    }*/
    
    static void testCompatibilityCharacters() throws IOException {
        log = Utility.openPrintWriter("UCA_CompatComparison.txt", Utility.UTF8_WINDOWS);
        
        int[] kenCes = new int[50];
        int[] markCes = new int[50];
        int[] kenComp = new int[50];
        Map forLater = new TreeMap();
        int count = 0;
        int typeLimit = UCD_Types.CANONICAL;
        boolean decompType = false;
        if (false) {
            typeLimit = UCD_Types.COMPATIBILITY;
            decompType = true;
        }
        
        // first find all the characters that cannot be generated "correctly"
        
        for (int i = 0; i < 0xFFFF; ++i) {
            int type = ucd.getDecompositionType(i);
            if (type < typeLimit) continue;
            int ceType = collator.getCEType((char)i);
            if (ceType >= collator.FIXED_CE) continue;
            // fix type
            type = getDecompType(i);
            
            String s = String.valueOf((char)i);
            int kenLen = collator.getCEs(s, decompType, kenCes); // true
            int markLen = fixCompatibilityCE(s, true, markCes, false);
            
            if (!arraysMatch(kenCes, kenLen, markCes, markLen)) {
                int kenCLen = fixCompatibilityCE(s, true, kenComp, true);
                String comp = CEList.toString(kenComp, kenCLen);
                
                if (arraysMatch(kenCes, kenLen, kenComp, kenCLen)) {
                    forLater.put((char)(COMPRESSED | type) + s, comp);
                    continue;
                }                
                if (type == ucd.CANONICAL && multipleZeroPrimaries(markCes, markLen)) {
                    forLater.put((char)(MULTIPLES | type) + s, comp);
                    continue;
                }
                forLater.put((char)type + s, comp);
            }
        }
        
        Iterator it = forLater.keySet().iterator();
        byte oldType = (byte)0xFF; // anything unique
        int caseCount = 0;
        log.println("# UCA Version: " + collator.getDataVersion() + "/" + collator.getUCDVersion());
        log.println("Generated: " + getNormalDate());
        while (it.hasNext()) {
            String key = (String) it.next();
            byte type = (byte)key.charAt(0);
            if (type != oldType) {
                oldType = type;
                log.println("===============================================================");
                log.print("CASE " + (caseCount++) + ": ");
                byte rType = (byte)(type & OTHER_MASK);
                log.println("    Decomposition Type = " + ucd.getDecompositionTypeID_fromIndex(rType));
                if ((type & COMPRESSED) != 0) {
                    log.println("    Successfully Compressed a la Ken");
                    log.println("    [XXXX.0020.YYYY][0000.ZZZZ.0002] => [XXXX.ZZZZ.YYYY]");
                } else if ((type & MULTIPLES) != 0) {
                    log.println("    MULTIPLE ACCENTS");
                }
                log.println("===============================================================");
                log.println();
            }
            String s = key.substring(1);
            String comp = (String)forLater.get(key);
            
            int kenLen = collator.getCEs(s, decompType, kenCes);
            String kenStr = CEList.toString(kenCes, kenLen);
            
            int markLen = fixCompatibilityCE(s, true, markCes, false);
            String markStr = CEList.toString(markCes, markLen);
            
            if ((type & COMPRESSED) != 0) {
                log.println("COMPRESSED #" + (++count) + ": " + ucd.getCodeAndName(s));
                log.println("         : " + comp);
            } else {
                log.println("DIFFERENCE #" + (++count) + ": " + ucd.getCodeAndName(s));
                log.println("generated : " + markStr);
                if (!markStr.equals(comp)) {
                    log.println("compressed: " + comp);
                }
                log.println("Ken's     : " + kenStr);
                String nfkd = NFKD.normalize(s);
                log.println("NFKD      : " + ucd.getCodeAndName(nfkd));
                String nfd = NFD.normalize(s);
                if (!nfd.equals(nfkd)) {
                    log.println("NFD       : " + ucd.getCodeAndName(nfd));
                }
                //kenCLen = collator.getCEs(decomp, true, kenComp);
                //log.println("decomp ce: " + CEList.toString(kenComp, kenCLen));                   
            }
            log.println();
        }
        log.println("===============================================================");
        log.println();
        log.println("Compressible Secondaries");
        for (int i = 0; i < compressSet.size(); ++i) {
            if ((i & 0xF) == 0) log.println();
            if (!compressSet.get(i)) log.print("-  ");
            else log.print(Utility.hex(i, 3) + ", ");
        }
        log.close();
    }
    
    static final byte getDecompType(int cp) {
        byte result = ucd.getDecompositionType(cp);
        if (result == ucd.CANONICAL) {
            String d = NFD.normalize(cp); // TODO
            int cp1;
            for (int i = 0; i < d.length(); i += UTF16.getCharCount(cp1)) {
                cp1 = UTF16.charAt(d, i);
                byte t = ucd.getDecompositionType(cp1);
                if (t > ucd.CANONICAL) return t;
            }
        }
        return result;
    }
    
    static final boolean multipleZeroPrimaries(int[] a, int aLen) {
        int count = 0;
        for (int i = 0; i < aLen; ++i) {
            if (UCA.getPrimary(a[i]) == 0) {
                if (count == 1) return true;
                count++;
            } else {
                count = 0;
            }
        }
        return false;
    }
    
    static final byte MULTIPLES = 0x20, COMPRESSED = 0x40, OTHER_MASK = 0x1F;
    static final BitSet compressSet = new BitSet();
        
    static int kenCompress(int[] markCes, int markLen) {
        if (markLen == 0) return 0;
        int out = 1;
        for (int i = 1; i < markLen; ++i) {
            int next = markCes[i];
            int prev = markCes[out-1];
            if (UCA.getPrimary(next) == 0 
              && UCA.getSecondary(prev) == 0x20
              && UCA.getTertiary(next) == 0x2) {
                markCes[out-1] = UCA.makeKey(
                  UCA.getPrimary(prev), 
                  UCA.getSecondary(next), 
                  UCA.getTertiary(prev));
                compressSet.set(UCA.getSecondary(next));
            } else {
                markCes[out++] = next;
            }
        }
        return out;
    }
    
    
    static boolean arraysMatch(int[] a, int aLen, int[] b, int bLen) {
        if (aLen != bLen) return false;
        for (int i = 0; i < aLen; ++i) {
            if (a[i] != b[i]) return false;
        }
        return true;
    }
    
    static int[] markCes = new int[50];
    
    static int fixCompatibilityCE(String s, boolean decompose, int[] output, boolean compress) {
        byte type = getDecompType(UTF16.charAt(s, 0));
        char ch = s.charAt(0);
        
        String decomp = NFKD.normalize(s);
        int len = 0;
        int markLen = collator.getCEs(decomp, true, markCes);
        if (compress) markLen = kenCompress(markCes, markLen);
        
        //for (int j = 0; j < decomp.length(); ++j) {
            for (int k = 0; k < markLen; ++k) {
                int t = UCA.getTertiary(markCes[k]);
                t = CEList.remap(k, type, t);
                /*
                if (type != CANONICAL) {
                    if (0x3041 <= ch && ch <= 0x3094) t = 0xE; // hiragana
                    else if (0x30A1 <= ch && ch <= 0x30FA) t = 0x11; // katakana
                }
                switch (type) {
                    case COMPATIBILITY: t = (t == 8) ? 0xA : 4; break;
                    case COMPAT_FONT:  t = (t == 8) ? 0xB : 5; break;
                    case COMPAT_NOBREAK: t = 0x1B; break;
                    case COMPAT_INITIAL: t = 0x17; break;
                    case COMPAT_MEDIAL: t = 0x18; break;
                    case COMPAT_FINAL: t = 0x19; break;
                    case COMPAT_ISOLATED: t = 0x1A; break;
                    case COMPAT_CIRCLE: t = (t == 0x11) ? 0x13 : (t == 8) ? 0xC : 6; break;
                    case COMPAT_SUPER: t = 0x14; break;
                    case COMPAT_SUB: t = 0x15; break;
                    case COMPAT_VERTICAL: t = 0x16; break;
                    case COMPAT_WIDE: t= (t == 8) ? 9 : 3; break;
                    case COMPAT_NARROW: t = (0xFF67 <= ch && ch <= 0xFF6F) ? 0x10 : 0x12; break;
                    case COMPAT_SMALL: t = (t == 0xE) ? 0xE : 0xF; break;
                    case COMPAT_SQUARE: t = (t == 8) ? 0x1D : 0x1C; break;
                    case COMPAT_FRACTION: t = 0x1E; break;
                }
                */
                output[len++] = UCA.makeKey(
                    UCA.getPrimary(markCes[k]),
                    UCA.getSecondary(markCes[k]),
                    t);
            //}
        }
        return len;
    }
    
    static int getStrengthDiff(CEList celist) {
        int result = QUARTERNARY_DIFF;
        for (int j = 0; j < celist.length(); ++j) {
            int ce = celist.at(j);
            if (collator.getPrimary(ce) != 0) {
                return PRIMARY_DIFF;
            } else if (collator.getSecondary(ce) != 0) {
                result = SECONDARY_DIFF;
            } else if (collator.getTertiary(ce) != 0) {
                result = TERTIARY_DIFF;
            }
        }
        return result;
    }
    
    static String[] strengthName = {"XYZ", "0YZ", "00Z", "000"};
    
    static void writeCategoryCheck() throws IOException {
        /*PrintWriter diLog = new PrintWriter(
            new BufferedWriter(
                new OutputStreamWriter(
                    new FileOutputStream(GEN_DIR + "UCA_Nonspacing.txt"),
                    "UTF8"),
                32*1024));
                */
    	log.println("<h2>8. Checking against categories</h2>");
    	log.println("<p>These are not necessarily errors, but should be examined for <i>possible</i> errors</p>");
        log.println("<table border='1' cellspacing='0' cellpadding='2'>");
        Normalizer nfd = new Normalizer(Normalizer.NFD, UNICODE_VERSION);
        
        Set sorted = new TreeSet();
        
        for (int i = 0; i < 0x10FFFF; ++i) {
            Utility.dot(i);
            if (!ucd.isRepresented(i)) continue;
            CEList celist = collator.getCEList(UTF32.valueOf32(i), true);
            int real = getStrengthDiff(celist);
            
            int desired = PRIMARY_DIFF;
            byte cat = ucd.getCategory(i);
            if (cat == Cc || cat == Cs || cat == Cf || ucd.isNoncharacter(i)) desired = QUARTERNARY_DIFF;
            else if (cat == Mn || cat == Me) desired = SECONDARY_DIFF;
            
            String listName = celist.toString();
            if (listName.length() == 0) listName = "<i>ignore</i>";
            if (real != desired) {
                sorted.add("<tr><td>" + strengthName[real]
                    + "</td><td>" + strengthName[desired]
                    + "</td><td>" + ucd.getCategoryID(i)
                    + "</td><td>" + listName
                    + "</td><td>" + ucd.getCodeAndName(i)
                    + "</td></tr>");
            }
        }
        
        Utility.print(log, sorted, "\r\n");
    	log.println("</table>");
    	log.flush();
    }
    
    static void writeDuplicates() {
    	log.println("<h2>9. Checking characters that are not canonical equivalents, but have same CE</h2>");
    	log.println("<p>These are not necessarily errors, but should be examined for <i>possible</i> errors</p>");
        log.println("<table border='1' cellspacing='0' cellpadding='2'>");
        
        UCA.UCAContents cc = collator.getContents(UCA.FIXED_CE, toD);
        
        Map map = new TreeMap();
        
        while (true) {
            String s = cc.next();
            if (s == null) break;
            if (!toD.isNormalized(s)) continue; // only unnormalized stuff
            if (UTF16.countCodePoint(s) == 1) {
                int cat = ucd.getCategory(UTF16.charAt(s,0));
                if (cat == Cn || cat == Cc || cat == Cs) continue;
            }
            
            CEList celist = collator.getCEList(s, true);
            Utility.addToSet(map, celist, s);
        }
        
        Iterator it = map.keySet().iterator();
        while (it.hasNext()) {
            CEList celist = (CEList) it.next();
            Set s = (Set) map.get(celist);
            String name = celist.toString();
            if (name.length() == 0) name = "<i>ignore</i>";
            if (s.size() > 1) {
                    log.println("<tr><td>" + name 
                        + "</td><td>" + getHTML_NameSet(s, null, true)
                        + "</td></tr>");
            }
        }
    	log.println("</table>");
    	log.flush();
    }
    
    
    static void writeOverlap() {
    	log.println("<h2>10. Checking overlaps</h2>");
    	log.println("<p>These are not necessarily errors, but should be examined for <i>possible</i> errors</p>");
        log.println("<table border='1' cellspacing='0' cellpadding='2'>");
        
        UCA.UCAContents cc = collator.getContents(UCA.FIXED_CE, toD);
        
        Map map = new TreeMap();
        Map tails = new TreeMap();
        
        int counter = 0;
        System.out.println("Collecting items");
        
        while (true) {
            String s = cc.next();
            if (s == null) break;
            Utility.dot(counter++);
            if (!toD.isNormalized(s)) continue; // only normalized stuff
            CEList celist = collator.getCEList(s, true);
            map.put(celist, s);
        }
        
        Utility.fixDot();
        System.out.println("Collecting tails");
        
        Iterator it = map.keySet().iterator();
        while (it.hasNext()) {
            CEList celist = (CEList) it.next();
            Utility.dot(counter++);
            int len = celist.length();
            if (len < 2) continue;
            
            for (int i = 1; i < len; ++i) {
                CEList tail = celist.sub(i, len);
                Utility.dot(counter++);
                if (map.get(tail) == null) { // skip anything in main
                    Utility.addToSet(tails, tail, celist);
                }
            }
        }
        
        Utility.fixDot();
        System.out.println("Finding overlaps");
        
        // we now have a set of main maps, and a set of tails
        // the main maps to string, the tails map to set of CELists

        it = map.keySet().iterator();
        List first = new ArrayList();
        List second = new ArrayList();
        
        while (it.hasNext()) {
            CEList celist = (CEList) it.next();
            int len = celist.length();
            if (len < 2) continue;
            Utility.dot(counter++);
            first.clear();
            second.clear();
            if (overlaps(map, tails, celist, 0, first, second)) {
                reverse(first);
                reverse(second);
            
                log.println("<tr><td>" + getHTML_NameSet(first, null, false) 
                    + "</td><td>" + getHTML_NameSet(first, map, true)
                    + "</td><td>" + getHTML_NameSet(second, null, false)
                    + "</td><td>" + getHTML_NameSet(second, map, true)
                    + "</td></tr>");
            }
        }
    	log.println("</table>");
    	log.flush();
    }
    
    static void reverse(List ell) {
        int i = 0;
        int j = ell.size() - 1;
        while (i < j) {
            Object temp = ell.get(i);
            ell.set(i, ell.get(j));
            ell.set(j, temp);
            ++i;
            --j;
        }
    }
    
    static final boolean DEBUG_SHOW_OVERLAP = false;
    
    static boolean overlaps(Map map, Map tails, CEList celist, int depth, List me, List other) {
        if (depth == 5) return false;
        boolean gotOne = false;
        if (DEBUG_SHOW_OVERLAP && depth > 0) {
            Object foo = map.get(celist);
            System.out.println(Utility.repeat("**", depth) + "Trying:" + celist + ", "
                + (foo != null ? ucd.getCodeAndName(foo.toString()) : ""));
            gotOne = true;
        }
        int len = celist.length();
        // if the tail of the celist matches something, then ArrayList
        // A. subtract the match and retry
        // B. if that doesn't work, see if the result is the tail of something else.
        for (int i = 1; i < len; ++i) {
            CEList tail = celist.sub(i, len);
            if (map.get(tail) != null) {
 
                if (DEBUG_SHOW_OVERLAP && !gotOne) {
                    Object foo = map.get(celist);
                    System.out.println(Utility.repeat("**", depth) + "Trying:" + celist + ", "
                        + (foo != null ? ucd.getCodeAndName(foo.toString()) : ""));
                    gotOne = true;
                }

                if (DEBUG_SHOW_OVERLAP) System.out.println(Utility.repeat("**", depth) + "  Match tail at " + i + ": " + tail + ", " + ucd.getCodeAndName(map.get(tail).toString()));
                // temporarily add tail
                int oldListSize = me.size();
                me.add(tail);
                
                // the tail matched, try 3 options
                CEList head = celist.sub(0, i);
                
                // see if the head matches exactly!
                
                if (map.get(head) != null) {
                    if (DEBUG_SHOW_OVERLAP) System.out.println(Utility.repeat("**", depth) + "  Match head at "
                        + i + ": " + head + ", " + ucd.getCodeAndName(map.get(head).toString()));
                    me.add(head);
                    other.add(celist);
                    return true;
                }
                
                // see if the end of the head matches something (recursively)
                
               if (DEBUG_SHOW_OVERLAP) System.out.println(Utility.repeat("**", depth) + "  Checking rest");
               if (overlaps(map, tails, head, depth+1, me, other)) return true;
                
                // otherwise we see if the head is some tail
                
                Set possibleFulls = (Set) tails.get(head);
                if (possibleFulls != null) {
                    Iterator it = possibleFulls.iterator();
                    while(it.hasNext()) {
                        CEList full = (CEList)it.next();
                        CEList possibleHead = full.sub(0,full.length() - head.length());
                        if (DEBUG_SHOW_OVERLAP) System.out.println(Utility.repeat("**", depth) + "  Reversing " + full
                            + ", " + ucd.getCodeAndName(map.get(full).toString()) 
                            + ", " + possibleHead);
                        if (overlaps(map, tails, possibleHead, depth+1, other, me)) return true;
                    }
                }
                    
                // didn't work, so retract!
                me.remove(oldListSize);
            }
        }
        return false;
    }
    
    // if m exists, then it is a mapping to strings. Use it.
    // otherwise just print what is in set
    static String getHTML_NameSet(Collection set, Map m, boolean useName) {
        StringBuffer result = new StringBuffer();
        Iterator it = set.iterator();
        while (it.hasNext()) {
            if (result.length() != 0) result.append(";<br>");
            Object item = it.next();
            if (m != null) item = m.get(item);
            if (useName) item = ucd.getCodeAndName(item.toString());
            result.append(item);
        }
        return result.toString();
    }

    static void writeContractions() throws IOException {
        /*PrintWriter diLog = new PrintWriter(
            new BufferedWriter(
                new OutputStreamWriter(
                    new FileOutputStream(GEN_DIR + "UCA_Contractions.txt"),
                    "UTF8"),
                32*1024));
                */
        PrintWriter diLog = Utility.openPrintWriter("UCA_Contractions.txt", Utility.UTF8_WINDOWS);
                
        diLog.write('\uFEFF');

        Normalizer nfd = new Normalizer(Normalizer.NFD, UNICODE_VERSION);
        
        int[] ces = new int[50];
        
        UCA.UCAContents cc = collator.getContents(UCA.FIXED_CE, nfd);
        int[] lenArray = new int[1];
        
        diLog.println("# Contractions");
        diLog.println("# Generated " + getNormalDate());
        diLog.println("# UCA Version: " + collator.getDataVersion() + "/" + collator.getUCDVersion());
        while (true) {
            String s = cc.next(ces, lenArray);
            if (s == null) break;
            int len = lenArray[0];
            
            if (s.length() > 1) {
                diLog.println(Utility.hex(s, " ")
                    + ";\t #" + CEList.toString(ces, len)
                    + " ( " + s + " )"
                    + " " + ucd.getName(s));
            }
        }
        diLog.close();
    }
    
    static void checkDisjointIgnorables() throws IOException {
    	/*
        PrintWriter diLog = new PrintWriter(
            new BufferedWriter(
                new OutputStreamWriter(
                    new FileOutputStream(GEN_DIR + "DisjointIgnorables.txt"),
                    "UTF8"),
                32*1024));
                */
        PrintWriter diLog = Utility.openPrintWriter("DisjointIgnorables.js", Utility.UTF8_WINDOWS);
                
        diLog.write('\uFEFF');

        /*
        PrintWriter diLog = new PrintWriter(
            // try new one
            new UTF8StreamWriter(new FileOutputStream(GEN_DIR + "DisjointIgnorables.txt"),
            32*1024));
        diLog.write('\uFEFF');
        */
        
        //diLog = new PrintWriter(new FileOutputStream(GEN_DIR + "DisjointIgnorables.txt"));
        
        Normalizer nfd = new Normalizer(Normalizer.NFD, UNICODE_VERSION);
        
        int[] ces = new int[50];
        int[] secondariesZP = new int[400];
        Vector[] secondariesZPsample = new Vector[400];
        int[] remapZP = new int[400];
        
        int[] secondariesNZP = new int[400];
        Vector[] secondariesNZPsample = new Vector[400];
        int[] remapNZP = new int[400];
        
        for (int i = 0; i < secondariesZP.length; ++i) {
            secondariesZPsample[i] = new Vector();
            secondariesNZPsample[i] = new Vector();
        }
        
        int zpCount = 0;
        int nzpCount = 0;
        
        /* for (char ch = 0; ch < 0xFFFF; ++ch) {
            byte type = collator.getCEType(ch);
            if (type >= UCA.FIXED_CE) continue;
            if (SKIP_CANONICAL_DECOMPOSIBLES && nfd.hasDecomposition(ch)) continue;
            String s = String.valueOf(ch);
            int len = collator.getCEs(s, true, ces);
            */
        UCA.UCAContents cc = collator.getContents(UCA.FIXED_CE, nfd);
        int[] lenArray = new int[1];
        
        Set sortedCodes = new TreeSet();
        Set mixedCEs = new TreeSet();
        
        while (true) {
            String s = cc.next(ces, lenArray);
            if (s == null) break;
            
            // process all CEs. Look for controls, and for mixed ignorable/non-ignorables
            
            int ccc;
            for (int kk = 0; kk < s.length(); kk += UTF32.count16(ccc)) {
                ccc = UTF32.char32At(s,kk);
                byte cat = ucd.getCategory(ccc);
                if (cat == Cf || cat == Cc || cat == Zs || cat == Zl || cat == Zp) {
                    sortedCodes.add(CEList.toString(ces, lenArray[0]) + "\t" + ucd.getCodeAndName(s));
                    break;
                }
            }
            
            int len = lenArray[0];
            
            int haveMixture = 0;
            for (int j = 0; j < len; ++j) {
                int ce = ces[j];
                int pri = collator.getPrimary(ce);
                int sec = collator.getSecondary(ce);
                if (pri == 0) {
                    secondariesZPsample[sec].add(secondariesZP[sec], s);
                    secondariesZP[sec]++;
                } else {
                    secondariesNZPsample[sec].add(secondariesNZP[sec], s);
                    secondariesNZP[sec]++;
                }
                if (haveMixture == 3) continue;
                if (collator.isVariable(ce)) haveMixture |= 1;
                else haveMixture |= 2;
                if (haveMixture == 3) {
                    mixedCEs.add(CEList.toString(ces, len) + "\t" + ucd.getCodeAndName(s));
                }
            }
        }
        
        for (int i = 0; i < secondariesZP.length; ++i) {
            if (secondariesZP[i] != 0) {
                remapZP[i] = zpCount;
                zpCount++;
            }
            if (secondariesNZP[i] != 0) {
                remapNZP[i] = nzpCount;
                nzpCount++;
            }
        }
        
        diLog.println();
        diLog.println("# Proposed Remapping (see doc about Japanese characters)");
        diLog.println();
 
        int bothCount = 0;
        for (int i = 0; i < secondariesZP.length; ++i) {
            if ((secondariesZP[i] != 0) || (secondariesNZP[i] != 0)) {
                char sign = ' ';
                if (secondariesZP[i] != 0 && secondariesNZP[i] != 0) {
                    sign = '*';
                    bothCount++;
                }
                if (secondariesZP[i] != 0) {
                    showSampleOverlap(diLog, false, sign + "ZP ", secondariesZPsample[i]); // i, 0x20 + nzpCount + remapZP[i], 
                }
                if (secondariesNZP[i] != 0) {
                    if (i == 0x20) {
                        diLog.println("(omitting " + secondariesNZP[i] + " NZP with values 0020 -- values don't change)");
                    } else {
                        showSampleOverlap(diLog, true, sign + "NZP", secondariesNZPsample[i]); // i, 0x20 + remapNZP[i],
                    }
                }
                diLog.println();
            }
        }
        diLog.println("ZP Count = " + zpCount + ", NZP Count = " + nzpCount + ", Collisions = " + bothCount);
        
        /*
        diLog.println();
        diLog.println("OVERLAPS");
        diLog.println();
        
        for (int i = 0; i < secondariesZP.length; ++i) {
            if (secondariesZP[i] != 0 && secondariesNZP[i] != 0) {
                diLog.println("Overlap at " + Utility.hex(i)
                    + ": " + secondariesZP[i] + " with zero primaries"
                    + ", " + secondariesNZP[i] + " with non-zero primaries"
                );
                
                showSampleOverlap(" ZP:  ", secondariesZPsample[i], ces);
                showSampleOverlap(" NZP: ", secondariesNZPsample[i], ces);
                diLog.println();
            }
        }
        */
        
        diLog.println();
        diLog.println("# BACKGROUND INFORMATION");
        diLog.println();
        diLog.println("# All characters with 'mixed' CEs: variable and non-variable");
        diLog.println("# Note: variables are in " + Utility.hex(collator.getVariableLow() >> 16) + " to "
            + Utility.hex(collator.getVariableHigh() >> 16));
        diLog.println();
        
        Iterator it;
        it = mixedCEs.iterator();
        while (it.hasNext()) {
            Object key = it.next();
            diLog.println(key);
        }
        
        diLog.println();
        diLog.println("# All 'controls': Cc, Cf, Zs, Zp, Zl");
        diLog.println();
        
        it = sortedCodes.iterator();
        while (it.hasNext()) {
            Object key = it.next();
            diLog.println(key);
        }
        
        
        diLog.close();
    }
    
    static void checkCE_overlap() throws IOException {
        /*PrintWriter diLog = new PrintWriter(
            new BufferedWriter(
                new OutputStreamWriter(
                    new FileOutputStream(GEN_DIR + "DisjointIgnorables.txt"),
                    "UTF8"),
                32*1024));
                */
        PrintWriter diLog = Utility.openPrintWriter("DisjointIgnorables2.js", Utility.UTF8_WINDOWS);
                
        diLog.write('\uFEFF');

        //diLog = new PrintWriter(new FileOutputStream(GEN_DIR + "DisjointIgnorables.txt"));
        
        Normalizer nfd = new Normalizer(Normalizer.NFD, UNICODE_VERSION);
        
        int[] ces = new int[50];
        int[] secondariesZP = new int[400];
        Vector[] secondariesZPsample = new Vector[400];
        int[] remapZP = new int[400];
        
        int[] secondariesNZP = new int[400];
        Vector[] secondariesNZPsample = new Vector[400];
        int[] remapNZP = new int[400];
        
        for (int i = 0; i < secondariesZP.length; ++i) {
            secondariesZPsample[i] = new Vector();
            secondariesNZPsample[i] = new Vector();
        }
        
        int zpCount = 0;
        int nzpCount = 0;
        
        /* for (char ch = 0; ch < 0xFFFF; ++ch) {
            byte type = collator.getCEType(ch);
            if (type >= UCA.FIXED_CE) continue;
            if (SKIP_CANONICAL_DECOMPOSIBLES && nfd.hasDecomposition(ch)) continue;
            String s = String.valueOf(ch);
            int len = collator.getCEs(s, true, ces);
            */
        UCA.UCAContents cc = collator.getContents(UCA.FIXED_CE, nfd);
        int[] lenArray = new int[1];
        
        Set sortedCodes = new TreeSet();
        Set mixedCEs = new TreeSet();
        
        while (true) {
            String s = cc.next(ces, lenArray);
            if (s == null) break;
            
            // process all CEs. Look for controls, and for mixed ignorable/non-ignorables
            
            int ccc;
            for (int kk = 0; kk < s.length(); kk += UTF32.count16(ccc)) {
                ccc = UTF32.char32At(s,kk);
                byte cat = ucd.getCategory(ccc);
                if (cat == Cf || cat == Cc || cat == Zs || cat == Zl || cat == Zp) {
                    sortedCodes.add(CEList.toString(ces, lenArray[0]) + "\t" + ucd.getCodeAndName(s));
                    break;
                }
            }
            
            int len = lenArray[0];
            
            int haveMixture = 0;
            for (int j = 0; j < len; ++j) {
                int ce = ces[j];
                int pri = collator.getPrimary(ce);
                int sec = collator.getSecondary(ce);
                if (pri == 0) {
                    secondariesZPsample[sec].add(secondariesZP[sec], s);
                    secondariesZP[sec]++;
                } else {
                    secondariesNZPsample[sec].add(secondariesNZP[sec], s);
                    secondariesNZP[sec]++;
                }
                if (haveMixture == 3) continue;
                if (collator.isVariable(ce)) haveMixture |= 1;
                else haveMixture |= 2;
                if (haveMixture == 3) {
                    mixedCEs.add(CEList.toString(ces, len) + "\t" + ucd.getCodeAndName(s));
                }
            }
        }
        
        for (int i = 0; i < secondariesZP.length; ++i) {
            if (secondariesZP[i] != 0) {
                remapZP[i] = zpCount;
                zpCount++;
            }
            if (secondariesNZP[i] != 0) {
                remapNZP[i] = nzpCount;
                nzpCount++;
            }
        }
        
        diLog.println();
        diLog.println("# Proposed Remapping (see doc about Japanese characters)");
        diLog.println();
 
        int bothCount = 0;
        for (int i = 0; i < secondariesZP.length; ++i) {
            if ((secondariesZP[i] != 0) || (secondariesNZP[i] != 0)) {
                char sign = ' ';
                if (secondariesZP[i] != 0 && secondariesNZP[i] != 0) {
                    sign = '*';
                    bothCount++;
                }
                if (secondariesZP[i] != 0) {
                    showSampleOverlap(diLog, false, sign + "ZP ", secondariesZPsample[i]); // i, 0x20 + nzpCount + remapZP[i], 
                }
                if (secondariesNZP[i] != 0) {
                    if (i == 0x20) {
                        diLog.println("(omitting " + secondariesNZP[i] + " NZP with values 0020 -- values don't change)");
                    } else {
                        showSampleOverlap(diLog, true, sign + "NZP", secondariesNZPsample[i]); // i, 0x20 + remapNZP[i],
                    }
                }
                diLog.println();
            }
        }
        diLog.println("ZP Count = " + zpCount + ", NZP Count = " + nzpCount + ", Collisions = " + bothCount);        
        
        diLog.close();
    }
    
    static void showSampleOverlap(PrintWriter diLog, boolean doNew, String head, Vector v) {
        for (int i = 0; i < v.size(); ++i) {
            showSampleOverlap(diLog, doNew, head, (String)v.get(i));
        }
    }
    
    static void showSampleOverlap(PrintWriter diLog, boolean doNew, String head, String src) {
        int[] ces = new int[30];
        int len = collator.getCEs(src, true, ces);
        int[] newCes = null;
        int newLen = 0;
        if (doNew) {
            newCes = new int[30];
            for (int i = 0; i < len; ++i) {
                int ce = ces[i];
                int p = UCA.getPrimary(ce);
                int s = UCA.getSecondary(ce);
                int t = UCA.getTertiary(ce);
                if (p != 0 && s != 0x20) {
                    newCes[newLen++] = UCA.makeKey(p, 0x20, t);
                    newCes[newLen++] = UCA.makeKey(0, s, 0x1F);
                } else {
                    newCes[newLen++] = ce;
                }
            }
        }
        diLog.println(
            ucd.getCode(src)
            + "\t" + head
            //+ "\t" + Utility.hex(oldWeight)
            //+ " => " + Utility.hex(newWeight)
            + "\t" + CEList.toString(ces, len)
            + (doNew ? " => " + CEList.toString(newCes, newLen) : "")
            + "\t( " + src + " )"
            + "\t" + ucd.getName(src)
            );
    }
    
    static final byte WITHOUT_NAMES = 0, WITH_NAMES = 1, IN_XML = 2;
    
    static final boolean SKIP_CANONICAL_DECOMPOSIBLES = true;
    
    static final int TRANSITIVITY_COUNT = 8000;
    static final int TRANSITIVITY_ITERATIONS = 1000000;
    
    static void testTransitivity() {
        char[] tests = new char[TRANSITIVITY_COUNT];
        String[] keys = new String[TRANSITIVITY_COUNT];
        
        int i = 0;
        System.out.println("Loading");
        for (char ch = 0; i < tests.length; ++ch) {
            byte type = collator.getCEType(ch);
            if (type >= UCA.FIXED_CE) continue;
            Utility.dot(ch);
            tests[i] = ch;
            keys[i] = collator.getSortKey(String.valueOf(ch));
            ++i;
        }
         
        java.util.Comparator cm = new RuleComparator();
        
        i = 0;
        Utility.fixDot();
        System.out.println("Comparing");
        
        while (i++ < TRANSITIVITY_ITERATIONS) {
            Utility.dot(i);
            int a = (int)Math.random()*TRANSITIVITY_COUNT;
            int b = (int)Math.random()*TRANSITIVITY_COUNT;
            int c = (int)Math.random()*TRANSITIVITY_COUNT;
            int ab = cm.compare(keys[a], keys[b]);
            int bc = cm.compare(keys[b], keys[c]);
            int ca = cm.compare(keys[c], keys[a]);
            
            if (ab < 0 && bc < 0 && ca < 0 || ab > 0 && bc > 0 && ca > 0) {
                System.out.println("Transitivity broken for "
                    + Utility.hex(a)
                    + ", " + Utility.hex(b)
                    + ", " + Utility.hex(c));
            }
        }
    }
    
    static Normalizer nfdNew = new Normalizer(Normalizer.NFD, "");
    static Normalizer NFC = new Normalizer(Normalizer.NFC, "");
    static Normalizer nfkdNew = new Normalizer(Normalizer.NFKD, "");
    
    static int getFirstCELen(int[] ces, int len) {
        if (len < 2) return len;
    	int expansionStart = 1;
    	if (UCA.isImplicitLeadCE(ces[0])) {
    		expansionStart = 2; // move up if first is double-ce
    	} 
    	if (len > expansionStart && homelessSecondaries.contains(UCA.getSecondary(ces[expansionStart]))) {
    		++expansionStart; // move up if *second* is homeless ignoreable
    	}
    	return expansionStart;
    }
    
    
    static void writeRules (byte option, boolean shortPrint) throws IOException {
        
        //testTransitivity();
        //if (true) return;
        
        int[] ces = new int[50];
        Normalizer nfd = new Normalizer(Normalizer.NFD, UNICODE_VERSION);
        Normalizer nfkd = new Normalizer(Normalizer.NFKD, UNICODE_VERSION);
        
        if (false) {
        int len2 = collator.getCEs("\u2474", true, ces);
        System.out.println(CEList.toString(ces, len2));

        String a = collator.getSortKey("a");
        String b = collator.getSortKey("A");
        System.out.println(collator.strengthDifference(a, b));
        }
        
        System.out.println("Sorting");
        Map backMap = new HashMap();
        java.util.Comparator cm = new RuleComparator();
        Map ordered = new TreeMap(cm);
        
        UCA.UCAContents cc = collator.getContents(UCA.FIXED_CE, 
            SKIP_CANONICAL_DECOMPOSIBLES ? nfd : null);
        int[] lenArray = new int[1];
        
        Set alreadyDone = new HashSet();
        PrintWriter log2 = Utility.openPrintWriter("UCARules-log.txt", Utility.UTF8_WINDOWS);

        while (true) {
            String s = cc.next(ces, lenArray);
            if (s == null) break;
            int len = lenArray[0];
            
            if (s.equals("\uD800")) {
            	System.out.println("Check: " + CEList.toString(ces, len));
            }
            
            log2.println(s + "\t" + CEList.toString(ces, len) + "\t" + ucd.getCodeAndName(s));
            
            addToBackMap(backMap, ces, len, s, false);
            
            int ce2 = 0;
            int ce3 = 0;
            int logicalFirstLen = getFirstCELen(ces, len);
            if (logicalFirstLen > 1) {
                ce2 = ces[1];
                if (logicalFirstLen > 2) {
                    ce3 = ces[2];
                }
            }

            String key = String.valueOf(UCA.getPrimary(ces[0])) + String.valueOf(UCA.getPrimary(ce2)) + String.valueOf(UCA.getPrimary(ce3))
                + String.valueOf(UCA.getSecondary(ces[0])) + String.valueOf(UCA.getSecondary(ce2)) + String.valueOf(UCA.getSecondary(ce3))
                + String.valueOf(UCA.getTertiary(ces[0])) + String.valueOf(UCA.getTertiary(ce2)) + String.valueOf(UCA.getTertiary(ce3))
                + collator.getSortKey(s, UCA.NON_IGNORABLE) + '\u0000' + UCA.codePointOrder(s);
                
                //String.valueOf((char)(ces[0]>>>16)) + String.valueOf((char)(ces[0] & 0xFFFF))
                //+ String.valueOf((char)(ce2>>>16)) + String.valueOf((char)(ce2 & 0xFFFF))
                
            if (s.equals("\u0660") || s.equals("\u2080")) {
                System.out.println(ucd.getCodeAndName(s) + "\t" + Utility.hex(key));
            }
                
            ordered.put(key, s);
            alreadyDone.add(s);
            
            Object result = ordered.get(key);
            if (result == null) {
                System.out.println("BAD SORT: " + Utility.hex(key) + ", " + Utility.hex(s));
            }
        }
        
        System.out.println("Checking CJK");
        
        // Check for characters that are ARE explicitly mapped in the CJK ranges
        UnicodeSet CJK = new UnicodeSet(0x2E80, 0x2EFF);
        CJK.add(0x2F00, 0x2EFF);
        CJK.add(0x2F00, 0x2FDF);
        CJK.add(0x3400, 0x9FFF);
        CJK.add(0xF900, 0xFAFF);
        CJK.add(0x20000, 0x2A6DF);
        CJK.add(0x2F800, 0x2FA1F);
        CJK.removeAll(new UnicodeSet("[:Cn:]")); // remove unassigned
        
        // make set with canonical decomposibles
        UnicodeSet composites = new UnicodeSet();
        for (int i = 0; i < 0x10FFFF; ++i) {
        	if (!ucd.isAllocated(i)) continue;
        	if (nfd.isNormalized(i)) continue;
        	composites.add(i);
        }
        UnicodeSet CJKcomposites = new UnicodeSet(CJK).retainAll(composites);
        System.out.println("CJK composites " + CJKcomposites.toPattern(true));
        System.out.println("CJK NONcomposites " + new UnicodeSet(CJK).removeAll(composites).toPattern(true));
        
        UnicodeSet mapped = new UnicodeSet();
        Iterator it = alreadyDone.iterator();
        while (it.hasNext()) {
        	String member = (String) it.next();
        	mapped.add(member);
        }
        UnicodeSet CJKmapped = new UnicodeSet(CJK).retainAll(mapped);
        System.out.println("Mapped CJK: " + CJKmapped.toPattern(true));
        System.out.println("UNMapped CJK: " + new UnicodeSet(CJK).removeAll(mapped).toPattern(true));
        System.out.println("Neither Mapped nor Composite CJK: "
        	+ new UnicodeSet(CJK).removeAll(CJKcomposites).removeAll(CJKmapped).toPattern(true));
        
        
        
/*
2E80..2EFF; CJK Radicals Supplement
2F00..2FDF; Kangxi Radicals

3400..4DBF; CJK Unified Ideographs Extension A
4E00..9FFF; CJK Unified Ideographs
F900..FAFF; CJK Compatibility Ideographs

20000..2A6DF; CJK Unified Ideographs Extension B
2F800..2FA1F; CJK Compatibility Ideographs Supplement
*/
        
        
        System.out.println("Adding Kanji");
        for (int i = 0; i < 0x10FFFF; ++i) {
        	if (!ucd.isAllocated(i)) continue;
        	if (nfkd.isNormalized(i)) continue;
        	Utility.dot(i);
        	String decomp = nfkd.normalize(i);
        	int cp;
        	for (int j = 0; j < decomp.length(); j += UTF16.getCharCount(cp)) {
        		cp = UTF16.charAt(decomp, j);
        		String s = UTF16.valueOf(cp);
        		if (alreadyDone.contains(s)) continue;

        		alreadyDone.add(s);
        		int len = collator.getCEs(s, true, ces);
        		
            	log2.println(s+ "\t" + CEList.toString(ces, len)
            		+ "\t" + ucd.getCodeAndName(s) + " from " + ucd.getCodeAndName(i));
            		
            	addToBackMap(backMap, ces, len, s, false);
        	}
        }
        
        System.out.println("Writing");
        
        String filename = "UCA_Rules";
        if (shortPrint) filename += "_SHORT";
        if (option == IN_XML) filename += ".xml"; else filename += ".txt";
        
        log = Utility.openPrintWriter(filename, Utility.UTF8_WINDOWS);
        
        String[] commentText = {
        	"UCA Rules",
        	"This file contains the UCA tables for the given version, but transformed into rule syntax.",
            "Generated:   " + getNormalDate(),
        	"NOTE: Since UCA handles canonical equivalents, no composites are necessary",
        	"(except in extensions).",
        	"For syntax description, see: http://oss.software.ibm.com/icu/userguide/Collate_Intro.html"
        };
        
        if (option == IN_XML) {
        	log.println("<collation>");
        	log.println("<!--");
        	for (int i = 0; i < commentText.length; ++i) {
        		log.println(commentText[i]);
        	}
        	log.println("-->");
        	log.println("<base uca='" + collator.getDataVersion() + "/" + collator.getUCDVersion() + "'/>");
        	log.println("<rules>");
        } else {
        	log.write('\uFEFF'); // BOM
        	for (int i = 0; i < commentText.length; ++i) {
        		log.println("#\t" + commentText[i]);
        	}
        	log.println("# VERSION: UCA=" + collator.getDataVersion() + ", UCD=" + collator.getUCDVersion());
        }
        
        it = ordered.keySet().iterator();
        int oldFirstPrimary = UCA.getPrimary(UCA.TERMINATOR);
        boolean wasVariable = false;
        
        //String lastSortKey = collator.getSortKey("\u0000");;
        // 12161004
        int lastCE = 0;
        int ce = 0;
        int nextCE = 0;
        int lastCJKPrimary = 0;
        
        boolean firstTime = true;
        
        boolean done = false;
        
        String chr = "";
        int len = -1;
        
        String nextChr = "";
        int nextLen = -1; // -1 signals that we need to skip!!
        int[] nextCes = new int[50];
        
        String lastChr = "";
        int lastLen = -1;
        int[] lastCes = new int[50];
        int lastExpansionStart = 0;
        int expansionStart = 0;
        
        long variableTop = collator.getVariableHigh() & INT_MASK;
        
        // for debugging ordering
        String lastSortKey = "";
        boolean showNext = false;
        
        for (int loopCounter = 0; !done; loopCounter++) {
            Utility.dot(loopCounter);
            
            lastCE = ce;
            lastLen = len;
            lastChr = chr;
            lastExpansionStart = expansionStart;
            if (len > 0) {
                System.arraycopy(ces, 0, lastCes, 0, lastLen);
            }
            
            // copy the current from Next
            
            ce = nextCE;
            len = nextLen;
            chr = nextChr;
            if (nextLen > 0) {
                System.arraycopy(nextCes, 0, ces, 0, nextLen);
            }
            
            // We need to look ahead one, to be able to reset properly
            
            if (it.hasNext()) {
                String nextSortKey = (String) it.next();
                nextChr = (String)ordered.get(nextSortKey);
                int result = cm.compare(nextSortKey, lastSortKey);
                if (result < 0) {
                    System.out.println();
                    System.out.println("DANGER: Sort Key Unordered!");
                        System.out.println((loopCounter-1) + " " + Utility.hex(lastSortKey)
                            + ", " + ucd.getCodeAndName(lastSortKey.charAt(lastSortKey.length()-1)));
                    System.out.println(loopCounter + " " + Utility.hex(nextSortKey)
                         + ", " + ucd.getCodeAndName(nextSortKey.charAt(nextSortKey.length()-1)));
                }                  
                if (nextChr == null) {
                    Utility.fixDot();
                    if (!showNext) {
                        System.out.println();
                        System.out.println((loopCounter-1) + "   Last = " + Utility.hex(lastSortKey)
                            + ", " + ucd.getCodeAndName(lastSortKey.charAt(lastSortKey.length()-1)));
                    }
                    System.out.println(cm.compare(lastSortKey, nextSortKey)
                        + ", " + cm.compare(nextSortKey, lastSortKey));
                    System.out.println(loopCounter + " NULL AT  " + Utility.hex(nextSortKey)
                         + ", " + ucd.getCodeAndName(nextSortKey.charAt(nextSortKey.length()-1)));
                    nextChr = "??";
                    showNext = true;
                } else if (showNext) {
                    showNext = false;
                    System.out.println(cm.compare(lastSortKey, nextSortKey)
                        + ", " + cm.compare(nextSortKey, lastSortKey));
                    System.out.println(loopCounter + "   Next = " + Utility.hex(nextSortKey)
                         + ", " + ucd.getCodeAndName(nextChr));
                }
                lastSortKey = nextSortKey;
            } else {
                nextChr = "??";
                done = true; // make one more pass!!!
            }
                
            nextLen = collator.getCEs(nextChr, true, nextCes);
            nextCE = nextCes[0];
            
            // skip first (fake) element
            
            if (len == -1) continue;
            
            
            // for debugging
           
            
            if (loopCounter < 5) {
                System.out.println(loopCounter);
                System.out.println(CEList.toString(lastCes, lastLen) + ", " + ucd.getCodeAndName(lastChr));
                System.out.println(CEList.toString(ces, len) + ", " + ucd.getCodeAndName(chr));
                System.out.println(CEList.toString(nextCes, nextLen) + ", " + ucd.getCodeAndName(nextChr));
            }
            
            // get relation
            
            
            /*if (chr.charAt(0) == 0xFFFB) {
                System.out.println("DEBUG");
            }*/
            
            
    		expansionStart = getFirstCELen(ces, len);
            
            // int relation = getStrengthDifference(ces, len, lastCes, lastLen);
            int relation = getStrengthDifference(ces, expansionStart, lastCes, lastExpansionStart);

            if (relation == QUARTERNARY_DIFF) {
                int relation2 = getStrengthDifference(ces, len, lastCes, lastLen);
                if (relation2 != QUARTERNARY_DIFF) relation = TERTIARY_DIFF;
            }

            // RESETs: do special case for relations to fixed items
            
            String reset = "";
            String resetComment = "";
            int xmlReset = 0;
            boolean insertVariableTop = false;
            boolean resetToParameter = false;
            
            int ceLayout = getCELayout(ce);
            if (ceLayout == IMPLICIT) {
                if (relation == PRIMARY_DIFF) {
                    int primary = UCA.getPrimary(ce);
                    int resetCp = UCA.ImplicitToCodePoint(primary, UCA.getPrimary(ces[1]));
                    	
                    int[] ces2 = new int[50];
                    int len2 = collator.getCEs(UTF16.valueOf(resetCp), true, ces2);
                    relation = getStrengthDifference(ces, len, ces2, len2);
                    	
                    reset = quoteOperand(UTF16.valueOf(resetCp));
                    resetComment = ucd.getCodeAndName(resetCp);
                    // lastCE = UCA.makeKey(primary, UCA.NEUTRAL_SECONDARY, UCA.NEUTRAL_TERTIARY);
                    xmlReset = 2;
                }
                // lastCJKPrimary = primary;
            } else if (ceLayout != getCELayout(lastCE) || firstTime) {
                resetToParameter = true;
                switch (ceLayout) {
                    case T_IGNORE: reset = "last tertiary ignorable"; break;
                    case S_IGNORE: reset = "last secondary ignorable"; break;
                    case P_IGNORE: reset = "last primary ignorable"; break;
                    case VARIABLE: reset = "last regular"; break;
                    case NON_IGNORE: /*reset = "top"; */ insertVariableTop = true; break;
                    case TRAILING: reset = "last trailing"; break;
                }
            }
            
            if (chr.equals("\u2F00")) {
                System.out.println(CEList.toString(ces, len));
            }
            
            // There are double-CEs, so we have to know what the length of the first bit is.
            
            
            // check expansions
            
            String expansion = "";
            if (len > expansionStart) {
                //int tert0 = ces[0] & 0xFF;
                //boolean isCompat = tert0 != 2 && tert0 != 8;
                log2.println("Exp: " + ucd.getCodeAndName(chr) + ", " + CEList.toString(ces, len) + ", start: " + expansionStart);
                int[] rel = {relation};
                expansion = getFromBackMap(backMap, ces, expansionStart, len, chr, rel);
                // relation = rel[0];
                
                // The relation needs to be fixed differently. Since it is an expansion, it should be compared to
                // the first CE
                // ONLY reset if the sort keys are not equal
                if (false && (relation == PRIMARY_DIFF || relation == SECONDARY_DIFF)) {
                    int relation2 = getStrengthDifference(ces, expansionStart, lastCes, lastExpansionStart);
                    if (relation2 != relation) {
                        System.out.println ();
                        System.out.println ("Resetting: " + RELATION_NAMES[relation] + " to " + RELATION_NAMES[relation2]);
                        System.out.println ("LCes: " + CEList.toString(lastCes, lastLen) + ", " + lastExpansionStart 
                            + ", " + ucd.getCodeAndName(lastChr));
                        System.out.println ("Ces:  " + CEList.toString(ces, len) + ", " + expansionStart 
                            + ", " + ucd.getCodeAndName(chr));
                        relation = relation2;
                    }
                }
                
            }
            
            // print results
            
            if (option == IN_XML) {
                if (insertVariableTop) log.println(XML_RELATION_NAMES[0] + "<variableTop/>");
                
                /*log.print("  <!--" + ucd.getCodeAndName(chr));
                if (len > 1) log.print(" / " + Utility.hex(expansion));
                log.println("-->");
                */
                
                if (reset.length() != 0) {
                    log.println("<reset/>"
                    + (resetToParameter ? "<position at=\"" + reset + "\"/>" : Utility.quoteXML(reset))
                	+ (resetComment.length() != 0 ? "<!-- " + resetComment + "-->": ""));
                }
                if (!firstTime) {
                    log.print("  <" + XML_RELATION_NAMES[relation] + "/>");
                    log.print(Utility.quoteXML(chr));
                    //log.print("</" + XML_RELATION_NAMES[relation] + ">");
                }
                if (expansion.length() > 0) {
                    log.print("<x/>" + Utility.quoteXML(expansion));
                }
                if (!shortPrint) {
                    log.print("\t<!--" 
                        + CEList.toString(ces, len) + " " 
                        + ucd.getCodeAndName(chr));
                    if (expansion.length() > 0) log.print(" / " + Utility.hex(expansion));
                    log.print("-->");
                }
                log.println();
            } else {
                if (insertVariableTop) log.println(RELATION_NAMES[0] + " [variable top]");
                if (reset.length() != 0) log.println("& " 
                    + (resetToParameter ? "[" : "") + reset + (resetToParameter ? "]" : "")
                	+ (resetComment.length() != 0 ? "\t\t# " + resetComment : ""));
                if (!firstTime) log.print(RELATION_NAMES[relation] + " " + quoteOperand(chr));
                if (expansion.length() > 0) log.print(" / " + quoteOperand(expansion));
                if (!shortPrint) {
                    log.print("\t# " 
                        + CEList.toString(ces, len) + " " 
                        + ucd.getCodeAndName(chr));
                    if (expansion.length() > 0) log.print(" / " + Utility.hex(expansion));
                }
                log.println();
            }
            firstTime = false;
        }
        // log.println("& [top]"); // RESET
        if (option == IN_XML) log.println("</rules></collation>");
        log2.close();
        log.close();
        Utility.fixDot();
    }
    
    static final int NONE = 0, T_IGNORE = 1, S_IGNORE = 2, P_IGNORE = 3, VARIABLE = 4, NON_IGNORE = 5, IMPLICIT = 6, TRAILING = 7;
    
    static int getCELayout(int ce) {
        int primary = collator.getPrimary(ce);
        int secondary = collator.getSecondary(ce);
        int tertiary = collator.getSecondary(ce);
        if (primary == 0) {
            if (secondary == 0) {
                if (tertiary == 0) return T_IGNORE;
                return S_IGNORE;
            }
            return P_IGNORE;
        }
        if (collator.isVariable(ce)) return VARIABLE;
        if (primary < UNSUPPORTED_BASE) return NON_IGNORE;
        if (primary < UNSUPPORTED_LIMIT) return IMPLICIT;
        return TRAILING;
    }
        
    static long getPrimary(int[] ces, int len) {
        for (int i = 0; i < len; ++i) {
            int result = UCA.getPrimary(ces[i]);
            if (result == 0) continue;
    	    if (UCA.isImplicitLeadPrimary(result)) {
    		    return (result << 16) + UCA.getPrimary(ces[i+1]);
    	    } else {
    		    return result;
    	    }
    	}
    	return 0;
    }
    
    static long getSecondary(int[] ces, int len) {
        for (int i = 0; i < len; ++i) {
            int result = UCA.getSecondary(ces[i]);
            if (result == 0) continue;
            return result;
        }
    	return 0;
    }
    
    static long getTertiary(int[] ces, int len) {
        for (int i = 0; i < len; ++i) {
    		int result = UCA.getTertiary(ces[i]);
            if (result == 0) continue;
            return result;
    	}
    	return 0;
    }
    
    static final int 
    	PRIMARY_DIFF = 0,
    	SECONDARY_DIFF = 1,
    	TERTIARY_DIFF = 2,
    	QUARTERNARY_DIFF = 3,
    	DONE = -1;
    	
    static class CE_Iterator {
        int[] ces;
        int len;
        int current;
        int level;
        
        void reset(int[] ces, int len) {
            this.ces = ces;
            this.len = len;
            current = 0;
            level = PRIMARY_DIFF;
        }
        void setLevel(int level) {
            current = 0;
            this.level = level;
        }
        int next() {
            int val = DONE;
            while (current < len) {
                int ce = ces[current++];
                switch (level) {
                    case PRIMARY_DIFF: val = UCA.getPrimary(ce); break;
                    case SECONDARY_DIFF: val = UCA.getSecondary(ce); break;
                    case TERTIARY_DIFF: val = UCA.getTertiary(ce); break;
                }
                if (val != 0) return val;
            }
            return DONE;
        }
    }                    
   
    static CE_Iterator ceit1 = new CE_Iterator();
    static CE_Iterator ceit2 = new CE_Iterator();
    
    // WARNING, Never Recursive!
    
	static int getStrengthDifference(int[] ces, int len, int[] lastCes, int lastLen) {
	    if (false && lastLen > 0 && lastCes[0] > 0) {
	        System.out.println("DeBug");
	    }
	    ceit1.reset(ces, len);
	    ceit2.reset(lastCes, lastLen);
	    
	    for (int level = PRIMARY_DIFF; level <= TERTIARY_DIFF; ++level) {
	        ceit1.setLevel(level);
	        ceit2.setLevel(level);
	        while (true) {
	            int weight1 = ceit1.next();
	            int weight2 = ceit2.next();
	            if (weight1 != weight2) return level;
	            if (weight1 == DONE) break;
	        }
	    }
	    return QUARTERNARY_DIFF;
	    
	    /*
		
        int relation = QUARTERNARY_DIFF;
        if (getPrimary(ces, len) != getPrimary(lastCes, lastLen)) {
            relation = PRIMARY_DIFF;
        } else if (getSecondary(ces, len) != getSecondary(lastCes, lastLen)) {
            relation = SECONDARY_DIFF;
        } else if (getTertiary(ces, len) != getTertiary(lastCes, lastLen)) {
            relation = TERTIARY_DIFF;
        } else if (len > lastLen) {
            relation = TERTIARY_DIFF; // HACK
        } else {
            int minLen = len < lastLen ? len : lastLen;
			int start = UCA.isImplicitLeadCE(ces[0]) ? 2 : 1;
            for (int kk = start; kk < minLen; ++kk) {
                int lc = lastCes[kk];
                int c = ces[kk];
                if (collator.getPrimary(c) != collator.getPrimary(lc)
                    || collator.getSecondary(c) != collator.getSecondary(lc)) {
                    relation = QUARTERNARY_DIFF;   // reset relation on FIRST char, since differ anyway
                    break;
                    } else if (collator.getTertiary(c) > collator.getTertiary(lc)) {
                    relation = TERTIARY_DIFF;   // reset to tertiary (but later ce's might override!)
                }
            }
        }
        return relation;
        */
    }
    
    
    // static final String[] RELATION_NAMES = {" <", "   <<", "     <<<", "         ="};
    static final String[] RELATION_NAMES = {" <\t", "  <<\t", "   <<<\t", "    =\t"};
    static final String[] XML_RELATION_NAMES = {"p", "s", "t", "eq"};
    
    static class ArrayWrapper {
    	int[] array;
    	int start;
    	int limit;
    	
    	/*public ArrayWrapper(int[] contents) {
    		set(contents, 0, contents.length);
    	}
    	*/
    	
    	public ArrayWrapper(int[] contents, int start, int limit) {
    		set(contents, start, limit);
    	}
    	
    	private void set(int[] contents, int start, int limit) {
    		array = contents;
    		this.start = start;
    		this.limit = limit;
		}
    	
    	public boolean equals(Object other) {
    		ArrayWrapper that = (ArrayWrapper) other;
    		if (that.limit - that.start != limit - start) return false;
    		for (int i = start; i < limit; ++i) {
    			if (array[i] != that.array[i - start + that.start]) return false;
    		}
    		return true;
    	}
    	
    	public int hashCode() {
    		int result = limit - start;
    		for (int i = start; i < limit; ++i) {
    			result = result * 37 + array[i];
    		}
    		return result;
    	}
    }
    
    static int testCase[] = {
    	//collator.makeKey(0xFF40, 0x0020, 0x0002),
    	collator.makeKey(0x0255, 0x0020, 0x000E),
    };
    
    static String testString = "\u33C2\u002E";
    
    static boolean contains(int[] array, int start, int limit, int key) {
    	for (int i = start; i < limit; ++i) {
    		if (array[i] == key) return true;
    	}
    	return false;
    }
    
    static final void addToBackMap(Map backMap, int[] ces, int len, String s, boolean show) {
    	if (show || contains(testCase, 0, testCase.length, ces[0]) || testString.indexOf(s) > 0) {
    		System.out.println("Test case: " + Utility.hex(s) + ", " + CEList.toString(ces, len));
    	}
		backMap.put(new ArrayWrapper((int[])(ces.clone()), 0, len), s);
		/*
		// HACK until Ken fixes
		for (int i = 0; i < len; ++i) {
		    int ce = ces[i];
		    if (collator.isImplicitLeadCE(ce)) {
		        ++i;
		        ce = ces[i];
		        if (DEBUG
		            && (UCA.getPrimary(ce) == 0 || UCA.getSecondary(ce) != 0 || UCA.getTertiary(ce) != 0)) {
		            System.out.println("WEIRD 2nd IMPLICIT: " 
		                + CEList.toString(ces, len)
		                + ", " + ucd.getCodeAndName(s));
		        }
		        ces[i] = UCA.makeKey(UCA.getPrimary(ce), NEUTRAL_SECONDARY, NEUTRAL_TERTIARY);
		    }
		}
		backMap.put(new ArrayWrapper((int[])(ces.clone()), 0, len), s);
		*/
    }
    
    
    static UnicodeSet homelessSecondaries = new UnicodeSet(0x0153,0x0170);
    
    /*static int[] ignorableList = new int[homelessSecondaries.size()];
    
    static {
        UnicodeSetIterator ui = new UnicodeSetIterator(homelessSecondaries);
        int counter = 0;
        while (ui.next()) {
            ignorableList. UCA.makeKey(0x0000, 0x0153, 0x0002),
    	UCA.makeKey(0x0000, 0x0154, 0x0002),
    	UCA.makeKey(0x0000, 0x0155, 0x0002),
    	UCA.makeKey(0x0000, 0x0156, 0x0002),
    	UCA.makeKey(0x0000, 0x0157, 0x0002),
    	UCA.makeKey(0x0000, 0x0158, 0x0002),
    	UCA.makeKey(0x0000, 0x0159, 0x0002),
    	UCA.makeKey(0x0000, 0x015A, 0x0002),
    	UCA.makeKey(0x0000, 0x015B, 0x0002),
    	UCA.makeKey(0x0000, 0x015C, 0x0002),
    	UCA.makeKey(0x0000, 0x015D, 0x0002),
    	UCA.makeKey(0x0000, 0x015E, 0x0002),
    	UCA.makeKey(0x0000, 0x015F, 0x0002),
    	UCA.makeKey(0x0000, 0x0160, 0x0002),
    	UCA.makeKey(0x0000, 0x0161, 0x0002),
    	UCA.makeKey(0x0000, 0x0162, 0x0002),
    	UCA.makeKey(0x0000, 0x0163, 0x0002),
    	UCA.makeKey(0x0000, 0x0164, 0x0002),
    	UCA.makeKey(0x0000, 0x0165, 0x0002),
    	UCA.makeKey(0x0000, 0x0166, 0x0002),
    	UCA.makeKey(0x0000, 0x0167, 0x0002),
    	UCA.makeKey(0x0000, 0x0168, 0x0002),
    	UCA.makeKey(0x0000, 0x0169, 0x0002),
    	UCA.makeKey(0x0000, 0x016A, 0x0002),
    	UCA.makeKey(0x0000, 0x016B, 0x0002),
    	UCA.makeKey(0x0000, 0x016C, 0x0002),
    	UCA.makeKey(0x0000, 0x016D, 0x0002),
    	UCA.makeKey(0x0000, 0x016E, 0x0002),
    	UCA.makeKey(0x0000, 0x016F, 0x0002),
    	UCA.makeKey(0x0000, 0x0170, 0x0002),
    };
    */
    
    
    static final String getFromBackMap(Map backMap, int[] originalces, int expansionStart, int len, String chr, int[] rel) {
    	int[] ces = (int[])(originalces.clone());
    	
    	String expansion = "";
    	
    	// process ces to neutralize tertiary
    	
    	for (int i = expansionStart; i < len; ++i) {
    		int probe = ces[i];
        	char primary = collator.getPrimary(probe);
        	char secondary = collator.getSecondary(probe);
        	char tertiary = collator.getTertiary(probe);
    		
            int tert = tertiary;
            switch (tert) {
            case 8: case 9: case 0xA: case 0xB: case 0xC: case 0x1D:
                tert = 8;
                break;
            case 0xD: case 0x10: case 0x11: case 0x12: case 0x13: case 0x1C:
                tert = 0xE;
                break;
            default:
                tert = 2;
                break;
            }
            ces[i] = collator.makeKey(primary, secondary, tert);
    	}
    	
        for (int i = expansionStart; i < len;) {
        	int limit;
        	String s = null;
        	for (limit = len; limit > i; --limit) {
        		ArrayWrapper wrapper = new ArrayWrapper(ces, i, limit);
        		s = (String)backMap.get(wrapper);
            	if (s != null) break;
            }
            if (s == null) {
            	do {
            		if (homelessSecondaries.contains(UCA.getSecondary(ces[i]))) {
            			s = "";
            			if (rel[0] > 1) rel[0] = 1; // HACK
            			break;
            		}
            		
            		// Try stomping the value to different tertiaries
            		
    				int probe = ces[i];
    				if (UCA.isImplicitLeadCE(probe)) {
                        s = UTF16.valueOf(UCA.ImplicitToCodePoint(UCA.getPrimary(probe), UCA.getPrimary(ces[i+1])));
                        ++i; // skip over next item!!
                        break;
    				}
    				
        			char primary = collator.getPrimary(probe);
        			char secondary = collator.getSecondary(probe);
	        		
            		ces[i] = collator.makeKey(primary, secondary, 2);
        			ArrayWrapper wrapper = new ArrayWrapper(ces, i, i+1);
        			s = (String)backMap.get(wrapper);
        			if (s != null) break;
            
            		ces[i] = collator.makeKey(primary, secondary,0xE);
        			wrapper = new ArrayWrapper(ces, i, i+1);
        			s = (String)backMap.get(wrapper);
        			if (s != null) break;

					/*
                	int meHack = UCA.makeKey(0x1795,0x0020,0x0004);
                	if (ces[i] == meHack) {
                    	s = "\u3081";
                    	break;
                    }
                    */
                    
                    // we failed completely. Print error message, and bail
                    
                    System.out.println("No back map for " + CEList.toString(ces[i])
                        + " from " + CEList.toString(ces, len));
                    System.out.println("\t" + ucd.getCodeAndName(chr)
                        + " => " + ucd.getCodeAndName(nfkdNew.normalize(chr))
                    );
                    s = "[" + Utility.hex(ces[i]) + "]";
        	    } while (false); // exactly one time, just for breaking
            	limit = i + 1;
            }
            expansion += s;
            i = limit;
        }
        return expansion;
    }
    
    /*

    static final String getFromBackMap(Map backMap, int[] ces, int index, int limit) {
    	ArrayWrapper wrapper = new ArrayWrapper(ces, index, limit);
    	
    	int probe = ces[index];
    	wrapperContents[0] = probe;
        String s = (String)backMap.get(wrapper);
        
        outputLen[0] = 1;
        if (s != null) return s;
        
        char primary = collator.getPrimary(probe);
        char secondary = collator.getSecondary(probe);
        char tertiary = collator.getTertiary(probe);
        
        if (isFixedIdeograph(remapUCA_CompatibilityIdeographToCp(primary))) {
            return String.valueOf(primary);
        } else {
            int tert = tertiary;
            switch (tert) {
            case 8: case 9: case 0xA: case 0xB: case 0xC: case 0x1D:
                tert = 8;
                break;
            case 0xD: case 0x10: case 0x11: case 0x12: case 0x13: case 0x1C:
                tert = 0xE;
                break;
            default:
                tert = 2;
                break;
            }
            probe = collator.makeKey(primary, secondary, tert);
            wrapperContents[0] = probe;
            s = (String)backMap.get(wrapper);
            if (s != null) return s;
                
            probe = collator.makeKey(primary, secondary, collator.NEUTRAL_TERTIARY);
            wrapperContents[0] = probe;
            s = (String)backMap.get(wrapper);
        }
        if (s != null) return s;
        
        if (primary != 0 && secondary != collator.NEUTRAL_SECONDARY) {
        	int[] dummyArray = new int[1];
        	dummyArray[0] = collator.makeKey(primary, collator.NEUTRAL_SECONDARY, tertiary);
            String first = getFromBackMap(backMap, dummyArray, 0, outputLen);
            
            dummyArray[0] = collator.makeKey(0, secondary, collator.NEUTRAL_TERTIARY);
            String second = getFromBackMap(backMap, dummyArray, 0, outputLen);
            
            if (first != null && second != null) {
                s = first + second;
            }
        }
        return s;
    }
    */
    
    static final String[] RELATION = {
        "<", " << ", "  <<<  ", "    =    ", "    =    ", "    =    ", "  >>>  ", " >> ", ">"
    };
    
    static StringBuffer quoteOperandBuffer = new StringBuffer(); // faster
    
    static UnicodeSet needsQuoting = null;
    
    static final String quoteOperand(String s) {
        if (needsQuoting == null) {
            /*
            c >= 'a' && c <= 'z' 
              || c >= 'A' && c <= 'Z' 
              || c >= '0' && c <= '9'
              || (c >= 0xA0 && !UCharacterProperty.isRuleWhiteSpace(c))
              */
            needsQuoting = new UnicodeSet("[a-zA-Z0-9\\u00A0-\\U00010FFF]");
            needsQuoting.remove();
        }
    	s = NFC.normalize(s);
        quoteOperandBuffer.setLength(0);
        boolean noQuotes = true;
        boolean inQuote = false;
        for (int i = 0; i < s.length(); ++i) {
            char c = s.charAt(i);
            if (!needsQuoting.contains(c)) {
                if (inQuote) {
                    quoteOperandBuffer.append('\'');
                    inQuote = false;
                }
                quoteOperandBuffer.append(c);
            } else {
                noQuotes = false;
                if (c == '\'') {
                    quoteOperandBuffer.append("''");
                } else {
                    if (!inQuote) {
                        quoteOperandBuffer.append('\'');
                        inQuote = true;
                    }
                    if (c <= 0x20 || c > 0x7E) quoteOperandBuffer.append("\\u").append(Utility.hex(c));
                    else quoteOperandBuffer.append(c);
                }
            }
            /*
            switch (c) {
              case '<':  case '>':  case '#': case '=': case '&': case '/':
                quoteOperandBuffer.append('\'').append(c).append('\'');
                break;
              case '\'':
                quoteOperandBuffer.append("''");
                break;
              default:
                if (0 <= c && c < 0x20 || 0x7F <= c && c < 0xA0) {
                    quoteOperandBuffer.append("\\u").append(Utility.hex(c));
                    break;
                }
                quoteOperandBuffer.append(c);
                break;
            }
            */
        }
        if (inQuote) {
            quoteOperandBuffer.append('\'');
        }
        if (noQuotes) return s; // faster
        return quoteOperandBuffer.toString();
    }

    
/*
1112; H   # HANGUL CHOSEONG HIEUH
1161; A   # HANGUL JUNGSEONG A
1175; I   # HANGUL JUNGSEONG I
11A8; G   # HANGUL JONGSEONG KIYEOK
11C2; H   # HANGUL JONGSEONG HIEUH
11F9;HANGUL JONGSEONG YEORINHIEUH;Lo;0;L;;;;;N;;;;;
*/
    static boolean gotInfo = false;
    static int oldJamo1, oldJamo2, oldJamo3, oldJamo4, oldJamo5, oldJamo6;
    
    static boolean isOldJamo(int primary) {
        if (!gotInfo) {
            int[] temp = new int[20];
            collator.getCEs("\u1112", true, temp);
            oldJamo1 = temp[0] >> 16;
            collator.getCEs("\u1161", true, temp);
            oldJamo2 = temp[0] >> 16;
            collator.getCEs("\u1175", true, temp);
            oldJamo3 = temp[0] >> 16;
            collator.getCEs("\u11A8", true, temp);
            oldJamo4 = temp[0] >> 16;
            collator.getCEs("\u11C2", true, temp);
            oldJamo5 = temp[0] >> 16;
            collator.getCEs("\u11F9", true, temp);
            oldJamo6 = temp[0] >> 16;
            gotInfo = true;
        }
        return primary > oldJamo1 && primary < oldJamo2
            || primary > oldJamo3 && primary < oldJamo4
            || primary > oldJamo5 && primary <= oldJamo6;
    }
    
    static Normalizer NFKD = new Normalizer(Normalizer.NFKD, UNICODE_VERSION);
    static Normalizer NFD = new Normalizer(Normalizer.NFD, UNICODE_VERSION);
    
    static int variableHigh = 0;
    static final int COMMON = 5;
    
    static int gapForA = 0;
    static int[] primaryDelta;
    
    static void writeFractionalUCA(String filename) throws IOException {
        Default.setUCD();
        
        checkImplicit();
        checkFixes();
        
        variableHigh = collator.getVariableHigh() >> 16;
        BitSet secondarySet = collator.getWeightUsage(2);
        
        // HACK for CJK
        secondarySet.set(0x0040);
        
        int subtotal = 0;
        System.out.println("Fixing Secondaries");
        compactSecondary = new int[secondarySet.size()];
        for (int secondary = 0; secondary < compactSecondary.length; ++secondary) {
            if (secondarySet.get(secondary)) {
                compactSecondary[secondary] = subtotal++;
                /*System.out.println("compact[" + Utility.hex(secondary)
                    + "]=" + Utility.hex(compactSecondary[secondary])
                    + ", " + Utility.hex(fixSecondary(secondary)));*/
            }
        }
        System.out.println();

        //TO DO: find secondaries that don't overlap, and reassign
        
        System.out.println("Finding Bumps");        
        char[] representatives = new char[65536];
        findBumps(representatives);
        
        System.out.println("Fixing Primaries");
        BitSet primarySet = collator.getWeightUsage(1);        
        
        primaryDelta = new int[65536];
        
        // start at 1 so zero stays zero.
        for (int primary = 1; primary < 0xFFFF; ++primary) {
            if (primarySet.get(primary)) primaryDelta[primary] = 2;
            else if (primary == 0x1299) {
                System.out.println("WHOOPS! Missing weight");
            }
        }
        
        int bumpNextToo = 0;
        
        subtotal = (COMMON << 8) + COMMON; // skip forbidden bytes, leave gap
        int lastValue = 0;
        
        // start at 1 so zero stays zero.
        for (int primary = 1; primary < 0xFFFF; ++primary) {
            if (primaryDelta[primary] != 0) {
                
                // special handling for Jamo 3-byte forms
                
                if (isOldJamo(primary)) {
                    if (DEBUG) System.out.print("JAMO: " + Utility.hex(lastValue));
                    if ((lastValue & 0xFF0000) == 0) { // lastValue was 2-byte form
                        subtotal += primaryDelta[primary];  // we convert from relative to absolute
                        lastValue = primaryDelta[primary] = (subtotal << 8) + 0x10; // make 3 byte, leave gap
                    } else { // lastValue was 3-byte form
                        lastValue = primaryDelta[primary] = lastValue + 3;
                    }
                    if (DEBUG) System.out.println(" => " + Utility.hex(lastValue));
                    continue;
                }
                
                subtotal += primaryDelta[primary];  // we convert from relative to absolute
                
                if (singles.get(primary)) { 
                    subtotal = (subtotal & 0xFF00) + 0x100;
                    if (primary == gapForA) subtotal += 0x200;
                    if (bumpNextToo == 0x40) subtotal += 0x100; // make sure of gap between singles!!!
                    bumpNextToo = 0x40;
                } else if (primary > variableHigh) {
                    variableHigh = 0xFFFF; // never do again!
                    subtotal = (subtotal & 0xFF00) + 0x320 + bumpNextToo;
                    bumpNextToo = 0;
                } else if (bumpNextToo > 0 || bumps.get(primary)) {
                    subtotal = ((subtotal + 0x20) & 0xFF00) + 0x120 + bumpNextToo;
                    bumpNextToo = 0;
                } else {
                    int lastByte = subtotal & 0xFF;
                    // skip all values of FF, 00, 01, 02,
                    if (0 <= lastByte && lastByte < COMMON || lastByte == 0xFF) {
                        subtotal = ((subtotal + 1) & 0xFFFFFF00) + COMMON; // skip
                    }
                }
                lastValue = primaryDelta[primary] = subtotal;
            }
            // fixup for Kanji
            /*
            
            // WE DROP THIS: we are skipping all CJK values above, and will fix them separately
            
            int fixedCompat = remapUCA_CompatibilityIdeographToCp(primary);
            if (isFixedIdeograph(fixedCompat)) {
                int CE = getImplicitPrimary(fixedCompat);
                
                lastValue = primaryDelta[primary] = CE >>> 8; 
            }
            */
            //if ((primary & 0xFF) == 0) System.out.println(Utility.hex(primary) + " => " + hexBytes(primaryDelta[primary]));
        }
        
        
        // now translate!!
        String highCompat = UTF16.valueOf(0x2F805);
        
        System.out.println("Sorting");
        Map ordered = new TreeMap();
        Set contentsForCanonicalIteration = new TreeSet();
        UCA.UCAContents ucac = collator.getContents(UCA.FIXED_CE, null);
        int ccounter = 0;
        while (true) {
            Utility.dot(ccounter++);
            String s = ucac.next();
            if (s == null) break;
            if (s.equals("\uFA36") || s.equals("\uF900") || s.equals("\u2ADC") || s.equals(highCompat)) {
                System.out.println(" * " + ucd.getCodeAndName(s));
            }
            contentsForCanonicalIteration.add(s);
            ordered.put(collator.getSortKey(s, UCA.NON_IGNORABLE) + '\u0000' + s, s);
        }
        
        // Add canonically equivalent characters!!
        System.out.println("Start Adding canonical Equivalents2");
        int canCount = 0;
        
        System.out.println("Add missing decomposibles and non-characters");
        for (int i = 0; i < 0x10FFFF; ++i) {
            if (!ucd.isNoncharacter(i)) {
                if (!ucd.isAllocated(i)) continue;
                if (NFD.isNormalized(i)) continue;
                if (ucd.isHangulSyllable(i)) continue;
                //if (collator.getCEType(i) >= UCA.FIXED_CE) continue;
            }
            String s = UTF16.valueOf(i);
            if (contentsForCanonicalIteration.contains(s)) continue; // skip if already present
            contentsForCanonicalIteration.add(s);
            ordered.put(collator.getSortKey(s, UCA.NON_IGNORABLE) + '\u0000' + s, s);
            System.out.println(" + " + ucd.getCodeAndName(s));
            canCount++;
        }
        
        Set additionalSet = new HashSet();
        System.out.println("Loading canonical iterator");
        if (canIt == null) canIt = new CanonicalIterator(".");
        Iterator it2 = contentsForCanonicalIteration.iterator();
        System.out.println("Adding any FCD equivalents that have different sort keys");
        while (it2.hasNext()) {
            String key = (String)it2.next();
            if (key == null) {
                System.out.println("Null Key");
                continue;
            }
            canIt.setSource(key);
            
            boolean first = true;
            while (true) {
                String s = canIt.next();
                if (s == null) break;
                if (s.equals(key)) continue;
                if (contentsForCanonicalIteration.contains(s)) continue;
                if (additionalSet.contains(s)) continue;
                
                
                // Skip anything that is not FCD.
                if (!NFD.isFCD(s)) continue;
                
                // We ONLY add if the sort key would be different
                // Than what we would get if we didn't decompose!!
                String sortKey = collator.getSortKey(s, UCA.NON_IGNORABLE);
                String nonDecompSortKey = collator.getSortKey(s, UCA.NON_IGNORABLE, false);
                if (sortKey.equals(nonDecompSortKey)) continue;
                
                if (first) {
                    System.out.println(" " + ucd.getCodeAndName(key));
                    first = false;
                }
                System.out.println(" => " + ucd.getCodeAndName(s));
                System.out.println("    old: " + collator.toString(nonDecompSortKey));
                System.out.println("    new: " + collator.toString(sortKey));
                canCount++;
                additionalSet.add(s);
                ordered.put(sortKey + '\u0000' + s, s);
            }
        }
        System.out.println("Done Adding canonical Equivalents -- added " + canCount);
        /*
        
        for (int ch = 0; ch < 0x10FFFF; ++ch) {
            Utility.dot(ch);
            byte type = collator.getCEType(ch);
            if (type >= UCA.FIXED_CE && !nfd.hasDecomposition(ch))
                continue;
            }
            String s = com.ibm.text.UTF16.valueOf(ch);
            ordered.put(collator.getSortKey(s, UCA.NON_IGNORABLE) + '\u0000' + s, s);
        }
        
        Hashtable multiTable = collator.getContracting();
        Enumeration enum = multiTable.keys();
        int ecount = 0;
        while (enum.hasMoreElements()) {
            Utility.dot(ecount++);
            String s = (String)enum.nextElement();
            ordered.put(collator.getSortKey(s, UCA.NON_IGNORABLE) + '\u0000' + s, s);
        }
        */
        // JUST FOR TESTING
        if (false) {
            String sample = "\u3400\u3401\u4DB4\u4DB5\u4E00\u4E01\u9FA4\u9FA5\uAC00\uAC01\uD7A2\uD7A3";
            for (int i = 0; i < sample.length(); ++i) {
                String s = sample.substring(i, i+1);
                ordered.put(collator.getSortKey(s, UCA.NON_IGNORABLE) + '\u0000' + s, s);
            }
        }
        
        Utility.fixDot();
        System.out.println("Writing");
        PrintWriter shortLog = new PrintWriter(new BufferedWriter(new FileWriter(GEN_DIR + filename + "_SHORT.txt"), 32*1024));
        PrintWriter longLog = new PrintWriter(new BufferedWriter(new FileWriter(GEN_DIR + filename + ".txt"), 32*1024));
        log = new PrintWriter(new DualWriter(shortLog, longLog));
        
        PrintWriter summary = new PrintWriter(new BufferedWriter(new FileWriter(GEN_DIR + filename + "_summary.txt"), 32*1024));
        //log.println("[Variable Low = " + UCA.toString(collator.getVariableLow()) + "]");
        //log.println("[Variable High = " + UCA.toString(collator.getVariableHigh()) + "]");
        
        int[] ces = new int[100];
        
        StringBuffer newPrimary = new StringBuffer();
        StringBuffer newSecondary = new StringBuffer();
        StringBuffer newTertiary = new StringBuffer();
        StringBuffer oldStr = new StringBuffer();

        EquivalenceClass secEq = new EquivalenceClass("\r\n#", 2, true);
        EquivalenceClass terEq = new EquivalenceClass("\r\n#", 2, true);
        String[] sampleEq = new String[500];
        int[] sampleLen = new int[500];
        
        Iterator it = ordered.keySet().iterator();
        int oldFirstPrimary = UCA.getPrimary(UCA.TERMINATOR);
        boolean wasVariable = false;
        
        log.println("# Fractional UCA Table, generated from standard UCA");
        log.println("# " + getNormalDate());
        log.println("# VERSION: UCA=" + collator.getDataVersion() + ", UCD=" + collator.getUCDVersion());
        log.println();
        log.println("# Generated processed version, as described in ICU design document.");
        log.println("# NOTES");
        log.println("#  - Bugs in UCA data are NOT FIXED, except for the following problems:");
        log.println("#    - canonical equivalents are decomposed directly (some beta UCA are wrong).");
        log.println("#    - overlapping variable ranges are fixed.");
        log.println("#  - Format is as follows:");
        log.println("#      <codepoint> (' ' <codepoint>)* ';' ('L' | 'S') ';' <fractionalCE>+ ' # ' <UCA_CE> '# ' <name> ");
        log.println("#    - zero weights are not printed");
        log.println("#    - S: contains at least one lowercase or SMALL kana");
        log.println("#    - L: otherwise");
        log.println("#    - Different primaries are separated by a blank line.");
        log.println("# WARNING");
        log.println("#  - Differs from previous version in that MAX value was introduced at 1F.");
        log.println("#    All tertiary values are shifted down by 1, filling the gap at 7!");
        
        
        String lastChr = "";
        int lastNp = 0;
        boolean doVariable = false;
        char[] codeUnits = new char[100];
        
        FCE firstSecondaryIgnorable = new FCE(false);
        FCE lastSecondaryIgnorable = new FCE(true);

        FCE firstPrimaryIgnorable = new FCE(false);
        FCE lastPrimaryIgnorable = new FCE(true);
                    
        FCE firstVariable = new FCE(false);
        FCE lastVariable = new FCE(true);
                    
        FCE firstNonIgnorable = new FCE(false);
        FCE lastNonIgnorable = new FCE(true);
                    
        FCE firstTrailing = new FCE(false);
        FCE lastTrailing = new FCE(true);
        
        Map backMap = new TreeMap();
                            
        while (it.hasNext()) {
            Object sortKey = it.next();
            String chr = (String)ordered.get(sortKey);            

            // get CEs and fix
            int len = collator.getCEs(chr, true, ces);
            int firstPrimary = UCA.getPrimary(ces[0]);
            if (firstPrimary != oldFirstPrimary) {
                log.println();
                boolean isVariable = collator.isVariable(ces[0]);
                if (isVariable != wasVariable) {
                    if (isVariable) {
                        log.println("# START OF VARIABLE SECTION!!!");
                        summary.println("# START OF VARIABLE SECTION!!!");
                    } else {
                        log.println("[variable top = " + Utility.hex(primaryDelta[oldFirstPrimary]) + "] # END OF VARIABLE SECTION!!!");
                        doVariable = true;
                    }
                    log.println();
                }
                wasVariable = isVariable;
                oldFirstPrimary = firstPrimary;
            }
            oldStr.setLength(0);
            chr.getChars(0, chr.length(), codeUnits, 0);
            
            log.print(Utility.hex(codeUnits, 0, chr.length(), " ") + "; ");
            boolean nonePrinted = true;
            boolean isFirst = true;
            
            for (int q = 0; q < len; ++q) {
                nonePrinted = false;
                newPrimary.setLength(0);
                newSecondary.setLength(0);
                newTertiary.setLength(0);
                
                int pri = UCA.getPrimary(ces[q]);
                int sec = UCA.getSecondary(ces[q]); 
                int ter = UCA.getTertiary(ces[q]);
                
                oldStr.append(CEList.toString(ces[q]));// + "," + Integer.toString(ces[q],16);
                
                // special treatment for unsupported!
                
                if (UCA.isImplicitLeadPrimary(pri)) {
                    if (DEBUG) System.out.println("DEBUG: " + CEList.toString(ces, len) 
                        + ", Current: " + q + ", " + ucd.getCodeAndName(chr));
                    ++q;
                    oldStr.append(CEList.toString(ces[q]));// + "," + Integer.toString(ces[q],16);
                
                    int pri2 = UCA.getPrimary(ces[q]);
                    // get old code point
                    
                    int cp = UCA.ImplicitToCodePoint(pri, pri2);
                    
                    // double check results!
                    
                    int[] testImplicit = new int[2];
                    collator.CodepointToImplicit(cp, testImplicit);
                    boolean gotError = pri != testImplicit[0] || pri2 != testImplicit[1];
                    if (gotError) {
                    	System.out.println("ERROR");
                    }
                    if (DEBUG || gotError) {
                    	System.out.println("Computing Unsupported CP as: "
                        	+ Utility.hex(pri)
                        	+ ", " + Utility.hex(pri2)
                        	+ " => " + Utility.hex(cp)
                        	+ " => " + Utility.hex(testImplicit[0])
                        	+ ", " + Utility.hex(testImplicit[1])
                        	// + ", " + Utility.hex(fixPrimary(pri) & INT_MASK)
                        );
                    }
                    
                    pri = cp | MARK_CODE_POINT;
                }
                
                if (sec != 0x20) {
                    boolean changed = secEq.add(new Integer(sec), new Integer(pri));
                }
                if (ter != 0x2) {
                    boolean changed = terEq.add(new Integer(ter), new Integer((pri << 16) | sec));
                }
                
                if (sampleEq[sec] == null || sampleLen[sec] > len) {
                    sampleEq[sec] = chr;
                    sampleLen[sec] = len;
                }
                if (sampleEq[ter] == null || sampleLen[sec] > len) {
                    sampleEq[ter] = chr;
                    sampleLen[sec] = len;
                }
                
                if ((pri & MARK_CODE_POINT) == 0 && pri == 0) {
                    Integer key = new Integer(ces[q]);
                    Pair value = (Pair) backMap.get(key);
                    if (value == null
                    || (len < ((Integer)(value.first)).intValue())) {
                        backMap.put(key, new Pair(new Integer(len), chr));
                    }
                }
                
                // int oldPrimaryValue = UCA.getPrimary(ces[q]);
                int np = fixPrimary(pri);
                int ns = fixSecondary(sec);
                int nt = fixTertiary(ter);
                
                try {
                	hexBytes(np, newPrimary);
                	hexBytes(ns, newSecondary);
                	hexBytes(nt, newTertiary);
                } catch (Exception e) {
                	throw new ChainException("Character is {0}", new String[] {Utility.hex(chr)}, e);
                }
                if (isFirst) {
                    if (!sameTopByte(np, lastNp)) {
                        summary.println("Last:  " + Utility.hex(lastNp & INT_MASK) + " " + ucd.getName(UTF16.charAt(lastChr,0)));
                        summary.println();
                        if (doVariable) {
                            doVariable = false;
                            summary.println("[variable top = " + Utility.hex(primaryDelta[firstPrimary]) + "] # END OF VARIABLE SECTION!!!");
                            summary.println();
                        }
                        summary.println("First: " + Utility.hex(np & INT_MASK) + ", " + ucd.getCodeAndName(UTF16.charAt(chr,0)));
                    }
                    lastNp = np;
                    isFirst = false;
                }
                log.print("[" + newPrimary 
                    + ", " + newSecondary 
                    + ", " + newTertiary 
                    + "]");
                    
                // RECORD STATS
                // but ONLY if we are not part of an implicit
                
                if ((pri & MARK_CODE_POINT) == 0) {
                    if (np == 0 && ns == 0) {
                        firstSecondaryIgnorable.setValue(np, ns, nt);
                        lastSecondaryIgnorable.setValue(np, ns, nt); 
                    } else if (np == 0) {
                        firstPrimaryIgnorable.setValue(np, ns, nt);
                        lastPrimaryIgnorable.setValue(np, ns, nt); 
                    } else if (collator.isVariable(ces[q])) {
                        firstVariable.setValue(np, ns, nt);
                        lastVariable.setValue(np, ns, nt); 
                    } else if (UCA.getPrimary(ces[q]) > UNSUPPORTED_LIMIT) {        // Trailing (none currently)
                        System.out.println("Trailing: " 
                            + ucd.getCodeAndName(chr) + ", "
                            + CEList.toString(ces[q]) + ", " 
                            + Utility.hex(pri) + ", " 
                            + Utility.hex(UNSUPPORTED_LIMIT));
                        firstTrailing.setValue(np, ns, nt);
                        lastTrailing.setValue(np, ns, nt); 
                    } else {
                        firstNonIgnorable.setValue(np, ns, nt);
                        lastNonIgnorable.setValue(np, ns, nt); 
                    }
                }
            }
            if (nonePrinted) {
                log.print("[,,]");
                oldStr.append(CEList.toString(0));
            }
            longLog.print("\t# " + oldStr + "\t* " + ucd.getName(UTF16.charAt(chr, 0)));
            log.println();
            lastChr = chr;
        }
        
        // ADD HOMELESS COLLATION ELEMENTS
        log.println();
        log.println("# HOMELESS COLLATION ELEMENTS");
        char fakeTrail = 'a';
        Iterator it3 = backMap.keySet().iterator();
        while (it3.hasNext()) {
            Integer key = (Integer) it3.next();
            Pair pair = (Pair) backMap.get(key);
            if (((Integer)pair.first).intValue() < 2) continue;
            String sample = (String)pair.second;
            
            int ce = key.intValue();
            
            int np = fixPrimary(UCA.getPrimary(ce));
            int ns = fixSecondary(UCA.getSecondary(ce));
            int nt = fixTertiary(UCA.getTertiary(ce));
                    
            newPrimary.setLength(0);
            newSecondary.setLength(0);
            newTertiary.setLength(0);
            
            hexBytes(np, newPrimary);
            hexBytes(ns, newSecondary);
            hexBytes(nt, newTertiary);
            
            log.print(Utility.hex('\uFDD0' + "" + (char)(fakeTrail++)) + "; " 
                + "[, " + newSecondary + ", " + newTertiary + "]");
            longLog.print("\t# " + collator.getCEList(sample, true) + "\t* " + ucd.getCodeAndName(sample));
            log.println();
        }

        int firstImplicit = getImplicitPrimary(CJK_BASE);
        int lastImplicit = getImplicitPrimary(0x10FFFF);
        
        log.println();
        log.println("# VALUES BASED ON UCA");
        
        log.println("[first tertiary ignorable " + new FCE(false,0,0, 0).formatFCE() + "]");
        log.println("[last tertiary ignorable " + new FCE(true,0,0, 0).formatFCE() + "]");
        
        // Since the UCA doesn't have secondary ignorables, fake them.
        
        if (firstSecondaryIgnorable.isUnset()) {
            System.out.println("No first/last secondary ignorable: resetting");
            firstSecondaryIgnorable = new FCE(false, 0, 0, COMMON<<24);
            lastSecondaryIgnorable = new FCE(true, 0, 0, COMMON<<24);
            System.out.println(firstSecondaryIgnorable.formatFCE());
        }
        
        log.println("[first secondary ignorable " + firstSecondaryIgnorable.formatFCE() + "]");
        log.println("[last secondary ignorable " + lastSecondaryIgnorable.formatFCE() + "]");
        
        log.println("[first primary ignorable " + firstPrimaryIgnorable.formatFCE() + "]");
        log.println("[last primary ignorable " + lastPrimaryIgnorable.formatFCE() + "]");
        
        log.println("[first variable " + firstVariable.formatFCE() + "]");
        log.println("[last variable " + lastVariable.formatFCE() + "]");
        
        log.println("[first regular " + firstNonIgnorable.formatFCE() + "]");
        log.println("[last regular " + lastNonIgnorable.formatFCE() + "]");
        
        
        log.println("[first implicit " + (new FCE(false,firstImplicit, COMMON<<24, COMMON<<24)).formatFCE() + "]");
        log.println("[last implicit " + (new FCE(false,lastImplicit, COMMON<<24, COMMON<<24)).formatFCE() + "]");
        
        if (firstTrailing.isUnset()) {
            System.out.println("No first/last trailing: resetting");
            firstTrailing = new FCE(false, (IMPLICIT_LIMIT_BYTE+1)<<24, COMMON<<24, COMMON<<24);
            lastTrailing = new FCE(true, (IMPLICIT_LIMIT_BYTE+1)<<24, COMMON<<24, COMMON<<24);
            System.out.println(firstTrailing.formatFCE());        
        }
        
        log.println("[first trailing " + firstTrailing.formatFCE() + "]");
        log.println("[last trailing " + lastTrailing.formatFCE() + "]");
        
        log.println();
        log.println("# FIXED VALUES");
        
        log.println("# superceded! [top "  + lastNonIgnorable.formatFCE() + "]");
        log.println("[fixed first implicit byte " + Utility.hex(IMPLICIT_BASE_BYTE,2) + "]");
        log.println("[fixed last implicit byte " + Utility.hex(IMPLICIT_LIMIT_BYTE,2) + "]");
        log.println("[fixed first trail byte " + Utility.hex(IMPLICIT_LIMIT_BYTE+1,2) + "]");
        log.println("[fixed last trail byte " + Utility.hex(SPECIAL_BASE-1,2) + "]");
        log.println("[fixed first special byte " + Utility.hex(SPECIAL_BASE,2) + "]");
        log.println("[fixed last special byte " + Utility.hex(0xFF,2) + "]");
        
        
        summary.println("Last:  " + Utility.hex(lastNp) + ", " + ucd.getCodeAndName(UTF16.charAt(lastChr, 0)));
        
        /*
        String sample = "\u3400\u3401\u4DB4\u4DB5\u4E00\u4E01\u9FA4\u9FA5\uAC00\uAC01\uD7A2\uD7A3";
        for (int i = 0; i < sample.length(); ++i) {
            char ch = sample.charAt(i);
            log.println(Utility.hex(ch) + " => " + Utility.hex(fixHan(ch))
                    + "          " + ucd.getName(ch));
        }
        */
        summary.println();
        summary.println("# First Implicit: " + Utility.hex(INT_MASK & getImplicitPrimary(0)));
        summary.println("# Last Implicit: " + Utility.hex(INT_MASK & getImplicitPrimary(0x10FFFF)));
        summary.println("# First CJK: " + Utility.hex(INT_MASK & getImplicitPrimary(0x4E00)));
        summary.println("# Last CJK: " + Utility.hex(INT_MASK & getImplicitPrimary(0xFA2F)));
        summary.println("# First CJK_A: " + Utility.hex(INT_MASK & getImplicitPrimary(0x3400)));
        summary.println("# Last CJK_A: " + Utility.hex(INT_MASK & getImplicitPrimary(0x4DBF)));
        
        boolean lastOne = false;
        for (int i = 0; i < 0x10FFFF; ++i) {
            boolean thisOne = ucd.isCJK_BASE(i) || ucd.isCJK_AB(i);
            if (thisOne != lastOne) {
                summary.println("# Implicit Cusp: CJK=" + lastOne + ": " + Utility.hex(i-1) + " => " + Utility.hex(INT_MASK & getImplicitPrimary(i-1)));
                summary.println("# Implicit Cusp: CJK=" + thisOne + ": " + Utility.hex(i) + " => " + Utility.hex(INT_MASK & getImplicitPrimary(i)));
                lastOne = thisOne;
            }
        }
                           
        summary.println("Compact Secondary 153: " + compactSecondary[0x153]);
        summary.println("Compact Secondary 157: " + compactSecondary[0x157]);
        
        
        summary.println();
        summary.println("# Disjoint classes for Secondaries");
        summary.println("#" + secEq.toString());
        
        summary.println();
        summary.println("# Disjoint classes for Tertiaries");
        summary.println("#" + terEq.toString());
        
        summary.println();
        summary.println("# Example characters for each TERTIARY value");
        summary.println();
        summary.println("# UCA : (FRAC) CODE [    UCA CE    ] Name");
        summary.println();
        
        for (int i = 0; i < sampleEq.length; ++i) {
            if (sampleEq[i] == null) continue;
            if (i == 0x20) {
                summary.println();
                summary.println("# Example characters for each SECONDARY value");
                summary.println();
                summary.println("# UCA : (FRAC) CODE [    UCA CE    ] Name");
                summary.println();
            }
            int len = collator.getCEs(sampleEq[i], true, ces);
            int newval = i < 0x20 ? fixTertiary(i) : fixSecondary(i);
            summary.print("# " + Utility.hex(i) + ": (" + Utility.hex(newval) + ") "
                + Utility.hex(sampleEq[i]) + " ");
            for (int q = 0; q < len; ++q) {
                summary.print(CEList.toString(ces[q]));
            }
            summary.println(" " + ucd.getName(sampleEq[i]));
            
        }
        log.close();
        summary.close();
    }
    
    static final long INT_MASK = 0xFFFFFFFFL;
    
    static class FCE {
        static final long UNDEFINED_MAX = Long.MAX_VALUE;
        static final long UNDEFINED_MIN = Long.MIN_VALUE;
        long[] key;
        boolean max;
        boolean debugShow = false;
        
        FCE (boolean max) {
            this.max = max;
            if (max) key = new long[] {UNDEFINED_MIN, UNDEFINED_MIN, UNDEFINED_MIN};    // make small!
            else key = new long[] {UNDEFINED_MAX, UNDEFINED_MAX, UNDEFINED_MAX};
        }
        
        FCE (boolean max, int primary, int secondary, int tertiary) {
            this(max);
            key[0] = primary & INT_MASK;
            key[1] = secondary & INT_MASK;
            key[2] = tertiary & INT_MASK;
        }
        
        FCE (boolean max, int primary) {
            this(max);
            key[0] = primary & INT_MASK;
        }
        
        boolean isUnset() {
            return key[0] == UNDEFINED_MIN || key[0] == UNDEFINED_MAX;
        }
        
        String formatFCE() {
            String b0 = getBuffer(key[0], false);
            boolean key0Defined = key[0] != UNDEFINED_MIN && key[0] != UNDEFINED_MAX;
            
            String b1 = getBuffer(key[1], key0Defined);
            boolean key1Defined = key[1] != UNDEFINED_MIN && key[1] != UNDEFINED_MAX;
            if (b1.length() != 0) b1 = " " + b1;

            String b2 = getBuffer(key[2], key0Defined || key1Defined);
            if (b2.length() != 0) b2 = " " + b2;
            return "[" + b0 + "," + b1  + "," + b2 + "]";
        }
        
        String getBuffer(long val, boolean haveHigher) {
            if (val == UNDEFINED_MIN) return "?"; 
            if (val == UNDEFINED_MAX) if (haveHigher) val = COMMON << 24; else return "?";
            StringBuffer result = new StringBuffer();
            hexBytes(val, result);
            return result.toString();
        }
        
        void setValue(int npInt, int nsInt, int ntInt) {
            if (debugShow) System.out.println("Setting FCE: " 
                + Utility.hex(npInt) + ", "  + Utility.hex(nsInt) + ", "  + Utility.hex(ntInt));
            // to get the sign right!
            long np = npInt & INT_MASK;
            long ns = nsInt & INT_MASK;
            long nt = ntInt & INT_MASK;
            if (max) {
                if (np < key[0]) return;
                if (np > key[0]) {
                    key[0] = np;
                    key[1] = ns;
                    key[2] = nt;
                    return;
                }
                if (ns < key[1]) return;
                if (ns > key[1]) {
                    key[1] = ns;
                    key[2] = nt;
                    return;
                }
                if (nt > key[2]) {
                    key[2] = nt;
                }
            } else {
                if (np > key[0]) return;
                if (np < key[0]) {
                    key[0] = np;
                    key[1] = ns;
                    key[2] = nt;
                    return;
                }
                if (ns > key[1]) return;
                if (ns < key[1]) {
                    key[1] = ns;
                    key[2] = nt;
                    return;
                }
                if (nt > key[2]) {
                    key[2] = nt;
                }
            }
        }
    }
       
    
    /*
    static boolean isFixedIdeograph(int cp) {
        return (0x3400 <= cp && cp <= 0x4DB5 
            || 0x4E00 <= cp && cp <= 0x9FA5 
            || 0xF900 <= cp && cp <= 0xFA2D // compat: most of these decompose anyway
            || 0x20000 <= cp && cp <= 0x2A6D6
            || 0x2F800 <= cp && cp <= 0x2FA1D // compat: most of these decompose anyway
            );
    }
    */
/*
3400;<CJK Ideograph Extension A, First>;Lo;0;L;;;;;N;;;;;
4DB5;<CJK Ideograph Extension A, Last>;Lo;0;L;;;;;N;;;;;
4E00;<CJK Ideograph, First>;Lo;0;L;;;;;N;;;;;
9FA5;<CJK Ideograph, Last>;Lo;0;L;;;;;N;;;;;
20000;<CJK Ideograph Extension B, First>;Lo;0;L;;;;;N;;;;;
2A6D6;<CJK Ideograph Extension B, Last>;Lo;0;L;;;;;N;;;;;
2F800;CJK COMPATIBILITY IDEOGRAPH-2F800;Lo;0;L;4E3D;;;;N;;;;;
...
2FA1D;CJK COMPATIBILITY IDEOGRAPH-2FA1D;Lo;0;L;2A600;;;;N;;;;;
*/
    
    /*
    static int remapUCA_CompatibilityIdeographToCp(int cp) {
        switch (cp) {    
            case 0x9FA6: return 0xFA0E; // FA0E ; [.9FA6.0020.0002.FA0E] # CJK COMPATIBILITY IDEOGRAPH-FA0E
            case 0x9FA7: return 0xFA0F; // FA0F ; [.9FA7.0020.0002.FA0F] # CJK COMPATIBILITY IDEOGRAPH-FA0F
            case 0x9FA8: return 0xFA11; // FA11 ; [.9FA8.0020.0002.FA11] # CJK COMPATIBILITY IDEOGRAPH-FA11
            case 0x9FA9: return 0xFA13; // FA13 ; [.9FA9.0020.0002.FA13] # CJK COMPATIBILITY IDEOGRAPH-FA13
            case 0x9FAA: return 0xFA14; // FA14 ; [.9FAA.0020.0002.FA14] # CJK COMPATIBILITY IDEOGRAPH-FA14
            case 0x9FAB: return 0xFA1F; // FA1F ; [.9FAB.0020.0002.FA1F] # CJK COMPATIBILITY IDEOGRAPH-FA1F
            case 0x9FAC: return 0xFA21; // FA21 ; [.9FAC.0020.0002.FA21] # CJK COMPATIBILITY IDEOGRAPH-FA21
            case 0x9FAD: return 0xFA23; // FA23 ; [.9FAD.0020.0002.FA23] # CJK COMPATIBILITY IDEOGRAPH-FA23
            case 0x9FAE: return 0xFA24; // FA24 ; [.9FAE.0020.0002.FA24] # CJK COMPATIBILITY IDEOGRAPH-FA24
            case 0x9FAF: return 0xFA27; // FA27 ; [.9FAF.0020.0002.FA27] # CJK COMPATIBILITY IDEOGRAPH-FA27
            case 0x9FB0: return 0xFA28; // FA28 ; [.9FB0.0020.0002.FA28] # CJK COMPATIBILITY IDEOGRAPH-FA28
            case 0x9FB1: return 0xFA29; // FA29 ; [.9FB1.0020.0002.FA29] # CJK COMPATIBILITY IDEOGRAPH-FA29
        }
        return cp;
    }
    */
    
/**
    * Function used to: 
    * a) collapse the 2 different Han ranges from UCA into one (in the right order), and
    * b) bump any non-CJK characters by 10FFFF.
    * The relevant blocks are:
    * A:	4E00..9FFF; CJK Unified Ideographs
	*		F900..FAFF; CJK Compatibility Ideographs
	* B:	3400..4DBF; CJK Unified Ideographs Extension A
	*		20000..XX;	CJK Unified Ideographs Extension B (and others later on)
	* As long as
	*	no new B characters are allocated between 4E00 and FAFF, and
	*	no new A characters are outside of this range,
	* (very high probability) this simple code will work.
	* The reordered blocks are:
	* Block1 is CJK
	* Block2 is CJK_COMPAT_USED
	* Block3 is CJK_A
	* Any other CJK gets its normal code point
	* Any non-CJK gets +10FFFF
	* When we reorder Block1, we make sure that it is at the very start,
	* so that it will use a 3-byte form.
	*/
static int swapCJK(int i) {
    	
	if (i >= CJK_BASE) {
		if (i < CJK_LIMIT)				return i - CJK_BASE;
			
		if (i < CJK_COMPAT_USED_BASE)	return i + NON_CJK_OFFSET;
    		
		if (i < CJK_COMPAT_USED_LIMIT)	return i - CJK_COMPAT_USED_BASE
												+ (CJK_LIMIT - CJK_BASE);
		if (i < CJK_B_BASE)				return i + NON_CJK_OFFSET;
    		
		if (i < CJK_B_LIMIT)			return i; // non-BMP-CJK
    		
		return i + NON_CJK_OFFSET;	// non-CJK
	}
	if (i < CJK_A_BASE)					return i + NON_CJK_OFFSET;
		
	if (i < CJK_A_LIMIT)				return i - CJK_A_BASE
												+ (CJK_LIMIT - CJK_BASE) 
												+ (CJK_COMPAT_USED_LIMIT - CJK_COMPAT_USED_BASE);
    return i + NON_CJK_OFFSET; // non-CJK
}
    
    // Fractional UCA Generation Constants
    
    static final int
        TOP = 0xA0,
        SPECIAL_BASE = 0xF0,
    
    	NON_CJK_OFFSET = 0x110000,
        BYTES_TO_AVOID = 3,
        OTHER_COUNT = 256 - BYTES_TO_AVOID,
        LAST_COUNT = OTHER_COUNT / 2,
        LAST_COUNT2 = OTHER_COUNT / 21, // room for intervening, without expanding to 5 bytes
        IMPLICIT_3BYTE_COUNT = 1,
        IMPLICIT_BASE_BYTE = 0xE0,
        
        IMPLICIT_LIMIT_BYTE = IMPLICIT_BASE_BYTE + 4, // leave room for 1 3-byte and 2 4-byte forms
        
        IMPLICIT_4BYTE_BOUNDARY = IMPLICIT_3BYTE_COUNT * OTHER_COUNT * LAST_COUNT,
        LAST_MULTIPLIER = OTHER_COUNT / LAST_COUNT,
        LAST2_MULTIPLIER = OTHER_COUNT / LAST_COUNT2,
        IMPLICIT_BASE_3BYTE = (IMPLICIT_BASE_BYTE << 24) + 0x030300,
        IMPLICIT_BASE_4BYTE = ((IMPLICIT_BASE_BYTE + IMPLICIT_3BYTE_COUNT) << 24) + 0x030303
        ;
    
    // GET IMPLICIT PRIMARY WEIGHTS
    // Return value is left justified primary key
    
    static int getImplicitPrimary(int cp) {

        if (DEBUG) System.out.println("Incoming: " + Utility.hex(cp));
        
        cp = swapCJK(cp);
        
        if (DEBUG) System.out.println("CJK swapped: " + Utility.hex(cp));
        
        // we now have a range of numbers from 0 to 21FFFF.
        
        return getImplicitPrimaryFromSwapped(cp);
    }
        
	static int getImplicitPrimaryFromSwapped(int cp) {
        
        // we must skip all 00, 01, 02 bytes, so most bytes have 253 values
        // we must leave a gap of 01 between all values of the last byte, so the last byte has 126 values (3 byte case)
        // we shift so that HAN all has the same first primary, for compression.
        // for the 4 byte case, we make the gap as large as we can fit.
        // Three byte forms are EC xx xx, ED xx xx, EE xx xx (with a gap of 1)
        // Four byte forms (most supplementaries) are EF xx xx xx (with a gap of LAST2_MULTIPLIER == 14)
        
        int last0 = cp - IMPLICIT_4BYTE_BOUNDARY;
        if (last0 < 0) {
            int last1 = cp / LAST_COUNT;
            last0 = cp % LAST_COUNT;
            
            int last2 = last1 / OTHER_COUNT;
            last1 %= OTHER_COUNT;
            
            if (DEBUG || last2 > 0xFF-BYTES_TO_AVOID) System.out.println("3B: " + Utility.hex(cp) + " => "
            	+ Utility.hex(last2) + ", "
            	+ Utility.hex(last1) + ", "
            	+ Utility.hex(last0) + ", "
        	);
            
            return IMPLICIT_BASE_3BYTE + (last2 << 24) + (last1 << 16) + ((last0*LAST_MULTIPLIER) << 8);
        } else {
            int last1 = last0 / LAST_COUNT2;
            last0 %= LAST_COUNT2;
            
            int last2 = last1 / OTHER_COUNT;
            last1 %= OTHER_COUNT;
            
            int last3 = last2 / OTHER_COUNT;
            last2 %= OTHER_COUNT;
            
            if (DEBUG || last3 > 0xFF-BYTES_TO_AVOID) System.out.println("4B: " + Utility.hex(cp) + " => "
            	+ Utility.hex(last3) + ", "
            	+ Utility.hex(last2) + ", "
            	+ Utility.hex(last1) + ", "
            	+ Utility.hex(last0 * LAST2_MULTIPLIER) + ", "
        	);

           return IMPLICIT_BASE_4BYTE + (last3 << 24) + (last2 << 16) + (last1 << 8) + (last0 * LAST2_MULTIPLIER);
        }
    }
    
    
    static void showImplicit(String title, int cp) {
    	if (DEBUG) showImplicit2(title + "-1", cp-1);
    	
    	showImplicit2(title + "00", cp);
    	
    	if (DEBUG) showImplicit2(title + "+1", cp+1);
    }
    
    static void showImplicit2(String title, int cp) {
        System.out.println(title + ":\t" + Utility.hex(cp)
        	+ " => " + Utility.hex(swapCJK(cp))
        	+ " => " + Utility.hex(INT_MASK & getImplicitPrimary(cp)));
    }
    
    static void showImplicit3(String title, int cp) {
        System.out.println("*" + title + ":\t" + Utility.hex(cp)
        	+ " => " + Utility.hex(INT_MASK & getImplicitPrimaryFromSwapped(cp)));
    }
    
    // TEST PROGRAM
    
    static void checkImplicit() {
        System.out.println("Starting Implicit Check");
        
        long oldPrimary = 0;
        int oldChar = -1;
        int oldSwap = -1;
    	
    	// test monotonically increasing
    	
    	for (int i = 0; i < 0x21FFFF; ++i) {
    		long newPrimary = INT_MASK & getImplicitPrimaryFromSwapped(i);
            if (newPrimary < oldPrimary) {
                throw new IllegalArgumentException(Utility.hex(i) + ": overlap: "
                	+ Utility.hex(oldChar) + " (" + Utility.hex(oldPrimary) + ")"
                	+ " > " + Utility.hex(i) + "(" + Utility.hex(newPrimary) + ")");
            }
            oldPrimary = newPrimary;
    	}
    	
        showImplicit("# First CJK", CJK_BASE);
        showImplicit("# Last CJK", CJK_LIMIT-1);
        showImplicit("# First CJK-compat", CJK_COMPAT_USED_BASE);
        showImplicit("# Last CJK-compat", CJK_COMPAT_USED_LIMIT-1);
        showImplicit("# First CJK_A", CJK_A_BASE);
        showImplicit("# Last CJK_A", CJK_A_LIMIT-1);
        showImplicit("# First CJK_B", CJK_B_BASE);
        showImplicit("# Last CJK_B", CJK_B_LIMIT-1);
        showImplicit("# First Other Implicit", 0);
        showImplicit("# Last Other Implicit", 0x10FFFF);
        
        showImplicit3("# FIRST", 0);
        showImplicit3("# Boundary-1", IMPLICIT_4BYTE_BOUNDARY-1);
        showImplicit3("# Boundary00", IMPLICIT_4BYTE_BOUNDARY);
        showImplicit3("# Boundary+1", IMPLICIT_4BYTE_BOUNDARY+1);
        showImplicit3("# LAST", 0x21FFFF);
        
    	oldPrimary = 0;
        oldChar = -1;
        
        for (int batch = 0; batch < 3; ++batch) {
        	for (int i = 0; i <= 0x10FFFF; ++i) {
        		
        		// separate the three groups
        		
        		if (ucd.isCJK_BASE(i) || CJK_COMPAT_USED_BASE <= i && i < CJK_COMPAT_USED_LIMIT) {
        			if (batch != 0) continue;
        		} else if (ucd.isCJK_AB(i)) {
        			if (batch != 1) continue;
        		} else if (batch != 2) continue;
        		
        		
        		// test swapping
        		
        		int currSwap = swapCJK(i);
        		if (currSwap < oldSwap) {
                	throw new IllegalArgumentException(Utility.hex(i) + ": overlap: "
                		+ Utility.hex(oldChar) + " (" + Utility.hex(oldSwap) + ")"
                		+ " > " + Utility.hex(i) + "(" + Utility.hex(currSwap) + ")");
        		}
        	
        		
            	long newPrimary = INT_MASK & getImplicitPrimary(i);
	            
            	// test correct values
	            
	            
            	if (newPrimary < oldPrimary) {
                	throw new IllegalArgumentException(Utility.hex(i) + ": overlap: "
                		+ Utility.hex(oldChar) + " (" + Utility.hex(oldPrimary) + ")"
                		+ " > " + Utility.hex(i) + "(" + Utility.hex(newPrimary) + ")");
            	}
	            
	            
            	long b0 = (newPrimary >> 24) & 0xFF;
            	long b1 = (newPrimary >> 16) & 0xFF;
            	long b2 = (newPrimary >> 8) & 0xFF;
            	long b3 = newPrimary & 0xFF;
	            
            	if (b0 < IMPLICIT_BASE_BYTE || b0 >= IMPLICIT_LIMIT_BYTE  || b1 < 3 || b2 < 3 || b3 == 1 || b3 == 2) {
                	throw new IllegalArgumentException(Utility.hex(i) + ": illegal byte value: " + Utility.hex(newPrimary)
                    	+ ", " + Utility.hex(b1) + ", " + Utility.hex(b2) + ", " + Utility.hex(b3));
            	}
	            
            	// print range to look at
	            
            	if (false) {
                	int b = i & 0xFF;
                	if (b == 255 || b == 0 || b == 1) {
                    	System.out.println(Utility.hex(i) + " => " + Utility.hex(newPrimary));
                	}
            	}
            	oldPrimary = newPrimary;
            	oldChar = i;
        	}
        }
        System.out.println("Successful Implicit Check!!");
    }
    
    static boolean sameTopByte(int x, int y) {
        int x1 = x & 0xFF0000;
        int y1 = y & 0xFF0000;
        if (x1 != 0 || y1 != 0) return x1 == y1;
        x1 = x & 0xFF00;
        y1 = y & 0xFF00;
        return x1 == y1;
    }
    
        // return true if either:
        // a. toLower(NFKD(x)) != x (using FULL case mappings), OR
        // b. toSmallKana(NFKD(x)) != x.

    static final boolean needsCaseBit(String x) {
        String s = NFKD.normalize(x);
        if (!ucd.getCase(s, FULL, LOWER).equals(s)) return true;
        if (!toSmallKana(s).equals(s)) return true;
        return false;
    }
    
    static final StringBuffer toSmallKanaBuffer = new StringBuffer();
    
    static final String toSmallKana(String s) {
        // note: don't need to do surrogates; none exist
        boolean gotOne = false;
        toSmallKanaBuffer.setLength(0);
        for (int i = 0; i < s.length(); ++i) {
            char c = s.charAt(i);
            if ('\u3042' <= c && c <= '\u30EF') {
                switch(c - 0x3000) {
                  case 0x42: case 0x44: case 0x46: case 0x48: case 0x4A: case 0x64: case 0x84: case 0x86: case 0x8F:
                  case 0xA2: case 0xA4: case 0xA6: case 0xA8: case 0xAA: case 0xC4: case 0xE4: case 0xE6: case 0xEF:
                    --c; // maps to previous char
                    gotOne = true;
                    break;
                  case 0xAB:
                    c = '\u30F5'; 
                    gotOne = true;
                    break;
                  case 0xB1:
                    c = '\u30F6'; 
                    gotOne = true;
                    break;
                }
            }
            toSmallKanaBuffer.append(c);
        }
        if (gotOne) return toSmallKanaBuffer.toString();
        return s;
    }
        
    /*
30F5;KATAKANA LETTER SMALL KA;Lo;0;L;;;;;N;;;;;
30AB;KATAKANA LETTER KA;Lo;0;L;;;;;N;;;;;
30F6;KATAKANA LETTER SMALL KE;Lo;0;L;;;;;N;;;;;
30B1;KATAKANA LETTER KE;Lo;0;L;;;;;N;;;;;

30A1;KATAKANA LETTER SMALL A;Lo;0;L;;;;;N;;;;;
30A2;KATAKANA LETTER A;Lo;0;L;;;;;N;;;;;
30A3;KATAKANA LETTER SMALL I;Lo;0;L;;;;;N;;;;;
30A4;KATAKANA LETTER I;Lo;0;L;;;;;N;;;;;
30A5;KATAKANA LETTER SMALL U;Lo;0;L;;;;;N;;;;;
30A6;KATAKANA LETTER U;Lo;0;L;;;;;N;;;;;
30A7;KATAKANA LETTER SMALL E;Lo;0;L;;;;;N;;;;;
30A8;KATAKANA LETTER E;Lo;0;L;;;;;N;;;;;
30A9;KATAKANA LETTER SMALL O;Lo;0;L;;;;;N;;;;;
30AA;KATAKANA LETTER O;Lo;0;L;;;;;N;;;;;
30C3;KATAKANA LETTER SMALL TU;Lo;0;L;;;;;N;;;;;
30C4;KATAKANA LETTER TU;Lo;0;L;;;;;N;;;;;
30E3;KATAKANA LETTER SMALL YA;Lo;0;L;;;;;N;;;;;
30E4;KATAKANA LETTER YA;Lo;0;L;;;;;N;;;;;
30E5;KATAKANA LETTER SMALL YU;Lo;0;L;;;;;N;;;;;
30E6;KATAKANA LETTER YU;Lo;0;L;;;;;N;;;;;
30E7;KATAKANA LETTER SMALL YO;Lo;0;L;;;;;N;;;;;
30E8;KATAKANA LETTER YO;Lo;0;L;;;;;N;;;;;
30EE;KATAKANA LETTER SMALL WA;Lo;0;L;;;;;N;;;;;
30EF;KATAKANA LETTER WA;Lo;0;L;;;;;N;;;;;

3041;HIRAGANA LETTER SMALL A;Lo;0;L;;;;;N;;;;;
3042;HIRAGANA LETTER A;Lo;0;L;;;;;N;;;;;
3043;HIRAGANA LETTER SMALL I;Lo;0;L;;;;;N;;;;;
3044;HIRAGANA LETTER I;Lo;0;L;;;;;N;;;;;
3045;HIRAGANA LETTER SMALL U;Lo;0;L;;;;;N;;;;;
3046;HIRAGANA LETTER U;Lo;0;L;;;;;N;;;;;

3047;HIRAGANA LETTER SMALL E;Lo;0;L;;;;;N;;;;;
3048;HIRAGANA LETTER E;Lo;0;L;;;;;N;;;;;
3049;HIRAGANA LETTER SMALL O;Lo;0;L;;;;;N;;;;;
304A;HIRAGANA LETTER O;Lo;0;L;;;;;N;;;;;
3063;HIRAGANA LETTER SMALL TU;Lo;0;L;;;;;N;;;;;
3064;HIRAGANA LETTER TU;Lo;0;L;;;;;N;;;;;
3083;HIRAGANA LETTER SMALL YA;Lo;0;L;;;;;N;;;;;
3084;HIRAGANA LETTER YA;Lo;0;L;;;;;N;;;;;
3085;HIRAGANA LETTER SMALL YU;Lo;0;L;;;;;N;;;;;
3086;HIRAGANA LETTER YU;Lo;0;L;;;;;N;;;;;
3087;HIRAGANA LETTER SMALL YO;Lo;0;L;;;;;N;;;;;
3088;HIRAGANA LETTER YO;Lo;0;L;;;;;N;;;;;
308E;HIRAGANA LETTER SMALL WA;Lo;0;L;;;;;N;;;;;
308F;HIRAGANA LETTER WA;Lo;0;L;;;;;N;;;;;

*/
        
    
    static final int secondaryDoubleStart = 0xD0;
    static final int MARK_CODE_POINT = 0x40000000;
    
    static int fixPrimary(int x) {
        int result = 0;
        if ((x & MARK_CODE_POINT) != 0) result = getImplicitPrimary(x & ~MARK_CODE_POINT);
        else result = primaryDelta[x];
        return result;
    }
    
    static int fixSecondary(int x) {
        x = compactSecondary[x];
        return fixSecondary2(x, compactSecondary[0x153], compactSecondary[0x157]);
    }
        
    static int fixSecondary2(int x, int gap1, int gap2) {
        int top = x;
        int bottom = 0;
        if (top == 0) {
            // ok, zero
        } else if (top == 1) {
            top = COMMON;
        } else {
            top *= 2; // create gap between elements. top is now 4 or more
            top += 0x80 + COMMON - 2; // insert gap to make top at least 87
            
            // lowest values are singletons. Others are 2 bytes
            if (top > secondaryDoubleStart) {
                top -= secondaryDoubleStart;
                top *= 4; // leave bigger gap just in case
                if (x > gap1) {
                    top += 256; // leave gap after COMBINING ENCLOSING KEYCAP (see below)
                }
                if (x > gap2) {
                    top += 64; // leave gap after RUNIC LETTER SHORT-TWIG-AR A (see below)
                }
                
                bottom = (top % LAST_COUNT) * 2 + COMMON;
                top = (top / LAST_COUNT) + secondaryDoubleStart;
            }
        }
        return (top << 8) | bottom;
    }
    
/*
# 0153: (EE3D) 20E3 [0000.0153.0002] COMBINING ENCLOSING KEYCAP
# 0154: (EE41) 0153 [0997.0154.0004][08B1.0020.0004] LATIN SMALL LIGATURE OE
# 0155: (EE45) 017F [09F3.0155.0004] LATIN SMALL LETTER LONG S
# 0157: (EE49) 16C6 [1656.0157.0004] RUNIC LETTER SHORT-TWIG-AR A
# 0158: (EE4D) 2776 [0858.0158.0006] DINGBAT NEGATIVE CIRCLED DIGIT ONE
*/
    
    static int fixTertiary(int x) {
        if (x == 0) return x;
        if (x == 1 || x == 7) throw new IllegalArgumentException("Tertiary illegal: " + x);
        // 2 => COMMON, 1 is unused
        int y = x < 7 ? x : x - 1; // we now use 1F = MAX. Causes a problem so we shift everything to fill a gap at 7 (unused).
        
        int result = 2 * (y - 2) + COMMON;
        
        if (result >= 0x3E) throw new IllegalArgumentException("Tertiary too large: "
        	+ Utility.hex(x) + " => " + Utility.hex(result));
 
        // get case bits. 00 is low, 01 is mixed (never happens), 10 is high
        if (isUpperTertiary[x]) result |= 0x80; 
        return result;
    }
    
    static final boolean[] isUpperTertiary = new boolean[32];
    static {
        isUpperTertiary[0x8] = true;
        isUpperTertiary[0x9] = true;
        isUpperTertiary[0xa] = true;
        isUpperTertiary[0xb] = true;
        isUpperTertiary[0xc] = true;
        isUpperTertiary[0xe] = true;
        isUpperTertiary[0x11] = true;
        isUpperTertiary[0x12] = true;
        isUpperTertiary[0x1D] = true;
    }
    
    static void checkFixes() {
        System.out.println("Checking Secondary/Tertiary Fixes");
        int lastVal = -1;
        for (int i = 0; i <= 0x16E; ++i) {
            if (i == 0x153) {
                System.out.println("debug");
            }
            int val = fixSecondary2(i, 999, 999); // HACK for UCA
            if (val <= lastVal) throw new IllegalArgumentException(
                "Unordered: " + Utility.hex(val) + " => " + Utility.hex(lastVal));
            int top = val >>> 8;
            int bottom = val & 0xFF;
            if (top != 0 && (top < COMMON || top > 0xEF)
                || (top > COMMON && top < 0x87)
                || (bottom != 0 && (isEven(bottom) || bottom < COMMON || bottom > 0xFD))
                || (bottom == 0 && top != 0 && isEven(top))) {
                throw new IllegalArgumentException("Secondary out of range: " + Utility.hex(i) + " => " 
                    + Utility.hex(top) + ", " + Utility.hex(bottom));
            }
        }
        
        lastVal = -1;
        for (int i = 0; i <= 0x1E; ++i) {
            if (i == 1 || i == 7) continue; // never occurs
            int val = fixTertiary(i);
            val &= 0x7F; // mask off case bits
            if (val <= lastVal) throw new IllegalArgumentException(
                "Unordered: " + Utility.hex(val) + " => " + Utility.hex(lastVal));
            if (val != 0 && (isEven(val) || val < COMMON || val > 0x3D)) {
                throw new IllegalArgumentException("Tertiary out of range: " + Utility.hex(i) + " => " 
                    + Utility.hex(val));
            }
        }
        System.out.println("END Checking Secondary/Tertiary Fixes");
    }
    
    static boolean isEven(int x) {
        return (x & 1) == 0;
    }
    
   /* static String ceToString(int primary, int secondary, int tertiary) {
        return "[" + hexBytes(primary) + ", "
            + hexBytes(secondary) + ", "
            + hexBytes(tertiary) + "]";
    }
    */
    
    static String hexBytes(long x) {
        StringBuffer temp = new StringBuffer();
        hexBytes(x, temp);
        return temp.toString();
    }
    
    static void hexBytes(long x, StringBuffer result) {
        byte lastb = 1;
        for (int shift = 24; shift >= 0; shift -= 8) {
            byte b = (byte)(x >>> shift);
            if (b != 0) {
                if (result.length() != 0) result.append(" ");
                result.append(Utility.hex(b));
                //if (lastb == 0) System.err.println(" bad zero byte: " + result);
            }
            lastb = b;
        }
    }
    
    static int fixHan(char ch) { // BUMP HANGUL, HAN
        if (ch < 0x3400 || ch > 0xD7A3) return -1;
        
        char ch2 = ch;
        if (ch >= 0xAC00) ch2 -= (0xAC00 - 0x9FA5 - 1);
        if (ch >= 0x4E00) ch2 -= (0x4E00 - 0x4DB5 - 1);
        
        return 0x6000 + (ch2-0x3400); // room to interleave
    }
    
    static BitSet bumps = new BitSet();
    static BitSet singles = new BitSet();
    
    static void findBumps(char[] representatives) {
        int[] ces = new int[100];
        int[] scripts = new int[100];
        char[] scriptChar = new char[100];
        
        // find representatives
         
        for (char ch = 0; ch < 0xFFFF; ++ch) {
            byte type = collator.getCEType(ch);
            if (type < FIXED_CE) {
                int len = collator.getCEs(String.valueOf(ch), true, ces);
                int primary = UCA.getPrimary(ces[0]);
                if (primary < variableHigh) continue;
                /*
                if (ch == 0x1160 || ch == 0x11A8) { // set bumps within Hangul L, V, T
                    bumps.set(primary);
                    continue;
                }
                */
                byte script = ucd.getScript(ch);
                // HACK
                if (ch == 0x0F7E || ch == 0x0F7F) script = TIBETAN_SCRIPT;
                //if (script == ucd.GREEK_SCRIPT) System.out.println(ucd.getName(ch));
                // get least primary for script
                if (scripts[script] == 0 || scripts[script] > primary) {
                    byte cat = ucd.getCategory(ch);
                    // HACK
                    if (ch == 0x0F7E || ch == 0x0F7F) cat = ucd.OTHER_LETTER;
                    if (cat <= ucd.OTHER_LETTER && cat != ucd.Lm) {
                        scripts[script] = primary;
                        scriptChar[script] = ch;
                        if (script == ucd.GREEK_SCRIPT) System.out.println("*" + Utility.hex(primary) + ucd.getName(ch));
                    }
                }
                // get representative char for primary
                if (representatives[primary] == 0 || representatives[primary] > ch) {
                    representatives[primary] = ch;
                }
            }
        }
 
        // set bumps
        for (int i = 0; i < scripts.length; ++i) {
            if (scripts[i] > 0) {
                bumps.set(scripts[i]);
                System.out.println(Utility.hex(scripts[i]) + " " + UCD.getScriptID_fromIndex((byte)i)
                 + " " + Utility.hex(scriptChar[i]) + " " + ucd.getName(scriptChar[i]));
            }
        }
 
        char[][] singlePairs = {{'a','z'}, {' ', ' '}}; // , {'\u3041', '\u30F3'}
        for (int j = 0; j < singlePairs.length; ++j) {
            for (char k = singlePairs[j][0]; k <= singlePairs[j][1]; ++k) {
                setSingle(k, ces);
            }
        }
        /*setSingle('\u0300', ces);
        setSingle('\u0301', ces);
        setSingle('\u0302', ces);
        setSingle('\u0303', ces);
        setSingle('\u0308', ces);
        setSingle('\u030C', ces);
        */
        
        bumps.set(0x089A); // lowest non-variable
        bumps.set(0x4E00); // lowest Kangxi
        
    }
    
    static DateFormat myDateFormat = new SimpleDateFormat("yyyy-MM-dd','HH:mm:ss' GMT'");
    
    static String getNormalDate() {
        return myDateFormat.format(new Date()) + " [MD]";
    }
    
    
    static void setSingle(char ch, int[] ces) {
        collator.getCEs(String.valueOf(ch), true, ces);
        singles.set(UCA.getPrimary(ces[0]));
        if (ch == 'a') gapForA = UCA.getPrimary(ces[0]);
    }
    
    
    static void copyFile(PrintWriter log, String fileName) throws IOException {
        BufferedReader input = new BufferedReader(new FileReader(fileName));
        while (true) {
           String line = input.readLine();
           if (line == null) break;
           log.println(line);
        }
        input.close();
    }
    
    static UnicodeSet compatibilityExceptions = new UnicodeSet("[\u0CCB\u0DDD\u017F\u1E9B\uFB05]");
    
    static void writeCollationValidityLog() throws IOException {
        Default.setUCD();
    	
        //log = new PrintWriter(new FileOutputStream("CheckCollationValidity.html"));
        log = Utility.openPrintWriter("CheckCollationValidity.html", Utility.UTF8_WINDOWS);
        
        log.println("<html><head><meta http-equiv='Content-Type' content='text/html; charset=utf-8'>");
        log.println("<title>UCA Validity Log</title>");
        log.println("<style>.bottom { border-bottom-style: solid; border-bottom-color: #0000FF }</style>");
        log.println("</head><body bgcolor='#FFFFFF'>");

        
        //collator = new UCA(null);
        if (false){
            String key = collator.getSortKey("\u0308\u0301", UCA.SHIFTED, false);
            String look = printableKey(key);
            System.out.println(look);
            
        }
        System.out.println("Sorting");
        /*
        for (int i = 0; i <= 0x10FFFF; ++i) {
            if (EXCLUDE_UNSUPPORTED && !collator.found.contains(i)) continue;
            if (0xD800 <= i && i <= 0xF8FF) continue; // skip surrogates and private use
            //if (0xA000 <= c && c <= 0xA48F) continue; // skip YI
            addString(UTF32.valueOf32(i), option);
        }
        */

        UCA.UCAContents cc = collator.getContents(UCA.FIXED_CE, null);
        cc.enableSamples();
        
        while (true) {
            String s = cc.next();
            if (s == null) break;
            addString(s, option);
        }
                
        System.out.println("Total: " + sortedD.size());
        Iterator it;
        
        //ucd.init();
        
        if (false) {
            System.out.println("Listing Mismatches");
            it = duplicates.keySet().iterator();
            //String lastSortKey = "";
            //String lastSource = "";
            while (it.hasNext()) {
                String source = (String)it.next();
                String sortKey = (String)duplicates.get(source);
                char endMark = source.charAt(source.length()-1);
                source = source.substring(0,source.length()-1);
                if (endMark == MARK1) {
                    log.println("<br>");
                    log.println("Mismatch: " + Utility.hex(source, " ")
                        + ", " + ucd.getName(source) + "<br>");
                    log.print("  NFD:");
                } else {
                    log.print("  NFC:");
                }
                log.println(UCA.toString(sortKey) + "<br>");
                
                /*if (source.equals(lastSource)) {
                    it.remove();
                    --duplicateCount;
                }
                //lastSortKey = sortKey;
                lastSource = lastSource;
                */
            }
            System.out.println("Total: " + sortedD.size());
        }
        
        System.out.println("Writing");
        String version = collator.getDataVersion();
        
        log.println("<h1>Collation Validity Checks</h1>");
        log.println("<table><tr><td>Generated: </td><td>" + getNormalDate() + "</td></tr>");
        log.println("<tr><td>File Version: </td><td>" + collator.getDataVersion() + "/" + collator.getUCDVersion() + "</td></tr></table>");
        
        
        if (GENERATED_NFC_MISMATCHES) showMismatches();
        removeAdjacentDuplicates2();
        
        
        UnicodeSet alreadySeen = new UnicodeSet(compatibilityExceptions);
        
        checkBadDecomps(1, false, alreadySeen); // if decomposition is off, all primaries should be identical
        checkBadDecomps(2, false, alreadySeen); // if decomposition is ON, all primaries and secondaries should be identical
        checkBadDecomps(3, false, alreadySeen); // if decomposition is ON, all primaries and secondaries should be identical
        //checkBadDecomps(2, true, alreadySeen); // if decomposition is ON, all primaries and secondaries should be identical
        
        log.println("<p>Note: characters with decompositions to space + X, and tatweel + X are excluded,"
        	+ " as are a few special characters: " + compatibilityExceptions.toPattern(true) + "</p>");
        
        checkWellformedTable();
        addClosure();
        writeDuplicates();
        writeOverlap();

        log.println("</body></html>");
        log.close();
        sortedD.clear();
        System.out.println("Done");
    }
    
    
    static void addClosure() {
        int canCount = 0;
        System.out.println("Add missing decomposibles");
    	log.println("<h2>7. Comparing Other Equivalents</h2>");
    	log.println("<p>These are not necessarily errors, but should be examined for <i>possible</i> errors</p>");
    	log.println("<p>Each of the three strings is canonically equivalent, but has different sort keys</p>");
        log.println("<table border='1' cellspacing='0' cellpadding='2'>");
        log.println("<tr><th>Count</th><th>Name</th><th>Code</th><th>Sort Keys</th></tr>");
    	
        
        Set contentsForCanonicalIteration = new TreeSet();
        UCA.UCAContents ucac = collator.getContents(UCA.FIXED_CE, null); // NFD
        int ccounter = 0;
        while (true) {
            Utility.dot(ccounter++);
            String s = ucac.next();
            if (s == null) break;
            contentsForCanonicalIteration.add(s);
        }
        
        Set additionalSet = new HashSet();
        
        System.out.println("Loading canonical iterator");
        if (canIt == null) canIt = new CanonicalIterator(".");
        Iterator it2 = contentsForCanonicalIteration.iterator();
        System.out.println("Adding any FCD equivalents that have different sort keys");
        while (it2.hasNext()) {
            String key = (String)it2.next();
            if (key == null) {
                System.out.println("Null Key");
                continue;
            }
            canIt.setSource(key);
            String nfdKey = toD.normalize(key);
            
            boolean first = true;
            while (true) {
                String s = canIt.next();
                if (s == null) break;
                if (s.equals(key)) continue;
                if (contentsForCanonicalIteration.contains(s)) continue;
                if (additionalSet.contains(s)) continue;
                
                
                // Skip anything that is not FCD.
                if (!NFD.isFCD(s)) continue;
                
                // We ONLY add if the sort key would be different
                // Than what we would get if we didn't decompose!!
                String sortKey = collator.getSortKey(s, UCA.NON_IGNORABLE);
                String nonDecompSortKey = collator.getSortKey(s, UCA.NON_IGNORABLE, false);
                if (sortKey.equals(nonDecompSortKey)) continue;
                
                if (DEBUG && first) {
                    System.out.println(" " + ucd.getCodeAndName(key));
                    first = false;
                }
                log.println("<tr><td rowspan='3'>" + (++canCount) + "</td><td>" + Utility.replace(ucd.getName(key), ", ", ",<br>") + "</td>");
                log.println("<td>" + Utility.hex(key) + "</td>");
                log.println("<td>" + collator.toString(sortKey) + "</td></tr>");
                log.println("<tr><td>" + Utility.replace(ucd.getName(nfdKey), ", ", ",<br>") + "</td>");
                log.println("<td>" + Utility.hex(nfdKey) + "</td>");
                log.println("<td>" + collator.toString(sortKey) + "</td></tr>");
                log.println("<tr><td class='bottom'>" + Utility.replace(ucd.getName(s), ", ", ",<br>") + "</td>");
                log.println("<td class='bottom'>" + Utility.hex(s) + "</td>");
                log.println("<td class='bottom'>" + collator.toString(nonDecompSortKey) + "</td></tr>");
                additionalSet.add(s);
            }
        }
    	log.println("</table>");
    	log.println("<p>Items: " + canCount + "</p>");
    	log.flush();
    }
    
	static void checkWellformedTable() throws IOException {
    	System.out.println("Checking for well-formedness");
    	
    	log.println("<h2>6. Checking for well-formedness</h2>");
    	
        Normalizer nfd = new Normalizer(Normalizer.NFD, UNICODE_VERSION);
        
        int[] ces = new int[50];
        
        UCA.UCAContents cc = collator.getContents(UCA.FIXED_CE, nfd);
        int[] lenArray = new int[1];
        
        int minps = Integer.MAX_VALUE;
        int minpst = Integer.MAX_VALUE;
        String minpsSample = "", minpstSample = "";
        int errorCount = 0;
        
        while (true) {
            String str = cc.next(ces, lenArray);
            if (str == null) break;
            int len = lenArray[0];
            
            for (int i = 0; i < len; ++i) {
            	int ce = ces[i];
            	int p = UCA.getPrimary(ce);
            	int s = UCA.getSecondary(ce);
            	int t = UCA.getTertiary(ce);
            	
            	// Gather data for WF#2 check

            	if (p == 0) {
            		if (s > 0) {
            			if (s < minps) {
            				minps = s;
            				minpsSample = str;
            			}
            		} else {
            			if (t > 0 && t < minpst) {
            				minpst = t;
            				minpstSample = str;
            			}
                    }            			
            	}
            }
        }
        
        cc = collator.getContents(UCA.FIXED_CE, nfd);
        log.println("<table border='1' cellspacing='0' cellpadding='2'>");
        int lastPrimary = 0;
        
        while (true) {
            String str = cc.next(ces, lenArray);
            if (str == null) break;
            int len = lenArray[0];
            
            for (int i = 0; i < len; ++i) {
            	int ce = ces[i];
            	int p = UCA.getPrimary(ce);
            	int s = UCA.getSecondary(ce);
            	int t = UCA.getTertiary(ce);
            	
            	// IF we are at the start of an implicit, then just check that the implicit is in range
            	// CHECK implicit
            	if (collator.isImplicitLeadPrimary(lastPrimary)) {
            	    try {
            	        if (s != 0 || t != 0) throw new Exception("Second implicit must be [X,0,0]");
            	        collator.ImplicitToCodePoint(lastPrimary, p);   // throws exception if bad
            	    } catch (Exception e) {
            		    log.println("<tr><td>" + (++errorCount) + ". BAD IMPLICIT: " + e.getMessage()
            			    + "</td><td>" + CEList.toString(ces, len) 
            			    + "</td><td>" + ucd.getCodeAndName(str) + "</td></tr>");
            	    }
            	    // zap the primary, since we worry about the last REAL primary:
            	    lastPrimary = 0;
            	    continue;
            	}
            	
            	// IF we are in the trailing range, something is wrong.
            	if (p >= UCA_Types.UNSUPPORTED_LIMIT) {
                    log.println("<tr><td>" + (++errorCount) + ". > " + Utility.hex(UCA_Types.UNSUPPORTED_LIMIT,4)
            			+ "</td><td>" + CEList.toString(ces, len) 
            			+ "</td><td>" + ucd.getCodeAndName(str) + "</td></tr>");
            	    lastPrimary = p;
            	    continue;
            	}
            	
            	// Check WF#1
            	
            	if (p != 0 && s == 0) {
            		log.println("<tr><td>" + (++errorCount) + ". WF1.1"
            			+ "</td><td>" + CEList.toString(ces, len) 
            			+ "</td><td>" + ucd.getCodeAndName(str) + "</td></tr>");
            	}
            	if (s != 0 && t == 0) {
            		log.println("<tr><td>" + (++errorCount) + ". WF1.2"
            			+ "</td><td>" + CEList.toString(ces, len) 
            			+ "</td><td>" + ucd.getCodeAndName(str) + "</td></tr>");
            	}
            	
            	// Check WF#2

            	if (p != 0) {
            		if (s > minps) {
            		log.println("<tr><td>" + (++errorCount) + ". WF2.2"
            			+ "</td><td>" + CEList.toString(ces, len) 
            			+ "</td><td>" + ucd.getCodeAndName(str) + "</td></tr>");
            		}
            	}
            	if (s != 0) {
            		if (t > minpst) {
            		log.println("<tr><td>" + (++errorCount) + ". WF2.3"
            			+ "</td><td>" + CEList.toString(ces, len) 
            			+ "</td><td>" + ucd.getCodeAndName(str) + "</td></tr>");
            		}
            	} else {
            	}
            	
            	lastPrimary = p;
            	    
            }
        }
        log.println("</table>");
        log.println("<p>Minimum Secondary in Primary Ignorable = " + Utility.hex(minps)
        	+ " from \t" + collator.getCEList(minpsSample, true) 
        	+ "\t" + ucd.getCodeAndName(minpsSample) + "</p>");
        if (minpst < Integer.MAX_VALUE) {
        	log.println("<p>Minimum Tertiary in Secondary Ignorable =" + Utility.hex(minpst)
        		+ " from \t" + collator.getCEList(minpstSample, true) 
        		+ "\t" + ucd.getCodeAndName(minpstSample) + "</p>");
        }
                
        
        log.println("<p>Errors: " + errorCount + "</p>");
    	log.flush();
    }
    
    
/*            
3400;<CJK Ideograph Extension A, First>;Lo;0;L;;;;;N;;;;;
4DB5;<CJK Ideograph Extension A, Last>;Lo;0;L;;;;;N;;;;;
4E00;<CJK Ideograph, First>;Lo;0;L;;;;;N;;;;;
9FA5;<CJK Ideograph, Last>;Lo;0;L;;;;;N;;;;;
AC00;<Hangul Syllable, First>;Lo;0;L;;;;;N;;;;;
D7A3;<Hangul Syllable, Last>;Lo;0;L;;;;;N;;;;;
A000;YI SYLLABLE IT;Lo;0;L;;;;;N;;;;;
A001;YI SYLLABLE IX;Lo;0;L;;;;;N;;;;;
A4C4;YI RADICAL ZZIET;So;0;ON;;;;;N;;;;;
A4C6;YI RADICAL KE;So;0;ON;;;;;N;;;;;
*/

    static final int[][] extraConformanceRanges = {
        {0x3400, 0x4DB5}, {0x4E00, 0x9FA5}, {0xAC00, 0xD7A3}, {0xA000, 0xA48C}, {0xE000, 0xF8FF},
        {0xFDD0, 0xFDEF},
        {0x20000, 0x2A6D6},
        {0x2F800, 0x2FA1D},
        };
            
    static final int[] extraConformanceTests = {
            //0xD800, 0xDBFF, 0xDC00, 0xDFFF,
            0xFDD0, 0xFDEF, 0xFFF8,
            0xFFFE, 0xFFFF,
            0x10000, 0x1FFFD, 0x1FFFE, 0x1FFFF,
            0x20000, 0x2FFFD, 0x2FFFE, 0x2FFFF,
            0xE0000, 0xEFFFD, 0xEFFFE, 0xEFFFF,
            0xF0000, 0xFFFFD, 0xFFFFE, 0xFFFFF,
            0x100000, 0x10FFFD, 0x10FFFE, 0x10FFFF,
            IMPLICIT_4BYTE_BOUNDARY, IMPLICIT_4BYTE_BOUNDARY-1, IMPLICIT_4BYTE_BOUNDARY+1,
        };
        
    static final int MARK = 1;
    static final char MARK1 = '\u0001';
    static final char MARK2 = '\u0002';
    //Normalizer normalizer = new Normalizer(Normalizer.NFC, true);
    
    static Normalizer toC = new Normalizer(Normalizer.NFC, UNICODE_VERSION);
    static Normalizer toD = new Normalizer(Normalizer.NFD, UNICODE_VERSION);
    static TreeMap MismatchedC = new TreeMap();
    static TreeMap MismatchedN = new TreeMap();
    static TreeMap MismatchedD = new TreeMap();
    
    static final byte option = UCA.NON_IGNORABLE; // SHIFTED
    
    static void addString(int ch, byte option) {
        addString(UTF32.valueOf32(ch), option);
    }
        
    static void addString(String ch, byte option) {
        String colDbase = collator.getSortKey(ch, option, true);
        String colNbase = collator.getSortKey(ch, option, false);
        String colCbase = collator.getSortKey(toC.normalize(ch), option, false);
        if (!colNbase.equals(colCbase) || !colNbase.equals(colDbase) ) {
            /*System.out.println(Utility.hex(ch));
            System.out.println(printableKey(colNbase));
            System.out.println(printableKey(colNbase));
            System.out.println(printableKey(colNbase));*/
            MismatchedN.put(ch,colNbase);
            MismatchedC.put(ch,colCbase);
            MismatchedD.put(ch,colDbase);
        }
        String colD = colDbase + "\u0000" + ch; // UCA.NON_IGNORABLE
        String colN = colNbase + "\u0000" + ch;
        String colC = colCbase + "\u0000" + ch;
        sortedD.put(colD, ch);
        backD.put(ch, colD);
        sortedN.put(colN, ch);
        backN.put(ch, colN);
        /*
        if (strength > 4) {
            duplicateCount++;
            duplicates.put(ch+MARK1, col);
            duplicates.put(ch+MARK2, col2);
        } else if (strength != 0) {
            sorted.put(col2 + MARK2, ch);
        }
        unique += 2;
        */
    }
    
   static void removeAdjacentDuplicates() {
        String lastChar = "";
        int countRem = 0;
        int countDups = 0;
        int errorCount = 0;
        Iterator it1 = sortedD.keySet().iterator();
        Iterator it2 = sortedN.keySet().iterator();
        Differ differ = new Differ(250,3);
        log.println("<h2>2. Differences in Ordering</h2>");
        log.println("<p>Codes and names are in the white rows: bold means that the NO-NFD sort key differs from UCA key.</p>");
        log.println("<p>Keys are in the light blue rows: green is the bad key, blue is UCA, black is where they equal.</p>");
        log.println("<table border='1' cellspacing='0' cellpadding='2'>");
        log.println("<tr><th>File Order</th><th>Code and Decomp</th><th>Key and Decomp-Key</th></tr>");
        
        while (true) {
            boolean gotOne = false;
            if (it1.hasNext()) {
                String col1 = (String)it1.next();
                String ch1 = (String)sortedD.get(col1);
                differ.addA(ch1);
                gotOne = true;
            }

            if (it2.hasNext()) {
                String col2 = (String)it2.next();
                String ch2 = (String)sortedN.get(col2);
                differ.addB(ch2);
                gotOne = true;
            }
            
            differ.checkMatch(!gotOne);
            
            if (differ.getACount() != 0 || differ.getBCount() != 0) {
                for (int q = 0; q < 2; ++q) {
                    String cell = "<td valign='top'" + (q!=0 ? "bgcolor='#C0C0C0'" : "") + ">" + (q!=0 ? "<tt>" : "");
                    
                    log.print("<tr>" + cell);
                    for (int i = -1; i < differ.getACount()+1; ++i) {
                        showDiff(q==0, true, differ.getALine(i), differ.getA(i));
                        log.println("<br>");
                        ++countDups;
                    }
                    countDups -= 2; // to make up for extra line above and below
                    if (false) {
                        log.print("</td>" + cell);
                        
                        for (int i = -1; i < differ.getBCount()+1; ++i) {
                            showDiff(q==0, false, differ.getBLine(i), differ.getB(i));
                            log.println("<br>");
                        }
                    }
                    log.println("</td></tr>");
                }
                errorCount++;
            }
            //differ.flush();
            
            if (!gotOne) break;
        }
        
        log.println("</table>");
        
        log.println("<p>Errors: " + errorCount + "</p>");
        
        //log.println("Removed " + countRem + " adjacent duplicates.<br>");
        System.out.println("Left " + countDups + " conflicts.<br>");
        log.println("Left " + countDups + " conflicts.<br>");
    	log.flush();
   }

   static void removeAdjacentDuplicates2() {
        String lastChar = "";
        int countRem = 0;
        int countDups = 0;
        int errorCount = 0;
        Iterator it = sortedD.keySet().iterator();
        log.println("<h1>2. Differences in Ordering</h1>");
        log.println("<p>Codes and names are in the white rows: bold means that the NO-NFD sort key differs from UCA key.</p>");
        log.println("<p>Keys are in the light blue rows: green is the bad key, blue is UCA, black is where they equal.</p>");
        log.println("<p>Note: so black lines are generally ok.</p>");
        log.println("<table border='1' cellspacing='0' cellpadding='2'>");
        log.println("<tr><th>File Order</th><th>Code and Decomp</th><th>Key and Decomp-Key</th></tr>");
        
        String lastCol = "a";
        String lastColN = "a";
        String lastCh = "";
        boolean showedLast = true;
        int count = 0;
        while (it.hasNext()) {
            count++;
            String col = (String)it.next();
            String ch = (String)sortedD.get(col);
            String colN = (String)backN.get(ch);
            if (colN == null || colN.length() < 1) {
                System.out.println("Missing colN value for " + Utility.hex(ch, " ") + ": " + printableKey(colN));
            }
            if (col == null || col.length() < 1) {
                System.out.println("Missing col value for " + Utility.hex(ch, " ") + ": " + printableKey(col));
            }
            
            if (compareMinusLast(col, lastCol) == compareMinusLast(colN, lastColN)) {
                showedLast = false;
            } else {
                if (true && count < 200) {
                    System.out.println();
                    System.out.println(Utility.hex(ch, " ") + ", " + Utility.hex(lastCh, " "));
                    System.out.println("      col: " + Utility.hex(col, " "));
                    System.out.println(compareMinusLast(col, lastCol));
                    System.out.println("  lastCol: " + Utility.hex(lastCol, " "));
                    System.out.println();
                    System.out.println("     colN: " + Utility.hex(colN, " "));
                    System.out.println(compareMinusLast(colN, lastColN));
                    System.out.println(" lastColN: " + Utility.hex(lastColN, " "));
                }
                if (!showedLast) {
                    log.println("<tr><td colspan='3'></td><tr>");
                    showLine(count-1, lastCh, lastCol, lastColN);
                    errorCount++;
                }
                showedLast = true;
                showLine(count,ch, col, colN);
                errorCount++;
            }
            lastCol = col;
            lastColN = colN;
            lastCh = ch;
        }
        
        log.println("</table>");
        log.println("<p>Errors: " + errorCount + "</p>");
    	log.flush();
   }
   
    static int compareMinusLast(String a, String b) {
        String am = a.substring(0,a.length()-1);
        String bm = b.substring(0,b.length()-1);
        int result = am.compareTo(b);
        return (result < 0 ? -1 : result > 0 ? 1 : 0);
    }
    
    static void showLine(int count, String ch, String keyD, String keyN) {
        String decomp = toD.normalize(ch);
        if (decomp.equals(ch)) decomp = ""; else decomp = "<br><" + Utility.hex(decomp, " ") + "> ";
        log.println("<tr><td>" + count + "</td><td>" 
            + Utility.hex(ch, " ")
            + " " + ucd.getName(ch)
            + decomp
            + "</td><td>");
                
        if (keyD.equals(keyN)) {
            log.println(printableKey(keyN));
        } else {
            log.println("<font color='#009900'>" + printableKey(keyN)
                + "</font><br><font color='#000099'>" + printableKey(keyD) + "</font>"
            );
        }
        log.println("</td></tr>");
    }
    
    TreeSet foo;
    
    static final String[] alternateName = {"SHIFTED", "ZEROED", "NON_IGNORABLE", "SHIFTED_TRIMMED"};
   
    static void showMismatches() {
        log.println("<h2>1. Mismatches when NFD is OFF</h2>");
        log.println("<p>Alternate Handling = " + alternateName[option] + "</p>");
        log.println("<p>NOTE: NFD form is used by UCA,"
            + "so if other forms are different there are <i>ignored</i>. This may indicate a problem, e.g. missing contraction.</p>");
        log.println("<table border='1'>");
        log.println("<tr><th>Name</th><th>Type</th><th>Unicode</th><th>Key</th></tr>");
        Iterator it = MismatchedC.keySet().iterator();
        int errorCount = 0;
        while (it.hasNext()) {
            String ch = (String)it.next();
            String MN = (String)MismatchedN.get(ch);
            String MC = (String)MismatchedC.get(ch);
            String MD = (String)MismatchedD.get(ch);
            String chInC = toC.normalize(ch);
            String chInD = toD.normalize(ch);
            
            log.println("<tr><td rowSpan='3' class='bottom'>" + Utility.replace(ucd.getName(ch), ", ", ",<br>")
                + "</td><td>NFD</td><td>" + Utility.hex(chInD) 
                + "</td><td>" + printableKey(MD) + "</td></tr>");

            log.println("<tr><td>NFC</td><td>" + Utility.hex(chInC) 
                + "</td><td>" + printableKey(MC) + "</td></tr>");
            
            log.println("<tr><td class='bottom'>Plain</td><td class='bottom'>" + Utility.hex(ch) 
                + "</td><td class='bottom'>" + printableKey(MN) + "</td></tr>");
            
            errorCount++;
        }
        log.println("</table>");
        log.println("<p>Errors: " + errorCount + "</p>");
        log.println("<br>");
    	log.flush();
    }
    
    static boolean containsCombining(String s) {
        for (int i = 0; i < s.length(); ++i) {
            if ((ucd.getCategoryMask(s.charAt(i)) & ucd.MARK_MASK) != 0) return true;
        }
        return false;
    }
   
   
    static void showDiff(boolean showName, boolean firstColumn, int line, Object chobj) {
        String ch = chobj.toString();
        String decomp = toD.normalize(ch);
        if (showName) {
            if (ch.equals(decomp)) {
                log.println(//title + counter + " "
                    Utility.hex(ch, " ") 
                    + " " + ucd.getName(ch)
                );
            } else {
                log.println(//title + counter + " "
                    "<b>" + Utility.hex(ch, " ") 
                    + " " + ucd.getName(ch) + "</b>"
                );
            }
        } else {
            String keyD = printableKey(backD.get(chobj));
            String keyN = printableKey(backN.get(chobj));
            if (keyD.equals(keyN)) {
                log.println(//title + counter + " "
                    Utility.hex(ch, " ") + " " + keyN);
            } else {
                log.println(//title + counter + " "
                    "<font color='#009900'>" + Utility.hex(ch, " ") + " " + keyN
                    + "</font><br><font color='#000099'>" + Utility.hex(decomp, " ") + " " + keyD + "</font>"
                );
            }
        }
    }
    
    static String printableKey(Object keyobj) {
        String sortKey;
        if (keyobj == null) {
            sortKey = "NULL!!";
        } else {
            sortKey = keyobj.toString();
            sortKey = sortKey.substring(0,sortKey.length()-1);
            sortKey = UCA.toString(sortKey);
        }
        return sortKey;
    }
    
    /*
      LINKS</td></tr><tr><td><blockquote>
     CONTENTS     
    */
    
   
    static void writeTail(PrintWriter out, int counter, String title, String other, boolean show) throws IOException {
        copyFile(out, "HTML-Part2.txt");
        /*
        out.println("</tr></table></center></div>");
        out.println("</body></html>");
        */
        out.close();
    }
    
    static String pad (int number) {
        String num = Integer.toString(number);
        if (num.length() < 2) num = "0" + number;
        return num;
    }

    static PrintWriter writeHead(int counter, int end, String title, String other, String version, boolean show) throws IOException {

        PrintWriter out = Utility.openPrintWriter(title + pad(counter) + ".html", Utility.UTF8_WINDOWS);
        
        copyFile(out, "HTML-Part1.txt");
        /*
        out.println("<html><head>");
        out.println("<meta http-equiv='Content-Type' content='text/html; charset=utf-8'>");
        out.println("<title>" + HTMLString(title) + "</title>");
        out.println("<style>");
        out.println("<!--");
        //out.println("td           { font-size: 18pt; font-family: Bitstream Cyberbit, Arial Unicode MS; text-align: Center}");
        out.println("td           { font-size: 18pt; text-align: Center}");
        out.println("td.right           { font-size: 12pt; text-align: Right}");
        out.println("td.title           { font-size: 18pt; text-align: Center}");
        out.println("td.left           { font-size: 12pt; text-align: Left}");
        //out.println("th         { background-color: #C0C0C0; font-size: 18pt; font-family: Arial Unicode MS, Bitstream Cyberbit; text-align: Center }");
        out.println("tt         { font-size: 8pt; }");
        //out.println("code           { font-size: 8pt; }");
        out.println("-->");
        out.println("</style></head><body bgcolor='#FFFFFF'>");
        
        // header
        out.print("<table width='100%'><tr>");
        out.println("<td><p align='left'><font size='3'><a href='index.html'>Instructions</a></font></td>");
        out.println("<td>" + HTMLString(title) + " Version" + version + "</td>");
        out.println("<td><p align='right'><font size='3'><a href='" + other + pad(counter) + ".html'>"
            + (show ? "Hide" : "Show") + " Key</a></td>");
        out.println("</tr></table>");
        /*
        <table width="100%">
  <tr>
    <td.left><a href="Collation.html">
      <font size="3">Instructions</font></a>
    <td>
      <td.title>Collation Version-2.1.9d7
    <td>
      <p align="right"><a href="CollationKey24.html"><font size="3">Show Key</font></a>
  </tr>
*/

        
        // index
        out.print("<table width='100%'><tr>");
        out.println("<td><p align='left'><font size='3'><a href='index.html'>Instructions</a></font></td>");
        out.println("<td>" + HTMLString(title) + " Version" + version + "</td>");
        out.println("<td><p align='right'><font size='3'><a href='" + other + pad(counter) + ".html'>"
            + (show ? "Hide" : "Show") + " Key</a></td>");
        out.println("</tr></table>");
        
        out.print("<table width='100%'><tr>");
        out.print("<td width='1%'><p align='left'>");
        if (counter > 0) {
            out.print("<a href='" + title + pad(counter-1) + ".html'>&lt;&lt;</a>");
        } else {
            out.print("<font color='#999999'>&lt;&lt;</font>");
        }
        out.println("</td>");
        out.println("<td><p align='center'>");
        boolean lastFar = false;
        for (int i = 0; i <= end; ++i) {
            boolean far = (i < counter-2 || i > counter+2);
            if (far && ((i % 5) != 0) && (i != end)) continue;
            if (i != 0 && lastFar != far) out.print(" - ");
            lastFar = far;
            if (i != counter) {
                out.print("<a href='" + title + pad(i) + ".html'>" + i + "</a>");
            } else {
                out.print("<font color='#FF0000'>" + i + "</font>");
            }
            out.println();
        }
        out.println("</td>");
        out.println("<td width='1%'><p align='right'>");
        if (counter < end) {
            out.print("<a href='" + title + pad(counter+1) + ".html'>&gt;&gt;</a>");
        } else {
            out.print("<font color='#999999'>&gt;&gt;</font>");
        }
        out.println("</td></tr></table>");
        // standard template!!!
        out.println("</td></tr><tr><td><blockquote>");
        //out.println("<p><div align='center'><center><table border='1'><tr>");
        return out;
    }
    
    static int getStrengthDifference(String old, String newStr) {
        int result = 5;
        int min = old.length();
        if (newStr.length() < min) min = newStr.length();
        for (int i = 0; i < min; ++i) {
            char ch1 = old.charAt(i);
            char ch2 = newStr.charAt(i);
            if (ch1 != ch2) return result;
            // see if we get difference before we get 0000.
            if (ch1 == 0) --result;
        }
        if (newStr.length() != old.length()) return 1;
        return 0;
    }
        
     
    static final boolean needsXMLQuote(String source, boolean quoteApos) {
        for (int i = 0; i < source.length(); ++i) {
            char ch = source.charAt(i);
            if (ch < ' ' || ch == '<' || ch == '&' || ch == '>') return true;
            if (quoteApos & ch == '\'') return true;
            if (ch == '\"') return true;
            if (ch >= '\uD800' && ch <= '\uDFFF') return true;
            if (ch >= '\uFFFE') return true;
        }
        return false;
    }
    
    public static final String XMLString(int[] cps) {
        return XMLBaseString(cps, cps.length, true);
    }
    
    public static final String XMLString(int[] cps, int len) {
        return XMLBaseString(cps, len, true);
    }

    public static final String XMLString(String source) {
        return XMLBaseString(source, true);
    }       
    
    public static final String HTMLString(int[] cps) {
        return XMLBaseString(cps, cps.length, false);
    }
    
    public static final String HTMLString(int[] cps, int len) {
        return XMLBaseString(cps, len, false);
    }

    public static final String HTMLString(String source) {
        return XMLBaseString(source, false);
    }       
    
    public static final String XMLBaseString(int[] cps, int len, boolean quoteApos) {
        StringBuffer temp = new StringBuffer();
        for (int i = 0; i < len; ++i) {
            temp.append((char)cps[i]);
        }
        return XMLBaseString(temp.toString(), quoteApos);
    }

    public static final String XMLBaseString(String source, boolean quoteApos) {
        if (!needsXMLQuote(source, quoteApos)) return source;
        StringBuffer result = new StringBuffer();
        for (int i = 0; i < source.length(); ++i) {
            char ch = source.charAt(i);
            if (ch < ' '
              || ch >= '\u007F' && ch <= '\u009F'
              || ch >= '\uD800' && ch <= '\uDFFF'
              || ch >= '\uFFFE') {
                result.append('\uFFFD');
                /*result.append("#x");
                result.append(cpName(ch));
                result.append(";");
                */
            } else if (quoteApos && ch == '\'') {
                result.append("&apos;");
            } else if (ch == '\"') {
                result.append("&quot;");
            } else if (ch == '<') {
                result.append("&lt;");
            } else if (ch == '&') {
                result.append("&amp;");
            } else if (ch == '>') {
                result.append("&gt;");
             } else {
                result.append(ch);
            }
        }
        return result.toString();
    }
    
    static int mapToStartOfRange(int ch) {
        if (ch <= 0x3400) return ch;         // CJK Ideograph Extension A
        if (ch <= 0x4DB5) return 0x3400;
        if (ch <= 0x4E00) return ch;         // CJK Ideograph
        if (ch <= 0x9FA5) return 0x4E00;
        if (ch <= 0xAC00) return ch;         // Hangul Syllable
        if (ch <= 0xD7A3) return 0xAC00;
        if (ch <= 0xD800) return ch;         // Non Private Use High Surrogate
        if (ch <= 0xDB7F) return 0xD800;
        if (ch <= 0xDB80) return ch;         // Private Use High Surrogate
        if (ch <= 0xDBFF) return 0xDB80;
        if (ch <= 0xDC00) return ch;         // Low Surrogate
        if (ch <= 0xDFFF) return 0xDC00;
        if (ch <= 0xE000) return ch;         // Private Use
        if (ch <= 0xF8FF) return 0xE000;
        if (ch <= 0xF0000) return ch;       // Plane 15 Private Use
        if (ch <= 0xFFFFD) return 0xF0000;
        if (ch <= 0x100000) return ch;       // Plane 16 Private Use
        return 0x100000;
    }
    

}
-												Forgot copyrights

X-SVN-Rev: 5642
											
										
										
											2001-08-31 00:20:40 +00:00
+								/**
 								*******************************************************************************
 								* Copyright (C) 1996-2001, International Business Machines Corporation and    *
 								* others. All Rights Reserved.                                                *
 								*******************************************************************************
 								*
 								* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/WriteCollationData.java,v $
-												Fixing Break Charts & Tests

X-SVN-Rev: 11428
											
										
										
											2003-04-01 02:53:07 +00:00
+								* $Date: 2003/04/01 02:51:57 $
 								* $Revision: 1.31 $
-												Forgot copyrights

X-SVN-Rev: 5642
											
										
										
											2001-08-31 00:20:40 +00:00
+								*
 								*******************************************************************************
 								*/
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								package com.ibm.text.UCA;
 								import java.util.*;
-												ICU-0 updated to reorganized java directories

X-SVN-Rev: 8039
											
										
										
											2002-03-15 01:57:01 +00:00
+								import com.ibm.icu.text.UTF16;
-												shifted the implicit range.

X-SVN-Rev: 8767
											
										
										
											2002-06-02 05:07:08 +00:00
+								import com.ibm.icu.text.UnicodeSet;
-												updated for collation bugs, added isFCD.

X-SVN-Rev: 8886
											
										
										
											2002-06-13 21:14:05 +00:00
+								import com.ibm.icu.text.CanonicalIterator;
-												Now quoting rule whitespaces. Changed [last non-ignorable] to [last regular]

X-SVN-Rev: 11362
											
										
										
											2003-03-19 23:31:12 +00:00
+								import com.ibm.icu.impl.UCharacterProperty;
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
 								import java.io.*;
 								//import java.text.*;
 								//import com.ibm.text.unicode.*;
 								import java.text.RuleBasedCollator;
 								import java.text.CollationElementIterator;
 								import java.text.Collator;
-												New d5 after discussions with Ken; added HOMELESS in Fractional UCA

X-SVN-Rev: 8969
											
										
										
											2002-06-28 01:59:58 +00:00
+								import java.text.DateFormat;
 								import java.text.SimpleDateFormat;
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
 								import com.ibm.text.UCD.*;
 								import com.ibm.text.UCD.UCD_Types;
 								import com.ibm.text.utility.*;
-												ICU-0 updated to reorganized java directories

X-SVN-Rev: 8039
											
										
										
											2002-03-15 01:57:01 +00:00
+								import com.ibm.text.UCD.Normalizer;
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
-												finally did some significant code cleanup on collation. not enough, but it's a start

X-SVN-Rev: 8896
											
										
										
											2002-06-15 02:47:14 +00:00
+								public class WriteCollationData implements UCD_Types, UCA_Types {
-												shifted the implicit range.

X-SVN-Rev: 8767
											
										
										
											2002-06-02 05:07:08 +00:00
 									static final boolean DEBUG = false;
-												New d5 after discussions with Ken; added HOMELESS in Fractional UCA

X-SVN-Rev: 8969
											
										
										
											2002-06-28 01:59:58 +00:00
+									static final boolean DEBUG_SHOW_ITERATION = false;
-												finally did some significant code cleanup on collation. not enough, but it's a start

X-SVN-Rev: 8896
											
										
										
											2002-06-15 02:47:14 +00:00
-												shifted the implicit range.

X-SVN-Rev: 8767
											
										
										
											2002-06-02 05:07:08 +00:00
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								    public static final String copyright =
 								      "Copyright (C) 2000, IBM Corp. and others. All Rights Reserved.";
 								    static final boolean EXCLUDE_UNSUPPORTED = true;
 								    static final boolean GENERATED_NFC_MISMATCHES = true;
-												added additional well-formedness test for UCA, tested UCD name lengths

X-SVN-Rev: 8773
											
										
										
											2002-06-04 01:59:02 +00:00
+								    static final boolean DO_CHARTS = false;
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
-												ICU-0 updated to reorganized java directories

X-SVN-Rev: 8039
											
										
										
											2002-03-15 01:57:01 +00:00
+								    static final String UNICODE_VERSION = UCD.latestVersion;
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								    static UCA collator;
 								    static char unique = '\u0000';
 								    static TreeMap sortedD = new TreeMap();
 								    static TreeMap sortedN = new TreeMap();
 								    static HashMap backD = new HashMap();
 								    static HashMap backN = new HashMap();
 								    static TreeMap duplicates = new TreeMap();
 								    static int duplicateCount = 0;
 								    static PrintWriter log;
 								    static UCD ucd;
-												Fixes for MAX value, moved commandline stuff to Main.

X-SVN-Rev: 8733
											
										
										
											2002-05-29 23:18:15 +00:00
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
 								    static public void javatest() throws Exception {
 								        checkJavaRules("& J , K / B & K , M", new String[] {"JA", "MA", "KA", "KC", "JC", "MC"});
 								        checkJavaRules("& J , K / B , M", new String[] {"JA", "MA", "KA", "KC", "JC", "MC"});
 								    }
 								    static public void checkJavaRules(String rules, String[] tests) throws Exception {
 								        System.out.println();
 								        System.out.println("Rules: " + rules);
 								        System.out.println();
 								        // duplicate the effect of ICU 1.8 by grabbing the default rules and appending
 								        RuleBasedCollator defaultCollator = (RuleBasedCollator) Collator.getInstance(Locale.US);
 								        RuleBasedCollator col = new RuleBasedCollator(defaultCollator.getRules() + rules);
 								        // check to make sure each pair is in order
 								        int i = 1;
 								        for (; i < tests.length; ++i) {
 								            System.out.println(tests[i-1] + "\t=> " + showJavaCollationKey(col, tests[i-1]));
 								            if (col.compare(tests[i-1], tests[i]) > 0) {
 								                System.out.println("Failure: " + tests[i-1] + " > " + tests[i]);
 								            }
 								        }
 								        System.out.println(tests[i-1] + "\t=> " + showJavaCollationKey(col, tests[i-1]));
 								    }
 								    static public String showJavaCollationKey(RuleBasedCollator col, String test) {
 								        CollationElementIterator it = col.getCollationElementIterator(test);
 								        String result = "[";
 								        for (int i = 0; ; ++i) {
 								            int ce = it.next();
 								            if (ce == CollationElementIterator.NULLORDER) break;
 								            if (i != 0) result += ", ";
 								            result += Utility.hex(ce,8);
 								        }
 								        return result + "]";
 								    }
 								    //private static final String DIR = "c:\\Documents and Settings\\Davis\\My Documents\\UnicodeData\\Update 3.0.1\\";
 								    //private static final String DIR31 = "c:\\Documents and Settings\\Davis\\My Documents\\UnicodeData\\Update 3.1\\";
 								    static public void writeCaseExceptions() {
 								        System.err.println("Writing Case Exceptions");
-												ICU-0 updated to reorganized java directories

X-SVN-Rev: 8039
											
										
										
											2002-03-15 01:57:01 +00:00
+								        Normalizer NFKC = new Normalizer(Normalizer.NFKC, UNICODE_VERSION);
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								        for (char a = 0; a < 0xFFFF; ++a) {
 								            if (!ucd.isRepresented(a)) continue;
 								            //if (0xA000 <= a && a <= 0xA48F) continue; // skip YI
 								            String b = Case.fold(a);
 								            String c = NFKC.normalize(b);
 								            String d = Case.fold(c);
 								            String e = NFKC.normalize(d);
 								            if (!e.equals(c)) {
 								                System.out.println(Utility.hex(a) + "; " + Utility.hex(d, " ") + " # " + ucd.getName(a));
 								                /*
 								                System.out.println(Utility.hex(a)
 								                + ", " + Utility.hex(b, " ")
 								                + ", " + Utility.hex(c, " ")
 								                + ", " + Utility.hex(d, " ")
 								                + ", " + Utility.hex(e, " "));
 								                System.out.println(ucd.getName(a)
 								                + ", " + ucd.getName(b)
 								                + ", " + ucd.getName(c)
 								                + ", " + ucd.getName(d)
 								                + ", " + ucd.getName(e));
 								                */
 								            }
 								            String f = Case.fold(e);
 								            String g = NFKC.normalize(f);
 								            if (!f.equals(d) || !g.equals(e)) System.out.println("!!!!!!SKY IS FALLING!!!!!!");
 								        }
 								    }
 								    static public void writeCaseFolding() throws IOException {
 								        System.err.println("Writing Javascript data");
-												bunch o' changes

X-SVN-Rev: 9983
											
										
										
											2002-10-05 02:16:17 +00:00
+								        BufferedReader in = Utility.openUnicodeFile("CaseFolding", UNICODE_VERSION, true, Utility.LATIN1);
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								        // new BufferedReader(new FileReader(DIR31 + "CaseFolding-3.d3.alpha.txt"), 64*1024);
-												added additional well-formedness test for UCA, tested UCD name lengths

X-SVN-Rev: 8773
											
										
										
											2002-06-04 01:59:02 +00:00
+								        // log = new PrintWriter(new FileOutputStream("CaseFolding_data.js"));
-												Added chart program, minor edits.

X-SVN-Rev: 9918
											
										
										
											2002-09-25 06:40:14 +00:00
+								        log = Utility.openPrintWriter("CaseFolding_data.js", Utility.UTF8_WINDOWS);
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								        log.println("var CF = new Object();");
 								        int count = 0;
 								        while (true) {
 								            String line = in.readLine();
 								            if (line == null) break;
 								            int comment = line.indexOf('#');                    // strip comments
 								            if (comment != -1) line = line.substring(0,comment);
 								            if (line.length() == 0) continue;
 								            int semi1 = line.indexOf(';');
 								            int semi2 = line.indexOf(';', semi1+1);
 								            int semi3 = line.indexOf(';', semi2+1);
 								            char type = line.substring(semi1+1,semi2).trim().charAt(0);
 								            if (type == 'C' || type == 'F' || type == 'T') {
 								                String code = line.substring(0,semi1).trim();
 								                String result = " " + line.substring(semi2+1,semi3).trim();
 								                result = replace(result, ' ', "\\u");
 								                log.println("\t CF[0x" + code + "]='" + result + "';");
 								                count++;
 								            }
 								        }
 								        log.println("// " + count + " case foldings total");
 								        in.close();
 								        log.close();
 								    }
 								    static public String replace(String source, char toBeReplaced, String toReplace) {
 								        StringBuffer result = new StringBuffer();
 								        for (int i = 0; i < source.length(); ++i) {
 								            char c = source.charAt(i);
 								            if (c == toBeReplaced) {
 								                result.append(toReplace);
 								            } else {
 								                result.append(c);
 								            }
 								        }
 								        return result.toString();
 								    }
 								    static public void writeJavascriptInfo() throws IOException {
 								        System.err.println("Writing Javascript data");
-												ICU-0 updated to reorganized java directories

X-SVN-Rev: 8039
											
										
										
											2002-03-15 01:57:01 +00:00
+								        Normalizer normKD = new Normalizer(Normalizer.NFKD, UNICODE_VERSION);
 								        Normalizer normD = new Normalizer(Normalizer.NFD, UNICODE_VERSION);
-												added additional well-formedness test for UCA, tested UCD name lengths

X-SVN-Rev: 8773
											
										
										
											2002-06-04 01:59:02 +00:00
+								        //log = new PrintWriter(new FileOutputStream("Normalization_data.js"));
-												Added chart program, minor edits.

X-SVN-Rev: 9918
											
										
										
											2002-09-25 06:40:14 +00:00
+								        log = Utility.openPrintWriter("Normalization_data.js", Utility.LATIN1_WINDOWS);
-												added additional well-formedness test for UCA, tested UCD name lengths

X-SVN-Rev: 8773
											
										
										
											2002-06-04 01:59:02 +00:00
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
 								        int count = 0;
 								        int datasize = 0;
 								        int max = 0;
 								        int over7 = 0;
 								        log.println("var KD = new Object(); // NFKD compatibility decomposition mappings");
 								        log.println("// NOTE: Hangul is done in code!");
 								        CompactShortArray csa = new CompactShortArray((short)0);
 								        for (char c = 0; c < 0xFFFF; ++c) {
-												Fixed Fractional UCA for surrogates

X-SVN-Rev: 6504
											
										
										
											2001-10-31 00:02:54 +00:00
+								            if ((c & 0xFFF) == 0) System.err.println(Utility.hex(c));
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								            if (0xAC00 <= c && c <= 0xD7A3) continue;
-												Completed most of the upgrade to UCA 3.1.1;
 involved double CEs for Han; fixing back-mappings, etc.
 did a bit of code cleanup too.
Remaining to do: backmap from UCA double CEs to original character codes,
 for constructing Fractional UCA.

X-SVN-Rev: 8754
											
										
										
											2002-05-31 01:41:04 +00:00
+								            if (!normKD.isNormalized(c)) {
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								                ++count;
 								                String decomp = normKD.normalize(c);
 								                datasize += decomp.length();
 								                if (max < decomp.length()) max = decomp.length();
 								                if (decomp.length() > 7) ++over7;
 								                csa.setElementAt(c, (short)count);
-												Fixed Fractional UCA for surrogates

X-SVN-Rev: 6504
											
										
										
											2001-10-31 00:02:54 +00:00
+								                log.println("\t KD[0x" + Utility.hex(c) + "]='\\u" + Utility.hex(decomp,"\\u") + "';");
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								            }
 								        }
 								        csa.compact();
 								        log.println("// " + count + " NFKD mappings total");
 								        log.println("// " + datasize + " total characters of results");
 								        log.println("// " + max + " string length, maximum");
 								        log.println("// " + over7 + " result strings with length > 7");
 								        log.println("// " + csa.storage() + " trie length (doesn't count string size)");
 								        log.println();
 								        count = 0;
 								        datasize = 0;
 								        max = 0;
 								        log.println("var D = new Object();  // NFD canonical decomposition mappings");
 								        log.println("// NOTE: Hangul is done in code!");
 								        csa = new CompactShortArray((short)0);
 								        for (char c = 0; c < 0xFFFF; ++c) {
-												Fixed Fractional UCA for surrogates

X-SVN-Rev: 6504
											
										
										
											2001-10-31 00:02:54 +00:00
+								            if ((c & 0xFFF) == 0) System.err.println(Utility.hex(c));
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								            if (0xAC00 <= c && c <= 0xD7A3) continue;
-												Completed most of the upgrade to UCA 3.1.1;
 involved double CEs for Han; fixing back-mappings, etc.
 did a bit of code cleanup too.
Remaining to do: backmap from UCA double CEs to original character codes,
 for constructing Fractional UCA.

X-SVN-Rev: 8754
											
										
										
											2002-05-31 01:41:04 +00:00
+								            if (!normD.isNormalized(c)) {
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								                ++count;
 								                String decomp = normD.normalize(c);
 								                datasize += decomp.length();
 								                if (max < decomp.length()) max = decomp.length();
 								                csa.setElementAt(c, (short)count);
-												Fixed Fractional UCA for surrogates

X-SVN-Rev: 6504
											
										
										
											2001-10-31 00:02:54 +00:00
+								                log.println("\t D[0x" + Utility.hex(c) + "]='\\u" + Utility.hex(decomp,"\\u") + "';");
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								            }
 								        }
 								        csa.compact();
 								        log.println("// " + count + " NFD mappings total");
 								        log.println("// " + datasize + " total characters of results");
 								        log.println("// " + max + " string length, maximum");
 								        log.println("// " + csa.storage() + " trie length (doesn't count string size)");
 								        log.println();
 								        count = 0;
 								        datasize = 0;
 								        log.println("var CC = new Object(); // canonical class mappings");
 								        CompactByteArray cba = new CompactByteArray();
 								        for (char c = 0; c < 0xFFFF; ++c) {
-												Fixed Fractional UCA for surrogates

X-SVN-Rev: 6504
											
										
										
											2001-10-31 00:02:54 +00:00
+								            if ((c & 0xFFF) == 0) System.err.println(Utility.hex(c));
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								            int canClass = normKD.getCanonicalClass(c);
 								            if (canClass != 0) {
 								                ++count;
-												Fixed Fractional UCA for surrogates

X-SVN-Rev: 6504
											
										
										
											2001-10-31 00:02:54 +00:00
+								                log.println("\t CC[0x" + Utility.hex(c) + "]=" + canClass + ";");
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								            }
 								        }
 								        cba.compact();
 								        log.println("// " + count + " canonical class mappings total");
 								        log.println("// " + cba.storage() + " trie length");
 								        log.println();
 								        count = 0;
 								        datasize = 0;
 								        log.println("var C = new Object();  // composition mappings");
 								        log.println("// NOTE: Hangul is done in code!");
 								        System.out.println("WARNING -- COMPOSITIONS UNFINISHED!!");
 								        /*
 								        IntHashtable.IntEnumeration enum = normKD.getComposition();
 								        while (enum.hasNext()) {
 								            int key = enum.next();
 								            char val = (char) enum.value();
 								            if (0xAC00 <= val && val <= 0xD7A3) continue;
 								            ++count;
-												Fixed Fractional UCA for surrogates

X-SVN-Rev: 6504
											
										
										
											2001-10-31 00:02:54 +00:00
+								            log.println("\tC[0x" + Utility.hex(key) + "]=0x" + Utility.hex(val) + ";");
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								        }
 								        log.println("// " + count + " composition mappings total");
 								        log.println();
 								        */
 								        log.close();
 								        System.err.println("Done writing Javascript data");
 								    }
-												reorg

X-SVN-Rev: 5824
											
										
										
											2001-09-19 23:33:52 +00:00
+								    static void writeConformance(String filename, byte option, boolean shortPrint)  throws IOException {
-												added more conformance tests

X-SVN-Rev: 8928
											
										
										
											2002-06-22 21:05:34 +00:00
+								        Default.setUCD();
-												finally did some significant code cleanup on collation. not enough, but it's a start

X-SVN-Rev: 8896
											
										
										
											2002-06-15 02:47:14 +00:00
+								        //UCD ucd30 = UCD.make("3.0.0");
 								/*
 								U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
 								 => U+00DC LATIN CAPITAL LETTER U WITH DIAERESIS, U+0304 COMBINING MACRON
 								*/
-												New d5 after discussions with Ken; added HOMELESS in Fractional UCA

X-SVN-Rev: 8969
											
										
										
											2002-06-28 01:59:58 +00:00
+								        if (DEBUG) {
 								            String[] testList = {"\u3192", "\u3220", "\u0344", "\u0385", "\uF934", "U", "U\u0308", "\u00DC", "\u00DC\u0304", "U\u0308\u0304"};
 								            for (int jj = 0; jj < testList.length; ++jj) {
 								                String t = testList[jj];
 								                System.out.println(ucd.getCodeAndName(t));
 								                CEList ces = collator.getCEList(t, true);
 								                System.out.println("CEs:    " + ces);
 								                String test = collator.getSortKey(t, option);
 								                System.out.println("Decomp: " + collator.toString(test));
 								                test = collator.getSortKey(t, option, false);
 								                System.out.println("No Dec: " + collator.toString(test));
 								            }
-												finally did some significant code cleanup on collation. not enough, but it's a start

X-SVN-Rev: 8896
											
										
										
											2002-06-15 02:47:14 +00:00
+								        }
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
-												Added chart program, minor edits.

X-SVN-Rev: 9918
											
										
										
											2002-09-25 06:40:14 +00:00
+								        PrintWriter log = Utility.openPrintWriter(filename + (shortPrint ? "_SHORT" : "") + ".txt", Utility.UTF8_WINDOWS);
-												New d5 after discussions with Ken; added HOMELESS in Fractional UCA

X-SVN-Rev: 8969
											
										
										
											2002-06-28 01:59:58 +00:00
+								        //if (!shortPrint) log.write('\uFEFF');
 								        log.println("# UCA Version: " + collator.getDataVersion() + "/" + collator.getUCDVersion());
 								        log.println("# Generated:   " + getNormalDate());
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
 								        System.out.println("Sorting");
-												reorg

X-SVN-Rev: 5824
											
										
										
											2001-09-19 23:33:52 +00:00
+								        int counter = 0;
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
-												finally did some significant code cleanup on collation. not enough, but it's a start

X-SVN-Rev: 8896
											
										
										
											2002-06-15 02:47:14 +00:00
+								        UCA.UCAContents cc = collator.getContents(UCA.FIXED_CE, null);
 								        cc.enableSamples();
 								        UnicodeSet found2 = new UnicodeSet();
 								        while (true) {
 								            String s = cc.next();
 								            if (s == null) break;
 								            found2.addAll(s);
 								            if (DEBUG_SHOW_ITERATION) {
 								                int cp = UTF16.charAt(s, 0);
 								                if (cp == 0x220 || !ucd.isAssigned(cp) || ucd.isCJK_BASE(cp)) {
 								                    System.out.println(ucd.getCodeAndName(s));
 								                }
 								            }
-												reorg

X-SVN-Rev: 5824
											
										
										
											2001-09-19 23:33:52 +00:00
+								            Utility.dot(counter++);
-												finally did some significant code cleanup on collation. not enough, but it's a start

X-SVN-Rev: 8896
											
										
										
											2002-06-15 02:47:14 +00:00
+								            addStringX(s, option);
 								        }
-												Updated with changes agreed with Ken for d6.
Added line for rearrange (missing earlier); this changes conformance!

X-SVN-Rev: 9022
											
										
										
											2002-07-03 02:15:47 +00:00
+								        // Add special examples
 								        /*
 								        addStringX("\u2024\u2024", option);
 								        addStringX("\u2024\u2024\u2024", option);
 								        addStringX("\u2032\u2032", option);
 								        addStringX("\u2032\u2032\u2032", option);
 								        addStringX("\u2033\u2033\u2033", option);
 								        addStringX("\u2034\u2034", option);
 								        */
-												finally did some significant code cleanup on collation. not enough, but it's a start

X-SVN-Rev: 8896
											
										
										
											2002-06-15 02:47:14 +00:00
+								        UnicodeSet found = collator.found;
 								        if (!found2.containsAll(found2)) {
 								            System.out.println("In both: " + new UnicodeSet(found).retainAll(found2).toPattern(true));
 								            System.out.println("In UCA but not iteration: " + new UnicodeSet(found).removeAll(found2).toPattern(true));
 								            System.out.println("In iteration but not UCA: " + new UnicodeSet(found2).removeAll(found).toPattern(true));
 								            throw new IllegalArgumentException("Inconsistent data");
 								        }
 								        /*
 								        for (int i = 0; i <= 0x10FFFF; ++i) {
 								            if (!ucd.isAssigned(i)) continue;
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								            addStringX(UTF32.valueOf32(i), option);
 								        }
 								        Hashtable multiTable = collator.getContracting();
 								        Enumeration enum = multiTable.keys();
 								        while (enum.hasMoreElements()) {
-												reorg

X-SVN-Rev: 5824
											
										
										
											2001-09-19 23:33:52 +00:00
+								            Utility.dot(counter++);
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								            addStringX((String)enum.nextElement(), option);
 								        }
 								        for (int i = 0; i < extraConformanceTests.length; ++i) { // put in sample non-characters
-												reorg

X-SVN-Rev: 5824
											
										
										
											2001-09-19 23:33:52 +00:00
+								            Utility.dot(counter++);
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								            String s = UTF32.valueOf32(extraConformanceTests[i]);
-												reorg

X-SVN-Rev: 5824
											
										
										
											2001-09-19 23:33:52 +00:00
+								            Utility.fixDot();
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								            System.out.println("Adding: " + Utility.hex(s));
 								            addStringX(s, option);
 								        }
 								        for (int i = 0; i < extraConformanceRanges.length; ++i) {
-												reorg

X-SVN-Rev: 5824
											
										
										
											2001-09-19 23:33:52 +00:00
+								            Utility.dot(counter++);
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								            int start = extraConformanceRanges[i][0];
 								            int end = extraConformanceRanges[i][1];
 								            int increment = ((end - start + 1) / 303) + 1;
 								            //System.out.println("Range: " + start + ", " + end + ", " + increment);
 								            addStringX(start, option);
 								            for (int j = start+1; j < end-1; j += increment) {
 								                addStringX(j, option);
 								                addStringX(j+1, option);
 								            }
 								            addStringX(end-1, option);
 								            addStringX(end, option);
 								        }
-												finally did some significant code cleanup on collation. not enough, but it's a start

X-SVN-Rev: 8896
											
										
										
											2002-06-15 02:47:14 +00:00
+								        */
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
-												reorg

X-SVN-Rev: 5824
											
										
										
											2001-09-19 23:33:52 +00:00
+								        Utility.fixDot();
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								        System.out.println("Total: " + sortedD.size());
 								        Iterator it;
 								        System.out.println("Writing");
 								        //String version = collator.getVersion();
 								        it = sortedD.keySet().iterator();
 								        String lastKey = "";
 								        while (it.hasNext()) {
-												reorg

X-SVN-Rev: 5824
											
										
										
											2001-09-19 23:33:52 +00:00
+								            Utility.dot(counter);
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								            String key = (String) it.next();
 								            String source = (String) sortedD.get(key);
 								            int fluff = key.charAt(key.length() - 1);
 								            key = key.substring(0, key.length()- fluff - 2);
 								            //String status = key.equals(lastKey) ? "*" : "";
 								            //lastKey = key;
 								            //log.println(source);
-												finally did some significant code cleanup on collation. not enough, but it's a start

X-SVN-Rev: 8896
											
										
										
											2002-06-15 02:47:14 +00:00
+								            char extra = source.charAt(source.length()-1);
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								            String clipped = source.substring(0, source.length()-1);
-												added more conformance tests

X-SVN-Rev: 8928
											
										
										
											2002-06-22 21:05:34 +00:00
+								            if (clipped.charAt(0) == LOW_ACCENT && extra != LOW_ACCENT) {
 								                extra = LOW_ACCENT;
 								                clipped = source.substring(1);
 								            }
-												reorg

X-SVN-Rev: 5824
											
										
										
											2001-09-19 23:33:52 +00:00
+								            if (!shortPrint) {
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								                log.print(Utility.hex(source));
 								                log.print(
-												finally did some significant code cleanup on collation. not enough, but it's a start

X-SVN-Rev: 8896
											
										
										
											2002-06-15 02:47:14 +00:00
+								                    ";\t# " + (extra != LOW_ACCENT ? extra : '.') + " " + ucd.getName(clipped, SHORT) + "\t" + UCA.toString(key));
-												reorg

X-SVN-Rev: 5824
											
										
										
											2001-09-19 23:33:52 +00:00
+								            } else {
-												added new parameters to rules, fixed a bit of the xml

X-SVN-Rev: 8930
											
										
										
											2002-06-24 15:25:10 +00:00
+								                log.print(Utility.hex(source) + ";\t" + Utility.hex(clipped));
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								            }
 								            log.println();
 								        }
 								        log.close();
 								        sortedD.clear();
 								        System.out.println("Done");
 								    }
 								    static void addStringX(int x, byte option) {
 								        addStringX(UTF32.valueOf32(x), option);
 								    }
-												finally did some significant code cleanup on collation. not enough, but it's a start

X-SVN-Rev: 8896
											
										
										
											2002-06-15 02:47:14 +00:00
-												added more conformance tests

X-SVN-Rev: 8928
											
										
										
											2002-06-22 21:05:34 +00:00
+								    static final char LOW_ACCENT = '\u0334';
-												added new parameters to rules, fixed a bit of the xml

X-SVN-Rev: 8930
											
										
										
											2002-06-24 15:25:10 +00:00
+								    static final String SUPPLEMENTARY_ACCENT = UTF16.valueOf(0x1D165);
 								    static final String COMPLETELY_IGNOREABLE = "\u0001";
 								    static final String COMPLETELY_IGNOREABLE_ACCENT = "\u0591";
 								    static final String[] CONTRACTION_TEST = {SUPPLEMENTARY_ACCENT, COMPLETELY_IGNOREABLE, COMPLETELY_IGNOREABLE_ACCENT};
-												added more conformance tests

X-SVN-Rev: 8928
											
										
										
											2002-06-22 21:05:34 +00:00
+								    static int addCounter = 0;
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
 								    static void addStringX(String s, byte option) {
-												added more conformance tests

X-SVN-Rev: 8928
											
										
										
											2002-06-22 21:05:34 +00:00
+								        int firstChar = UTF16.charAt(s,0);
 								        // add characters with different strengths, to verify the order
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								        addStringY(s + 'a', option);
 								        addStringY(s + 'b', option);
-												added more conformance tests

X-SVN-Rev: 8928
											
										
										
											2002-06-22 21:05:34 +00:00
+								        addStringY(s + '<27>', option);
 								        addStringY(s + 'A', option);
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								        addStringY(s + '!', option);
-												added more conformance tests

X-SVN-Rev: 8928
											
										
										
											2002-06-22 21:05:34 +00:00
+								        if (option == SHIFTED && collator.isVariable(firstChar)) addStringY(s + LOW_ACCENT, option);
 								        // NOW, if the character decomposes, or is a combining mark (non-zero), try combinations
 								        if (Default.ucd.getCombiningClass(firstChar) > 0
 								            || !Default.nfd.isNormalized(s) && !Default.ucd.isHangulSyllable(firstChar)) {
 								        // if it ends with a non-starter, try the decompositions.
 								            String decomp = Default.nfd.normalize(s);
 								            if (Default.ucd.getCombiningClass(UTF16.charAt(decomp, decomp.length()-1)) > 0) {
 								                if (canIt == null) canIt = new CanonicalIterator(".");
 								                canIt.setSource(s + LOW_ACCENT);
 								                int limit = 4;
 								                for (String can = canIt.next(); can != null; can = canIt.next()) {
 								                    if (s.equals(can)) continue;
 								                    if (--limit < 0) continue; // just include a sampling
 								                    addStringY(can, option);
 								                    // System.out.println(addCounter++ + " Adding " + Default.ucd.getCodeAndName(can));
 								                }
 								            }
 								        }
-												added new parameters to rules, fixed a bit of the xml

X-SVN-Rev: 8930
											
										
										
											2002-06-24 15:25:10 +00:00
+								        if (UTF16.countCodePoint(s) > 1) {
 								            for (int i = 1; i < s.length(); ++i) {
 								                if (UTF16.isLeadSurrogate(s.charAt(i-1))) continue; // skip if in middle of supplementary
 								                for (int j = 0; j < CONTRACTION_TEST.length; ++j) {
 								                    String extra = s.substring(0,i) + CONTRACTION_TEST[j] + s.substring(i);
 								                    addStringY(extra + 'a', option);
-												New d5 after discussions with Ken; added HOMELESS in Fractional UCA

X-SVN-Rev: 8969
											
										
										
											2002-06-28 01:59:58 +00:00
+								                    if (DEBUG) System.out.println(addCounter++ + " Adding " + Default.ucd.getCodeAndName(extra));
-												added new parameters to rules, fixed a bit of the xml

X-SVN-Rev: 8930
											
										
										
											2002-06-24 15:25:10 +00:00
+								                }
 								            }
 								        }
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								    }
-												added more conformance tests

X-SVN-Rev: 8928
											
										
										
											2002-06-22 21:05:34 +00:00
+								    static CanonicalIterator canIt = null;
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								    static char counter;
 								    static void addStringY(String s, byte option) {
 								        String cpo = UCA.codePointOrder(s);
 								        String colDbase = collator.getSortKey(s, option, true) + "\u0000" + cpo + (char)cpo.length();
 								        sortedD.put(colDbase, s);
 								    }
-												New d5 after discussions with Ken; added HOMELESS in Fractional UCA

X-SVN-Rev: 8969
											
										
										
											2002-06-28 01:59:58 +00:00
+								    static UCD ucd_uca_base = null;
-												added additional well-formedness test for UCA, tested UCD name lengths

X-SVN-Rev: 8773
											
										
										
											2002-06-04 01:59:02 +00:00
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								    /**
 								     * Check that the primaries are the same as the compatibility decomposition.
 								     */
-												added additional well-formedness test for UCA, tested UCD name lengths

X-SVN-Rev: 8773
											
										
										
											2002-06-04 01:59:02 +00:00
+								    static void checkBadDecomps(int strength, boolean decomposition, UnicodeSet alreadySeen) {
-												New d5 after discussions with Ken; added HOMELESS in Fractional UCA

X-SVN-Rev: 8969
											
										
										
											2002-06-28 01:59:58 +00:00
+								        if (ucd_uca_base == null) {
 								            ucd_uca_base = UCD.make(UCA.UCA_BASE);
 								        }
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								        int oldStrength = collator.getStrength();
 								        collator.setStrength(strength);
-												ICU-0 updated to reorganized java directories

X-SVN-Rev: 8039
											
										
										
											2002-03-15 01:57:01 +00:00
+								        Normalizer nfkd = new Normalizer(Normalizer.NFKD, UNICODE_VERSION);
-												New d5 after discussions with Ken; added HOMELESS in Fractional UCA

X-SVN-Rev: 8969
											
										
										
											2002-06-28 01:59:58 +00:00
+								        Normalizer nfc = new Normalizer(Normalizer.NFC, UNICODE_VERSION);
 								        switch (strength) {
 								            case 1: log.println("<h2>3. Primaries Incompatible with Decompositions</h2>"); break;
 								            case 2: log.println("<h2>4. Secondaries Incompatible with Decompositions</h2>"); break;
 								            case 3: log.println("<h2>5. Tertiaries Incompatible with Decompositions</h2>");
 								                log.println("<p>Note: Tertiary differences are not really errors; these are just warnings</p>");
 								            break;
 								            default: throw new IllegalArgumentException("bad strength: " + strength);
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								        }
-												New d5 after discussions with Ken; added HOMELESS in Fractional UCA

X-SVN-Rev: 8969
											
										
										
											2002-06-28 01:59:58 +00:00
+								        log.println("<p>Warning: only checking characters defined in base: " + ucd_uca_base.getVersion() + "</p>");
 								        log.println("<table border='1' cellspacing='0' cellpadding='2'>");
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								        log.println("<tr><th>Code</td><th>Sort Key</th><th>Decomposed Sort Key</th><th>Name</th></tr>");
-												New d5 after discussions with Ken; added HOMELESS in Fractional UCA

X-SVN-Rev: 8969
											
										
										
											2002-06-28 01:59:58 +00:00
 								        int errorCount = 0;
 								        UnicodeSet skipSet = new UnicodeSet();
-												added additional well-formedness test for UCA, tested UCD name lengths

X-SVN-Rev: 8773
											
										
										
											2002-06-04 01:59:02 +00:00
+								        for (int ch = 0; ch < 0x10FFFF; ++ch) {
-												New d5 after discussions with Ken; added HOMELESS in Fractional UCA

X-SVN-Rev: 8969
											
										
										
											2002-06-28 01:59:58 +00:00
+								        	if (!ucd_uca_base.isAllocated(ch)) continue;
-												Completed most of the upgrade to UCA 3.1.1;
 involved double CEs for Han; fixing back-mappings, etc.
 did a bit of code cleanup too.
Remaining to do: backmap from UCA double CEs to original character codes,
 for constructing Fractional UCA.

X-SVN-Rev: 8754
											
										
										
											2002-05-31 01:41:04 +00:00
+								            if (nfkd.isNormalized(ch)) continue;
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								            if (ch > 0xAC00 && ch < 0xD7A3) continue; // skip most of Hangul
-												added additional well-formedness test for UCA, tested UCD name lengths

X-SVN-Rev: 8773
											
										
										
											2002-06-04 01:59:02 +00:00
+								            if (alreadySeen.contains(ch)) continue;
 								            Utility.dot(ch);
 								            String decomp = nfkd.normalize(ch);
-												New d5 after discussions with Ken; added HOMELESS in Fractional UCA

X-SVN-Rev: 8969
											
										
										
											2002-06-28 01:59:58 +00:00
+								            if (ch != ' ' && decomp.charAt(0) == ' ') {
 								                skipSet.add(ch);
 								                continue; // skip wierd decomps
 								            }
 								            if (ch != '\u0640' && decomp.charAt(0) == '\u0640') {
 								                skipSet.add(ch);
 								                continue; // skip wierd decomps
 								            }
-												added additional well-formedness test for UCA, tested UCD name lengths

X-SVN-Rev: 8773
											
										
										
											2002-06-04 01:59:02 +00:00
 								            String sortKey = collator.getSortKey(UTF16.valueOf(ch), UCA.NON_IGNORABLE, decomposition);
 								            String decompSortKey = collator.getSortKey(decomp, UCA.NON_IGNORABLE, decomposition);
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								            if (false && strength == 2) {
 								                sortKey = remove(sortKey, '\u0020');
 								                decompSortKey = remove(decompSortKey, '\u0020');
 								            }
-												New d5 after discussions with Ken; added HOMELESS in Fractional UCA

X-SVN-Rev: 8969
											
										
										
											2002-06-28 01:59:58 +00:00
 								            if (sortKey.equals(decompSortKey)) continue; // no problem!
 								            // fix key in the case of strength 3
 								            if (strength == 3) {
 								                String newSortKey = remapSortKey(ch, decomposition);
 								                if (!sortKey.equals(newSortKey)) {
 								                    System.out.println("Fixing: " + ucd.getCodeAndName(ch));
 								                    System.out.println("  Old:" + collator.toString(decompSortKey));
 								                    System.out.println("  New: " + collator.toString(newSortKey));
 								                    System.out.println("  Tgt: " + collator.toString(sortKey));
 								                }
 								                decompSortKey = newSortKey;
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								            }
-												New d5 after discussions with Ken; added HOMELESS in Fractional UCA

X-SVN-Rev: 8969
											
										
										
											2002-06-28 01:59:58 +00:00
 								            if (sortKey.equals(decompSortKey)) continue; // no problem!
 								            log.println("<tr><td>" + Utility.hex(ch)
 								                + "</td><td>" + UCA.toString(sortKey)
 								                + "</td><td>" + UCA.toString(decompSortKey)
 								                + "</td><td>" + ucd.getName(ch)
 								                + "</td></tr>"
 								                );
 								            alreadySeen.add(ch);
 								            errorCount++;
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								        }
 								        log.println("</table>");
-												New d5 after discussions with Ken; added HOMELESS in Fractional UCA

X-SVN-Rev: 8969
											
										
										
											2002-06-28 01:59:58 +00:00
+								        log.println("<p>Errors: " + errorCount + "</p>");
 								        log.println("<p>Space/Tatweel exceptions: " + skipSet.toPattern(true) + "</p>");
-												Updated with changes agreed with Ken for d6.
Added line for rearrange (missing earlier); this changes conformance!

X-SVN-Rev: 9022
											
										
										
											2002-07-03 02:15:47 +00:00
+								    	log.flush();
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								        collator.setStrength(oldStrength);
-												added additional well-formedness test for UCA, tested UCD name lengths

X-SVN-Rev: 8773
											
										
										
											2002-06-04 01:59:02 +00:00
+								        Utility.fixDot();
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								    }
-												New d5 after discussions with Ken; added HOMELESS in Fractional UCA

X-SVN-Rev: 8969
											
										
										
											2002-06-28 01:59:58 +00:00
+								    static String remapSortKey(int cp, boolean decomposition) {
 								        if (toD.isNormalized(cp)) return remapCanSortKey(cp, decomposition);
 								        // we know that it is not NFKD.
 								        String canDecomp = toD.normalize(cp);
 								        String result = "";
 								        int ch;
 								        for (int j = 0; j < canDecomp.length(); j += UTF16.getCharCount(ch)) {
 								            ch = UTF16.charAt(canDecomp, j);
 								            System.out.println("* " + Default.ucd.getCodeAndName(ch));
 								            String newSortKey = remapCanSortKey(ch, decomposition);
 								            System.out.println("* " + UCA.toString(newSortKey));
 								            result = mergeSortKeys(result, newSortKey);
 								            System.out.println("= " + UCA.toString(result));
 								        }
 								        return result;
 								    }
 								    static String remapCanSortKey(int ch, boolean decomposition) {
 								        String compatDecomp = Default.nfkd.normalize(ch);
 								        String decompSortKey = collator.getSortKey(compatDecomp, UCA.NON_IGNORABLE, decomposition);
 								        byte type = ucd.getDecompositionType(ch);
 								        int pos = decompSortKey.indexOf(UCA.LEVEL_SEPARATOR) + 1; // after first separator
 								        pos = decompSortKey.indexOf(UCA.LEVEL_SEPARATOR, pos) + 1; // after second separator
 								        String newSortKey = decompSortKey.substring(0, pos);
 								        for (int i = pos; i < decompSortKey.length(); ++i) {
 								            int weight = decompSortKey.charAt(i);
 								            int newWeight = CEList.remap(ch, type, weight);
 								            if (i > pos + 1) newWeight = 0x1F;
 								            newSortKey += (char)newWeight;
 								        }
 								        return newSortKey;
 								    }
 								    // keys must be of the same strength
 								    static String mergeSortKeys(String key1, String key2) {
 								        StringBuffer result = new StringBuffer();
 								        int end1 = 0, end2 = 0;
 								        while (true) {
 								            int pos1 = key1.indexOf(UCA.LEVEL_SEPARATOR, end1);
 								            int pos2 = key2.indexOf(UCA.LEVEL_SEPARATOR, end2);
 								            if (pos1 < 0) {
 								                result.append(key1.substring(end1)).append(key2.substring(end2));
 								                return result.toString();
 								            }
 								            if (pos2 < 0) {
 								                result.append(key1.substring(end1, pos1)).append(key2.substring(end2)).append(key1.substring(pos1));
 								                return result.toString();
 								            }
 								            result.append(key1.substring(end1, pos1)).append(key2.substring(end2, pos2)).append(UCA.LEVEL_SEPARATOR);
 								            end1 = pos1 + 1;
 								            end2 = pos2 + 1;
 								        }
 								    }
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								    static final String remove (String s, char ch) {
 								        StringBuffer buf = new StringBuffer();
 								        for (int i = 0; i < s.length(); ++i) {
 								            char c = s.charAt(i);
 								            if (c == ch) continue;
 								            buf.append(c);
 								        }
 								        return buf.toString();
 								    }
 								        /*
 								        log = new PrintWriter(new FileOutputStream("Frequencies.html"));
 								        log.println("<html><body>");
 								        MessageFormat mf = new MessageFormat("<tr><td><tt>{0}</tt></td><td><tt>{1}</tt></td><td align='right'><tt>{2}</tt></td><td align='right'><tt>{3}</tt></td></tr>");
 								        MessageFormat mf2 = new MessageFormat("<tr><td><tt>{0}</tt></td><td align='right'><tt>{1}</tt></td></tr>");
 								        String header = mf.format(new String[] {"Start", "End", "Count", "Subtotal"});
 								        int count;
 								        log.println("<h2>Writing Used Weights</h2>");
 								        log.println("<p>Primaries</p><table border='1'>" + mf.format(new String[] {"Start", "End", "Count", "Subtotal"}));
 								        count = collator.writeUsedWeights(log, 1, mf);
 								        log.println(MessageFormat.format("<tr><td>Count:</td><td>{0}</td></tr>", new Object[] {new Integer(count)}));
 								        log.println("</table>");
 								        log.println("<p>Secondaries</p><table border='1'>" + mf2.format(new String[] {"Code", "Frequency"}));
 								        count = collator.writeUsedWeights(log, 2, mf2);
 								        log.println(MessageFormat.format("<tr><td>Count:</td><td>{0}</td></tr>", new Object[] {new Integer(count)}));
 								        log.println("</table>");
 								        log.println("<p>Tertiaries</p><table border='1'>" + mf2.format(new String[] {"Code", "Frequency"}));
 								        count = collator.writeUsedWeights(log, 3, mf2);
 								        log.println(MessageFormat.format("<tr><td>Count:</td><td>{0}</td></tr>", new Object[] {new Integer(count)}));
 								        log.println("</table>");
 								        log.println("</body></html>");
 								        log.close();
 								        */
 								    static int[] compactSecondary;
 								    /*static void checkEquivalents() {
 								        Normalizer nfkd = new Normalizer(Normalizer.NFKC);
 								        Normalizer nfd = new Normalizer(Normalizer.NFKD);
 								        for (char c = 0; c < 0xFFFF; ++c) {
 								    }*/
 								    static void testCompatibilityCharacters() throws IOException {
-												Added chart program, minor edits.

X-SVN-Rev: 9918
											
										
										
											2002-09-25 06:40:14 +00:00
+								        log = Utility.openPrintWriter("UCA_CompatComparison.txt", Utility.UTF8_WINDOWS);
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
 								        int[] kenCes = new int[50];
 								        int[] markCes = new int[50];
 								        int[] kenComp = new int[50];
 								        Map forLater = new TreeMap();
 								        int count = 0;
 								        int typeLimit = UCD_Types.CANONICAL;
 								        boolean decompType = false;
 								        if (false) {
 								            typeLimit = UCD_Types.COMPATIBILITY;
 								            decompType = true;
 								        }
 								        // first find all the characters that cannot be generated "correctly"
 								        for (int i = 0; i < 0xFFFF; ++i) {
 								            int type = ucd.getDecompositionType(i);
 								            if (type < typeLimit) continue;
 								            int ceType = collator.getCEType((char)i);
 								            if (ceType >= collator.FIXED_CE) continue;
 								            // fix type
 								            type = getDecompType(i);
 								            String s = String.valueOf((char)i);
 								            int kenLen = collator.getCEs(s, decompType, kenCes); // true
 								            int markLen = fixCompatibilityCE(s, true, markCes, false);
 								            if (!arraysMatch(kenCes, kenLen, markCes, markLen)) {
 								                int kenCLen = fixCompatibilityCE(s, true, kenComp, true);
-												finally did some significant code cleanup on collation. not enough, but it's a start

X-SVN-Rev: 8896
											
										
										
											2002-06-15 02:47:14 +00:00
+								                String comp = CEList.toString(kenComp, kenCLen);
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
 								                if (arraysMatch(kenCes, kenLen, kenComp, kenCLen)) {
 								                    forLater.put((char)(COMPRESSED | type) + s, comp);
 								                    continue;
 								                }
 								                if (type == ucd.CANONICAL && multipleZeroPrimaries(markCes, markLen)) {
 								                    forLater.put((char)(MULTIPLES | type) + s, comp);
 								                    continue;
 								                }
 								                forLater.put((char)type + s, comp);
 								            }
 								        }
 								        Iterator it = forLater.keySet().iterator();
 								        byte oldType = (byte)0xFF; // anything unique
 								        int caseCount = 0;
-												New d5 after discussions with Ken; added HOMELESS in Fractional UCA

X-SVN-Rev: 8969
											
										
										
											2002-06-28 01:59:58 +00:00
+								        log.println("# UCA Version: " + collator.getDataVersion() + "/" + collator.getUCDVersion());
 								        log.println("Generated: " + getNormalDate());
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								        while (it.hasNext()) {
 								            String key = (String) it.next();
 								            byte type = (byte)key.charAt(0);
 								            if (type != oldType) {
 								                oldType = type;
 								                log.println("===============================================================");
 								                log.print("CASE " + (caseCount++) + ": ");
 								                byte rType = (byte)(type & OTHER_MASK);
 								                log.println("    Decomposition Type = " + ucd.getDecompositionTypeID_fromIndex(rType));
 								                if ((type & COMPRESSED) != 0) {
 								                    log.println("    Successfully Compressed a la Ken");
 								                    log.println("    [XXXX.0020.YYYY][0000.ZZZZ.0002] => [XXXX.ZZZZ.YYYY]");
 								                } else if ((type & MULTIPLES) != 0) {
 								                    log.println("    MULTIPLE ACCENTS");
 								                }
 								                log.println("===============================================================");
 								                log.println();
 								            }
 								            String s = key.substring(1);
 								            String comp = (String)forLater.get(key);
 								            int kenLen = collator.getCEs(s, decompType, kenCes);
-												finally did some significant code cleanup on collation. not enough, but it's a start

X-SVN-Rev: 8896
											
										
										
											2002-06-15 02:47:14 +00:00
+								            String kenStr = CEList.toString(kenCes, kenLen);
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
 								            int markLen = fixCompatibilityCE(s, true, markCes, false);
-												finally did some significant code cleanup on collation. not enough, but it's a start

X-SVN-Rev: 8896
											
										
										
											2002-06-15 02:47:14 +00:00
+								            String markStr = CEList.toString(markCes, markLen);
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
 								            if ((type & COMPRESSED) != 0) {
 								                log.println("COMPRESSED #" + (++count) + ": " + ucd.getCodeAndName(s));
 								                log.println("         : " + comp);
 								            } else {
 								                log.println("DIFFERENCE #" + (++count) + ": " + ucd.getCodeAndName(s));
 								                log.println("generated : " + markStr);
 								                if (!markStr.equals(comp)) {
 								                    log.println("compressed: " + comp);
 								                }
 								                log.println("Ken's     : " + kenStr);
 								                String nfkd = NFKD.normalize(s);
 								                log.println("NFKD      : " + ucd.getCodeAndName(nfkd));
 								                String nfd = NFD.normalize(s);
 								                if (!nfd.equals(nfkd)) {
 								                    log.println("NFD       : " + ucd.getCodeAndName(nfd));
 								                }
 								                //kenCLen = collator.getCEs(decomp, true, kenComp);
-												finally did some significant code cleanup on collation. not enough, but it's a start

X-SVN-Rev: 8896
											
										
										
											2002-06-15 02:47:14 +00:00
+								                //log.println("decomp ce: " + CEList.toString(kenComp, kenCLen));
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								            }
 								            log.println();
 								        }
 								        log.println("===============================================================");
 								        log.println();
 								        log.println("Compressible Secondaries");
 								        for (int i = 0; i < compressSet.size(); ++i) {
 								            if ((i & 0xF) == 0) log.println();
 								            if (!compressSet.get(i)) log.print("-  ");
 								            else log.print(Utility.hex(i, 3) + ", ");
 								        }
 								        log.close();
 								    }
 								    static final byte getDecompType(int cp) {
 								        byte result = ucd.getDecompositionType(cp);
 								        if (result == ucd.CANONICAL) {
-												Fixed Fractional UCA for surrogates

X-SVN-Rev: 6504
											
										
										
											2001-10-31 00:02:54 +00:00
+								            String d = NFD.normalize(cp); // TODO
 								            int cp1;
 								            for (int i = 0; i < d.length(); i += UTF16.getCharCount(cp1)) {
 								                cp1 = UTF16.charAt(d, i);
 								                byte t = ucd.getDecompositionType(cp1);
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								                if (t > ucd.CANONICAL) return t;
 								            }
 								        }
 								        return result;
 								    }
 								    static final boolean multipleZeroPrimaries(int[] a, int aLen) {
 								        int count = 0;
 								        for (int i = 0; i < aLen; ++i) {
 								            if (UCA.getPrimary(a[i]) == 0) {
 								                if (count == 1) return true;
 								                count++;
 								            } else {
 								                count = 0;
 								            }
 								        }
 								        return false;
 								    }
 								    static final byte MULTIPLES = 0x20, COMPRESSED = 0x40, OTHER_MASK = 0x1F;
 								    static final BitSet compressSet = new BitSet();
 								    static int kenCompress(int[] markCes, int markLen) {
 								        if (markLen == 0) return 0;
 								        int out = 1;
 								        for (int i = 1; i < markLen; ++i) {
 								            int next = markCes[i];
 								            int prev = markCes[out-1];
 								            if (UCA.getPrimary(next) == 0
 								              && UCA.getSecondary(prev) == 0x20
 								              && UCA.getTertiary(next) == 0x2) {
 								                markCes[out-1] = UCA.makeKey(
 								                  UCA.getPrimary(prev),
 								                  UCA.getSecondary(next),
 								                  UCA.getTertiary(prev));
 								                compressSet.set(UCA.getSecondary(next));
 								            } else {
 								                markCes[out++] = next;
 								            }
 								        }
 								        return out;
 								    }
 								    static boolean arraysMatch(int[] a, int aLen, int[] b, int bLen) {
 								        if (aLen != bLen) return false;
 								        for (int i = 0; i < aLen; ++i) {
 								            if (a[i] != b[i]) return false;
 								        }
 								        return true;
 								    }
 								    static int[] markCes = new int[50];
 								    static int fixCompatibilityCE(String s, boolean decompose, int[] output, boolean compress) {
-												Fixed Fractional UCA for surrogates

X-SVN-Rev: 6504
											
										
										
											2001-10-31 00:02:54 +00:00
+								        byte type = getDecompType(UTF16.charAt(s, 0));
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								        char ch = s.charAt(0);
 								        String decomp = NFKD.normalize(s);
 								        int len = 0;
 								        int markLen = collator.getCEs(decomp, true, markCes);
 								        if (compress) markLen = kenCompress(markCes, markLen);
 								        //for (int j = 0; j < decomp.length(); ++j) {
 								            for (int k = 0; k < markLen; ++k) {
 								                int t = UCA.getTertiary(markCes[k]);
 								                t = CEList.remap(k, type, t);
 								                /*
 								                if (type != CANONICAL) {
 								                    if (0x3041 <= ch && ch <= 0x3094) t = 0xE; // hiragana
 								                    else if (0x30A1 <= ch && ch <= 0x30FA) t = 0x11; // katakana
 								                }
 								                switch (type) {
 								                    case COMPATIBILITY: t = (t == 8) ? 0xA : 4; break;
 								                    case COMPAT_FONT:  t = (t == 8) ? 0xB : 5; break;
 								                    case COMPAT_NOBREAK: t = 0x1B; break;
 								                    case COMPAT_INITIAL: t = 0x17; break;
 								                    case COMPAT_MEDIAL: t = 0x18; break;
 								                    case COMPAT_FINAL: t = 0x19; break;
 								                    case COMPAT_ISOLATED: t = 0x1A; break;
 								                    case COMPAT_CIRCLE: t = (t == 0x11) ? 0x13 : (t == 8) ? 0xC : 6; break;
 								                    case COMPAT_SUPER: t = 0x14; break;
 								                    case COMPAT_SUB: t = 0x15; break;
 								                    case COMPAT_VERTICAL: t = 0x16; break;
 								                    case COMPAT_WIDE: t= (t == 8) ? 9 : 3; break;
 								                    case COMPAT_NARROW: t = (0xFF67 <= ch && ch <= 0xFF6F) ? 0x10 : 0x12; break;
 								                    case COMPAT_SMALL: t = (t == 0xE) ? 0xE : 0xF; break;
 								                    case COMPAT_SQUARE: t = (t == 8) ? 0x1D : 0x1C; break;
 								                    case COMPAT_FRACTION: t = 0x1E; break;
 								                }
 								                */
 								                output[len++] = UCA.makeKey(
 								                    UCA.getPrimary(markCes[k]),
 								                    UCA.getSecondary(markCes[k]),
 								                    t);
 								            //}
 								        }
 								        return len;
 								    }
-												Updated with changes agreed with Ken for d6.
Added line for rearrange (missing earlier); this changes conformance!

X-SVN-Rev: 9022
											
										
										
											2002-07-03 02:15:47 +00:00
+								    static int getStrengthDiff(CEList celist) {
 								        int result = QUARTERNARY_DIFF;
 								        for (int j = 0; j < celist.length(); ++j) {
 								            int ce = celist.at(j);
 								            if (collator.getPrimary(ce) != 0) {
 								                return PRIMARY_DIFF;
 								            } else if (collator.getSecondary(ce) != 0) {
 								                result = SECONDARY_DIFF;
 								            } else if (collator.getTertiary(ce) != 0) {
 								                result = TERTIARY_DIFF;
 								            }
 								        }
 								        return result;
 								    }
 								    static String[] strengthName = {"XYZ", "0YZ", "00Z", "000"};
 								    static void writeCategoryCheck() throws IOException {
-												added additional well-formedness test for UCA, tested UCD name lengths

X-SVN-Rev: 8773
											
										
										
											2002-06-04 01:59:02 +00:00
+								        /*PrintWriter diLog = new PrintWriter(
-												fixes for collation charts

X-SVN-Rev: 6436
											
										
										
											2001-10-25 20:35:42 +00:00
+								            new BufferedWriter(
 								                new OutputStreamWriter(
 								                    new FileOutputStream(GEN_DIR + "UCA_Nonspacing.txt"),
 								                    "UTF8"),
 *1024));
-												added additional well-formedness test for UCA, tested UCD name lengths

X-SVN-Rev: 8773
											
										
										
											2002-06-04 01:59:02 +00:00
+								                */
-												Updated with changes agreed with Ken for d6.
Added line for rearrange (missing earlier); this changes conformance!

X-SVN-Rev: 9022
											
										
										
											2002-07-03 02:15:47 +00:00
+								    	log.println("<h2>8. Checking against categories</h2>");
 								    	log.println("<p>These are not necessarily errors, but should be examined for <i>possible</i> errors</p>");
 								        log.println("<table border='1' cellspacing='0' cellpadding='2'>");
-												ICU-0 updated to reorganized java directories

X-SVN-Rev: 8039
											
										
										
											2002-03-15 01:57:01 +00:00
+								        Normalizer nfd = new Normalizer(Normalizer.NFD, UNICODE_VERSION);
-												fixes for collation charts

X-SVN-Rev: 6436
											
										
										
											2001-10-25 20:35:42 +00:00
 								        Set sorted = new TreeSet();
 								        for (int i = 0; i < 0x10FFFF; ++i) {
 								            Utility.dot(i);
 								            if (!ucd.isRepresented(i)) continue;
 								            CEList celist = collator.getCEList(UTF32.valueOf32(i), true);
-												Updated with changes agreed with Ken for d6.
Added line for rearrange (missing earlier); this changes conformance!

X-SVN-Rev: 9022
											
										
										
											2002-07-03 02:15:47 +00:00
+								            int real = getStrengthDiff(celist);
 								            int desired = PRIMARY_DIFF;
 								            byte cat = ucd.getCategory(i);
 								            if (cat == Cc || cat == Cs || cat == Cf || ucd.isNoncharacter(i)) desired = QUARTERNARY_DIFF;
 								            else if (cat == Mn || cat == Me) desired = SECONDARY_DIFF;
 								            String listName = celist.toString();
 								            if (listName.length() == 0) listName = "<i>ignore</i>";
 								            if (real != desired) {
 								                sorted.add("<tr><td>" + strengthName[real]
 								                    + "</td><td>" + strengthName[desired]
 								                    + "</td><td>" + ucd.getCategoryID(i)
 								                    + "</td><td>" + listName
 								                    + "</td><td>" + ucd.getCodeAndName(i)
 								                    + "</td></tr>");
 								            }
 								        }
 								        Utility.print(log, sorted, "\r\n");
 								    	log.println("</table>");
 								    	log.flush();
 								    }
 								    static void writeDuplicates() {
 								    	log.println("<h2>9. Checking characters that are not canonical equivalents, but have same CE</h2>");
 								    	log.println("<p>These are not necessarily errors, but should be examined for <i>possible</i> errors</p>");
 								        log.println("<table border='1' cellspacing='0' cellpadding='2'>");
 								        UCA.UCAContents cc = collator.getContents(UCA.FIXED_CE, toD);
 								        Map map = new TreeMap();
 								        while (true) {
 								            String s = cc.next();
 								            if (s == null) break;
 								            if (!toD.isNormalized(s)) continue; // only unnormalized stuff
 								            if (UTF16.countCodePoint(s) == 1) {
 								                int cat = ucd.getCategory(UTF16.charAt(s,0));
 								                if (cat == Cn || cat == Cc || cat == Cs) continue;
-												fixes for collation charts

X-SVN-Rev: 6436
											
										
										
											2001-10-25 20:35:42 +00:00
+								            }
-												Updated with changes agreed with Ken for d6.
Added line for rearrange (missing earlier); this changes conformance!

X-SVN-Rev: 9022
											
										
										
											2002-07-03 02:15:47 +00:00
+								            CEList celist = collator.getCEList(s, true);
 								            Utility.addToSet(map, celist, s);
 								        }
 								        Iterator it = map.keySet().iterator();
 								        while (it.hasNext()) {
 								            CEList celist = (CEList) it.next();
 								            Set s = (Set) map.get(celist);
 								            String name = celist.toString();
 								            if (name.length() == 0) name = "<i>ignore</i>";
 								            if (s.size() > 1) {
 								                    log.println("<tr><td>" + name
 								                        + "</td><td>" + getHTML_NameSet(s, null, true)
 								                        + "</td></tr>");
-												fixes for collation charts

X-SVN-Rev: 6436
											
										
										
											2001-10-25 20:35:42 +00:00
+								            }
 								        }
-												Updated with changes agreed with Ken for d6.
Added line for rearrange (missing earlier); this changes conformance!

X-SVN-Rev: 9022
											
										
										
											2002-07-03 02:15:47 +00:00
+								    	log.println("</table>");
 								    	log.flush();
 								    }
 								    static void writeOverlap() {
 								    	log.println("<h2>10. Checking overlaps</h2>");
 								    	log.println("<p>These are not necessarily errors, but should be examined for <i>possible</i> errors</p>");
 								        log.println("<table border='1' cellspacing='0' cellpadding='2'>");
-												fixes for collation charts

X-SVN-Rev: 6436
											
										
										
											2001-10-25 20:35:42 +00:00
-												Updated with changes agreed with Ken for d6.
Added line for rearrange (missing earlier); this changes conformance!

X-SVN-Rev: 9022
											
										
										
											2002-07-03 02:15:47 +00:00
+								        UCA.UCAContents cc = collator.getContents(UCA.FIXED_CE, toD);
-												fixes for collation charts

X-SVN-Rev: 6436
											
										
										
											2001-10-25 20:35:42 +00:00
-												Updated with changes agreed with Ken for d6.
Added line for rearrange (missing earlier); this changes conformance!

X-SVN-Rev: 9022
											
										
										
											2002-07-03 02:15:47 +00:00
+								        Map map = new TreeMap();
 								        Map tails = new TreeMap();
 								        int counter = 0;
 								        System.out.println("Collecting items");
 								        while (true) {
 								            String s = cc.next();
 								            if (s == null) break;
 								            Utility.dot(counter++);
 								            if (!toD.isNormalized(s)) continue; // only normalized stuff
 								            CEList celist = collator.getCEList(s, true);
 								            map.put(celist, s);
 								        }
 								        Utility.fixDot();
 								        System.out.println("Collecting tails");
 								        Iterator it = map.keySet().iterator();
 								        while (it.hasNext()) {
 								            CEList celist = (CEList) it.next();
 								            Utility.dot(counter++);
 								            int len = celist.length();
 								            if (len < 2) continue;
 								            for (int i = 1; i < len; ++i) {
 								                CEList tail = celist.sub(i, len);
 								                Utility.dot(counter++);
 								                if (map.get(tail) == null) { // skip anything in main
 								                    Utility.addToSet(tails, tail, celist);
 								                }
 								            }
 								        }
 								        Utility.fixDot();
 								        System.out.println("Finding overlaps");
 								        // we now have a set of main maps, and a set of tails
 								        // the main maps to string, the tails map to set of CELists
 								        it = map.keySet().iterator();
 								        List first = new ArrayList();
 								        List second = new ArrayList();
 								        while (it.hasNext()) {
 								            CEList celist = (CEList) it.next();
 								            int len = celist.length();
 								            if (len < 2) continue;
 								            Utility.dot(counter++);
 								            first.clear();
 								            second.clear();
 								            if (overlaps(map, tails, celist, 0, first, second)) {
 								                reverse(first);
 								                reverse(second);
 								                log.println("<tr><td>" + getHTML_NameSet(first, null, false)
 								                    + "</td><td>" + getHTML_NameSet(first, map, true)
 								                    + "</td><td>" + getHTML_NameSet(second, null, false)
 								                    + "</td><td>" + getHTML_NameSet(second, map, true)
 								                    + "</td></tr>");
 								            }
 								        }
 								    	log.println("</table>");
 								    	log.flush();
-												fixes for collation charts

X-SVN-Rev: 6436
											
										
										
											2001-10-25 20:35:42 +00:00
+								    }
-												Updated with changes agreed with Ken for d6.
Added line for rearrange (missing earlier); this changes conformance!

X-SVN-Rev: 9022
											
										
										
											2002-07-03 02:15:47 +00:00
+								    static void reverse(List ell) {
 								        int i = 0;
 								        int j = ell.size() - 1;
 								        while (i < j) {
 								            Object temp = ell.get(i);
 								            ell.set(i, ell.get(j));
 								            ell.set(j, temp);
 								            ++i;
 								            --j;
 								        }
 								    }
 								    static final boolean DEBUG_SHOW_OVERLAP = false;
 								    static boolean overlaps(Map map, Map tails, CEList celist, int depth, List me, List other) {
 								        if (depth == 5) return false;
 								        boolean gotOne = false;
 								        if (DEBUG_SHOW_OVERLAP && depth > 0) {
 								            Object foo = map.get(celist);
 								            System.out.println(Utility.repeat("**", depth) + "Trying:" + celist + ", "
 								                + (foo != null ? ucd.getCodeAndName(foo.toString()) : ""));
 								            gotOne = true;
 								        }
 								        int len = celist.length();
 								        // if the tail of the celist matches something, then ArrayList
 								        // A. subtract the match and retry
 								        // B. if that doesn't work, see if the result is the tail of something else.
 								        for (int i = 1; i < len; ++i) {
 								            CEList tail = celist.sub(i, len);
 								            if (map.get(tail) != null) {
 								                if (DEBUG_SHOW_OVERLAP && !gotOne) {
 								                    Object foo = map.get(celist);
 								                    System.out.println(Utility.repeat("**", depth) + "Trying:" + celist + ", "
 								                        + (foo != null ? ucd.getCodeAndName(foo.toString()) : ""));
 								                    gotOne = true;
 								                }
 								                if (DEBUG_SHOW_OVERLAP) System.out.println(Utility.repeat("**", depth) + "  Match tail at " + i + ": " + tail + ", " + ucd.getCodeAndName(map.get(tail).toString()));
 								                // temporarily add tail
 								                int oldListSize = me.size();
 								                me.add(tail);
 								                // the tail matched, try 3 options
 								                CEList head = celist.sub(0, i);
 								                // see if the head matches exactly!
 								                if (map.get(head) != null) {
 								                    if (DEBUG_SHOW_OVERLAP) System.out.println(Utility.repeat("**", depth) + "  Match head at "
 								                        + i + ": " + head + ", " + ucd.getCodeAndName(map.get(head).toString()));
 								                    me.add(head);
 								                    other.add(celist);
 								                    return true;
 								                }
 								                // see if the end of the head matches something (recursively)
 								               if (DEBUG_SHOW_OVERLAP) System.out.println(Utility.repeat("**", depth) + "  Checking rest");
 								               if (overlaps(map, tails, head, depth+1, me, other)) return true;
 								                // otherwise we see if the head is some tail
 								                Set possibleFulls = (Set) tails.get(head);
 								                if (possibleFulls != null) {
 								                    Iterator it = possibleFulls.iterator();
 								                    while(it.hasNext()) {
 								                        CEList full = (CEList)it.next();
 								                        CEList possibleHead = full.sub(0,full.length() - head.length());
 								                        if (DEBUG_SHOW_OVERLAP) System.out.println(Utility.repeat("**", depth) + "  Reversing " + full
 								                            + ", " + ucd.getCodeAndName(map.get(full).toString())
 								                            + ", " + possibleHead);
 								                        if (overlaps(map, tails, possibleHead, depth+1, other, me)) return true;
 								                    }
 								                }
 								                // didn't work, so retract!
 								                me.remove(oldListSize);
 								            }
 								        }
 								        return false;
 								    }
 								    // if m exists, then it is a mapping to strings. Use it.
 								    // otherwise just print what is in set
 								    static String getHTML_NameSet(Collection set, Map m, boolean useName) {
 								        StringBuffer result = new StringBuffer();
 								        Iterator it = set.iterator();
 								        while (it.hasNext()) {
 								            if (result.length() != 0) result.append(";<br>");
 								            Object item = it.next();
 								            if (m != null) item = m.get(item);
 								            if (useName) item = ucd.getCodeAndName(item.toString());
 								            result.append(item);
 								        }
 								        return result.toString();
 								    }
-												added additional well-formedness test for UCA, tested UCD name lengths

X-SVN-Rev: 8773
											
										
										
											2002-06-04 01:59:02 +00:00
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								    static void writeContractions() throws IOException {
-												added additional well-formedness test for UCA, tested UCD name lengths

X-SVN-Rev: 8773
											
										
										
											2002-06-04 01:59:02 +00:00
+								        /*PrintWriter diLog = new PrintWriter(
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								            new BufferedWriter(
 								                new OutputStreamWriter(
 								                    new FileOutputStream(GEN_DIR + "UCA_Contractions.txt"),
 								                    "UTF8"),
 *1024));
-												added additional well-formedness test for UCA, tested UCD name lengths

X-SVN-Rev: 8773
											
										
										
											2002-06-04 01:59:02 +00:00
+								                */
-												Added chart program, minor edits.

X-SVN-Rev: 9918
											
										
										
											2002-09-25 06:40:14 +00:00
+								        PrintWriter diLog = Utility.openPrintWriter("UCA_Contractions.txt", Utility.UTF8_WINDOWS);
-												added additional well-formedness test for UCA, tested UCD name lengths

X-SVN-Rev: 8773
											
										
										
											2002-06-04 01:59:02 +00:00
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								        diLog.write('\uFEFF');
-												ICU-0 updated to reorganized java directories

X-SVN-Rev: 8039
											
										
										
											2002-03-15 01:57:01 +00:00
+								        Normalizer nfd = new Normalizer(Normalizer.NFD, UNICODE_VERSION);
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
 								        int[] ces = new int[50];
-												reorg

X-SVN-Rev: 5824
											
										
										
											2001-09-19 23:33:52 +00:00
+								        UCA.UCAContents cc = collator.getContents(UCA.FIXED_CE, nfd);
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								        int[] lenArray = new int[1];
 								        diLog.println("# Contractions");
-												New d5 after discussions with Ken; added HOMELESS in Fractional UCA

X-SVN-Rev: 8969
											
										
										
											2002-06-28 01:59:58 +00:00
+								        diLog.println("# Generated " + getNormalDate());
 								        diLog.println("# UCA Version: " + collator.getDataVersion() + "/" + collator.getUCDVersion());
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								        while (true) {
 								            String s = cc.next(ces, lenArray);
 								            if (s == null) break;
 								            int len = lenArray[0];
 								            if (s.length() > 1) {
 								                diLog.println(Utility.hex(s, " ")
-												finally did some significant code cleanup on collation. not enough, but it's a start

X-SVN-Rev: 8896
											
										
										
											2002-06-15 02:47:14 +00:00
+								                    + ";\t #" + CEList.toString(ces, len)
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								                    + " ( " + s + " )"
 								                    + " " + ucd.getName(s));
 								            }
 								        }
 								        diLog.close();
 								    }
 								    static void checkDisjointIgnorables() throws IOException {
-												added additional well-formedness test for UCA, tested UCD name lengths

X-SVN-Rev: 8773
											
										
										
											2002-06-04 01:59:02 +00:00
+								    	/*
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								        PrintWriter diLog = new PrintWriter(
 								            new BufferedWriter(
 								                new OutputStreamWriter(
 								                    new FileOutputStream(GEN_DIR + "DisjointIgnorables.txt"),
 								                    "UTF8"),
 *1024));
-												added additional well-formedness test for UCA, tested UCD name lengths

X-SVN-Rev: 8773
											
										
										
											2002-06-04 01:59:02 +00:00
+								                */
-												Latest updates for UCD, default values. Fixed UTF-8 output for UCA, Logical_Order_Exceptions

X-SVN-Rev: 11358
											
										
										
											2003-03-19 17:30:58 +00:00
+								        PrintWriter diLog = Utility.openPrintWriter("DisjointIgnorables.js", Utility.UTF8_WINDOWS);
-												added additional well-formedness test for UCA, tested UCD name lengths

X-SVN-Rev: 8773
											
										
										
											2002-06-04 01:59:02 +00:00
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								        diLog.write('\uFEFF');
 								        /*
 								        PrintWriter diLog = new PrintWriter(
 								            // try new one
 								            new UTF8StreamWriter(new FileOutputStream(GEN_DIR + "DisjointIgnorables.txt"),
 *1024));
 								        diLog.write('\uFEFF');
 								        */
 								        //diLog = new PrintWriter(new FileOutputStream(GEN_DIR + "DisjointIgnorables.txt"));
-												ICU-0 updated to reorganized java directories

X-SVN-Rev: 8039
											
										
										
											2002-03-15 01:57:01 +00:00
+								        Normalizer nfd = new Normalizer(Normalizer.NFD, UNICODE_VERSION);
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
 								        int[] ces = new int[50];
 								        int[] secondariesZP = new int[400];
 								        Vector[] secondariesZPsample = new Vector[400];
 								        int[] remapZP = new int[400];
 								        int[] secondariesNZP = new int[400];
 								        Vector[] secondariesNZPsample = new Vector[400];
 								        int[] remapNZP = new int[400];
 								        for (int i = 0; i < secondariesZP.length; ++i) {
 								            secondariesZPsample[i] = new Vector();
 								            secondariesNZPsample[i] = new Vector();
 								        }
 								        int zpCount = 0;
 								        int nzpCount = 0;
 								        /* for (char ch = 0; ch < 0xFFFF; ++ch) {
 								            byte type = collator.getCEType(ch);
 								            if (type >= UCA.FIXED_CE) continue;
 								            if (SKIP_CANONICAL_DECOMPOSIBLES && nfd.hasDecomposition(ch)) continue;
 								            String s = String.valueOf(ch);
 								            int len = collator.getCEs(s, true, ces);
 								            */
-												reorg

X-SVN-Rev: 5824
											
										
										
											2001-09-19 23:33:52 +00:00
+								        UCA.UCAContents cc = collator.getContents(UCA.FIXED_CE, nfd);
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								        int[] lenArray = new int[1];
 								        Set sortedCodes = new TreeSet();
 								        Set mixedCEs = new TreeSet();
 								        while (true) {
 								            String s = cc.next(ces, lenArray);
 								            if (s == null) break;
 								            // process all CEs. Look for controls, and for mixed ignorable/non-ignorables
 								            int ccc;
 								            for (int kk = 0; kk < s.length(); kk += UTF32.count16(ccc)) {
 								                ccc = UTF32.char32At(s,kk);
 								                byte cat = ucd.getCategory(ccc);
 								                if (cat == Cf || cat == Cc || cat == Zs || cat == Zl || cat == Zp) {
-												finally did some significant code cleanup on collation. not enough, but it's a start

X-SVN-Rev: 8896
											
										
										
											2002-06-15 02:47:14 +00:00
+								                    sortedCodes.add(CEList.toString(ces, lenArray[0]) + "\t" + ucd.getCodeAndName(s));
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								                    break;
 								                }
 								            }
 								            int len = lenArray[0];
 								            int haveMixture = 0;
 								            for (int j = 0; j < len; ++j) {
 								                int ce = ces[j];
 								                int pri = collator.getPrimary(ce);
 								                int sec = collator.getSecondary(ce);
 								                if (pri == 0) {
 								                    secondariesZPsample[sec].add(secondariesZP[sec], s);
 								                    secondariesZP[sec]++;
 								                } else {
 								                    secondariesNZPsample[sec].add(secondariesNZP[sec], s);
 								                    secondariesNZP[sec]++;
 								                }
 								                if (haveMixture == 3) continue;
 								                if (collator.isVariable(ce)) haveMixture |= 1;
 								                else haveMixture |= 2;
 								                if (haveMixture == 3) {
-												finally did some significant code cleanup on collation. not enough, but it's a start

X-SVN-Rev: 8896
											
										
										
											2002-06-15 02:47:14 +00:00
+								                    mixedCEs.add(CEList.toString(ces, len) + "\t" + ucd.getCodeAndName(s));
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								                }
 								            }
 								        }
 								        for (int i = 0; i < secondariesZP.length; ++i) {
 								            if (secondariesZP[i] != 0) {
 								                remapZP[i] = zpCount;
 								                zpCount++;
 								            }
 								            if (secondariesNZP[i] != 0) {
 								                remapNZP[i] = nzpCount;
 								                nzpCount++;
 								            }
 								        }
 								        diLog.println();
 								        diLog.println("# Proposed Remapping (see doc about Japanese characters)");
 								        diLog.println();
 								        int bothCount = 0;
 								        for (int i = 0; i < secondariesZP.length; ++i) {
 								            if ((secondariesZP[i] != 0) || (secondariesNZP[i] != 0)) {
 								                char sign = ' ';
 								                if (secondariesZP[i] != 0 && secondariesNZP[i] != 0) {
 								                    sign = '*';
 								                    bothCount++;
 								                }
 								                if (secondariesZP[i] != 0) {
 								                    showSampleOverlap(diLog, false, sign + "ZP ", secondariesZPsample[i]); // i, 0x20 + nzpCount + remapZP[i],
 								                }
 								                if (secondariesNZP[i] != 0) {
 								                    if (i == 0x20) {
 								                        diLog.println("(omitting " + secondariesNZP[i] + " NZP with values 0020 -- values don't change)");
 								                    } else {
 								                        showSampleOverlap(diLog, true, sign + "NZP", secondariesNZPsample[i]); // i, 0x20 + remapNZP[i],
 								                    }
 								                }
 								                diLog.println();
 								            }
 								        }
 								        diLog.println("ZP Count = " + zpCount + ", NZP Count = " + nzpCount + ", Collisions = " + bothCount);
 								        /*
 								        diLog.println();
 								        diLog.println("OVERLAPS");
 								        diLog.println();
 								        for (int i = 0; i < secondariesZP.length; ++i) {
 								            if (secondariesZP[i] != 0 && secondariesNZP[i] != 0) {
 								                diLog.println("Overlap at " + Utility.hex(i)
 								                    + ": " + secondariesZP[i] + " with zero primaries"
 								                    + ", " + secondariesNZP[i] + " with non-zero primaries"
 								                );
 								                showSampleOverlap(" ZP:  ", secondariesZPsample[i], ces);
 								                showSampleOverlap(" NZP: ", secondariesNZPsample[i], ces);
 								                diLog.println();
 								            }
 								        }
 								        */
 								        diLog.println();
 								        diLog.println("# BACKGROUND INFORMATION");
 								        diLog.println();
 								        diLog.println("# All characters with 'mixed' CEs: variable and non-variable");
 								        diLog.println("# Note: variables are in " + Utility.hex(collator.getVariableLow() >> 16) + " to "
 								            + Utility.hex(collator.getVariableHigh() >> 16));
 								        diLog.println();
 								        Iterator it;
 								        it = mixedCEs.iterator();
 								        while (it.hasNext()) {
 								            Object key = it.next();
 								            diLog.println(key);
 								        }
 								        diLog.println();
 								        diLog.println("# All 'controls': Cc, Cf, Zs, Zp, Zl");
 								        diLog.println();
 								        it = sortedCodes.iterator();
 								        while (it.hasNext()) {
 								            Object key = it.next();
 								            diLog.println(key);
 								        }
 								        diLog.close();
 								    }
 								    static void checkCE_overlap() throws IOException {
-												added additional well-formedness test for UCA, tested UCD name lengths

X-SVN-Rev: 8773
											
										
										
											2002-06-04 01:59:02 +00:00
+								        /*PrintWriter diLog = new PrintWriter(
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								            new BufferedWriter(
 								                new OutputStreamWriter(
 								                    new FileOutputStream(GEN_DIR + "DisjointIgnorables.txt"),
 								                    "UTF8"),
 *1024));
-												added additional well-formedness test for UCA, tested UCD name lengths

X-SVN-Rev: 8773
											
										
										
											2002-06-04 01:59:02 +00:00
+								                */
-												Latest updates for UCD, default values. Fixed UTF-8 output for UCA, Logical_Order_Exceptions

X-SVN-Rev: 11358
											
										
										
											2003-03-19 17:30:58 +00:00
+								        PrintWriter diLog = Utility.openPrintWriter("DisjointIgnorables2.js", Utility.UTF8_WINDOWS);
-												added additional well-formedness test for UCA, tested UCD name lengths

X-SVN-Rev: 8773
											
										
										
											2002-06-04 01:59:02 +00:00
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								        diLog.write('\uFEFF');
 								        //diLog = new PrintWriter(new FileOutputStream(GEN_DIR + "DisjointIgnorables.txt"));
-												ICU-0 updated to reorganized java directories

X-SVN-Rev: 8039
											
										
										
											2002-03-15 01:57:01 +00:00
+								        Normalizer nfd = new Normalizer(Normalizer.NFD, UNICODE_VERSION);
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
 								        int[] ces = new int[50];
 								        int[] secondariesZP = new int[400];
 								        Vector[] secondariesZPsample = new Vector[400];
 								        int[] remapZP = new int[400];
 								        int[] secondariesNZP = new int[400];
 								        Vector[] secondariesNZPsample = new Vector[400];
 								        int[] remapNZP = new int[400];
 								        for (int i = 0; i < secondariesZP.length; ++i) {
 								            secondariesZPsample[i] = new Vector();
 								            secondariesNZPsample[i] = new Vector();
 								        }
 								        int zpCount = 0;
 								        int nzpCount = 0;
 								        /* for (char ch = 0; ch < 0xFFFF; ++ch) {
 								            byte type = collator.getCEType(ch);
 								            if (type >= UCA.FIXED_CE) continue;
 								            if (SKIP_CANONICAL_DECOMPOSIBLES && nfd.hasDecomposition(ch)) continue;
 								            String s = String.valueOf(ch);
 								            int len = collator.getCEs(s, true, ces);
 								            */
-												reorg

X-SVN-Rev: 5824
											
										
										
											2001-09-19 23:33:52 +00:00
+								        UCA.UCAContents cc = collator.getContents(UCA.FIXED_CE, nfd);
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								        int[] lenArray = new int[1];
 								        Set sortedCodes = new TreeSet();
 								        Set mixedCEs = new TreeSet();
 								        while (true) {
 								            String s = cc.next(ces, lenArray);
 								            if (s == null) break;
 								            // process all CEs. Look for controls, and for mixed ignorable/non-ignorables
 								            int ccc;
 								            for (int kk = 0; kk < s.length(); kk += UTF32.count16(ccc)) {
 								                ccc = UTF32.char32At(s,kk);
 								                byte cat = ucd.getCategory(ccc);
 								                if (cat == Cf || cat == Cc || cat == Zs || cat == Zl || cat == Zp) {
-												finally did some significant code cleanup on collation. not enough, but it's a start

X-SVN-Rev: 8896
											
										
										
											2002-06-15 02:47:14 +00:00
+								                    sortedCodes.add(CEList.toString(ces, lenArray[0]) + "\t" + ucd.getCodeAndName(s));
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								                    break;
 								                }
 								            }
 								            int len = lenArray[0];
 								            int haveMixture = 0;
 								            for (int j = 0; j < len; ++j) {
 								                int ce = ces[j];
 								                int pri = collator.getPrimary(ce);
 								                int sec = collator.getSecondary(ce);
 								                if (pri == 0) {
 								                    secondariesZPsample[sec].add(secondariesZP[sec], s);
 								                    secondariesZP[sec]++;
 								                } else {
 								                    secondariesNZPsample[sec].add(secondariesNZP[sec], s);
 								                    secondariesNZP[sec]++;
 								                }
 								                if (haveMixture == 3) continue;
 								                if (collator.isVariable(ce)) haveMixture |= 1;
 								                else haveMixture |= 2;
 								                if (haveMixture == 3) {
-												finally did some significant code cleanup on collation. not enough, but it's a start

X-SVN-Rev: 8896
											
										
										
											2002-06-15 02:47:14 +00:00
+								                    mixedCEs.add(CEList.toString(ces, len) + "\t" + ucd.getCodeAndName(s));
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								                }
 								            }
 								        }
 								        for (int i = 0; i < secondariesZP.length; ++i) {
 								            if (secondariesZP[i] != 0) {
 								                remapZP[i] = zpCount;
 								                zpCount++;
 								            }
 								            if (secondariesNZP[i] != 0) {
 								                remapNZP[i] = nzpCount;
 								                nzpCount++;
 								            }
 								        }
 								        diLog.println();
 								        diLog.println("# Proposed Remapping (see doc about Japanese characters)");
 								        diLog.println();
 								        int bothCount = 0;
 								        for (int i = 0; i < secondariesZP.length; ++i) {
 								            if ((secondariesZP[i] != 0) || (secondariesNZP[i] != 0)) {
 								                char sign = ' ';
 								                if (secondariesZP[i] != 0 && secondariesNZP[i] != 0) {
 								                    sign = '*';
 								                    bothCount++;
 								                }
 								                if (secondariesZP[i] != 0) {
 								                    showSampleOverlap(diLog, false, sign + "ZP ", secondariesZPsample[i]); // i, 0x20 + nzpCount + remapZP[i],
 								                }
 								                if (secondariesNZP[i] != 0) {
 								                    if (i == 0x20) {
 								                        diLog.println("(omitting " + secondariesNZP[i] + " NZP with values 0020 -- values don't change)");
 								                    } else {
 								                        showSampleOverlap(diLog, true, sign + "NZP", secondariesNZPsample[i]); // i, 0x20 + remapNZP[i],
 								                    }
 								                }
 								                diLog.println();
 								            }
 								        }
 								        diLog.println("ZP Count = " + zpCount + ", NZP Count = " + nzpCount + ", Collisions = " + bothCount);
 								        diLog.close();
 								    }
 								    static void showSampleOverlap(PrintWriter diLog, boolean doNew, String head, Vector v) {
 								        for (int i = 0; i < v.size(); ++i) {
 								            showSampleOverlap(diLog, doNew, head, (String)v.get(i));
 								        }
 								    }
 								    static void showSampleOverlap(PrintWriter diLog, boolean doNew, String head, String src) {
 								        int[] ces = new int[30];
 								        int len = collator.getCEs(src, true, ces);
 								        int[] newCes = null;
 								        int newLen = 0;
 								        if (doNew) {
 								            newCes = new int[30];
 								            for (int i = 0; i < len; ++i) {
 								                int ce = ces[i];
 								                int p = UCA.getPrimary(ce);
 								                int s = UCA.getSecondary(ce);
 								                int t = UCA.getTertiary(ce);
 								                if (p != 0 && s != 0x20) {
 								                    newCes[newLen++] = UCA.makeKey(p, 0x20, t);
 								                    newCes[newLen++] = UCA.makeKey(0, s, 0x1F);
 								                } else {
 								                    newCes[newLen++] = ce;
 								                }
 								            }
 								        }
 								        diLog.println(
 								            ucd.getCode(src)
 								            + "\t" + head
 								            //+ "\t" + Utility.hex(oldWeight)
 								            //+ " => " + Utility.hex(newWeight)
-												finally did some significant code cleanup on collation. not enough, but it's a start

X-SVN-Rev: 8896
											
										
										
											2002-06-15 02:47:14 +00:00
+								            + "\t" + CEList.toString(ces, len)
 								            + (doNew ? " => " + CEList.toString(newCes, newLen) : "")
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								            + "\t( " + src + " )"
 								            + "\t" + ucd.getName(src)
 								            );
 								    }
 								    static final byte WITHOUT_NAMES = 0, WITH_NAMES = 1, IN_XML = 2;
 								    static final boolean SKIP_CANONICAL_DECOMPOSIBLES = true;
 								    static final int TRANSITIVITY_COUNT = 8000;
 								    static final int TRANSITIVITY_ITERATIONS = 1000000;
 								    static void testTransitivity() {
 								        char[] tests = new char[TRANSITIVITY_COUNT];
 								        String[] keys = new String[TRANSITIVITY_COUNT];
 								        int i = 0;
 								        System.out.println("Loading");
 								        for (char ch = 0; i < tests.length; ++ch) {
 								            byte type = collator.getCEType(ch);
 								            if (type >= UCA.FIXED_CE) continue;
 								            Utility.dot(ch);
 								            tests[i] = ch;
 								            keys[i] = collator.getSortKey(String.valueOf(ch));
 								            ++i;
 								        }
 								        java.util.Comparator cm = new RuleComparator();
 								        i = 0;
 								        Utility.fixDot();
 								        System.out.println("Comparing");
 								        while (i++ < TRANSITIVITY_ITERATIONS) {
 								            Utility.dot(i);
 								            int a = (int)Math.random()*TRANSITIVITY_COUNT;
 								            int b = (int)Math.random()*TRANSITIVITY_COUNT;
 								            int c = (int)Math.random()*TRANSITIVITY_COUNT;
 								            int ab = cm.compare(keys[a], keys[b]);
 								            int bc = cm.compare(keys[b], keys[c]);
 								            int ca = cm.compare(keys[c], keys[a]);
 								            if (ab < 0 && bc < 0 && ca < 0 || ab > 0 && bc > 0 && ca > 0) {
 								                System.out.println("Transitivity broken for "
 								                    + Utility.hex(a)
 								                    + ", " + Utility.hex(b)
 								                    + ", " + Utility.hex(c));
 								            }
 								        }
 								    }
-												Completed most of the upgrade to UCA 3.1.1;
 involved double CEs for Han; fixing back-mappings, etc.
 did a bit of code cleanup too.
Remaining to do: backmap from UCA double CEs to original character codes,
 for constructing Fractional UCA.

X-SVN-Rev: 8754
											
										
										
											2002-05-31 01:41:04 +00:00
+								    static Normalizer nfdNew = new Normalizer(Normalizer.NFD, "");
-												shifted the implicit range.

X-SVN-Rev: 8767
											
										
										
											2002-06-02 05:07:08 +00:00
+								    static Normalizer NFC = new Normalizer(Normalizer.NFC, "");
-												Completed most of the upgrade to UCA 3.1.1;
 involved double CEs for Han; fixing back-mappings, etc.
 did a bit of code cleanup too.
Remaining to do: backmap from UCA double CEs to original character codes,
 for constructing Fractional UCA.

X-SVN-Rev: 8754
											
										
										
											2002-05-31 01:41:04 +00:00
+								    static Normalizer nfkdNew = new Normalizer(Normalizer.NFKD, "");
-												Updated with changes agreed with Ken for d6.
Added line for rearrange (missing earlier); this changes conformance!

X-SVN-Rev: 9022
											
										
										
											2002-07-03 02:15:47 +00:00
+								    static int getFirstCELen(int[] ces, int len) {
 								        if (len < 2) return len;
 								    	int expansionStart = 1;
 								    	if (UCA.isImplicitLeadCE(ces[0])) {
 								    		expansionStart = 2; // move up if first is double-ce
 								    	}
 								    	if (len > expansionStart && homelessSecondaries.contains(UCA.getSecondary(ces[expansionStart]))) {
 								    		++expansionStart; // move up if *second* is homeless ignoreable
 								    	}
 								    	return expansionStart;
 								    }
 								    static void writeRules (byte option, boolean shortPrint) throws IOException {
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
 								        //testTransitivity();
 								        //if (true) return;
 								        int[] ces = new int[50];
-												ICU-0 updated to reorganized java directories

X-SVN-Rev: 8039
											
										
										
											2002-03-15 01:57:01 +00:00
+								        Normalizer nfd = new Normalizer(Normalizer.NFD, UNICODE_VERSION);
-												Completed most of the upgrade to UCA 3.1.1;
 involved double CEs for Han; fixing back-mappings, etc.
 did a bit of code cleanup too.
Remaining to do: backmap from UCA double CEs to original character codes,
 for constructing Fractional UCA.

X-SVN-Rev: 8754
											
										
										
											2002-05-31 01:41:04 +00:00
+								        Normalizer nfkd = new Normalizer(Normalizer.NFKD, UNICODE_VERSION);
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
 								        if (false) {
 								        int len2 = collator.getCEs("\u2474", true, ces);
-												finally did some significant code cleanup on collation. not enough, but it's a start

X-SVN-Rev: 8896
											
										
										
											2002-06-15 02:47:14 +00:00
+								        System.out.println(CEList.toString(ces, len2));
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
 								        String a = collator.getSortKey("a");
 								        String b = collator.getSortKey("A");
 								        System.out.println(collator.strengthDifference(a, b));
 								        }
 								        System.out.println("Sorting");
 								        Map backMap = new HashMap();
 								        java.util.Comparator cm = new RuleComparator();
 								        Map ordered = new TreeMap(cm);
-												reorg

X-SVN-Rev: 5824
											
										
										
											2001-09-19 23:33:52 +00:00
+								        UCA.UCAContents cc = collator.getContents(UCA.FIXED_CE,
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								            SKIP_CANONICAL_DECOMPOSIBLES ? nfd : null);
 								        int[] lenArray = new int[1];
-												Completed most of the upgrade to UCA 3.1.1;
 involved double CEs for Han; fixing back-mappings, etc.
 did a bit of code cleanup too.
Remaining to do: backmap from UCA double CEs to original character codes,
 for constructing Fractional UCA.

X-SVN-Rev: 8754
											
										
										
											2002-05-31 01:41:04 +00:00
 								        Set alreadyDone = new HashSet();
-												Latest updates for UCD, default values. Fixed UTF-8 output for UCA, Logical_Order_Exceptions

X-SVN-Rev: 11358
											
										
										
											2003-03-19 17:30:58 +00:00
+								        PrintWriter log2 = Utility.openPrintWriter("UCARules-log.txt", Utility.UTF8_WINDOWS);
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
 								        while (true) {
 								            String s = cc.next(ces, lenArray);
 								            if (s == null) break;
 								            int len = lenArray[0];
-												Completed most of the upgrade to UCA 3.1.1;
 involved double CEs for Han; fixing back-mappings, etc.
 did a bit of code cleanup too.
Remaining to do: backmap from UCA double CEs to original character codes,
 for constructing Fractional UCA.

X-SVN-Rev: 8754
											
										
										
											2002-05-31 01:41:04 +00:00
 								            if (s.equals("\uD800")) {
 								            	System.out.println("Check: " + CEList.toString(ces, len));
 								            }
 								            log2.println(s + "\t" + CEList.toString(ces, len) + "\t" + ucd.getCodeAndName(s));
 								            addToBackMap(backMap, ces, len, s, false);
-												Updated with changes agreed with Ken for d6.
Added line for rearrange (missing earlier); this changes conformance!

X-SVN-Rev: 9022
											
										
										
											2002-07-03 02:15:47 +00:00
 								            int ce2 = 0;
 								            int ce3 = 0;
 								            int logicalFirstLen = getFirstCELen(ces, len);
 								            if (logicalFirstLen > 1) {
 								                ce2 = ces[1];
 								                if (logicalFirstLen > 2) {
 								                    ce3 = ces[2];
 								                }
 								            }
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
-												Updated with changes agreed with Ken for d6.
Added line for rearrange (missing earlier); this changes conformance!

X-SVN-Rev: 9022
											
										
										
											2002-07-03 02:15:47 +00:00
+								            String key = String.valueOf(UCA.getPrimary(ces[0])) + String.valueOf(UCA.getPrimary(ce2)) + String.valueOf(UCA.getPrimary(ce3))
 								                + String.valueOf(UCA.getSecondary(ces[0])) + String.valueOf(UCA.getSecondary(ce2)) + String.valueOf(UCA.getSecondary(ce3))
 								                + String.valueOf(UCA.getTertiary(ces[0])) + String.valueOf(UCA.getTertiary(ce2)) + String.valueOf(UCA.getTertiary(ce3))
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								                + collator.getSortKey(s, UCA.NON_IGNORABLE) + '\u0000' + UCA.codePointOrder(s);
-												Completed most of the upgrade to UCA 3.1.1;
 involved double CEs for Han; fixing back-mappings, etc.
 did a bit of code cleanup too.
Remaining to do: backmap from UCA double CEs to original character codes,
 for constructing Fractional UCA.

X-SVN-Rev: 8754
											
										
										
											2002-05-31 01:41:04 +00:00
-												Updated with changes agreed with Ken for d6.
Added line for rearrange (missing earlier); this changes conformance!

X-SVN-Rev: 9022
											
										
										
											2002-07-03 02:15:47 +00:00
+								                //String.valueOf((char)(ces[0]>>>16)) + String.valueOf((char)(ces[0] & 0xFFFF))
 								                //+ String.valueOf((char)(ce2>>>16)) + String.valueOf((char)(ce2 & 0xFFFF))
 								            if (s.equals("\u0660") || s.equals("\u2080")) {
 								                System.out.println(ucd.getCodeAndName(s) + "\t" + Utility.hex(key));
 								            }
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								            ordered.put(key, s);
-												Completed most of the upgrade to UCA 3.1.1;
 involved double CEs for Han; fixing back-mappings, etc.
 did a bit of code cleanup too.
Remaining to do: backmap from UCA double CEs to original character codes,
 for constructing Fractional UCA.

X-SVN-Rev: 8754
											
										
										
											2002-05-31 01:41:04 +00:00
+								            alreadyDone.add(s);
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								            Object result = ordered.get(key);
 								            if (result == null) {
 								                System.out.println("BAD SORT: " + Utility.hex(key) + ", " + Utility.hex(s));
 								            }
 								        }
-												Completed most of the upgrade to UCA 3.1.1;
 involved double CEs for Han; fixing back-mappings, etc.
 did a bit of code cleanup too.
Remaining to do: backmap from UCA double CEs to original character codes,
 for constructing Fractional UCA.

X-SVN-Rev: 8754
											
										
										
											2002-05-31 01:41:04 +00:00
-												shifted the implicit range.

X-SVN-Rev: 8767
											
										
										
											2002-06-02 05:07:08 +00:00
+								        System.out.println("Checking CJK");
 								        // Check for characters that are ARE explicitly mapped in the CJK ranges
 								        UnicodeSet CJK = new UnicodeSet(0x2E80, 0x2EFF);
 								        CJK.add(0x2F00, 0x2EFF);
 								        CJK.add(0x2F00, 0x2FDF);
 								        CJK.add(0x3400, 0x9FFF);
 								        CJK.add(0xF900, 0xFAFF);
 								        CJK.add(0x20000, 0x2A6DF);
 								        CJK.add(0x2F800, 0x2FA1F);
 								        CJK.removeAll(new UnicodeSet("[:Cn:]")); // remove unassigned
 								        // make set with canonical decomposibles
 								        UnicodeSet composites = new UnicodeSet();
 								        for (int i = 0; i < 0x10FFFF; ++i) {
 								        	if (!ucd.isAllocated(i)) continue;
 								        	if (nfd.isNormalized(i)) continue;
 								        	composites.add(i);
 								        }
 								        UnicodeSet CJKcomposites = new UnicodeSet(CJK).retainAll(composites);
 								        System.out.println("CJK composites " + CJKcomposites.toPattern(true));
 								        System.out.println("CJK NONcomposites " + new UnicodeSet(CJK).removeAll(composites).toPattern(true));
 								        UnicodeSet mapped = new UnicodeSet();
 								        Iterator it = alreadyDone.iterator();
 								        while (it.hasNext()) {
 								        	String member = (String) it.next();
 								        	mapped.add(member);
 								        }
 								        UnicodeSet CJKmapped = new UnicodeSet(CJK).retainAll(mapped);
 								        System.out.println("Mapped CJK: " + CJKmapped.toPattern(true));
 								        System.out.println("UNMapped CJK: " + new UnicodeSet(CJK).removeAll(mapped).toPattern(true));
 								        System.out.println("Neither Mapped nor Composite CJK: "
 								        	+ new UnicodeSet(CJK).removeAll(CJKcomposites).removeAll(CJKmapped).toPattern(true));
 								/*
 E80..2EFF; CJK Radicals Supplement
 F00..2FDF; Kangxi Radicals
 ..4DBF; CJK Unified Ideographs Extension A
 E00..9FFF; CJK Unified Ideographs
 								F900..FAFF; CJK Compatibility Ideographs
 ..2A6DF; CJK Unified Ideographs Extension B
 F800..2FA1F; CJK Compatibility Ideographs Supplement
 								*/
-												Completed most of the upgrade to UCA 3.1.1;
 involved double CEs for Han; fixing back-mappings, etc.
 did a bit of code cleanup too.
Remaining to do: backmap from UCA double CEs to original character codes,
 for constructing Fractional UCA.

X-SVN-Rev: 8754
											
										
										
											2002-05-31 01:41:04 +00:00
+								        System.out.println("Adding Kanji");
 								        for (int i = 0; i < 0x10FFFF; ++i) {
 								        	if (!ucd.isAllocated(i)) continue;
 								        	if (nfkd.isNormalized(i)) continue;
 								        	Utility.dot(i);
 								        	String decomp = nfkd.normalize(i);
 								        	int cp;
 								        	for (int j = 0; j < decomp.length(); j += UTF16.getCharCount(cp)) {
 								        		cp = UTF16.charAt(decomp, j);
 								        		String s = UTF16.valueOf(cp);
 								        		if (alreadyDone.contains(s)) continue;
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
-												Completed most of the upgrade to UCA 3.1.1;
 involved double CEs for Han; fixing back-mappings, etc.
 did a bit of code cleanup too.
Remaining to do: backmap from UCA double CEs to original character codes,
 for constructing Fractional UCA.

X-SVN-Rev: 8754
											
										
										
											2002-05-31 01:41:04 +00:00
+								        		alreadyDone.add(s);
 								        		int len = collator.getCEs(s, true, ces);
 								            	log2.println(s+ "\t" + CEList.toString(ces, len)
 								            		+ "\t" + ucd.getCodeAndName(s) + " from " + ucd.getCodeAndName(i));
 								            	addToBackMap(backMap, ces, len, s, false);
 								        	}
 								        }
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								        System.out.println("Writing");
-												Updated with changes agreed with Ken for d6.
Added line for rearrange (missing earlier); this changes conformance!

X-SVN-Rev: 9022
											
										
										
											2002-07-03 02:15:47 +00:00
+								        String filename = "UCA_Rules";
 								        if (shortPrint) filename += "_SHORT";
 								        if (option == IN_XML) filename += ".xml"; else filename += ".txt";
-												Latest updates for UCD, default values. Fixed UTF-8 output for UCA, Logical_Order_Exceptions

X-SVN-Rev: 11358
											
										
										
											2003-03-19 17:30:58 +00:00
+								        log = Utility.openPrintWriter(filename, Utility.UTF8_WINDOWS);
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
-												shifted the implicit range.

X-SVN-Rev: 8767
											
										
										
											2002-06-02 05:07:08 +00:00
+								        String[] commentText = {
-												New d5 after discussions with Ken; added HOMELESS in Fractional UCA

X-SVN-Rev: 8969
											
										
										
											2002-06-28 01:59:58 +00:00
+								        	"UCA Rules",
 								        	"This file contains the UCA tables for the given version, but transformed into rule syntax.",
 								            "Generated:   " + getNormalDate(),
-												shifted the implicit range.

X-SVN-Rev: 8767
											
										
										
											2002-06-02 05:07:08 +00:00
+								        	"NOTE: Since UCA handles canonical equivalents, no composites are necessary",
 								        	"(except in extensions).",
 								        	"For syntax description, see: http://oss.software.ibm.com/icu/userguide/Collate_Intro.html"
 								        };
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
-												shifted the implicit range.

X-SVN-Rev: 8767
											
										
										
											2002-06-02 05:07:08 +00:00
+								        if (option == IN_XML) {
-												added new parameters to rules, fixed a bit of the xml

X-SVN-Rev: 8930
											
										
										
											2002-06-24 15:25:10 +00:00
+								        	log.println("<collation>");
-												shifted the implicit range.

X-SVN-Rev: 8767
											
										
										
											2002-06-02 05:07:08 +00:00
+								        	log.println("<!--");
 								        	for (int i = 0; i < commentText.length; ++i) {
 								        		log.println(commentText[i]);
 								        	}
 								        	log.println("-->");
-												added new parameters to rules, fixed a bit of the xml

X-SVN-Rev: 8930
											
										
										
											2002-06-24 15:25:10 +00:00
+								        	log.println("<base uca='" + collator.getDataVersion() + "/" + collator.getUCDVersion() + "'/>");
 								        	log.println("<rules>");
-												shifted the implicit range.

X-SVN-Rev: 8767
											
										
										
											2002-06-02 05:07:08 +00:00
+								        } else {
 								        	log.write('\uFEFF'); // BOM
 								        	for (int i = 0; i < commentText.length; ++i) {
 								        		log.println("#\t" + commentText[i]);
 								        	}
 								        	log.println("# VERSION: UCA=" + collator.getDataVersion() + ", UCD=" + collator.getUCDVersion());
 								        }
 								        it = ordered.keySet().iterator();
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								        int oldFirstPrimary = UCA.getPrimary(UCA.TERMINATOR);
 								        boolean wasVariable = false;
 								        //String lastSortKey = collator.getSortKey("\u0000");;
 								        // 12161004
 								        int lastCE = 0;
 								        int ce = 0;
 								        int nextCE = 0;
 								        int lastCJKPrimary = 0;
 								        boolean firstTime = true;
 								        boolean done = false;
 								        String chr = "";
 								        int len = -1;
 								        String nextChr = "";
-												Updated with changes agreed with Ken for d6.
Added line for rearrange (missing earlier); this changes conformance!

X-SVN-Rev: 9022
											
										
										
											2002-07-03 02:15:47 +00:00
+								        int nextLen = -1; // -1 signals that we need to skip!!
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								        int[] nextCes = new int[50];
 								        String lastChr = "";
 								        int lastLen = -1;
 								        int[] lastCes = new int[50];
-												Updated with changes agreed with Ken for d6.
Added line for rearrange (missing earlier); this changes conformance!

X-SVN-Rev: 9022
											
										
										
											2002-07-03 02:15:47 +00:00
+								        int lastExpansionStart = 0;
 								        int expansionStart = 0;
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
-												Added table of information to fractional UCA, changed XML, regenerated collation test

X-SVN-Rev: 8921
											
										
										
											2002-06-22 01:21:11 +00:00
+								        long variableTop = collator.getVariableHigh() & INT_MASK;
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
 								        // for debugging ordering
 								        String lastSortKey = "";
 								        boolean showNext = false;
 								        for (int loopCounter = 0; !done; loopCounter++) {
 								            Utility.dot(loopCounter);
 								            lastCE = ce;
 								            lastLen = len;
 								            lastChr = chr;
-												Updated with changes agreed with Ken for d6.
Added line for rearrange (missing earlier); this changes conformance!

X-SVN-Rev: 9022
											
										
										
											2002-07-03 02:15:47 +00:00
+								            lastExpansionStart = expansionStart;
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								            if (len > 0) {
 								                System.arraycopy(ces, 0, lastCes, 0, lastLen);
 								            }
 								            // copy the current from Next
 								            ce = nextCE;
 								            len = nextLen;
 								            chr = nextChr;
 								            if (nextLen > 0) {
 								                System.arraycopy(nextCes, 0, ces, 0, nextLen);
 								            }
 								            // We need to look ahead one, to be able to reset properly
 								            if (it.hasNext()) {
 								                String nextSortKey = (String) it.next();
 								                nextChr = (String)ordered.get(nextSortKey);
 								                int result = cm.compare(nextSortKey, lastSortKey);
 								                if (result < 0) {
 								                    System.out.println();
 								                    System.out.println("DANGER: Sort Key Unordered!");
 								                        System.out.println((loopCounter-1) + " " + Utility.hex(lastSortKey)
 								                            + ", " + ucd.getCodeAndName(lastSortKey.charAt(lastSortKey.length()-1)));
 								                    System.out.println(loopCounter + " " + Utility.hex(nextSortKey)
 								                         + ", " + ucd.getCodeAndName(nextSortKey.charAt(nextSortKey.length()-1)));
 								                }
 								                if (nextChr == null) {
 								                    Utility.fixDot();
 								                    if (!showNext) {
 								                        System.out.println();
 								                        System.out.println((loopCounter-1) + "   Last = " + Utility.hex(lastSortKey)
 								                            + ", " + ucd.getCodeAndName(lastSortKey.charAt(lastSortKey.length()-1)));
 								                    }
 								                    System.out.println(cm.compare(lastSortKey, nextSortKey)
 								                        + ", " + cm.compare(nextSortKey, lastSortKey));
 								                    System.out.println(loopCounter + " NULL AT  " + Utility.hex(nextSortKey)
 								                         + ", " + ucd.getCodeAndName(nextSortKey.charAt(nextSortKey.length()-1)));
 								                    nextChr = "??";
 								                    showNext = true;
 								                } else if (showNext) {
 								                    showNext = false;
 								                    System.out.println(cm.compare(lastSortKey, nextSortKey)
 								                        + ", " + cm.compare(nextSortKey, lastSortKey));
 								                    System.out.println(loopCounter + "   Next = " + Utility.hex(nextSortKey)
 								                         + ", " + ucd.getCodeAndName(nextChr));
 								                }
 								                lastSortKey = nextSortKey;
 								            } else {
 								                nextChr = "??";
 								                done = true; // make one more pass!!!
 								            }
 								            nextLen = collator.getCEs(nextChr, true, nextCes);
 								            nextCE = nextCes[0];
 								            // skip first (fake) element
 								            if (len == -1) continue;
-												shifted the implicit range.

X-SVN-Rev: 8767
											
										
										
											2002-06-02 05:07:08 +00:00
-												Updated with changes agreed with Ken for d6.
Added line for rearrange (missing earlier); this changes conformance!

X-SVN-Rev: 9022
											
										
										
											2002-07-03 02:15:47 +00:00
+								            // for debugging
 								            if (loopCounter < 5) {
 								                System.out.println(loopCounter);
 								                System.out.println(CEList.toString(lastCes, lastLen) + ", " + ucd.getCodeAndName(lastChr));
 								                System.out.println(CEList.toString(ces, len) + ", " + ucd.getCodeAndName(chr));
 								                System.out.println(CEList.toString(nextCes, nextLen) + ", " + ucd.getCodeAndName(nextChr));
 								            }
-												shifted the implicit range.

X-SVN-Rev: 8767
											
										
										
											2002-06-02 05:07:08 +00:00
+								            // get relation
 								            /*if (chr.charAt(0) == 0xFFFB) {
 								                System.out.println("DEBUG");
 								            }*/
-												Updated with changes agreed with Ken for d6.
Added line for rearrange (missing earlier); this changes conformance!

X-SVN-Rev: 9022
											
										
										
											2002-07-03 02:15:47 +00:00
+								    		expansionStart = getFirstCELen(ces, len);
 								            // int relation = getStrengthDifference(ces, len, lastCes, lastLen);
 								            int relation = getStrengthDifference(ces, expansionStart, lastCes, lastExpansionStart);
 								            if (relation == QUARTERNARY_DIFF) {
 								                int relation2 = getStrengthDifference(ces, len, lastCes, lastLen);
 								                if (relation2 != QUARTERNARY_DIFF) relation = TERTIARY_DIFF;
 								            }
-												shifted the implicit range.

X-SVN-Rev: 8767
											
										
										
											2002-06-02 05:07:08 +00:00
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								            // RESETs: do special case for relations to fixed items
 								            String reset = "";
-												shifted the implicit range.

X-SVN-Rev: 8767
											
										
										
											2002-06-02 05:07:08 +00:00
+								            String resetComment = "";
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								            int xmlReset = 0;
-												added new parameters to rules, fixed a bit of the xml

X-SVN-Rev: 8930
											
										
										
											2002-06-24 15:25:10 +00:00
+								            boolean insertVariableTop = false;
 								            boolean resetToParameter = false;
 								            int ceLayout = getCELayout(ce);
 								            if (ceLayout == IMPLICIT) {
 								                if (relation == PRIMARY_DIFF) {
 								                    int primary = UCA.getPrimary(ce);
 								                    int resetCp = UCA.ImplicitToCodePoint(primary, UCA.getPrimary(ces[1]));
 								                    int[] ces2 = new int[50];
 								                    int len2 = collator.getCEs(UTF16.valueOf(resetCp), true, ces2);
 								                    relation = getStrengthDifference(ces, len, ces2, len2);
 								                    reset = quoteOperand(UTF16.valueOf(resetCp));
 								                    resetComment = ucd.getCodeAndName(resetCp);
 								                    // lastCE = UCA.makeKey(primary, UCA.NEUTRAL_SECONDARY, UCA.NEUTRAL_TERTIARY);
 								                    xmlReset = 2;
 								                }
 								                // lastCJKPrimary = primary;
 								            } else if (ceLayout != getCELayout(lastCE) || firstTime) {
 								                resetToParameter = true;
 								                switch (ceLayout) {
 								                    case T_IGNORE: reset = "last tertiary ignorable"; break;
 								                    case S_IGNORE: reset = "last secondary ignorable"; break;
 								                    case P_IGNORE: reset = "last primary ignorable"; break;
-												Now quoting rule whitespaces. Changed [last non-ignorable] to [last regular]

X-SVN-Rev: 11362
											
										
										
											2003-03-19 23:31:12 +00:00
+								                    case VARIABLE: reset = "last regular"; break;
-												added new parameters to rules, fixed a bit of the xml

X-SVN-Rev: 8930
											
										
										
											2002-06-24 15:25:10 +00:00
+								                    case NON_IGNORE: /*reset = "top"; */ insertVariableTop = true; break;
 								                    case TRAILING: reset = "last trailing"; break;
 								                }
 								            }
-												Completed most of the upgrade to UCA 3.1.1;
 involved double CEs for Han; fixing back-mappings, etc.
 did a bit of code cleanup too.
Remaining to do: backmap from UCA double CEs to original character codes,
 for constructing Fractional UCA.

X-SVN-Rev: 8754
											
										
										
											2002-05-31 01:41:04 +00:00
+								            if (chr.equals("\u2F00")) {
-												finally did some significant code cleanup on collation. not enough, but it's a start

X-SVN-Rev: 8896
											
										
										
											2002-06-15 02:47:14 +00:00
+								                System.out.println(CEList.toString(ces, len));
-												Completed most of the upgrade to UCA 3.1.1;
 involved double CEs for Han; fixing back-mappings, etc.
 did a bit of code cleanup too.
Remaining to do: backmap from UCA double CEs to original character codes,
 for constructing Fractional UCA.

X-SVN-Rev: 8754
											
										
										
											2002-05-31 01:41:04 +00:00
+								            }
 								            // There are double-CEs, so we have to know what the length of the first bit is.
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
 								            // check expansions
 								            String expansion = "";
-												Completed most of the upgrade to UCA 3.1.1;
 involved double CEs for Han; fixing back-mappings, etc.
 did a bit of code cleanup too.
Remaining to do: backmap from UCA double CEs to original character codes,
 for constructing Fractional UCA.

X-SVN-Rev: 8754
											
										
										
											2002-05-31 01:41:04 +00:00
+								            if (len > expansionStart) {
 								                //int tert0 = ces[0] & 0xFF;
 								                //boolean isCompat = tert0 != 2 && tert0 != 8;
 								                log2.println("Exp: " + ucd.getCodeAndName(chr) + ", " + CEList.toString(ces, len) + ", start: " + expansionStart);
 								                int[] rel = {relation};
 								                expansion = getFromBackMap(backMap, ces, expansionStart, len, chr, rel);
-												Updated with changes agreed with Ken for d6.
Added line for rearrange (missing earlier); this changes conformance!

X-SVN-Rev: 9022
											
										
										
											2002-07-03 02:15:47 +00:00
+								                // relation = rel[0];
 								                // The relation needs to be fixed differently. Since it is an expansion, it should be compared to
 								                // the first CE
 								                // ONLY reset if the sort keys are not equal
 								                if (false && (relation == PRIMARY_DIFF || relation == SECONDARY_DIFF)) {
 								                    int relation2 = getStrengthDifference(ces, expansionStart, lastCes, lastExpansionStart);
 								                    if (relation2 != relation) {
 								                        System.out.println ();
 								                        System.out.println ("Resetting: " + RELATION_NAMES[relation] + " to " + RELATION_NAMES[relation2]);
 								                        System.out.println ("LCes: " + CEList.toString(lastCes, lastLen) + ", " + lastExpansionStart
 								                            + ", " + ucd.getCodeAndName(lastChr));
 								                        System.out.println ("Ces:  " + CEList.toString(ces, len) + ", " + expansionStart
 								                            + ", " + ucd.getCodeAndName(chr));
 								                        relation = relation2;
 								                    }
 								                }
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								            }
 								            // print results
 								            if (option == IN_XML) {
-												added new parameters to rules, fixed a bit of the xml

X-SVN-Rev: 8930
											
										
										
											2002-06-24 15:25:10 +00:00
+								                if (insertVariableTop) log.println(XML_RELATION_NAMES[0] + "<variableTop/>");
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
 								                /*log.print("  <!--" + ucd.getCodeAndName(chr));
 								                if (len > 1) log.print(" / " + Utility.hex(expansion));
 								                log.println("-->");
 								                */
-												added new parameters to rules, fixed a bit of the xml

X-SVN-Rev: 8930
											
										
										
											2002-06-24 15:25:10 +00:00
+								                if (reset.length() != 0) {
 								                    log.println("<reset/>"
 								                    + (resetToParameter ? "<position at=\"" + reset + "\"/>" : Utility.quoteXML(reset))
 								                	+ (resetComment.length() != 0 ? "<!-- " + resetComment + "-->": ""));
-												finally did some significant code cleanup on collation. not enough, but it's a start

X-SVN-Rev: 8897
											
										
										
											2002-06-15 03:15:55 +00:00
+								                }
 								                if (!firstTime) {
 								                    log.print("  <" + XML_RELATION_NAMES[relation] + "/>");
-												Updated with changes agreed with Ken for d6.
Added line for rearrange (missing earlier); this changes conformance!

X-SVN-Rev: 9022
											
										
										
											2002-07-03 02:15:47 +00:00
+								                    log.print(Utility.quoteXML(chr));
-												finally did some significant code cleanup on collation. not enough, but it's a start

X-SVN-Rev: 8897
											
										
										
											2002-06-15 03:15:55 +00:00
+								                    //log.print("</" + XML_RELATION_NAMES[relation] + ">");
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								                }
-												shifted the implicit range.

X-SVN-Rev: 8767
											
										
										
											2002-06-02 05:07:08 +00:00
+								                if (expansion.length() > 0) {
-												Updated with changes agreed with Ken for d6.
Added line for rearrange (missing earlier); this changes conformance!

X-SVN-Rev: 9022
											
										
										
											2002-07-03 02:15:47 +00:00
+								                    log.print("<x/>" + Utility.quoteXML(expansion));
 								                }
 								                if (!shortPrint) {
 								                    log.print("\t<!--"
 								                        + CEList.toString(ces, len) + " "
 								                        + ucd.getCodeAndName(chr));
 								                    if (expansion.length() > 0) log.print(" / " + Utility.hex(expansion));
 								                    log.print("-->");
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								                }
-												Updated with changes agreed with Ken for d6.
Added line for rearrange (missing earlier); this changes conformance!

X-SVN-Rev: 9022
											
										
										
											2002-07-03 02:15:47 +00:00
+								                log.println();
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								            } else {
-												added new parameters to rules, fixed a bit of the xml

X-SVN-Rev: 8930
											
										
										
											2002-06-24 15:25:10 +00:00
+								                if (insertVariableTop) log.println(RELATION_NAMES[0] + " [variable top]");
 								                if (reset.length() != 0) log.println("& "
 								                    + (resetToParameter ? "[" : "") + reset + (resetToParameter ? "]" : "")
-												shifted the implicit range.

X-SVN-Rev: 8767
											
										
										
											2002-06-02 05:07:08 +00:00
+								                	+ (resetComment.length() != 0 ? "\t\t# " + resetComment : ""));
-												finally did some significant code cleanup on collation. not enough, but it's a start

X-SVN-Rev: 8897
											
										
										
											2002-06-15 03:15:55 +00:00
+								                if (!firstTime) log.print(RELATION_NAMES[relation] + " " + quoteOperand(chr));
-												Completed most of the upgrade to UCA 3.1.1;
 involved double CEs for Han; fixing back-mappings, etc.
 did a bit of code cleanup too.
Remaining to do: backmap from UCA double CEs to original character codes,
 for constructing Fractional UCA.

X-SVN-Rev: 8754
											
										
										
											2002-05-31 01:41:04 +00:00
+								                if (expansion.length() > 0) log.print(" / " + quoteOperand(expansion));
-												Updated with changes agreed with Ken for d6.
Added line for rearrange (missing earlier); this changes conformance!

X-SVN-Rev: 9022
											
										
										
											2002-07-03 02:15:47 +00:00
+								                if (!shortPrint) {
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								                    log.print("\t# "
-												finally did some significant code cleanup on collation. not enough, but it's a start

X-SVN-Rev: 8896
											
										
										
											2002-06-15 02:47:14 +00:00
+								                        + CEList.toString(ces, len) + " "
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								                        + ucd.getCodeAndName(chr));
-												Completed most of the upgrade to UCA 3.1.1;
 involved double CEs for Han; fixing back-mappings, etc.
 did a bit of code cleanup too.
Remaining to do: backmap from UCA double CEs to original character codes,
 for constructing Fractional UCA.

X-SVN-Rev: 8754
											
										
										
											2002-05-31 01:41:04 +00:00
+								                    if (expansion.length() > 0) log.print(" / " + Utility.hex(expansion));
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								                }
 								                log.println();
 								            }
-												finally did some significant code cleanup on collation. not enough, but it's a start

X-SVN-Rev: 8897
											
										
										
											2002-06-15 03:15:55 +00:00
+								            firstTime = false;
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								        }
 								        // log.println("& [top]"); // RESET
-												added new parameters to rules, fixed a bit of the xml

X-SVN-Rev: 8930
											
										
										
											2002-06-24 15:25:10 +00:00
+								        if (option == IN_XML) log.println("</rules></collation>");
-												Completed most of the upgrade to UCA 3.1.1;
 involved double CEs for Han; fixing back-mappings, etc.
 did a bit of code cleanup too.
Remaining to do: backmap from UCA double CEs to original character codes,
 for constructing Fractional UCA.

X-SVN-Rev: 8754
											
										
										
											2002-05-31 01:41:04 +00:00
+								        log2.close();
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								        log.close();
 								        Utility.fixDot();
 								    }
-												added new parameters to rules, fixed a bit of the xml

X-SVN-Rev: 8930
											
										
										
											2002-06-24 15:25:10 +00:00
+								    static final int NONE = 0, T_IGNORE = 1, S_IGNORE = 2, P_IGNORE = 3, VARIABLE = 4, NON_IGNORE = 5, IMPLICIT = 6, TRAILING = 7;
 								    static int getCELayout(int ce) {
 								        int primary = collator.getPrimary(ce);
 								        int secondary = collator.getSecondary(ce);
 								        int tertiary = collator.getSecondary(ce);
 								        if (primary == 0) {
 								            if (secondary == 0) {
 								                if (tertiary == 0) return T_IGNORE;
 								                return S_IGNORE;
 								            }
 								            return P_IGNORE;
 								        }
 								        if (collator.isVariable(ce)) return VARIABLE;
 								        if (primary < UNSUPPORTED_BASE) return NON_IGNORE;
 								        if (primary < UNSUPPORTED_LIMIT) return IMPLICIT;
 								        return TRAILING;
 								    }
-												finally did some significant code cleanup on collation. not enough, but it's a start

X-SVN-Rev: 8897
											
										
										
											2002-06-15 03:15:55 +00:00
+								    static long getPrimary(int[] ces, int len) {
-												Updated with changes agreed with Ken for d6.
Added line for rearrange (missing earlier); this changes conformance!

X-SVN-Rev: 9022
											
										
										
											2002-07-03 02:15:47 +00:00
+								        for (int i = 0; i < len; ++i) {
 								            int result = UCA.getPrimary(ces[i]);
 								            if (result == 0) continue;
 								    	    if (UCA.isImplicitLeadPrimary(result)) {
 								    		    return (result << 16) + UCA.getPrimary(ces[i+1]);
 								    	    } else {
 								    		    return result;
 								    	    }
-												Completed most of the upgrade to UCA 3.1.1;
 involved double CEs for Han; fixing back-mappings, etc.
 did a bit of code cleanup too.
Remaining to do: backmap from UCA double CEs to original character codes,
 for constructing Fractional UCA.

X-SVN-Rev: 8754
											
										
										
											2002-05-31 01:41:04 +00:00
+								    	}
-												Updated with changes agreed with Ken for d6.
Added line for rearrange (missing earlier); this changes conformance!

X-SVN-Rev: 9022
											
										
										
											2002-07-03 02:15:47 +00:00
+								    	return 0;
-												Completed most of the upgrade to UCA 3.1.1;
 involved double CEs for Han; fixing back-mappings, etc.
 did a bit of code cleanup too.
Remaining to do: backmap from UCA double CEs to original character codes,
 for constructing Fractional UCA.

X-SVN-Rev: 8754
											
										
										
											2002-05-31 01:41:04 +00:00
+								    }
-												finally did some significant code cleanup on collation. not enough, but it's a start

X-SVN-Rev: 8897
											
										
										
											2002-06-15 03:15:55 +00:00
+								    static long getSecondary(int[] ces, int len) {
-												Updated with changes agreed with Ken for d6.
Added line for rearrange (missing earlier); this changes conformance!

X-SVN-Rev: 9022
											
										
										
											2002-07-03 02:15:47 +00:00
+								        for (int i = 0; i < len; ++i) {
 								            int result = UCA.getSecondary(ces[i]);
 								            if (result == 0) continue;
 								            return result;
 								        }
 								    	return 0;
-												Completed most of the upgrade to UCA 3.1.1;
 involved double CEs for Han; fixing back-mappings, etc.
 did a bit of code cleanup too.
Remaining to do: backmap from UCA double CEs to original character codes,
 for constructing Fractional UCA.

X-SVN-Rev: 8754
											
										
										
											2002-05-31 01:41:04 +00:00
+								    }
-												finally did some significant code cleanup on collation. not enough, but it's a start

X-SVN-Rev: 8897
											
										
										
											2002-06-15 03:15:55 +00:00
+								    static long getTertiary(int[] ces, int len) {
-												Updated with changes agreed with Ken for d6.
Added line for rearrange (missing earlier); this changes conformance!

X-SVN-Rev: 9022
											
										
										
											2002-07-03 02:15:47 +00:00
+								        for (int i = 0; i < len; ++i) {
 								    		int result = UCA.getTertiary(ces[i]);
 								            if (result == 0) continue;
 								            return result;
-												Completed most of the upgrade to UCA 3.1.1;
 involved double CEs for Han; fixing back-mappings, etc.
 did a bit of code cleanup too.
Remaining to do: backmap from UCA double CEs to original character codes,
 for constructing Fractional UCA.

X-SVN-Rev: 8754
											
										
										
											2002-05-31 01:41:04 +00:00
+								    	}
-												Updated with changes agreed with Ken for d6.
Added line for rearrange (missing earlier); this changes conformance!

X-SVN-Rev: 9022
											
										
										
											2002-07-03 02:15:47 +00:00
+								    	return 0;
-												Completed most of the upgrade to UCA 3.1.1;
 involved double CEs for Han; fixing back-mappings, etc.
 did a bit of code cleanup too.
Remaining to do: backmap from UCA double CEs to original character codes,
 for constructing Fractional UCA.

X-SVN-Rev: 8754
											
										
										
											2002-05-31 01:41:04 +00:00
+								    }
-												shifted the implicit range.

X-SVN-Rev: 8767
											
										
										
											2002-06-02 05:07:08 +00:00
+								    static final int
 								    	PRIMARY_DIFF = 0,
 								    	SECONDARY_DIFF = 1,
 								    	TERTIARY_DIFF = 2,
-												Updated with changes agreed with Ken for d6.
Added line for rearrange (missing earlier); this changes conformance!

X-SVN-Rev: 9022
											
										
										
											2002-07-03 02:15:47 +00:00
+								    	QUARTERNARY_DIFF = 3,
 								    	DONE = -1;
-												shifted the implicit range.

X-SVN-Rev: 8767
											
										
										
											2002-06-02 05:07:08 +00:00
-												Updated with changes agreed with Ken for d6.
Added line for rearrange (missing earlier); this changes conformance!

X-SVN-Rev: 9022
											
										
										
											2002-07-03 02:15:47 +00:00
+								    static class CE_Iterator {
 								        int[] ces;
 								        int len;
 								        int current;
 								        int level;
 								        void reset(int[] ces, int len) {
 								            this.ces = ces;
 								            this.len = len;
 								            current = 0;
 								            level = PRIMARY_DIFF;
 								        }
 								        void setLevel(int level) {
 								            current = 0;
 								            this.level = level;
 								        }
 								        int next() {
 								            int val = DONE;
 								            while (current < len) {
 								                int ce = ces[current++];
 								                switch (level) {
 								                    case PRIMARY_DIFF: val = UCA.getPrimary(ce); break;
 								                    case SECONDARY_DIFF: val = UCA.getSecondary(ce); break;
 								                    case TERTIARY_DIFF: val = UCA.getTertiary(ce); break;
 								                }
 								                if (val != 0) return val;
 								            }
 								            return DONE;
 								        }
 								    }
 								    static CE_Iterator ceit1 = new CE_Iterator();
 								    static CE_Iterator ceit2 = new CE_Iterator();
 								    // WARNING, Never Recursive!
-												Completed most of the upgrade to UCA 3.1.1;
 involved double CEs for Han; fixing back-mappings, etc.
 did a bit of code cleanup too.
Remaining to do: backmap from UCA double CEs to original character codes,
 for constructing Fractional UCA.

X-SVN-Rev: 8754
											
										
										
											2002-05-31 01:41:04 +00:00
+									static int getStrengthDifference(int[] ces, int len, int[] lastCes, int lastLen) {
-												Updated with changes agreed with Ken for d6.
Added line for rearrange (missing earlier); this changes conformance!

X-SVN-Rev: 9022
											
										
										
											2002-07-03 02:15:47 +00:00
+									    if (false && lastLen > 0 && lastCes[0] > 0) {
 									        System.out.println("DeBug");
 									    }
 									    ceit1.reset(ces, len);
 									    ceit2.reset(lastCes, lastLen);
 									    for (int level = PRIMARY_DIFF; level <= TERTIARY_DIFF; ++level) {
 									        ceit1.setLevel(level);
 									        ceit2.setLevel(level);
 									        while (true) {
 									            int weight1 = ceit1.next();
 									            int weight2 = ceit2.next();
 									            if (weight1 != weight2) return level;
 									            if (weight1 == DONE) break;
 									        }
 									    }
 									    return QUARTERNARY_DIFF;
 									    /*
-												Completed most of the upgrade to UCA 3.1.1;
 involved double CEs for Han; fixing back-mappings, etc.
 did a bit of code cleanup too.
Remaining to do: backmap from UCA double CEs to original character codes,
 for constructing Fractional UCA.

X-SVN-Rev: 8754
											
										
										
											2002-05-31 01:41:04 +00:00
-												shifted the implicit range.

X-SVN-Rev: 8767
											
										
										
											2002-06-02 05:07:08 +00:00
+								        int relation = QUARTERNARY_DIFF;
-												finally did some significant code cleanup on collation. not enough, but it's a start

X-SVN-Rev: 8897
											
										
										
											2002-06-15 03:15:55 +00:00
+								        if (getPrimary(ces, len) != getPrimary(lastCes, lastLen)) {
-												shifted the implicit range.

X-SVN-Rev: 8767
											
										
										
											2002-06-02 05:07:08 +00:00
+								            relation = PRIMARY_DIFF;
-												finally did some significant code cleanup on collation. not enough, but it's a start

X-SVN-Rev: 8897
											
										
										
											2002-06-15 03:15:55 +00:00
+								        } else if (getSecondary(ces, len) != getSecondary(lastCes, lastLen)) {
-												shifted the implicit range.

X-SVN-Rev: 8767
											
										
										
											2002-06-02 05:07:08 +00:00
+								            relation = SECONDARY_DIFF;
-												finally did some significant code cleanup on collation. not enough, but it's a start

X-SVN-Rev: 8897
											
										
										
											2002-06-15 03:15:55 +00:00
+								        } else if (getTertiary(ces, len) != getTertiary(lastCes, lastLen)) {
-												shifted the implicit range.

X-SVN-Rev: 8767
											
										
										
											2002-06-02 05:07:08 +00:00
+								            relation = TERTIARY_DIFF;
-												Completed most of the upgrade to UCA 3.1.1;
 involved double CEs for Han; fixing back-mappings, etc.
 did a bit of code cleanup too.
Remaining to do: backmap from UCA double CEs to original character codes,
 for constructing Fractional UCA.

X-SVN-Rev: 8754
											
										
										
											2002-05-31 01:41:04 +00:00
+								        } else if (len > lastLen) {
-												shifted the implicit range.

X-SVN-Rev: 8767
											
										
										
											2002-06-02 05:07:08 +00:00
+								            relation = TERTIARY_DIFF; // HACK
-												Completed most of the upgrade to UCA 3.1.1;
 involved double CEs for Han; fixing back-mappings, etc.
 did a bit of code cleanup too.
Remaining to do: backmap from UCA double CEs to original character codes,
 for constructing Fractional UCA.

X-SVN-Rev: 8754
											
										
										
											2002-05-31 01:41:04 +00:00
+								        } else {
 								            int minLen = len < lastLen ? len : lastLen;
-												shifted the implicit range.

X-SVN-Rev: 8767
											
										
										
											2002-06-02 05:07:08 +00:00
+											int start = UCA.isImplicitLeadCE(ces[0]) ? 2 : 1;
-												Completed most of the upgrade to UCA 3.1.1;
 involved double CEs for Han; fixing back-mappings, etc.
 did a bit of code cleanup too.
Remaining to do: backmap from UCA double CEs to original character codes,
 for constructing Fractional UCA.

X-SVN-Rev: 8754
											
										
										
											2002-05-31 01:41:04 +00:00
+								            for (int kk = start; kk < minLen; ++kk) {
 								                int lc = lastCes[kk];
 								                int c = ces[kk];
 								                if (collator.getPrimary(c) != collator.getPrimary(lc)
 								                    || collator.getSecondary(c) != collator.getSecondary(lc)) {
-												shifted the implicit range.

X-SVN-Rev: 8767
											
										
										
											2002-06-02 05:07:08 +00:00
+								                    relation = QUARTERNARY_DIFF;   // reset relation on FIRST char, since differ anyway
-												Completed most of the upgrade to UCA 3.1.1;
 involved double CEs for Han; fixing back-mappings, etc.
 did a bit of code cleanup too.
Remaining to do: backmap from UCA double CEs to original character codes,
 for constructing Fractional UCA.

X-SVN-Rev: 8754
											
										
										
											2002-05-31 01:41:04 +00:00
+								                    break;
 								                    } else if (collator.getTertiary(c) > collator.getTertiary(lc)) {
-												shifted the implicit range.

X-SVN-Rev: 8767
											
										
										
											2002-06-02 05:07:08 +00:00
+								                    relation = TERTIARY_DIFF;   // reset to tertiary (but later ce's might override!)
-												Completed most of the upgrade to UCA 3.1.1;
 involved double CEs for Han; fixing back-mappings, etc.
 did a bit of code cleanup too.
Remaining to do: backmap from UCA double CEs to original character codes,
 for constructing Fractional UCA.

X-SVN-Rev: 8754
											
										
										
											2002-05-31 01:41:04 +00:00
+								                }
 								            }
 								        }
 								        return relation;
-												Updated with changes agreed with Ken for d6.
Added line for rearrange (missing earlier); this changes conformance!

X-SVN-Rev: 9022
											
										
										
											2002-07-03 02:15:47 +00:00
+								        */
-												Completed most of the upgrade to UCA 3.1.1;
 involved double CEs for Han; fixing back-mappings, etc.
 did a bit of code cleanup too.
Remaining to do: backmap from UCA double CEs to original character codes,
 for constructing Fractional UCA.

X-SVN-Rev: 8754
											
										
										
											2002-05-31 01:41:04 +00:00
+								    }
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								    // static final String[] RELATION_NAMES = {" <", "   <<", "     <<<", "         ="};
 								    static final String[] RELATION_NAMES = {" <\t", "  <<\t", "   <<<\t", "    =\t"};
-												Added table of information to fractional UCA, changed XML, regenerated collation test

X-SVN-Rev: 8921
											
										
										
											2002-06-22 01:21:11 +00:00
+								    static final String[] XML_RELATION_NAMES = {"p", "s", "t", "eq"};
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
-												Completed most of the upgrade to UCA 3.1.1;
 involved double CEs for Han; fixing back-mappings, etc.
 did a bit of code cleanup too.
Remaining to do: backmap from UCA double CEs to original character codes,
 for constructing Fractional UCA.

X-SVN-Rev: 8754
											
										
										
											2002-05-31 01:41:04 +00:00
+								    static class ArrayWrapper {
 								    	int[] array;
 								    	int start;
 								    	int limit;
 								    	/*public ArrayWrapper(int[] contents) {
 								    		set(contents, 0, contents.length);
 								    	}
 								    	*/
 								    	public ArrayWrapper(int[] contents, int start, int limit) {
 								    		set(contents, start, limit);
 								    	}
 								    	private void set(int[] contents, int start, int limit) {
 								    		array = contents;
 								    		this.start = start;
 								    		this.limit = limit;
 										}
 								    	public boolean equals(Object other) {
 								    		ArrayWrapper that = (ArrayWrapper) other;
 								    		if (that.limit - that.start != limit - start) return false;
 								    		for (int i = start; i < limit; ++i) {
 								    			if (array[i] != that.array[i - start + that.start]) return false;
 								    		}
 								    		return true;
 								    	}
 								    	public int hashCode() {
 								    		int result = limit - start;
 								    		for (int i = start; i < limit; ++i) {
 								    			result = result * 37 + array[i];
 								    		}
 								    		return result;
 								    	}
 								    }
 								    static int testCase[] = {
 								    	//collator.makeKey(0xFF40, 0x0020, 0x0002),
 								    	collator.makeKey(0x0255, 0x0020, 0x000E),
 								    };
 								    static String testString = "\u33C2\u002E";
 								    static boolean contains(int[] array, int start, int limit, int key) {
 								    	for (int i = start; i < limit; ++i) {
 								    		if (array[i] == key) return true;
 								    	}
 								    	return false;
 								    }
 								    static final void addToBackMap(Map backMap, int[] ces, int len, String s, boolean show) {
 								    	if (show || contains(testCase, 0, testCase.length, ces[0]) || testString.indexOf(s) > 0) {
 								    		System.out.println("Test case: " + Utility.hex(s) + ", " + CEList.toString(ces, len));
 								    	}
 										backMap.put(new ArrayWrapper((int[])(ces.clone()), 0, len), s);
-												New d5 after discussions with Ken; added HOMELESS in Fractional UCA

X-SVN-Rev: 8969
											
										
										
											2002-06-28 01:59:58 +00:00
+										/*
 										// HACK until Ken fixes
 										for (int i = 0; i < len; ++i) {
 										    int ce = ces[i];
 										    if (collator.isImplicitLeadCE(ce)) {
 										        ++i;
 										        ce = ces[i];
 										        if (DEBUG
 										            && (UCA.getPrimary(ce) == 0 || UCA.getSecondary(ce) != 0 || UCA.getTertiary(ce) != 0)) {
 										            System.out.println("WEIRD 2nd IMPLICIT: "
 										                + CEList.toString(ces, len)
 										                + ", " + ucd.getCodeAndName(s));
 										        }
 										        ces[i] = UCA.makeKey(UCA.getPrimary(ce), NEUTRAL_SECONDARY, NEUTRAL_TERTIARY);
 										    }
 										}
 										backMap.put(new ArrayWrapper((int[])(ces.clone()), 0, len), s);
 										*/
-												Completed most of the upgrade to UCA 3.1.1;
 involved double CEs for Han; fixing back-mappings, etc.
 did a bit of code cleanup too.
Remaining to do: backmap from UCA double CEs to original character codes,
 for constructing Fractional UCA.

X-SVN-Rev: 8754
											
										
										
											2002-05-31 01:41:04 +00:00
+								    }
-												Updated with changes agreed with Ken for d6.
Added line for rearrange (missing earlier); this changes conformance!

X-SVN-Rev: 9022
											
										
										
											2002-07-03 02:15:47 +00:00
 								    static UnicodeSet homelessSecondaries = new UnicodeSet(0x0153,0x0170);
 								    /*static int[] ignorableList = new int[homelessSecondaries.size()];
 								    static {
 								        UnicodeSetIterator ui = new UnicodeSetIterator(homelessSecondaries);
 								        int counter = 0;
 								        while (ui.next()) {
 								            ignorableList. UCA.makeKey(0x0000, 0x0153, 0x0002),
-												Completed most of the upgrade to UCA 3.1.1;
 involved double CEs for Han; fixing back-mappings, etc.
 did a bit of code cleanup too.
Remaining to do: backmap from UCA double CEs to original character codes,
 for constructing Fractional UCA.

X-SVN-Rev: 8754
											
										
										
											2002-05-31 01:41:04 +00:00
+								    	UCA.makeKey(0x0000, 0x0154, 0x0002),
 								    	UCA.makeKey(0x0000, 0x0155, 0x0002),
 								    	UCA.makeKey(0x0000, 0x0156, 0x0002),
 								    	UCA.makeKey(0x0000, 0x0157, 0x0002),
 								    	UCA.makeKey(0x0000, 0x0158, 0x0002),
 								    	UCA.makeKey(0x0000, 0x0159, 0x0002),
 								    	UCA.makeKey(0x0000, 0x015A, 0x0002),
 								    	UCA.makeKey(0x0000, 0x015B, 0x0002),
 								    	UCA.makeKey(0x0000, 0x015C, 0x0002),
 								    	UCA.makeKey(0x0000, 0x015D, 0x0002),
 								    	UCA.makeKey(0x0000, 0x015E, 0x0002),
 								    	UCA.makeKey(0x0000, 0x015F, 0x0002),
 								    	UCA.makeKey(0x0000, 0x0160, 0x0002),
 								    	UCA.makeKey(0x0000, 0x0161, 0x0002),
 								    	UCA.makeKey(0x0000, 0x0162, 0x0002),
 								    	UCA.makeKey(0x0000, 0x0163, 0x0002),
 								    	UCA.makeKey(0x0000, 0x0164, 0x0002),
 								    	UCA.makeKey(0x0000, 0x0165, 0x0002),
 								    	UCA.makeKey(0x0000, 0x0166, 0x0002),
 								    	UCA.makeKey(0x0000, 0x0167, 0x0002),
 								    	UCA.makeKey(0x0000, 0x0168, 0x0002),
 								    	UCA.makeKey(0x0000, 0x0169, 0x0002),
 								    	UCA.makeKey(0x0000, 0x016A, 0x0002),
 								    	UCA.makeKey(0x0000, 0x016B, 0x0002),
 								    	UCA.makeKey(0x0000, 0x016C, 0x0002),
 								    	UCA.makeKey(0x0000, 0x016D, 0x0002),
 								    	UCA.makeKey(0x0000, 0x016E, 0x0002),
 								    	UCA.makeKey(0x0000, 0x016F, 0x0002),
 								    	UCA.makeKey(0x0000, 0x0170, 0x0002),
 								    };
-												Updated with changes agreed with Ken for d6.
Added line for rearrange (missing earlier); this changes conformance!

X-SVN-Rev: 9022
											
										
										
											2002-07-03 02:15:47 +00:00
+								    */
-												Completed most of the upgrade to UCA 3.1.1;
 involved double CEs for Han; fixing back-mappings, etc.
 did a bit of code cleanup too.
Remaining to do: backmap from UCA double CEs to original character codes,
 for constructing Fractional UCA.

X-SVN-Rev: 8754
											
										
										
											2002-05-31 01:41:04 +00:00
 								    static final String getFromBackMap(Map backMap, int[] originalces, int expansionStart, int len, String chr, int[] rel) {
 								    	int[] ces = (int[])(originalces.clone());
 								    	String expansion = "";
 								    	// process ces to neutralize tertiary
 								    	for (int i = expansionStart; i < len; ++i) {
 								    		int probe = ces[i];
 								        	char primary = collator.getPrimary(probe);
 								        	char secondary = collator.getSecondary(probe);
 								        	char tertiary = collator.getTertiary(probe);
 								            int tert = tertiary;
 								            switch (tert) {
 								            case 8: case 9: case 0xA: case 0xB: case 0xC: case 0x1D:
 								                tert = 8;
 								                break;
 								            case 0xD: case 0x10: case 0x11: case 0x12: case 0x13: case 0x1C:
 								                tert = 0xE;
 								                break;
 								            default:
 								                tert = 2;
 								                break;
 								            }
 								            ces[i] = collator.makeKey(primary, secondary, tert);
 								    	}
 								        for (int i = expansionStart; i < len;) {
 								        	int limit;
 								        	String s = null;
 								        	for (limit = len; limit > i; --limit) {
 								        		ArrayWrapper wrapper = new ArrayWrapper(ces, i, limit);
 								        		s = (String)backMap.get(wrapper);
 								            	if (s != null) break;
 								            }
 								            if (s == null) {
 								            	do {
-												Updated with changes agreed with Ken for d6.
Added line for rearrange (missing earlier); this changes conformance!

X-SVN-Rev: 9022
											
										
										
											2002-07-03 02:15:47 +00:00
+								            		if (homelessSecondaries.contains(UCA.getSecondary(ces[i]))) {
-												Completed most of the upgrade to UCA 3.1.1;
 involved double CEs for Han; fixing back-mappings, etc.
 did a bit of code cleanup too.
Remaining to do: backmap from UCA double CEs to original character codes,
 for constructing Fractional UCA.

X-SVN-Rev: 8754
											
										
										
											2002-05-31 01:41:04 +00:00
+								            			s = "";
 								            			if (rel[0] > 1) rel[0] = 1; // HACK
 								            			break;
 								            		}
 								            		// Try stomping the value to different tertiaries
 								    				int probe = ces[i];
-												New d5 after discussions with Ken; added HOMELESS in Fractional UCA

X-SVN-Rev: 8969
											
										
										
											2002-06-28 01:59:58 +00:00
+								    				if (UCA.isImplicitLeadCE(probe)) {
 								                        s = UTF16.valueOf(UCA.ImplicitToCodePoint(UCA.getPrimary(probe), UCA.getPrimary(ces[i+1])));
 								                        ++i; // skip over next item!!
 								                        break;
 								    				}
-												Completed most of the upgrade to UCA 3.1.1;
 involved double CEs for Han; fixing back-mappings, etc.
 did a bit of code cleanup too.
Remaining to do: backmap from UCA double CEs to original character codes,
 for constructing Fractional UCA.

X-SVN-Rev: 8754
											
										
										
											2002-05-31 01:41:04 +00:00
+								        			char primary = collator.getPrimary(probe);
 								        			char secondary = collator.getSecondary(probe);
 								            		ces[i] = collator.makeKey(primary, secondary, 2);
 								        			ArrayWrapper wrapper = new ArrayWrapper(ces, i, i+1);
 								        			s = (String)backMap.get(wrapper);
 								        			if (s != null) break;
 								            		ces[i] = collator.makeKey(primary, secondary,0xE);
 								        			wrapper = new ArrayWrapper(ces, i, i+1);
 								        			s = (String)backMap.get(wrapper);
 								        			if (s != null) break;
 													/*
 								                	int meHack = UCA.makeKey(0x1795,0x0020,0x0004);
 								                	if (ces[i] == meHack) {
 								                    	s = "\u3081";
 								                    	break;
 								                    }
 								                    */
 								                    // we failed completely. Print error message, and bail
-												finally did some significant code cleanup on collation. not enough, but it's a start

X-SVN-Rev: 8896
											
										
										
											2002-06-15 02:47:14 +00:00
+								                    System.out.println("No back map for " + CEList.toString(ces[i])
-												Completed most of the upgrade to UCA 3.1.1;
 involved double CEs for Han; fixing back-mappings, etc.
 did a bit of code cleanup too.
Remaining to do: backmap from UCA double CEs to original character codes,
 for constructing Fractional UCA.

X-SVN-Rev: 8754
											
										
										
											2002-05-31 01:41:04 +00:00
+								                        + " from " + CEList.toString(ces, len));
 								                    System.out.println("\t" + ucd.getCodeAndName(chr)
 								                        + " => " + ucd.getCodeAndName(nfkdNew.normalize(chr))
 								                    );
 								                    s = "[" + Utility.hex(ces[i]) + "]";
 								        	    } while (false); // exactly one time, just for breaking
 								            	limit = i + 1;
 								            }
 								            expansion += s;
 								            i = limit;
 								        }
 								        return expansion;
 								    }
 								    /*
 								    static final String getFromBackMap(Map backMap, int[] ces, int index, int limit) {
 								    	ArrayWrapper wrapper = new ArrayWrapper(ces, index, limit);
 								    	int probe = ces[index];
 								    	wrapperContents[0] = probe;
 								        String s = (String)backMap.get(wrapper);
 								        outputLen[0] = 1;
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								        if (s != null) return s;
 								        char primary = collator.getPrimary(probe);
 								        char secondary = collator.getSecondary(probe);
 								        char tertiary = collator.getTertiary(probe);
 								        if (isFixedIdeograph(remapUCA_CompatibilityIdeographToCp(primary))) {
 								            return String.valueOf(primary);
 								        } else {
 								            int tert = tertiary;
 								            switch (tert) {
 								            case 8: case 9: case 0xA: case 0xB: case 0xC: case 0x1D:
 								                tert = 8;
 								                break;
 								            case 0xD: case 0x10: case 0x11: case 0x12: case 0x13: case 0x1C:
 								                tert = 0xE;
 								                break;
 								            default:
 								                tert = 2;
 								                break;
 								            }
 								            probe = collator.makeKey(primary, secondary, tert);
-												Completed most of the upgrade to UCA 3.1.1;
 involved double CEs for Han; fixing back-mappings, etc.
 did a bit of code cleanup too.
Remaining to do: backmap from UCA double CEs to original character codes,
 for constructing Fractional UCA.

X-SVN-Rev: 8754
											
										
										
											2002-05-31 01:41:04 +00:00
+								            wrapperContents[0] = probe;
 								            s = (String)backMap.get(wrapper);
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								            if (s != null) return s;
 								            probe = collator.makeKey(primary, secondary, collator.NEUTRAL_TERTIARY);
-												Completed most of the upgrade to UCA 3.1.1;
 involved double CEs for Han; fixing back-mappings, etc.
 did a bit of code cleanup too.
Remaining to do: backmap from UCA double CEs to original character codes,
 for constructing Fractional UCA.

X-SVN-Rev: 8754
											
										
										
											2002-05-31 01:41:04 +00:00
+								            wrapperContents[0] = probe;
 								            s = (String)backMap.get(wrapper);
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								        }
 								        if (s != null) return s;
 								        if (primary != 0 && secondary != collator.NEUTRAL_SECONDARY) {
-												Completed most of the upgrade to UCA 3.1.1;
 involved double CEs for Han; fixing back-mappings, etc.
 did a bit of code cleanup too.
Remaining to do: backmap from UCA double CEs to original character codes,
 for constructing Fractional UCA.

X-SVN-Rev: 8754
											
										
										
											2002-05-31 01:41:04 +00:00
+								        	int[] dummyArray = new int[1];
 								        	dummyArray[0] = collator.makeKey(primary, collator.NEUTRAL_SECONDARY, tertiary);
 								            String first = getFromBackMap(backMap, dummyArray, 0, outputLen);
 								            dummyArray[0] = collator.makeKey(0, secondary, collator.NEUTRAL_TERTIARY);
 								            String second = getFromBackMap(backMap, dummyArray, 0, outputLen);
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								            if (first != null && second != null) {
 								                s = first + second;
 								            }
 								        }
 								        return s;
 								    }
-												Completed most of the upgrade to UCA 3.1.1;
 involved double CEs for Han; fixing back-mappings, etc.
 did a bit of code cleanup too.
Remaining to do: backmap from UCA double CEs to original character codes,
 for constructing Fractional UCA.

X-SVN-Rev: 8754
											
										
										
											2002-05-31 01:41:04 +00:00
+								    */
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
 								    static final String[] RELATION = {
 								        "<", " << ", "  <<<  ", "    =    ", "    =    ", "    =    ", "  >>>  ", " >> ", ">"
 								    };
 								    static StringBuffer quoteOperandBuffer = new StringBuffer(); // faster
-												Fixing Break Charts & Tests

X-SVN-Rev: 11428
											
										
										
											2003-04-01 02:53:07 +00:00
+								    static UnicodeSet needsQuoting = null;
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								    static final String quoteOperand(String s) {
-												Fixing Break Charts & Tests

X-SVN-Rev: 11428
											
										
										
											2003-04-01 02:53:07 +00:00
+								        if (needsQuoting == null) {
 								            /*
 								            c >= 'a' && c <= 'z'
 								              || c >= 'A' && c <= 'Z'
 								              || c >= '0' && c <= '9'
 								              || (c >= 0xA0 && !UCharacterProperty.isRuleWhiteSpace(c))
 								              */
 								            needsQuoting = new UnicodeSet("[a-zA-Z0-9\\u00A0-\\U00010FFF]");
 								            needsQuoting.remove();
 								        }
-												shifted the implicit range.

X-SVN-Rev: 8767
											
										
										
											2002-06-02 05:07:08 +00:00
+								    	s = NFC.normalize(s);
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								        quoteOperandBuffer.setLength(0);
 								        boolean noQuotes = true;
 								        boolean inQuote = false;
 								        for (int i = 0; i < s.length(); ++i) {
 								            char c = s.charAt(i);
-												Fixing Break Charts & Tests

X-SVN-Rev: 11428
											
										
										
											2003-04-01 02:53:07 +00:00
+								            if (!needsQuoting.contains(c)) {
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								                if (inQuote) {
 								                    quoteOperandBuffer.append('\'');
 								                    inQuote = false;
 								                }
 								                quoteOperandBuffer.append(c);
 								            } else {
 								                noQuotes = false;
 								                if (c == '\'') {
 								                    quoteOperandBuffer.append("''");
 								                } else {
 								                    if (!inQuote) {
 								                        quoteOperandBuffer.append('\'');
 								                        inQuote = true;
 								                    }
 								                    if (c <= 0x20 || c > 0x7E) quoteOperandBuffer.append("\\u").append(Utility.hex(c));
 								                    else quoteOperandBuffer.append(c);
 								                }
 								            }
 								            /*
 								            switch (c) {
 								              case '<':  case '>':  case '#': case '=': case '&': case '/':
 								                quoteOperandBuffer.append('\'').append(c).append('\'');
 								                break;
 								              case '\'':
 								                quoteOperandBuffer.append("''");
 								                break;
 								              default:
 								                if (0 <= c && c < 0x20 || 0x7F <= c && c < 0xA0) {
 								                    quoteOperandBuffer.append("\\u").append(Utility.hex(c));
 								                    break;
 								                }
 								                quoteOperandBuffer.append(c);
 								                break;
 								            }
 								            */
 								        }
 								        if (inQuote) {
 								            quoteOperandBuffer.append('\'');
 								        }
 								        if (noQuotes) return s; // faster
 								        return quoteOperandBuffer.toString();
 								    }
 								/*
 ; H   # HANGUL CHOSEONG HIEUH
 ; A   # HANGUL JUNGSEONG A
 ; I   # HANGUL JUNGSEONG I
 A8; G   # HANGUL JONGSEONG KIYEOK
 C2; H   # HANGUL JONGSEONG HIEUH
 F9;HANGUL JONGSEONG YEORINHIEUH;Lo;0;L;;;;;N;;;;;
 								*/
 								    static boolean gotInfo = false;
 								    static int oldJamo1, oldJamo2, oldJamo3, oldJamo4, oldJamo5, oldJamo6;
 								    static boolean isOldJamo(int primary) {
 								        if (!gotInfo) {
 								            int[] temp = new int[20];
 								            collator.getCEs("\u1112", true, temp);
 								            oldJamo1 = temp[0] >> 16;
 								            collator.getCEs("\u1161", true, temp);
 								            oldJamo2 = temp[0] >> 16;
 								            collator.getCEs("\u1175", true, temp);
 								            oldJamo3 = temp[0] >> 16;
 								            collator.getCEs("\u11A8", true, temp);
 								            oldJamo4 = temp[0] >> 16;
 								            collator.getCEs("\u11C2", true, temp);
 								            oldJamo5 = temp[0] >> 16;
 								            collator.getCEs("\u11F9", true, temp);
 								            oldJamo6 = temp[0] >> 16;
 								            gotInfo = true;
 								        }
 								        return primary > oldJamo1 && primary < oldJamo2
 								            || primary > oldJamo3 && primary < oldJamo4
 								            || primary > oldJamo5 && primary <= oldJamo6;
 								    }
-												ICU-0 updated to reorganized java directories

X-SVN-Rev: 8039
											
										
										
											2002-03-15 01:57:01 +00:00
+								    static Normalizer NFKD = new Normalizer(Normalizer.NFKD, UNICODE_VERSION);
 								    static Normalizer NFD = new Normalizer(Normalizer.NFD, UNICODE_VERSION);
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
 								    static int variableHigh = 0;
 								    static final int COMMON = 5;
 								    static int gapForA = 0;
-												Fixed Fractional UCA for surrogates

X-SVN-Rev: 6504
											
										
										
											2001-10-31 00:02:54 +00:00
+								    static int[] primaryDelta;
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
 								    static void writeFractionalUCA(String filename) throws IOException {
-												New d5 after discussions with Ken; added HOMELESS in Fractional UCA

X-SVN-Rev: 8969
											
										
										
											2002-06-28 01:59:58 +00:00
+								        Default.setUCD();
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
 								        checkImplicit();
 								        checkFixes();
 								        variableHigh = collator.getVariableHigh() >> 16;
 								        BitSet secondarySet = collator.getWeightUsage(2);
 								        // HACK for CJK
 								        secondarySet.set(0x0040);
 								        int subtotal = 0;
 								        System.out.println("Fixing Secondaries");
 								        compactSecondary = new int[secondarySet.size()];
 								        for (int secondary = 0; secondary < compactSecondary.length; ++secondary) {
 								            if (secondarySet.get(secondary)) {
 								                compactSecondary[secondary] = subtotal++;
-												Fixed Fractional UCA for surrogates

X-SVN-Rev: 6504
											
										
										
											2001-10-31 00:02:54 +00:00
+								                /*System.out.println("compact[" + Utility.hex(secondary)
 								                    + "]=" + Utility.hex(compactSecondary[secondary])
 								                    + ", " + Utility.hex(fixSecondary(secondary)));*/
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								            }
 								        }
 								        System.out.println();
 								        //TO DO: find secondaries that don't overlap, and reassign
 								        System.out.println("Finding Bumps");
 								        char[] representatives = new char[65536];
 								        findBumps(representatives);
 								        System.out.println("Fixing Primaries");
 								        BitSet primarySet = collator.getWeightUsage(1);
-												Fixed Fractional UCA for surrogates

X-SVN-Rev: 6504
											
										
										
											2001-10-31 00:02:54 +00:00
 								        primaryDelta = new int[65536];
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								        // start at 1 so zero stays zero.
 								        for (int primary = 1; primary < 0xFFFF; ++primary) {
 								            if (primarySet.get(primary)) primaryDelta[primary] = 2;
 								            else if (primary == 0x1299) {
 								                System.out.println("WHOOPS! Missing weight");
 								            }
 								        }
 								        int bumpNextToo = 0;
 								        subtotal = (COMMON << 8) + COMMON; // skip forbidden bytes, leave gap
 								        int lastValue = 0;
 								        // start at 1 so zero stays zero.
 								        for (int primary = 1; primary < 0xFFFF; ++primary) {
 								            if (primaryDelta[primary] != 0) {
 								                // special handling for Jamo 3-byte forms
 								                if (isOldJamo(primary)) {
-												shifted the implicit range.

X-SVN-Rev: 8767
											
										
										
											2002-06-02 05:07:08 +00:00
+								                    if (DEBUG) System.out.print("JAMO: " + Utility.hex(lastValue));
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								                    if ((lastValue & 0xFF0000) == 0) { // lastValue was 2-byte form
 								                        subtotal += primaryDelta[primary];  // we convert from relative to absolute
 								                        lastValue = primaryDelta[primary] = (subtotal << 8) + 0x10; // make 3 byte, leave gap
 								                    } else { // lastValue was 3-byte form
 								                        lastValue = primaryDelta[primary] = lastValue + 3;
 								                    }
-												shifted the implicit range.

X-SVN-Rev: 8767
											
										
										
											2002-06-02 05:07:08 +00:00
+								                    if (DEBUG) System.out.println(" => " + Utility.hex(lastValue));
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								                    continue;
 								                }
 								                subtotal += primaryDelta[primary];  // we convert from relative to absolute
 								                if (singles.get(primary)) {
 								                    subtotal = (subtotal & 0xFF00) + 0x100;
 								                    if (primary == gapForA) subtotal += 0x200;
 								                    if (bumpNextToo == 0x40) subtotal += 0x100; // make sure of gap between singles!!!
 								                    bumpNextToo = 0x40;
 								                } else if (primary > variableHigh) {
 								                    variableHigh = 0xFFFF; // never do again!
 								                    subtotal = (subtotal & 0xFF00) + 0x320 + bumpNextToo;
 								                    bumpNextToo = 0;
 								                } else if (bumpNextToo > 0 || bumps.get(primary)) {
 								                    subtotal = ((subtotal + 0x20) & 0xFF00) + 0x120 + bumpNextToo;
 								                    bumpNextToo = 0;
 								                } else {
 								                    int lastByte = subtotal & 0xFF;
 								                    // skip all values of FF, 00, 01, 02,
 								                    if (0 <= lastByte && lastByte < COMMON || lastByte == 0xFF) {
 								                        subtotal = ((subtotal + 1) & 0xFFFFFF00) + COMMON; // skip
 								                    }
 								                }
 								                lastValue = primaryDelta[primary] = subtotal;
 								            }
 								            // fixup for Kanji
-												shifted the implicit range.

X-SVN-Rev: 8767
											
										
										
											2002-06-02 05:07:08 +00:00
+								            /*
 								            // WE DROP THIS: we are skipping all CJK values above, and will fix them separately
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								            int fixedCompat = remapUCA_CompatibilityIdeographToCp(primary);
 								            if (isFixedIdeograph(fixedCompat)) {
 								                int CE = getImplicitPrimary(fixedCompat);
 								                lastValue = primaryDelta[primary] = CE >>> 8;
 								            }
-												shifted the implicit range.

X-SVN-Rev: 8767
											
										
										
											2002-06-02 05:07:08 +00:00
+								            */
-												Fixed Fractional UCA for surrogates

X-SVN-Rev: 6504
											
										
										
											2001-10-31 00:02:54 +00:00
+								            //if ((primary & 0xFF) == 0) System.out.println(Utility.hex(primary) + " => " + hexBytes(primaryDelta[primary]));
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								        }
 								        // now translate!!
-												Fixed bug where "fixed" (not explicit in the main UCA table) items
were not being included in canonical closure for the fractional uca

X-SVN-Rev: 9151
											
										
										
											2002-07-15 15:23:03 +00:00
+								        String highCompat = UTF16.valueOf(0x2F805);
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
 								        System.out.println("Sorting");
 								        Map ordered = new TreeMap();
-												updated for collation bugs, added isFCD.

X-SVN-Rev: 8886
											
										
										
											2002-06-13 21:14:05 +00:00
+								        Set contentsForCanonicalIteration = new TreeSet();
-												Updated with changes agreed with Ken for d6.
Added line for rearrange (missing earlier); this changes conformance!

X-SVN-Rev: 9022
											
										
										
											2002-07-03 02:15:47 +00:00
+								        UCA.UCAContents ucac = collator.getContents(UCA.FIXED_CE, null);
-												Fixed Fractional UCA for surrogates

X-SVN-Rev: 6504
											
										
										
											2001-10-31 00:02:54 +00:00
+								        int ccounter = 0;
 								        while (true) {
 								            Utility.dot(ccounter++);
 								            String s = ucac.next();
 								            if (s == null) break;
-												Fixed bug where "fixed" (not explicit in the main UCA table) items
were not being included in canonical closure for the fractional uca

X-SVN-Rev: 9151
											
										
										
											2002-07-15 15:23:03 +00:00
+								            if (s.equals("\uFA36") || s.equals("\uF900") || s.equals("\u2ADC") || s.equals(highCompat)) {
 								                System.out.println(" * " + ucd.getCodeAndName(s));
 								            }
-												updated for collation bugs, added isFCD.

X-SVN-Rev: 8886
											
										
										
											2002-06-13 21:14:05 +00:00
+								            contentsForCanonicalIteration.add(s);
-												Fixed Fractional UCA for surrogates

X-SVN-Rev: 6504
											
										
										
											2001-10-31 00:02:54 +00:00
+								            ordered.put(collator.getSortKey(s, UCA.NON_IGNORABLE) + '\u0000' + s, s);
 								        }
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
-												updated for collation bugs, added isFCD.

X-SVN-Rev: 8886
											
										
										
											2002-06-13 21:14:05 +00:00
+								        // Add canonically equivalent characters!!
 								        System.out.println("Start Adding canonical Equivalents2");
 								        int canCount = 0;
-												Updated with changes agreed with Ken for d6.
Added line for rearrange (missing earlier); this changes conformance!

X-SVN-Rev: 9022
											
										
										
											2002-07-03 02:15:47 +00:00
+								        System.out.println("Add missing decomposibles and non-characters");
-												updated for collation bugs, added isFCD.

X-SVN-Rev: 8886
											
										
										
											2002-06-13 21:14:05 +00:00
+								        for (int i = 0; i < 0x10FFFF; ++i) {
-												Updated with changes agreed with Ken for d6.
Added line for rearrange (missing earlier); this changes conformance!

X-SVN-Rev: 9022
											
										
										
											2002-07-03 02:15:47 +00:00
+								            if (!ucd.isNoncharacter(i)) {
 								                if (!ucd.isAllocated(i)) continue;
 								                if (NFD.isNormalized(i)) continue;
-												Fixed bug where "fixed" (not explicit in the main UCA table) items
were not being included in canonical closure for the fractional uca

X-SVN-Rev: 9151
											
										
										
											2002-07-15 15:23:03 +00:00
+								                if (ucd.isHangulSyllable(i)) continue;
 								                //if (collator.getCEType(i) >= UCA.FIXED_CE) continue;
-												Updated with changes agreed with Ken for d6.
Added line for rearrange (missing earlier); this changes conformance!

X-SVN-Rev: 9022
											
										
										
											2002-07-03 02:15:47 +00:00
+								            }
-												updated for collation bugs, added isFCD.

X-SVN-Rev: 8886
											
										
										
											2002-06-13 21:14:05 +00:00
+								            String s = UTF16.valueOf(i);
-												Updated with changes agreed with Ken for d6.
Added line for rearrange (missing earlier); this changes conformance!

X-SVN-Rev: 9022
											
										
										
											2002-07-03 02:15:47 +00:00
+								            if (contentsForCanonicalIteration.contains(s)) continue; // skip if already present
-												updated for collation bugs, added isFCD.

X-SVN-Rev: 8886
											
										
										
											2002-06-13 21:14:05 +00:00
+								            contentsForCanonicalIteration.add(s);
 								            ordered.put(collator.getSortKey(s, UCA.NON_IGNORABLE) + '\u0000' + s, s);
 								            System.out.println(" + " + ucd.getCodeAndName(s));
 								            canCount++;
 								        }
 								        Set additionalSet = new HashSet();
 								        System.out.println("Loading canonical iterator");
-												added more conformance tests

X-SVN-Rev: 8928
											
										
										
											2002-06-22 21:05:34 +00:00
+								        if (canIt == null) canIt = new CanonicalIterator(".");
-												updated for collation bugs, added isFCD.

X-SVN-Rev: 8886
											
										
										
											2002-06-13 21:14:05 +00:00
+								        Iterator it2 = contentsForCanonicalIteration.iterator();
 								        System.out.println("Adding any FCD equivalents that have different sort keys");
 								        while (it2.hasNext()) {
 								            String key = (String)it2.next();
 								            if (key == null) {
 								                System.out.println("Null Key");
 								                continue;
 								            }
 								            canIt.setSource(key);
-												finally did some significant code cleanup on collation. not enough, but it's a start

X-SVN-Rev: 8896
											
										
										
											2002-06-15 02:47:14 +00:00
-												updated for collation bugs, added isFCD.

X-SVN-Rev: 8886
											
										
										
											2002-06-13 21:14:05 +00:00
+								            boolean first = true;
 								            while (true) {
 								                String s = canIt.next();
 								                if (s == null) break;
 								                if (s.equals(key)) continue;
 								                if (contentsForCanonicalIteration.contains(s)) continue;
 								                if (additionalSet.contains(s)) continue;
 								                // Skip anything that is not FCD.
 								                if (!NFD.isFCD(s)) continue;
 								                // We ONLY add if the sort key would be different
 								                // Than what we would get if we didn't decompose!!
 								                String sortKey = collator.getSortKey(s, UCA.NON_IGNORABLE);
 								                String nonDecompSortKey = collator.getSortKey(s, UCA.NON_IGNORABLE, false);
 								                if (sortKey.equals(nonDecompSortKey)) continue;
 								                if (first) {
 								                    System.out.println(" " + ucd.getCodeAndName(key));
 								                    first = false;
 								                }
 								                System.out.println(" => " + ucd.getCodeAndName(s));
 								                System.out.println("    old: " + collator.toString(nonDecompSortKey));
 								                System.out.println("    new: " + collator.toString(sortKey));
 								                canCount++;
 								                additionalSet.add(s);
 								                ordered.put(sortKey + '\u0000' + s, s);
 								            }
 								        }
 								        System.out.println("Done Adding canonical Equivalents -- added " + canCount);
-												Fixed Fractional UCA for surrogates

X-SVN-Rev: 6504
											
										
										
											2001-10-31 00:02:54 +00:00
+								        /*
 								        for (int ch = 0; ch < 0x10FFFF; ++ch) {
 								            Utility.dot(ch);
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								            byte type = collator.getCEType(ch);
-												Fixed Fractional UCA for surrogates

X-SVN-Rev: 6504
											
										
										
											2001-10-31 00:02:54 +00:00
+								            if (type >= UCA.FIXED_CE && !nfd.hasDecomposition(ch))
 								                continue;
 								            }
 								            String s = com.ibm.text.UTF16.valueOf(ch);
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								            ordered.put(collator.getSortKey(s, UCA.NON_IGNORABLE) + '\u0000' + s, s);
 								        }
-												Fixed Fractional UCA for surrogates

X-SVN-Rev: 6504
											
										
										
											2001-10-31 00:02:54 +00:00
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								        Hashtable multiTable = collator.getContracting();
 								        Enumeration enum = multiTable.keys();
-												Fixed Fractional UCA for surrogates

X-SVN-Rev: 6504
											
										
										
											2001-10-31 00:02:54 +00:00
+								        int ecount = 0;
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								        while (enum.hasMoreElements()) {
-												Fixed Fractional UCA for surrogates

X-SVN-Rev: 6504
											
										
										
											2001-10-31 00:02:54 +00:00
+								            Utility.dot(ecount++);
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								            String s = (String)enum.nextElement();
 								            ordered.put(collator.getSortKey(s, UCA.NON_IGNORABLE) + '\u0000' + s, s);
 								        }
-												Fixed Fractional UCA for surrogates

X-SVN-Rev: 6504
											
										
										
											2001-10-31 00:02:54 +00:00
+								        */
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								        // JUST FOR TESTING
 								        if (false) {
 								            String sample = "\u3400\u3401\u4DB4\u4DB5\u4E00\u4E01\u9FA4\u9FA5\uAC00\uAC01\uD7A2\uD7A3";
 								            for (int i = 0; i < sample.length(); ++i) {
 								                String s = sample.substring(i, i+1);
 								                ordered.put(collator.getSortKey(s, UCA.NON_IGNORABLE) + '\u0000' + s, s);
 								            }
 								        }
-												Fixed Fractional UCA for surrogates

X-SVN-Rev: 6504
											
										
										
											2001-10-31 00:02:54 +00:00
+								        Utility.fixDot();
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								        System.out.println("Writing");
-												Updated with changes agreed with Ken for d6.
Added line for rearrange (missing earlier); this changes conformance!

X-SVN-Rev: 9022
											
										
										
											2002-07-03 02:15:47 +00:00
+								        PrintWriter shortLog = new PrintWriter(new BufferedWriter(new FileWriter(GEN_DIR + filename + "_SHORT.txt"), 32*1024));
 								        PrintWriter longLog = new PrintWriter(new BufferedWriter(new FileWriter(GEN_DIR + filename + ".txt"), 32*1024));
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								        log = new PrintWriter(new DualWriter(shortLog, longLog));
 								        PrintWriter summary = new PrintWriter(new BufferedWriter(new FileWriter(GEN_DIR + filename + "_summary.txt"), 32*1024));
 								        //log.println("[Variable Low = " + UCA.toString(collator.getVariableLow()) + "]");
 								        //log.println("[Variable High = " + UCA.toString(collator.getVariableHigh()) + "]");
 								        int[] ces = new int[100];
 								        StringBuffer newPrimary = new StringBuffer();
 								        StringBuffer newSecondary = new StringBuffer();
 								        StringBuffer newTertiary = new StringBuffer();
 								        StringBuffer oldStr = new StringBuffer();
 								        EquivalenceClass secEq = new EquivalenceClass("\r\n#", 2, true);
 								        EquivalenceClass terEq = new EquivalenceClass("\r\n#", 2, true);
 								        String[] sampleEq = new String[500];
-												New d5 after discussions with Ken; added HOMELESS in Fractional UCA

X-SVN-Rev: 8969
											
										
										
											2002-06-28 01:59:58 +00:00
+								        int[] sampleLen = new int[500];
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
 								        Iterator it = ordered.keySet().iterator();
 								        int oldFirstPrimary = UCA.getPrimary(UCA.TERMINATOR);
 								        boolean wasVariable = false;
-												Fixes for MAX value, moved commandline stuff to Main.

X-SVN-Rev: 8733
											
										
										
											2002-05-29 23:18:15 +00:00
+								        log.println("# Fractional UCA Table, generated from standard UCA");
-												New d5 after discussions with Ken; added HOMELESS in Fractional UCA

X-SVN-Rev: 8969
											
										
										
											2002-06-28 01:59:58 +00:00
+								        log.println("# " + getNormalDate());
-												Fixes for MAX value, moved commandline stuff to Main.

X-SVN-Rev: 8733
											
										
										
											2002-05-29 23:18:15 +00:00
+								        log.println("# VERSION: UCA=" + collator.getDataVersion() + ", UCD=" + collator.getUCDVersion());
 								        log.println();
 								        log.println("# Generated processed version, as described in ICU design document.");
 								        log.println("# NOTES");
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								        log.println("#  - Bugs in UCA data are NOT FIXED, except for the following problems:");
 								        log.println("#    - canonical equivalents are decomposed directly (some beta UCA are wrong).");
 								        log.println("#    - overlapping variable ranges are fixed.");
 								        log.println("#  - Format is as follows:");
 								        log.println("#      <codepoint> (' ' <codepoint>)* ';' ('L' | 'S') ';' <fractionalCE>+ ' # ' <UCA_CE> '# ' <name> ");
 								        log.println("#    - zero weights are not printed");
 								        log.println("#    - S: contains at least one lowercase or SMALL kana");
 								        log.println("#    - L: otherwise");
 								        log.println("#    - Different primaries are separated by a blank line.");
-												Fixes for MAX value, moved commandline stuff to Main.

X-SVN-Rev: 8733
											
										
										
											2002-05-29 23:18:15 +00:00
+								        log.println("# WARNING");
 								        log.println("#  - Differs from previous version in that MAX value was introduced at 1F.");
 								        log.println("#    All tertiary values are shifted down by 1, filling the gap at 7!");
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
-												shifted the implicit range.

X-SVN-Rev: 8767
											
										
										
											2002-06-02 05:07:08 +00:00
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								        String lastChr = "";
 								        int lastNp = 0;
 								        boolean doVariable = false;
-												Fixed Fractional UCA for surrogates

X-SVN-Rev: 6504
											
										
										
											2001-10-31 00:02:54 +00:00
+								        char[] codeUnits = new char[100];
-												Added table of information to fractional UCA, changed XML, regenerated collation test

X-SVN-Rev: 8921
											
										
										
											2002-06-22 01:21:11 +00:00
+								        FCE firstSecondaryIgnorable = new FCE(false);
 								        FCE lastSecondaryIgnorable = new FCE(true);
 								        FCE firstPrimaryIgnorable = new FCE(false);
 								        FCE lastPrimaryIgnorable = new FCE(true);
 								        FCE firstVariable = new FCE(false);
 								        FCE lastVariable = new FCE(true);
 								        FCE firstNonIgnorable = new FCE(false);
 								        FCE lastNonIgnorable = new FCE(true);
 								        FCE firstTrailing = new FCE(false);
 								        FCE lastTrailing = new FCE(true);
-												New d5 after discussions with Ken; added HOMELESS in Fractional UCA

X-SVN-Rev: 8969
											
										
										
											2002-06-28 01:59:58 +00:00
 								        Map backMap = new TreeMap();
-												Added table of information to fractional UCA, changed XML, regenerated collation test

X-SVN-Rev: 8921
											
										
										
											2002-06-22 01:21:11 +00:00
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								        while (it.hasNext()) {
 								            Object sortKey = it.next();
-												Updated with changes agreed with Ken for d6.
Added line for rearrange (missing earlier); this changes conformance!

X-SVN-Rev: 9022
											
										
										
											2002-07-03 02:15:47 +00:00
+								            String chr = (String)ordered.get(sortKey);
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
 								            // get CEs and fix
 								            int len = collator.getCEs(chr, true, ces);
 								            int firstPrimary = UCA.getPrimary(ces[0]);
 								            if (firstPrimary != oldFirstPrimary) {
 								                log.println();
 								                boolean isVariable = collator.isVariable(ces[0]);
 								                if (isVariable != wasVariable) {
 								                    if (isVariable) {
 								                        log.println("# START OF VARIABLE SECTION!!!");
 								                        summary.println("# START OF VARIABLE SECTION!!!");
 								                    } else {
-												New d5 after discussions with Ken; added HOMELESS in Fractional UCA

X-SVN-Rev: 8969
											
										
										
											2002-06-28 01:59:58 +00:00
+								                        log.println("[variable top = " + Utility.hex(primaryDelta[oldFirstPrimary]) + "] # END OF VARIABLE SECTION!!!");
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								                        doVariable = true;
 								                    }
 								                    log.println();
 								                }
 								                wasVariable = isVariable;
-												New d5 after discussions with Ken; added HOMELESS in Fractional UCA

X-SVN-Rev: 8969
											
										
										
											2002-06-28 01:59:58 +00:00
+								                oldFirstPrimary = firstPrimary;
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								            }
 								            oldStr.setLength(0);
-												Fixed Fractional UCA for surrogates

X-SVN-Rev: 6504
											
										
										
											2001-10-31 00:02:54 +00:00
+								            chr.getChars(0, chr.length(), codeUnits, 0);
 								            log.print(Utility.hex(codeUnits, 0, chr.length(), " ") + "; ");
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								            boolean nonePrinted = true;
-												Fixed Fractional UCA for surrogates

X-SVN-Rev: 6504
											
										
										
											2001-10-31 00:02:54 +00:00
+								            boolean isFirst = true;
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								            for (int q = 0; q < len; ++q) {
 								                nonePrinted = false;
 								                newPrimary.setLength(0);
 								                newSecondary.setLength(0);
 								                newTertiary.setLength(0);
 								                int pri = UCA.getPrimary(ces[q]);
 								                int sec = UCA.getSecondary(ces[q]);
-												Fixed Fractional UCA for surrogates

X-SVN-Rev: 6504
											
										
										
											2001-10-31 00:02:54 +00:00
+								                int ter = UCA.getTertiary(ces[q]);
-												finally did some significant code cleanup on collation. not enough, but it's a start

X-SVN-Rev: 8896
											
										
										
											2002-06-15 02:47:14 +00:00
+								                oldStr.append(CEList.toString(ces[q]));// + "," + Integer.toString(ces[q],16);
-												Fixed Fractional UCA for surrogates

X-SVN-Rev: 6504
											
										
										
											2001-10-31 00:02:54 +00:00
-												shifted the implicit range.

X-SVN-Rev: 8767
											
										
										
											2002-06-02 05:07:08 +00:00
+								                // special treatment for unsupported!
-												Fixed Fractional UCA for surrogates

X-SVN-Rev: 6504
											
										
										
											2001-10-31 00:02:54 +00:00
-												shifted the implicit range.

X-SVN-Rev: 8767
											
										
										
											2002-06-02 05:07:08 +00:00
+								                if (UCA.isImplicitLeadPrimary(pri)) {
-												Added table of information to fractional UCA, changed XML, regenerated collation test

X-SVN-Rev: 8921
											
										
										
											2002-06-22 01:21:11 +00:00
+								                    if (DEBUG) System.out.println("DEBUG: " + CEList.toString(ces, len)
-												finally did some significant code cleanup on collation. not enough, but it's a start

X-SVN-Rev: 8896
											
										
										
											2002-06-15 02:47:14 +00:00
+								                        + ", Current: " + q + ", " + ucd.getCodeAndName(chr));
-												Fixed Fractional UCA for surrogates

X-SVN-Rev: 6504
											
										
										
											2001-10-31 00:02:54 +00:00
+								                    ++q;
-												finally did some significant code cleanup on collation. not enough, but it's a start

X-SVN-Rev: 8896
											
										
										
											2002-06-15 02:47:14 +00:00
+								                    oldStr.append(CEList.toString(ces[q]));// + "," + Integer.toString(ces[q],16);
-												Fixed Fractional UCA for surrogates

X-SVN-Rev: 6504
											
										
										
											2001-10-31 00:02:54 +00:00
 								                    int pri2 = UCA.getPrimary(ces[q]);
 								                    // get old code point
-												shifted the implicit range.

X-SVN-Rev: 8767
											
										
										
											2002-06-02 05:07:08 +00:00
 								                    int cp = UCA.ImplicitToCodePoint(pri, pri2);
 								                    // double check results!
 								                    int[] testImplicit = new int[2];
-												finally did some significant code cleanup on collation. not enough, but it's a start

X-SVN-Rev: 8896
											
										
										
											2002-06-15 02:47:14 +00:00
+								                    collator.CodepointToImplicit(cp, testImplicit);
-												shifted the implicit range.

X-SVN-Rev: 8767
											
										
										
											2002-06-02 05:07:08 +00:00
+								                    boolean gotError = pri != testImplicit[0] || pri2 != testImplicit[1];
 								                    if (gotError) {
 								                    	System.out.println("ERROR");
 								                    }
 								                    if (DEBUG || gotError) {
 								                    	System.out.println("Computing Unsupported CP as: "
 								                        	+ Utility.hex(pri)
 								                        	+ ", " + Utility.hex(pri2)
 								                        	+ " => " + Utility.hex(cp)
 								                        	+ " => " + Utility.hex(testImplicit[0])
 								                        	+ ", " + Utility.hex(testImplicit[1])
-												Added table of information to fractional UCA, changed XML, regenerated collation test

X-SVN-Rev: 8921
											
										
										
											2002-06-22 01:21:11 +00:00
+								                        	// + ", " + Utility.hex(fixPrimary(pri) & INT_MASK)
-												Fixed Fractional UCA for surrogates

X-SVN-Rev: 6504
											
										
										
											2001-10-31 00:02:54 +00:00
+								                        );
-												shifted the implicit range.

X-SVN-Rev: 8767
											
										
										
											2002-06-02 05:07:08 +00:00
+								                    }
 								                    pri = cp | MARK_CODE_POINT;
-												Fixed Fractional UCA for surrogates

X-SVN-Rev: 6504
											
										
										
											2001-10-31 00:02:54 +00:00
+								                }
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
 								                if (sec != 0x20) {
 								                    boolean changed = secEq.add(new Integer(sec), new Integer(pri));
 								                }
 								                if (ter != 0x2) {
 								                    boolean changed = terEq.add(new Integer(ter), new Integer((pri << 16) | sec));
 								                }
-												New d5 after discussions with Ken; added HOMELESS in Fractional UCA

X-SVN-Rev: 8969
											
										
										
											2002-06-28 01:59:58 +00:00
 								                if (sampleEq[sec] == null || sampleLen[sec] > len) {
 								                    sampleEq[sec] = chr;
 								                    sampleLen[sec] = len;
 								                }
 								                if (sampleEq[ter] == null || sampleLen[sec] > len) {
 								                    sampleEq[ter] = chr;
 								                    sampleLen[sec] = len;
 								                }
 								                if ((pri & MARK_CODE_POINT) == 0 && pri == 0) {
 								                    Integer key = new Integer(ces[q]);
 								                    Pair value = (Pair) backMap.get(key);
 								                    if (value == null
 								                    || (len < ((Integer)(value.first)).intValue())) {
 								                        backMap.put(key, new Pair(new Integer(len), chr));
 								                    }
 								                }
-												Fixed Fractional UCA for surrogates

X-SVN-Rev: 6504
											
										
										
											2001-10-31 00:02:54 +00:00
 								                // int oldPrimaryValue = UCA.getPrimary(ces[q]);
 								                int np = fixPrimary(pri);
-												Added table of information to fractional UCA, changed XML, regenerated collation test

X-SVN-Rev: 8921
											
										
										
											2002-06-22 01:21:11 +00:00
+								                int ns = fixSecondary(sec);
 								                int nt = fixTertiary(ter);
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
-												Fixes for MAX value, moved commandline stuff to Main.

X-SVN-Rev: 8733
											
										
										
											2002-05-29 23:18:15 +00:00
+								                try {
 								                	hexBytes(np, newPrimary);
-												Added table of information to fractional UCA, changed XML, regenerated collation test

X-SVN-Rev: 8921
											
										
										
											2002-06-22 01:21:11 +00:00
+								                	hexBytes(ns, newSecondary);
 								                	hexBytes(nt, newTertiary);
-												Fixes for MAX value, moved commandline stuff to Main.

X-SVN-Rev: 8733
											
										
										
											2002-05-29 23:18:15 +00:00
+								                } catch (Exception e) {
 								                	throw new ChainException("Character is {0}", new String[] {Utility.hex(chr)}, e);
 								                }
-												Fixed Fractional UCA for surrogates

X-SVN-Rev: 6504
											
										
										
											2001-10-31 00:02:54 +00:00
+								                if (isFirst) {
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								                    if (!sameTopByte(np, lastNp)) {
-												Added table of information to fractional UCA, changed XML, regenerated collation test

X-SVN-Rev: 8921
											
										
										
											2002-06-22 01:21:11 +00:00
+								                        summary.println("Last:  " + Utility.hex(lastNp & INT_MASK) + " " + ucd.getName(UTF16.charAt(lastChr,0)));
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								                        summary.println();
 								                        if (doVariable) {
 								                            doVariable = false;
 								                            summary.println("[variable top = " + Utility.hex(primaryDelta[firstPrimary]) + "] # END OF VARIABLE SECTION!!!");
 								                            summary.println();
 								                        }
-												Added table of information to fractional UCA, changed XML, regenerated collation test

X-SVN-Rev: 8921
											
										
										
											2002-06-22 01:21:11 +00:00
+								                        summary.println("First: " + Utility.hex(np & INT_MASK) + ", " + ucd.getCodeAndName(UTF16.charAt(chr,0)));
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								                    }
 								                    lastNp = np;
-												Fixed Fractional UCA for surrogates

X-SVN-Rev: 6504
											
										
										
											2001-10-31 00:02:54 +00:00
+								                    isFirst = false;
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								                }
 								                log.print("[" + newPrimary
 								                    + ", " + newSecondary
 								                    + ", " + newTertiary
 								                    + "]");
-												Added table of information to fractional UCA, changed XML, regenerated collation test

X-SVN-Rev: 8921
											
										
										
											2002-06-22 01:21:11 +00:00
 								                // RECORD STATS
-												New d5 after discussions with Ken; added HOMELESS in Fractional UCA

X-SVN-Rev: 8969
											
										
										
											2002-06-28 01:59:58 +00:00
+								                // but ONLY if we are not part of an implicit
-												Added table of information to fractional UCA, changed XML, regenerated collation test

X-SVN-Rev: 8921
											
										
										
											2002-06-22 01:21:11 +00:00
-												New d5 after discussions with Ken; added HOMELESS in Fractional UCA

X-SVN-Rev: 8969
											
										
										
											2002-06-28 01:59:58 +00:00
+								                if ((pri & MARK_CODE_POINT) == 0) {
 								                    if (np == 0 && ns == 0) {
 								                        firstSecondaryIgnorable.setValue(np, ns, nt);
 								                        lastSecondaryIgnorable.setValue(np, ns, nt);
 								                    } else if (np == 0) {
 								                        firstPrimaryIgnorable.setValue(np, ns, nt);
 								                        lastPrimaryIgnorable.setValue(np, ns, nt);
 								                    } else if (collator.isVariable(ces[q])) {
 								                        firstVariable.setValue(np, ns, nt);
 								                        lastVariable.setValue(np, ns, nt);
 								                    } else if (UCA.getPrimary(ces[q]) > UNSUPPORTED_LIMIT) {        // Trailing (none currently)
 								                        System.out.println("Trailing: "
 								                            + ucd.getCodeAndName(chr) + ", "
 								                            + CEList.toString(ces[q]) + ", "
 								                            + Utility.hex(pri) + ", "
 								                            + Utility.hex(UNSUPPORTED_LIMIT));
 								                        firstTrailing.setValue(np, ns, nt);
 								                        lastTrailing.setValue(np, ns, nt);
 								                    } else {
 								                        firstNonIgnorable.setValue(np, ns, nt);
 								                        lastNonIgnorable.setValue(np, ns, nt);
 								                    }
-												Added table of information to fractional UCA, changed XML, regenerated collation test

X-SVN-Rev: 8921
											
										
										
											2002-06-22 01:21:11 +00:00
+								                }
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								            }
 								            if (nonePrinted) {
 								                log.print("[,,]");
-												finally did some significant code cleanup on collation. not enough, but it's a start

X-SVN-Rev: 8896
											
										
										
											2002-06-15 02:47:14 +00:00
+								                oldStr.append(CEList.toString(0));
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								            }
-												New d5 after discussions with Ken; added HOMELESS in Fractional UCA

X-SVN-Rev: 8969
											
										
										
											2002-06-28 01:59:58 +00:00
+								            longLog.print("\t# " + oldStr + "\t* " + ucd.getName(UTF16.charAt(chr, 0)));
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								            log.println();
 								            lastChr = chr;
 								        }
-												Added table of information to fractional UCA, changed XML, regenerated collation test

X-SVN-Rev: 8921
											
										
										
											2002-06-22 01:21:11 +00:00
-												New d5 after discussions with Ken; added HOMELESS in Fractional UCA

X-SVN-Rev: 8969
											
										
										
											2002-06-28 01:59:58 +00:00
+								        // ADD HOMELESS COLLATION ELEMENTS
 								        log.println();
 								        log.println("# HOMELESS COLLATION ELEMENTS");
 								        char fakeTrail = 'a';
 								        Iterator it3 = backMap.keySet().iterator();
 								        while (it3.hasNext()) {
 								            Integer key = (Integer) it3.next();
 								            Pair pair = (Pair) backMap.get(key);
 								            if (((Integer)pair.first).intValue() < 2) continue;
 								            String sample = (String)pair.second;
 								            int ce = key.intValue();
 								            int np = fixPrimary(UCA.getPrimary(ce));
 								            int ns = fixSecondary(UCA.getSecondary(ce));
 								            int nt = fixTertiary(UCA.getTertiary(ce));
 								            newPrimary.setLength(0);
 								            newSecondary.setLength(0);
 								            newTertiary.setLength(0);
 								            hexBytes(np, newPrimary);
 								            hexBytes(ns, newSecondary);
 								            hexBytes(nt, newTertiary);
 								            log.print(Utility.hex('\uFDD0' + "" + (char)(fakeTrail++)) + "; "
 								                + "[, " + newSecondary + ", " + newTertiary + "]");
 								            longLog.print("\t# " + collator.getCEList(sample, true) + "\t* " + ucd.getCodeAndName(sample));
 								            log.println();
 								        }
-												Added table of information to fractional UCA, changed XML, regenerated collation test

X-SVN-Rev: 8921
											
										
										
											2002-06-22 01:21:11 +00:00
+								        int firstImplicit = getImplicitPrimary(CJK_BASE);
 								        int lastImplicit = getImplicitPrimary(0x10FFFF);
-												New d5 after discussions with Ken; added HOMELESS in Fractional UCA

X-SVN-Rev: 8969
											
										
										
											2002-06-28 01:59:58 +00:00
+								        log.println();
-												Added table of information to fractional UCA, changed XML, regenerated collation test

X-SVN-Rev: 8921
											
										
										
											2002-06-22 01:21:11 +00:00
+								        log.println("# VALUES BASED ON UCA");
 								        log.println("[first tertiary ignorable " + new FCE(false,0,0, 0).formatFCE() + "]");
 								        log.println("[last tertiary ignorable " + new FCE(true,0,0, 0).formatFCE() + "]");
 								        // Since the UCA doesn't have secondary ignorables, fake them.
 								        if (firstSecondaryIgnorable.isUnset()) {
 								            System.out.println("No first/last secondary ignorable: resetting");
 								            firstSecondaryIgnorable = new FCE(false, 0, 0, COMMON<<24);
 								            lastSecondaryIgnorable = new FCE(true, 0, 0, COMMON<<24);
 								            System.out.println(firstSecondaryIgnorable.formatFCE());
 								        }
 								        log.println("[first secondary ignorable " + firstSecondaryIgnorable.formatFCE() + "]");
 								        log.println("[last secondary ignorable " + lastSecondaryIgnorable.formatFCE() + "]");
 								        log.println("[first primary ignorable " + firstPrimaryIgnorable.formatFCE() + "]");
 								        log.println("[last primary ignorable " + lastPrimaryIgnorable.formatFCE() + "]");
 								        log.println("[first variable " + firstVariable.formatFCE() + "]");
 								        log.println("[last variable " + lastVariable.formatFCE() + "]");
-												Latest updates for UCD, default values. Fixed UTF-8 output for UCA, Logical_Order_Exceptions

X-SVN-Rev: 11358
											
										
										
											2003-03-19 17:30:58 +00:00
+								        log.println("[first regular " + firstNonIgnorable.formatFCE() + "]");
 								        log.println("[last regular " + lastNonIgnorable.formatFCE() + "]");
-												Added table of information to fractional UCA, changed XML, regenerated collation test

X-SVN-Rev: 8921
											
										
										
											2002-06-22 01:21:11 +00:00
 								        log.println("[first implicit " + (new FCE(false,firstImplicit, COMMON<<24, COMMON<<24)).formatFCE() + "]");
 								        log.println("[last implicit " + (new FCE(false,lastImplicit, COMMON<<24, COMMON<<24)).formatFCE() + "]");
 								        if (firstTrailing.isUnset()) {
 								            System.out.println("No first/last trailing: resetting");
 								            firstTrailing = new FCE(false, (IMPLICIT_LIMIT_BYTE+1)<<24, COMMON<<24, COMMON<<24);
 								            lastTrailing = new FCE(true, (IMPLICIT_LIMIT_BYTE+1)<<24, COMMON<<24, COMMON<<24);
 								            System.out.println(firstTrailing.formatFCE());
 								        }
 								        log.println("[first trailing " + firstTrailing.formatFCE() + "]");
 								        log.println("[last trailing " + lastTrailing.formatFCE() + "]");
-												New d5 after discussions with Ken; added HOMELESS in Fractional UCA

X-SVN-Rev: 8969
											
										
										
											2002-06-28 01:59:58 +00:00
+								        log.println();
-												Added table of information to fractional UCA, changed XML, regenerated collation test

X-SVN-Rev: 8921
											
										
										
											2002-06-22 01:21:11 +00:00
+								        log.println("# FIXED VALUES");
-												New d5 after discussions with Ken; added HOMELESS in Fractional UCA

X-SVN-Rev: 8969
											
										
										
											2002-06-28 01:59:58 +00:00
+								        log.println("# superceded! [top "  + lastNonIgnorable.formatFCE() + "]");
 								        log.println("[fixed first implicit byte " + Utility.hex(IMPLICIT_BASE_BYTE,2) + "]");
 								        log.println("[fixed last implicit byte " + Utility.hex(IMPLICIT_LIMIT_BYTE,2) + "]");
 								        log.println("[fixed first trail byte " + Utility.hex(IMPLICIT_LIMIT_BYTE+1,2) + "]");
 								        log.println("[fixed last trail byte " + Utility.hex(SPECIAL_BASE-1,2) + "]");
 								        log.println("[fixed first special byte " + Utility.hex(SPECIAL_BASE,2) + "]");
 								        log.println("[fixed last special byte " + Utility.hex(0xFF,2) + "]");
-												Added table of information to fractional UCA, changed XML, regenerated collation test

X-SVN-Rev: 8921
											
										
										
											2002-06-22 01:21:11 +00:00
-												Use modified cascade, added more tests of implicit weights

X-SVN-Rev: 8787
											
										
										
											2002-06-04 23:56:29 +00:00
+								        summary.println("Last:  " + Utility.hex(lastNp) + ", " + ucd.getCodeAndName(UTF16.charAt(lastChr, 0)));
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
 								        /*
 								        String sample = "\u3400\u3401\u4DB4\u4DB5\u4E00\u4E01\u9FA4\u9FA5\uAC00\uAC01\uD7A2\uD7A3";
 								        for (int i = 0; i < sample.length(); ++i) {
 								            char ch = sample.charAt(i);
-												Fixed Fractional UCA for surrogates

X-SVN-Rev: 6504
											
										
										
											2001-10-31 00:02:54 +00:00
+								            log.println(Utility.hex(ch) + " => " + Utility.hex(fixHan(ch))
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								                    + "          " + ucd.getName(ch));
 								        }
 								        */
 								        summary.println();
-												Added table of information to fractional UCA, changed XML, regenerated collation test

X-SVN-Rev: 8921
											
										
										
											2002-06-22 01:21:11 +00:00
+								        summary.println("# First Implicit: " + Utility.hex(INT_MASK & getImplicitPrimary(0)));
 								        summary.println("# Last Implicit: " + Utility.hex(INT_MASK & getImplicitPrimary(0x10FFFF)));
 								        summary.println("# First CJK: " + Utility.hex(INT_MASK & getImplicitPrimary(0x4E00)));
 								        summary.println("# Last CJK: " + Utility.hex(INT_MASK & getImplicitPrimary(0xFA2F)));
 								        summary.println("# First CJK_A: " + Utility.hex(INT_MASK & getImplicitPrimary(0x3400)));
 								        summary.println("# Last CJK_A: " + Utility.hex(INT_MASK & getImplicitPrimary(0x4DBF)));
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
 								        boolean lastOne = false;
 								        for (int i = 0; i < 0x10FFFF; ++i) {
-												finally did some significant code cleanup on collation. not enough, but it's a start

X-SVN-Rev: 8896
											
										
										
											2002-06-15 02:47:14 +00:00
+								            boolean thisOne = ucd.isCJK_BASE(i) || ucd.isCJK_AB(i);
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								            if (thisOne != lastOne) {
-												Added table of information to fractional UCA, changed XML, regenerated collation test

X-SVN-Rev: 8921
											
										
										
											2002-06-22 01:21:11 +00:00
+								                summary.println("# Implicit Cusp: CJK=" + lastOne + ": " + Utility.hex(i-1) + " => " + Utility.hex(INT_MASK & getImplicitPrimary(i-1)));
 								                summary.println("# Implicit Cusp: CJK=" + thisOne + ": " + Utility.hex(i) + " => " + Utility.hex(INT_MASK & getImplicitPrimary(i)));
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								                lastOne = thisOne;
 								            }
 								        }
 								        summary.println("Compact Secondary 153: " + compactSecondary[0x153]);
 								        summary.println("Compact Secondary 157: " + compactSecondary[0x157]);
 								        summary.println();
 								        summary.println("# Disjoint classes for Secondaries");
 								        summary.println("#" + secEq.toString());
 								        summary.println();
 								        summary.println("# Disjoint classes for Tertiaries");
 								        summary.println("#" + terEq.toString());
 								        summary.println();
 								        summary.println("# Example characters for each TERTIARY value");
 								        summary.println();
 								        summary.println("# UCA : (FRAC) CODE [    UCA CE    ] Name");
 								        summary.println();
-												New d5 after discussions with Ken; added HOMELESS in Fractional UCA

X-SVN-Rev: 8969
											
										
										
											2002-06-28 01:59:58 +00:00
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								        for (int i = 0; i < sampleEq.length; ++i) {
 								            if (sampleEq[i] == null) continue;
 								            if (i == 0x20) {
 								                summary.println();
 								                summary.println("# Example characters for each SECONDARY value");
 								                summary.println();
 								                summary.println("# UCA : (FRAC) CODE [    UCA CE    ] Name");
 								                summary.println();
 								            }
 								            int len = collator.getCEs(sampleEq[i], true, ces);
 								            int newval = i < 0x20 ? fixTertiary(i) : fixSecondary(i);
 								            summary.print("# " + Utility.hex(i) + ": (" + Utility.hex(newval) + ") "
 								                + Utility.hex(sampleEq[i]) + " ");
 								            for (int q = 0; q < len; ++q) {
-												finally did some significant code cleanup on collation. not enough, but it's a start

X-SVN-Rev: 8896
											
										
										
											2002-06-15 02:47:14 +00:00
+								                summary.print(CEList.toString(ces[q]));
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								            }
 								            summary.println(" " + ucd.getName(sampleEq[i]));
-												New d5 after discussions with Ken; added HOMELESS in Fractional UCA

X-SVN-Rev: 8969
											
										
										
											2002-06-28 01:59:58 +00:00
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								        }
 								        log.close();
 								        summary.close();
 								    }
-												Added table of information to fractional UCA, changed XML, regenerated collation test

X-SVN-Rev: 8921
											
										
										
											2002-06-22 01:21:11 +00:00
+								    static final long INT_MASK = 0xFFFFFFFFL;
 								    static class FCE {
 								        static final long UNDEFINED_MAX = Long.MAX_VALUE;
 								        static final long UNDEFINED_MIN = Long.MIN_VALUE;
 								        long[] key;
 								        boolean max;
 								        boolean debugShow = false;
 								        FCE (boolean max) {
 								            this.max = max;
 								            if (max) key = new long[] {UNDEFINED_MIN, UNDEFINED_MIN, UNDEFINED_MIN};    // make small!
 								            else key = new long[] {UNDEFINED_MAX, UNDEFINED_MAX, UNDEFINED_MAX};
 								        }
 								        FCE (boolean max, int primary, int secondary, int tertiary) {
 								            this(max);
 								            key[0] = primary & INT_MASK;
 								            key[1] = secondary & INT_MASK;
 								            key[2] = tertiary & INT_MASK;
 								        }
 								        FCE (boolean max, int primary) {
 								            this(max);
 								            key[0] = primary & INT_MASK;
 								        }
 								        boolean isUnset() {
 								            return key[0] == UNDEFINED_MIN || key[0] == UNDEFINED_MAX;
 								        }
 								        String formatFCE() {
 								            String b0 = getBuffer(key[0], false);
 								            boolean key0Defined = key[0] != UNDEFINED_MIN && key[0] != UNDEFINED_MAX;
 								            String b1 = getBuffer(key[1], key0Defined);
 								            boolean key1Defined = key[1] != UNDEFINED_MIN && key[1] != UNDEFINED_MAX;
 								            if (b1.length() != 0) b1 = " " + b1;
 								            String b2 = getBuffer(key[2], key0Defined || key1Defined);
 								            if (b2.length() != 0) b2 = " " + b2;
 								            return "[" + b0 + "," + b1  + "," + b2 + "]";
 								        }
 								        String getBuffer(long val, boolean haveHigher) {
 								            if (val == UNDEFINED_MIN) return "?";
 								            if (val == UNDEFINED_MAX) if (haveHigher) val = COMMON << 24; else return "?";
 								            StringBuffer result = new StringBuffer();
 								            hexBytes(val, result);
 								            return result.toString();
 								        }
 								        void setValue(int npInt, int nsInt, int ntInt) {
 								            if (debugShow) System.out.println("Setting FCE: "
 								                + Utility.hex(npInt) + ", "  + Utility.hex(nsInt) + ", "  + Utility.hex(ntInt));
 								            // to get the sign right!
 								            long np = npInt & INT_MASK;
 								            long ns = nsInt & INT_MASK;
 								            long nt = ntInt & INT_MASK;
 								            if (max) {
 								                if (np < key[0]) return;
 								                if (np > key[0]) {
 								                    key[0] = np;
 								                    key[1] = ns;
 								                    key[2] = nt;
 								                    return;
 								                }
 								                if (ns < key[1]) return;
 								                if (ns > key[1]) {
 								                    key[1] = ns;
 								                    key[2] = nt;
 								                    return;
 								                }
 								                if (nt > key[2]) {
 								                    key[2] = nt;
 								                }
 								            } else {
 								                if (np > key[0]) return;
 								                if (np < key[0]) {
 								                    key[0] = np;
 								                    key[1] = ns;
 								                    key[2] = nt;
 								                    return;
 								                }
 								                if (ns > key[1]) return;
 								                if (ns < key[1]) {
 								                    key[1] = ns;
 								                    key[2] = nt;
 								                    return;
 								                }
 								                if (nt > key[2]) {
 								                    key[2] = nt;
 								                }
 								            }
 								        }
 								    }
-												shifted the implicit range.

X-SVN-Rev: 8767
											
										
										
											2002-06-02 05:07:08 +00:00
+								    /*
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								    static boolean isFixedIdeograph(int cp) {
-												Fixed Fractional UCA for surrogates

X-SVN-Rev: 6504
											
										
										
											2001-10-31 00:02:54 +00:00
+								        return (0x3400 <= cp && cp <= 0x4DB5
 								            || 0x4E00 <= cp && cp <= 0x9FA5
 								            || 0xF900 <= cp && cp <= 0xFA2D // compat: most of these decompose anyway
 								            || 0x20000 <= cp && cp <= 0x2A6D6
 								            || 0x2F800 <= cp && cp <= 0x2FA1D // compat: most of these decompose anyway
 								            );
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								    }
-												shifted the implicit range.

X-SVN-Rev: 8767
											
										
										
											2002-06-02 05:07:08 +00:00
+								    */
-												Fixed Fractional UCA for surrogates

X-SVN-Rev: 6504
											
										
										
											2001-10-31 00:02:54 +00:00
+								/*
 ;<CJK Ideograph Extension A, First>;Lo;0;L;;;;;N;;;;;
 DB5;<CJK Ideograph Extension A, Last>;Lo;0;L;;;;;N;;;;;
 E00;<CJK Ideograph, First>;Lo;0;L;;;;;N;;;;;
 FA5;<CJK Ideograph, Last>;Lo;0;L;;;;;N;;;;;
 ;<CJK Ideograph Extension B, First>;Lo;0;L;;;;;N;;;;;
 A6D6;<CJK Ideograph Extension B, Last>;Lo;0;L;;;;;N;;;;;
 F800;CJK COMPATIBILITY IDEOGRAPH-2F800;Lo;0;L;4E3D;;;;N;;;;;
 								...
 FA1D;CJK COMPATIBILITY IDEOGRAPH-2FA1D;Lo;0;L;2A600;;;;N;;;;;
 								*/
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
-												shifted the implicit range.

X-SVN-Rev: 8767
											
										
										
											2002-06-02 05:07:08 +00:00
+								    /*
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								    static int remapUCA_CompatibilityIdeographToCp(int cp) {
 								        switch (cp) {
 								            case 0x9FA6: return 0xFA0E; // FA0E ; [.9FA6.0020.0002.FA0E] # CJK COMPATIBILITY IDEOGRAPH-FA0E
 								            case 0x9FA7: return 0xFA0F; // FA0F ; [.9FA7.0020.0002.FA0F] # CJK COMPATIBILITY IDEOGRAPH-FA0F
 								            case 0x9FA8: return 0xFA11; // FA11 ; [.9FA8.0020.0002.FA11] # CJK COMPATIBILITY IDEOGRAPH-FA11
 								            case 0x9FA9: return 0xFA13; // FA13 ; [.9FA9.0020.0002.FA13] # CJK COMPATIBILITY IDEOGRAPH-FA13
 								            case 0x9FAA: return 0xFA14; // FA14 ; [.9FAA.0020.0002.FA14] # CJK COMPATIBILITY IDEOGRAPH-FA14
 								            case 0x9FAB: return 0xFA1F; // FA1F ; [.9FAB.0020.0002.FA1F] # CJK COMPATIBILITY IDEOGRAPH-FA1F
 								            case 0x9FAC: return 0xFA21; // FA21 ; [.9FAC.0020.0002.FA21] # CJK COMPATIBILITY IDEOGRAPH-FA21
 								            case 0x9FAD: return 0xFA23; // FA23 ; [.9FAD.0020.0002.FA23] # CJK COMPATIBILITY IDEOGRAPH-FA23
 								            case 0x9FAE: return 0xFA24; // FA24 ; [.9FAE.0020.0002.FA24] # CJK COMPATIBILITY IDEOGRAPH-FA24
 								            case 0x9FAF: return 0xFA27; // FA27 ; [.9FAF.0020.0002.FA27] # CJK COMPATIBILITY IDEOGRAPH-FA27
 								            case 0x9FB0: return 0xFA28; // FA28 ; [.9FB0.0020.0002.FA28] # CJK COMPATIBILITY IDEOGRAPH-FA28
 								            case 0x9FB1: return 0xFA29; // FA29 ; [.9FB1.0020.0002.FA29] # CJK COMPATIBILITY IDEOGRAPH-FA29
 								        }
 								        return cp;
 								    }
-												shifted the implicit range.

X-SVN-Rev: 8767
											
										
										
											2002-06-02 05:07:08 +00:00
+								    */
-												Use modified cascade, added more tests of implicit weights

X-SVN-Rev: 8787
											
										
										
											2002-06-04 23:56:29 +00:00
+								/**
 								    * Function used to:
 								    * a) collapse the 2 different Han ranges from UCA into one (in the right order), and
 								    * b) bump any non-CJK characters by 10FFFF.
 								    * The relevant blocks are:
 								    * A:	4E00..9FFF; CJK Unified Ideographs
 									*		F900..FAFF; CJK Compatibility Ideographs
 									* B:	3400..4DBF; CJK Unified Ideographs Extension A
 									*		20000..XX;	CJK Unified Ideographs Extension B (and others later on)
 									* As long as
 									*	no new B characters are allocated between 4E00 and FAFF, and
 									*	no new A characters are outside of this range,
 									* (very high probability) this simple code will work.
 									* The reordered blocks are:
 									* Block1 is CJK
 									* Block2 is CJK_COMPAT_USED
 									* Block3 is CJK_A
 									* Any other CJK gets its normal code point
 									* Any non-CJK gets +10FFFF
 									* When we reorder Block1, we make sure that it is at the very start,
 									* so that it will use a 3-byte form.
 									*/
 								static int swapCJK(int i) {
-												finally did some significant code cleanup on collation. not enough, but it's a start

X-SVN-Rev: 8896
											
										
										
											2002-06-15 02:47:14 +00:00
+									if (i >= CJK_BASE) {
 										if (i < CJK_LIMIT)				return i - CJK_BASE;
-												Use modified cascade, added more tests of implicit weights

X-SVN-Rev: 8787
											
										
										
											2002-06-04 23:56:29 +00:00
-												finally did some significant code cleanup on collation. not enough, but it's a start

X-SVN-Rev: 8896
											
										
										
											2002-06-15 02:47:14 +00:00
+										if (i < CJK_COMPAT_USED_BASE)	return i + NON_CJK_OFFSET;
-												Use modified cascade, added more tests of implicit weights

X-SVN-Rev: 8787
											
										
										
											2002-06-04 23:56:29 +00:00
-												finally did some significant code cleanup on collation. not enough, but it's a start

X-SVN-Rev: 8896
											
										
										
											2002-06-15 02:47:14 +00:00
+										if (i < CJK_COMPAT_USED_LIMIT)	return i - CJK_COMPAT_USED_BASE
 																				+ (CJK_LIMIT - CJK_BASE);
 										if (i < CJK_B_BASE)				return i + NON_CJK_OFFSET;
-												Use modified cascade, added more tests of implicit weights

X-SVN-Rev: 8787
											
										
										
											2002-06-04 23:56:29 +00:00
-												finally did some significant code cleanup on collation. not enough, but it's a start

X-SVN-Rev: 8896
											
										
										
											2002-06-15 02:47:14 +00:00
+										if (i < CJK_B_LIMIT)			return i; // non-BMP-CJK
-												Use modified cascade, added more tests of implicit weights

X-SVN-Rev: 8787
											
										
										
											2002-06-04 23:56:29 +00:00
 										return i + NON_CJK_OFFSET;	// non-CJK
 									}
-												finally did some significant code cleanup on collation. not enough, but it's a start

X-SVN-Rev: 8896
											
										
										
											2002-06-15 02:47:14 +00:00
+									if (i < CJK_A_BASE)					return i + NON_CJK_OFFSET;
-												Use modified cascade, added more tests of implicit weights

X-SVN-Rev: 8787
											
										
										
											2002-06-04 23:56:29 +00:00
-												finally did some significant code cleanup on collation. not enough, but it's a start

X-SVN-Rev: 8896
											
										
										
											2002-06-15 02:47:14 +00:00
+									if (i < CJK_A_LIMIT)				return i - CJK_A_BASE
 																				+ (CJK_LIMIT - CJK_BASE)
 																				+ (CJK_COMPAT_USED_LIMIT - CJK_COMPAT_USED_BASE);
-												Use modified cascade, added more tests of implicit weights

X-SVN-Rev: 8787
											
										
										
											2002-06-04 23:56:29 +00:00
+								    return i + NON_CJK_OFFSET; // non-CJK
 								}
-												shifted the implicit range.

X-SVN-Rev: 8767
											
										
										
											2002-06-02 05:07:08 +00:00
-												Added table of information to fractional UCA, changed XML, regenerated collation test

X-SVN-Rev: 8921
											
										
										
											2002-06-22 01:21:11 +00:00
+								    // Fractional UCA Generation Constants
-												shifted the implicit range.

X-SVN-Rev: 8767
											
										
										
											2002-06-02 05:07:08 +00:00
-												Use modified cascade, added more tests of implicit weights

X-SVN-Rev: 8787
											
										
										
											2002-06-04 23:56:29 +00:00
+								    static final int
-												Added table of information to fractional UCA, changed XML, regenerated collation test

X-SVN-Rev: 8921
											
										
										
											2002-06-22 01:21:11 +00:00
+								        TOP = 0xA0,
 								        SPECIAL_BASE = 0xF0,
-												Use modified cascade, added more tests of implicit weights

X-SVN-Rev: 8787
											
										
										
											2002-06-04 23:56:29 +00:00
+								    	NON_CJK_OFFSET = 0x110000,
-												shifted the implicit range.

X-SVN-Rev: 8767
											
										
										
											2002-06-02 05:07:08 +00:00
+								        BYTES_TO_AVOID = 3,
 								        OTHER_COUNT = 256 - BYTES_TO_AVOID,
 								        LAST_COUNT = OTHER_COUNT / 2,
-												Use modified cascade, added more tests of implicit weights

X-SVN-Rev: 8787
											
										
										
											2002-06-04 23:56:29 +00:00
+								        LAST_COUNT2 = OTHER_COUNT / 21, // room for intervening, without expanding to 5 bytes
-												shifted the implicit range.

X-SVN-Rev: 8767
											
										
										
											2002-06-02 05:07:08 +00:00
+								        IMPLICIT_3BYTE_COUNT = 1,
 								        IMPLICIT_BASE_BYTE = 0xE0,
 								        IMPLICIT_LIMIT_BYTE = IMPLICIT_BASE_BYTE + 4, // leave room for 1 3-byte and 2 4-byte forms
 								        IMPLICIT_4BYTE_BOUNDARY = IMPLICIT_3BYTE_COUNT * OTHER_COUNT * LAST_COUNT,
 								        LAST_MULTIPLIER = OTHER_COUNT / LAST_COUNT,
 								        LAST2_MULTIPLIER = OTHER_COUNT / LAST_COUNT2,
 								        IMPLICIT_BASE_3BYTE = (IMPLICIT_BASE_BYTE << 24) + 0x030300,
 								        IMPLICIT_BASE_4BYTE = ((IMPLICIT_BASE_BYTE + IMPLICIT_3BYTE_COUNT) << 24) + 0x030303
 								        ;
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
 								    // GET IMPLICIT PRIMARY WEIGHTS
 								    // Return value is left justified primary key
 								    static int getImplicitPrimary(int cp) {
-												Use modified cascade, added more tests of implicit weights

X-SVN-Rev: 8787
											
										
										
											2002-06-04 23:56:29 +00:00
-												shifted the implicit range.

X-SVN-Rev: 8767
											
										
										
											2002-06-02 05:07:08 +00:00
+								        if (DEBUG) System.out.println("Incoming: " + Utility.hex(cp));
-												Use modified cascade, added more tests of implicit weights

X-SVN-Rev: 8787
											
										
										
											2002-06-04 23:56:29 +00:00
+								        cp = swapCJK(cp);
-												shifted the implicit range.

X-SVN-Rev: 8767
											
										
										
											2002-06-02 05:07:08 +00:00
 								        if (DEBUG) System.out.println("CJK swapped: " + Utility.hex(cp));
-												fixed minor error in getImplicitPrimary

X-SVN-Rev: 8780
											
										
										
											2002-06-04 19:01:51 +00:00
-												shifted the implicit range.

X-SVN-Rev: 8767
											
										
										
											2002-06-02 05:07:08 +00:00
+								        // we now have a range of numbers from 0 to 21FFFF.
-												Use modified cascade, added more tests of implicit weights

X-SVN-Rev: 8787
											
										
										
											2002-06-04 23:56:29 +00:00
+								        return getImplicitPrimaryFromSwapped(cp);
 								    }
 									static int getImplicitPrimaryFromSwapped(int cp) {
 								        // we must skip all 00, 01, 02 bytes, so most bytes have 253 values
 								        // we must leave a gap of 01 between all values of the last byte, so the last byte has 126 values (3 byte case)
 								        // we shift so that HAN all has the same first primary, for compression.
 								        // for the 4 byte case, we make the gap as large as we can fit.
 								        // Three byte forms are EC xx xx, ED xx xx, EE xx xx (with a gap of 1)
 								        // Four byte forms (most supplementaries) are EF xx xx xx (with a gap of LAST2_MULTIPLIER == 14)
-												shifted the implicit range.

X-SVN-Rev: 8767
											
										
										
											2002-06-02 05:07:08 +00:00
+								        int last0 = cp - IMPLICIT_4BYTE_BOUNDARY;
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								        if (last0 < 0) {
 								            int last1 = cp / LAST_COUNT;
 								            last0 = cp % LAST_COUNT;
-												shifted the implicit range.

X-SVN-Rev: 8767
											
										
										
											2002-06-02 05:07:08 +00:00
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								            int last2 = last1 / OTHER_COUNT;
 								            last1 %= OTHER_COUNT;
-												shifted the implicit range.

X-SVN-Rev: 8767
											
										
										
											2002-06-02 05:07:08 +00:00
 								            if (DEBUG || last2 > 0xFF-BYTES_TO_AVOID) System.out.println("3B: " + Utility.hex(cp) + " => "
 								            	+ Utility.hex(last2) + ", "
 								            	+ Utility.hex(last1) + ", "
 								            	+ Utility.hex(last0) + ", "
 								        	);
 								            return IMPLICIT_BASE_3BYTE + (last2 << 24) + (last1 << 16) + ((last0*LAST_MULTIPLIER) << 8);
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								        } else {
 								            int last1 = last0 / LAST_COUNT2;
 								            last0 %= LAST_COUNT2;
-												shifted the implicit range.

X-SVN-Rev: 8767
											
										
										
											2002-06-02 05:07:08 +00:00
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								            int last2 = last1 / OTHER_COUNT;
 								            last1 %= OTHER_COUNT;
-												shifted the implicit range.

X-SVN-Rev: 8767
											
										
										
											2002-06-02 05:07:08 +00:00
 								            int last3 = last2 / OTHER_COUNT;
 								            last2 %= OTHER_COUNT;
 								            if (DEBUG || last3 > 0xFF-BYTES_TO_AVOID) System.out.println("4B: " + Utility.hex(cp) + " => "
 								            	+ Utility.hex(last3) + ", "
 								            	+ Utility.hex(last2) + ", "
 								            	+ Utility.hex(last1) + ", "
 								            	+ Utility.hex(last0 * LAST2_MULTIPLIER) + ", "
 								        	);
 								           return IMPLICIT_BASE_4BYTE + (last3 << 24) + (last2 << 16) + (last1 << 8) + (last0 * LAST2_MULTIPLIER);
 								        }
 								    }
 								    static void showImplicit(String title, int cp) {
-												Use modified cascade, added more tests of implicit weights

X-SVN-Rev: 8787
											
										
										
											2002-06-04 23:56:29 +00:00
+								    	if (DEBUG) showImplicit2(title + "-1", cp-1);
 								    	showImplicit2(title + "00", cp);
 								    	if (DEBUG) showImplicit2(title + "+1", cp+1);
 								    }
 								    static void showImplicit2(String title, int cp) {
 								        System.out.println(title + ":\t" + Utility.hex(cp)
 								        	+ " => " + Utility.hex(swapCJK(cp))
-												Added table of information to fractional UCA, changed XML, regenerated collation test

X-SVN-Rev: 8921
											
										
										
											2002-06-22 01:21:11 +00:00
+								        	+ " => " + Utility.hex(INT_MASK & getImplicitPrimary(cp)));
-												Use modified cascade, added more tests of implicit weights

X-SVN-Rev: 8787
											
										
										
											2002-06-04 23:56:29 +00:00
+								    }
 								    static void showImplicit3(String title, int cp) {
 								        System.out.println("*" + title + ":\t" + Utility.hex(cp)
-												Added table of information to fractional UCA, changed XML, regenerated collation test

X-SVN-Rev: 8921
											
										
										
											2002-06-22 01:21:11 +00:00
+								        	+ " => " + Utility.hex(INT_MASK & getImplicitPrimaryFromSwapped(cp)));
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								    }
 								    // TEST PROGRAM
 								    static void checkImplicit() {
 								        System.out.println("Starting Implicit Check");
-												shifted the implicit range.

X-SVN-Rev: 8767
											
										
										
											2002-06-02 05:07:08 +00:00
-												Use modified cascade, added more tests of implicit weights

X-SVN-Rev: 8787
											
										
										
											2002-06-04 23:56:29 +00:00
+								        long oldPrimary = 0;
 								        int oldChar = -1;
 								        int oldSwap = -1;
 								    	// test monotonically increasing
 								    	for (int i = 0; i < 0x21FFFF; ++i) {
-												Added table of information to fractional UCA, changed XML, regenerated collation test

X-SVN-Rev: 8921
											
										
										
											2002-06-22 01:21:11 +00:00
+								    		long newPrimary = INT_MASK & getImplicitPrimaryFromSwapped(i);
-												Use modified cascade, added more tests of implicit weights

X-SVN-Rev: 8787
											
										
										
											2002-06-04 23:56:29 +00:00
+								            if (newPrimary < oldPrimary) {
 								                throw new IllegalArgumentException(Utility.hex(i) + ": overlap: "
 								                	+ Utility.hex(oldChar) + " (" + Utility.hex(oldPrimary) + ")"
 								                	+ " > " + Utility.hex(i) + "(" + Utility.hex(newPrimary) + ")");
 								            }
 								            oldPrimary = newPrimary;
 								    	}
-												finally did some significant code cleanup on collation. not enough, but it's a start

X-SVN-Rev: 8896
											
										
										
											2002-06-15 02:47:14 +00:00
+								        showImplicit("# First CJK", CJK_BASE);
 								        showImplicit("# Last CJK", CJK_LIMIT-1);
 								        showImplicit("# First CJK-compat", CJK_COMPAT_USED_BASE);
 								        showImplicit("# Last CJK-compat", CJK_COMPAT_USED_LIMIT-1);
 								        showImplicit("# First CJK_A", CJK_A_BASE);
 								        showImplicit("# Last CJK_A", CJK_A_LIMIT-1);
 								        showImplicit("# First CJK_B", CJK_B_BASE);
 								        showImplicit("# Last CJK_B", CJK_B_LIMIT-1);
-												shifted the implicit range.

X-SVN-Rev: 8767
											
										
										
											2002-06-02 05:07:08 +00:00
+								        showImplicit("# First Other Implicit", 0);
 								        showImplicit("# Last Other Implicit", 0x10FFFF);
-												Use modified cascade, added more tests of implicit weights

X-SVN-Rev: 8787
											
										
										
											2002-06-04 23:56:29 +00:00
+								        showImplicit3("# FIRST", 0);
 								        showImplicit3("# Boundary-1", IMPLICIT_4BYTE_BOUNDARY-1);
 								        showImplicit3("# Boundary00", IMPLICIT_4BYTE_BOUNDARY);
 								        showImplicit3("# Boundary+1", IMPLICIT_4BYTE_BOUNDARY+1);
 								        showImplicit3("# LAST", 0x21FFFF);
 								    	oldPrimary = 0;
 								        oldChar = -1;
-												shifted the implicit range.

X-SVN-Rev: 8767
											
										
										
											2002-06-02 05:07:08 +00:00
 								        for (int batch = 0; batch < 3; ++batch) {
 								        	for (int i = 0; i <= 0x10FFFF; ++i) {
 								        		// separate the three groups
-												finally did some significant code cleanup on collation. not enough, but it's a start

X-SVN-Rev: 8896
											
										
										
											2002-06-15 02:47:14 +00:00
+								        		if (ucd.isCJK_BASE(i) || CJK_COMPAT_USED_BASE <= i && i < CJK_COMPAT_USED_LIMIT) {
-												shifted the implicit range.

X-SVN-Rev: 8767
											
										
										
											2002-06-02 05:07:08 +00:00
+								        			if (batch != 0) continue;
-												finally did some significant code cleanup on collation. not enough, but it's a start

X-SVN-Rev: 8896
											
										
										
											2002-06-15 02:47:14 +00:00
+								        		} else if (ucd.isCJK_AB(i)) {
-												shifted the implicit range.

X-SVN-Rev: 8767
											
										
										
											2002-06-02 05:07:08 +00:00
+								        			if (batch != 1) continue;
 								        		} else if (batch != 2) continue;
-												Use modified cascade, added more tests of implicit weights

X-SVN-Rev: 8787
											
										
										
											2002-06-04 23:56:29 +00:00
 								        		// test swapping
 								        		int currSwap = swapCJK(i);
 								        		if (currSwap < oldSwap) {
 								                	throw new IllegalArgumentException(Utility.hex(i) + ": overlap: "
 								                		+ Utility.hex(oldChar) + " (" + Utility.hex(oldSwap) + ")"
 								                		+ " > " + Utility.hex(i) + "(" + Utility.hex(currSwap) + ")");
 								        		}
-												Added table of information to fractional UCA, changed XML, regenerated collation test

X-SVN-Rev: 8921
											
										
										
											2002-06-22 01:21:11 +00:00
+								            	long newPrimary = INT_MASK & getImplicitPrimary(i);
-												shifted the implicit range.

X-SVN-Rev: 8767
											
										
										
											2002-06-02 05:07:08 +00:00
 								            	// test correct values
-												Use modified cascade, added more tests of implicit weights

X-SVN-Rev: 8787
											
										
										
											2002-06-04 23:56:29 +00:00
+								            	if (newPrimary < oldPrimary) {
-												shifted the implicit range.

X-SVN-Rev: 8767
											
										
										
											2002-06-02 05:07:08 +00:00
+								                	throw new IllegalArgumentException(Utility.hex(i) + ": overlap: "
-												Use modified cascade, added more tests of implicit weights

X-SVN-Rev: 8787
											
										
										
											2002-06-04 23:56:29 +00:00
+								                		+ Utility.hex(oldChar) + " (" + Utility.hex(oldPrimary) + ")"
 								                		+ " > " + Utility.hex(i) + "(" + Utility.hex(newPrimary) + ")");
-												shifted the implicit range.

X-SVN-Rev: 8767
											
										
										
											2002-06-02 05:07:08 +00:00
+								            	}
 								            	long b0 = (newPrimary >> 24) & 0xFF;
 								            	long b1 = (newPrimary >> 16) & 0xFF;
 								            	long b2 = (newPrimary >> 8) & 0xFF;
 								            	long b3 = newPrimary & 0xFF;
 								            	if (b0 < IMPLICIT_BASE_BYTE || b0 >= IMPLICIT_LIMIT_BYTE  || b1 < 3 || b2 < 3 || b3 == 1 || b3 == 2) {
 								                	throw new IllegalArgumentException(Utility.hex(i) + ": illegal byte value: " + Utility.hex(newPrimary)
 								                    	+ ", " + Utility.hex(b1) + ", " + Utility.hex(b2) + ", " + Utility.hex(b3));
 								            	}
 								            	// print range to look at
 								            	if (false) {
 								                	int b = i & 0xFF;
 								                	if (b == 255 || b == 0 || b == 1) {
 								                    	System.out.println(Utility.hex(i) + " => " + Utility.hex(newPrimary));
 								                	}
 								            	}
 								            	oldPrimary = newPrimary;
 								            	oldChar = i;
 								        	}
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								        }
 								        System.out.println("Successful Implicit Check!!");
 								    }
 								    static boolean sameTopByte(int x, int y) {
 								        int x1 = x & 0xFF0000;
 								        int y1 = y & 0xFF0000;
 								        if (x1 != 0 || y1 != 0) return x1 == y1;
 								        x1 = x & 0xFF00;
 								        y1 = y & 0xFF00;
 								        return x1 == y1;
 								    }
 								        // return true if either:
 								        // a. toLower(NFKD(x)) != x (using FULL case mappings), OR
 								        // b. toSmallKana(NFKD(x)) != x.
 								    static final boolean needsCaseBit(String x) {
 								        String s = NFKD.normalize(x);
 								        if (!ucd.getCase(s, FULL, LOWER).equals(s)) return true;
 								        if (!toSmallKana(s).equals(s)) return true;
 								        return false;
 								    }
 								    static final StringBuffer toSmallKanaBuffer = new StringBuffer();
 								    static final String toSmallKana(String s) {
 								        // note: don't need to do surrogates; none exist
 								        boolean gotOne = false;
 								        toSmallKanaBuffer.setLength(0);
 								        for (int i = 0; i < s.length(); ++i) {
 								            char c = s.charAt(i);
 								            if ('\u3042' <= c && c <= '\u30EF') {
 								                switch(c - 0x3000) {
 								                  case 0x42: case 0x44: case 0x46: case 0x48: case 0x4A: case 0x64: case 0x84: case 0x86: case 0x8F:
 								                  case 0xA2: case 0xA4: case 0xA6: case 0xA8: case 0xAA: case 0xC4: case 0xE4: case 0xE6: case 0xEF:
 								                    --c; // maps to previous char
 								                    gotOne = true;
 								                    break;
 								                  case 0xAB:
 								                    c = '\u30F5';
 								                    gotOne = true;
 								                    break;
 								                  case 0xB1:
 								                    c = '\u30F6';
 								                    gotOne = true;
 								                    break;
 								                }
 								            }
 								            toSmallKanaBuffer.append(c);
 								        }
 								        if (gotOne) return toSmallKanaBuffer.toString();
 								        return s;
 								    }
 								    /*
 F5;KATAKANA LETTER SMALL KA;Lo;0;L;;;;;N;;;;;
 AB;KATAKANA LETTER KA;Lo;0;L;;;;;N;;;;;
 F6;KATAKANA LETTER SMALL KE;Lo;0;L;;;;;N;;;;;
 B1;KATAKANA LETTER KE;Lo;0;L;;;;;N;;;;;
 A1;KATAKANA LETTER SMALL A;Lo;0;L;;;;;N;;;;;
 A2;KATAKANA LETTER A;Lo;0;L;;;;;N;;;;;
 A3;KATAKANA LETTER SMALL I;Lo;0;L;;;;;N;;;;;
 A4;KATAKANA LETTER I;Lo;0;L;;;;;N;;;;;
 A5;KATAKANA LETTER SMALL U;Lo;0;L;;;;;N;;;;;
 A6;KATAKANA LETTER U;Lo;0;L;;;;;N;;;;;
 A7;KATAKANA LETTER SMALL E;Lo;0;L;;;;;N;;;;;
 A8;KATAKANA LETTER E;Lo;0;L;;;;;N;;;;;
 A9;KATAKANA LETTER SMALL O;Lo;0;L;;;;;N;;;;;
 AA;KATAKANA LETTER O;Lo;0;L;;;;;N;;;;;
 C3;KATAKANA LETTER SMALL TU;Lo;0;L;;;;;N;;;;;
 C4;KATAKANA LETTER TU;Lo;0;L;;;;;N;;;;;
 E3;KATAKANA LETTER SMALL YA;Lo;0;L;;;;;N;;;;;
 E4;KATAKANA LETTER YA;Lo;0;L;;;;;N;;;;;
 E5;KATAKANA LETTER SMALL YU;Lo;0;L;;;;;N;;;;;
 E6;KATAKANA LETTER YU;Lo;0;L;;;;;N;;;;;
 E7;KATAKANA LETTER SMALL YO;Lo;0;L;;;;;N;;;;;
 E8;KATAKANA LETTER YO;Lo;0;L;;;;;N;;;;;
 EE;KATAKANA LETTER SMALL WA;Lo;0;L;;;;;N;;;;;
 EF;KATAKANA LETTER WA;Lo;0;L;;;;;N;;;;;
 ;HIRAGANA LETTER SMALL A;Lo;0;L;;;;;N;;;;;
 ;HIRAGANA LETTER A;Lo;0;L;;;;;N;;;;;
 ;HIRAGANA LETTER SMALL I;Lo;0;L;;;;;N;;;;;
 ;HIRAGANA LETTER I;Lo;0;L;;;;;N;;;;;
 ;HIRAGANA LETTER SMALL U;Lo;0;L;;;;;N;;;;;
 ;HIRAGANA LETTER U;Lo;0;L;;;;;N;;;;;
 ;HIRAGANA LETTER SMALL E;Lo;0;L;;;;;N;;;;;
 ;HIRAGANA LETTER E;Lo;0;L;;;;;N;;;;;
 ;HIRAGANA LETTER SMALL O;Lo;0;L;;;;;N;;;;;
 A;HIRAGANA LETTER O;Lo;0;L;;;;;N;;;;;
 ;HIRAGANA LETTER SMALL TU;Lo;0;L;;;;;N;;;;;
 ;HIRAGANA LETTER TU;Lo;0;L;;;;;N;;;;;
 ;HIRAGANA LETTER SMALL YA;Lo;0;L;;;;;N;;;;;
 ;HIRAGANA LETTER YA;Lo;0;L;;;;;N;;;;;
 ;HIRAGANA LETTER SMALL YU;Lo;0;L;;;;;N;;;;;
 ;HIRAGANA LETTER YU;Lo;0;L;;;;;N;;;;;
 ;HIRAGANA LETTER SMALL YO;Lo;0;L;;;;;N;;;;;
 ;HIRAGANA LETTER YO;Lo;0;L;;;;;N;;;;;
 E;HIRAGANA LETTER SMALL WA;Lo;0;L;;;;;N;;;;;
 F;HIRAGANA LETTER WA;Lo;0;L;;;;;N;;;;;
 								*/
 								    static final int secondaryDoubleStart = 0xD0;
-												shifted the implicit range.

X-SVN-Rev: 8767
											
										
										
											2002-06-02 05:07:08 +00:00
+								    static final int MARK_CODE_POINT = 0x40000000;
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
-												Fixed Fractional UCA for surrogates

X-SVN-Rev: 6504
											
										
										
											2001-10-31 00:02:54 +00:00
+								    static int fixPrimary(int x) {
 								        int result = 0;
-												shifted the implicit range.

X-SVN-Rev: 8767
											
										
										
											2002-06-02 05:07:08 +00:00
+								        if ((x & MARK_CODE_POINT) != 0) result = getImplicitPrimary(x & ~MARK_CODE_POINT);
 								        else result = primaryDelta[x];
-												Fixed Fractional UCA for surrogates

X-SVN-Rev: 6504
											
										
										
											2001-10-31 00:02:54 +00:00
+								        return result;
 								    }
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								    static int fixSecondary(int x) {
 								        x = compactSecondary[x];
 								        return fixSecondary2(x, compactSecondary[0x153], compactSecondary[0x157]);
 								    }
 								    static int fixSecondary2(int x, int gap1, int gap2) {
 								        int top = x;
 								        int bottom = 0;
 								        if (top == 0) {
 								            // ok, zero
 								        } else if (top == 1) {
 								            top = COMMON;
 								        } else {
 								            top *= 2; // create gap between elements. top is now 4 or more
 								            top += 0x80 + COMMON - 2; // insert gap to make top at least 87
 								            // lowest values are singletons. Others are 2 bytes
 								            if (top > secondaryDoubleStart) {
 								                top -= secondaryDoubleStart;
 								                top *= 4; // leave bigger gap just in case
 								                if (x > gap1) {
 								                    top += 256; // leave gap after COMBINING ENCLOSING KEYCAP (see below)
 								                }
 								                if (x > gap2) {
 								                    top += 64; // leave gap after RUNIC LETTER SHORT-TWIG-AR A (see below)
 								                }
 								                bottom = (top % LAST_COUNT) * 2 + COMMON;
 								                top = (top / LAST_COUNT) + secondaryDoubleStart;
 								            }
 								        }
 								        return (top << 8) | bottom;
 								    }
 								/*
 								# 0153: (EE3D) 20E3 [0000.0153.0002] COMBINING ENCLOSING KEYCAP
 								# 0154: (EE41) 0153 [0997.0154.0004][08B1.0020.0004] LATIN SMALL LIGATURE OE
 								# 0155: (EE45) 017F [09F3.0155.0004] LATIN SMALL LETTER LONG S
 								# 0157: (EE49) 16C6 [1656.0157.0004] RUNIC LETTER SHORT-TWIG-AR A
 								# 0158: (EE4D) 2776 [0858.0158.0006] DINGBAT NEGATIVE CIRCLED DIGIT ONE
 								*/
 								    static int fixTertiary(int x) {
 								        if (x == 0) return x;
-												Fixes for MAX value, moved commandline stuff to Main.

X-SVN-Rev: 8733
											
										
										
											2002-05-29 23:18:15 +00:00
+								        if (x == 1 || x == 7) throw new IllegalArgumentException("Tertiary illegal: " + x);
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								        // 2 => COMMON, 1 is unused
-												Fixes for MAX value, moved commandline stuff to Main.

X-SVN-Rev: 8733
											
										
										
											2002-05-29 23:18:15 +00:00
+								        int y = x < 7 ? x : x - 1; // we now use 1F = MAX. Causes a problem so we shift everything to fill a gap at 7 (unused).
 								        int result = 2 * (y - 2) + COMMON;
 								        if (result >= 0x3E) throw new IllegalArgumentException("Tertiary too large: "
 								        	+ Utility.hex(x) + " => " + Utility.hex(result));
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
 								        // get case bits. 00 is low, 01 is mixed (never happens), 10 is high
 								        if (isUpperTertiary[x]) result |= 0x80;
 								        return result;
 								    }
 								    static final boolean[] isUpperTertiary = new boolean[32];
 								    static {
 								        isUpperTertiary[0x8] = true;
 								        isUpperTertiary[0x9] = true;
 								        isUpperTertiary[0xa] = true;
 								        isUpperTertiary[0xb] = true;
 								        isUpperTertiary[0xc] = true;
 								        isUpperTertiary[0xe] = true;
 								        isUpperTertiary[0x11] = true;
 								        isUpperTertiary[0x12] = true;
 								        isUpperTertiary[0x1D] = true;
 								    }
 								    static void checkFixes() {
 								        System.out.println("Checking Secondary/Tertiary Fixes");
 								        int lastVal = -1;
 								        for (int i = 0; i <= 0x16E; ++i) {
 								            if (i == 0x153) {
 								                System.out.println("debug");
 								            }
 								            int val = fixSecondary2(i, 999, 999); // HACK for UCA
 								            if (val <= lastVal) throw new IllegalArgumentException(
 								                "Unordered: " + Utility.hex(val) + " => " + Utility.hex(lastVal));
 								            int top = val >>> 8;
 								            int bottom = val & 0xFF;
 								            if (top != 0 && (top < COMMON || top > 0xEF)
 								                || (top > COMMON && top < 0x87)
 								                || (bottom != 0 && (isEven(bottom) || bottom < COMMON || bottom > 0xFD))
 								                || (bottom == 0 && top != 0 && isEven(top))) {
 								                throw new IllegalArgumentException("Secondary out of range: " + Utility.hex(i) + " => "
 								                    + Utility.hex(top) + ", " + Utility.hex(bottom));
 								            }
 								        }
 								        lastVal = -1;
 								        for (int i = 0; i <= 0x1E; ++i) {
-												Fixes for MAX value, moved commandline stuff to Main.

X-SVN-Rev: 8733
											
										
										
											2002-05-29 23:18:15 +00:00
+								            if (i == 1 || i == 7) continue; // never occurs
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								            int val = fixTertiary(i);
 								            val &= 0x7F; // mask off case bits
 								            if (val <= lastVal) throw new IllegalArgumentException(
 								                "Unordered: " + Utility.hex(val) + " => " + Utility.hex(lastVal));
 								            if (val != 0 && (isEven(val) || val < COMMON || val > 0x3D)) {
 								                throw new IllegalArgumentException("Tertiary out of range: " + Utility.hex(i) + " => "
 								                    + Utility.hex(val));
 								            }
 								        }
 								        System.out.println("END Checking Secondary/Tertiary Fixes");
 								    }
 								    static boolean isEven(int x) {
 								        return (x & 1) == 0;
 								    }
 								   /* static String ceToString(int primary, int secondary, int tertiary) {
 								        return "[" + hexBytes(primary) + ", "
 								            + hexBytes(secondary) + ", "
 								            + hexBytes(tertiary) + "]";
 								    }
 								    */
 								    static String hexBytes(long x) {
 								        StringBuffer temp = new StringBuffer();
 								        hexBytes(x, temp);
 								        return temp.toString();
 								    }
 								    static void hexBytes(long x, StringBuffer result) {
 								        byte lastb = 1;
 								        for (int shift = 24; shift >= 0; shift -= 8) {
 								            byte b = (byte)(x >>> shift);
 								            if (b != 0) {
 								                if (result.length() != 0) result.append(" ");
-												Fixed Fractional UCA for surrogates

X-SVN-Rev: 6504
											
										
										
											2001-10-31 00:02:54 +00:00
+								                result.append(Utility.hex(b));
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								                //if (lastb == 0) System.err.println(" bad zero byte: " + result);
 								            }
 								            lastb = b;
 								        }
 								    }
 								    static int fixHan(char ch) { // BUMP HANGUL, HAN
 								        if (ch < 0x3400 || ch > 0xD7A3) return -1;
 								        char ch2 = ch;
 								        if (ch >= 0xAC00) ch2 -= (0xAC00 - 0x9FA5 - 1);
 								        if (ch >= 0x4E00) ch2 -= (0x4E00 - 0x4DB5 - 1);
 								        return 0x6000 + (ch2-0x3400); // room to interleave
 								    }
 								    static BitSet bumps = new BitSet();
 								    static BitSet singles = new BitSet();
 								    static void findBumps(char[] representatives) {
 								        int[] ces = new int[100];
 								        int[] scripts = new int[100];
 								        char[] scriptChar = new char[100];
 								        // find representatives
 								        for (char ch = 0; ch < 0xFFFF; ++ch) {
 								            byte type = collator.getCEType(ch);
-												finally did some significant code cleanup on collation. not enough, but it's a start

X-SVN-Rev: 8896
											
										
										
											2002-06-15 02:47:14 +00:00
+								            if (type < FIXED_CE) {
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								                int len = collator.getCEs(String.valueOf(ch), true, ces);
 								                int primary = UCA.getPrimary(ces[0]);
 								                if (primary < variableHigh) continue;
 								                /*
 								                if (ch == 0x1160 || ch == 0x11A8) { // set bumps within Hangul L, V, T
 								                    bumps.set(primary);
 								                    continue;
 								                }
 								                */
 								                byte script = ucd.getScript(ch);
 								                // HACK
 								                if (ch == 0x0F7E || ch == 0x0F7F) script = TIBETAN_SCRIPT;
 								                //if (script == ucd.GREEK_SCRIPT) System.out.println(ucd.getName(ch));
 								                // get least primary for script
 								                if (scripts[script] == 0 || scripts[script] > primary) {
 								                    byte cat = ucd.getCategory(ch);
 								                    // HACK
 								                    if (ch == 0x0F7E || ch == 0x0F7F) cat = ucd.OTHER_LETTER;
 								                    if (cat <= ucd.OTHER_LETTER && cat != ucd.Lm) {
 								                        scripts[script] = primary;
 								                        scriptChar[script] = ch;
-												Fixed Fractional UCA for surrogates

X-SVN-Rev: 6504
											
										
										
											2001-10-31 00:02:54 +00:00
+								                        if (script == ucd.GREEK_SCRIPT) System.out.println("*" + Utility.hex(primary) + ucd.getName(ch));
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								                    }
 								                }
 								                // get representative char for primary
 								                if (representatives[primary] == 0 || representatives[primary] > ch) {
 								                    representatives[primary] = ch;
 								                }
 								            }
 								        }
 								        // set bumps
 								        for (int i = 0; i < scripts.length; ++i) {
 								            if (scripts[i] > 0) {
 								                bumps.set(scripts[i]);
 								                System.out.println(Utility.hex(scripts[i]) + " " + UCD.getScriptID_fromIndex((byte)i)
 								                 + " " + Utility.hex(scriptChar[i]) + " " + ucd.getName(scriptChar[i]));
 								            }
 								        }
 								        char[][] singlePairs = {{'a','z'}, {' ', ' '}}; // , {'\u3041', '\u30F3'}
 								        for (int j = 0; j < singlePairs.length; ++j) {
 								            for (char k = singlePairs[j][0]; k <= singlePairs[j][1]; ++k) {
 								                setSingle(k, ces);
 								            }
 								        }
 								        /*setSingle('\u0300', ces);
 								        setSingle('\u0301', ces);
 								        setSingle('\u0302', ces);
 								        setSingle('\u0303', ces);
 								        setSingle('\u0308', ces);
 								        setSingle('\u030C', ces);
 								        */
 								        bumps.set(0x089A); // lowest non-variable
 								        bumps.set(0x4E00); // lowest Kangxi
 								    }
-												New d5 after discussions with Ken; added HOMELESS in Fractional UCA

X-SVN-Rev: 8969
											
										
										
											2002-06-28 01:59:58 +00:00
+								    static DateFormat myDateFormat = new SimpleDateFormat("yyyy-MM-dd','HH:mm:ss' GMT'");
 								    static String getNormalDate() {
 								        return myDateFormat.format(new Date()) + " [MD]";
 								    }
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								    static void setSingle(char ch, int[] ces) {
 								        collator.getCEs(String.valueOf(ch), true, ces);
 								        singles.set(UCA.getPrimary(ces[0]));
 								        if (ch == 'a') gapForA = UCA.getPrimary(ces[0]);
 								    }
 								    static void copyFile(PrintWriter log, String fileName) throws IOException {
 								        BufferedReader input = new BufferedReader(new FileReader(fileName));
 								        while (true) {
 								           String line = input.readLine();
 								           if (line == null) break;
 								           log.println(line);
 								        }
 								        input.close();
 								    }
-												New d5 after discussions with Ken; added HOMELESS in Fractional UCA

X-SVN-Rev: 8969
											
										
										
											2002-06-28 01:59:58 +00:00
+								    static UnicodeSet compatibilityExceptions = new UnicodeSet("[\u0CCB\u0DDD\u017F\u1E9B\uFB05]");
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								    static void writeCollationValidityLog() throws IOException {
-												New d5 after discussions with Ken; added HOMELESS in Fractional UCA

X-SVN-Rev: 8969
											
										
										
											2002-06-28 01:59:58 +00:00
+								        Default.setUCD();
-												added additional well-formedness test for UCA, tested UCD name lengths

X-SVN-Rev: 8773
											
										
										
											2002-06-04 01:59:02 +00:00
 								        //log = new PrintWriter(new FileOutputStream("CheckCollationValidity.html"));
-												Added chart program, minor edits.

X-SVN-Rev: 9918
											
										
										
											2002-09-25 06:40:14 +00:00
+								        log = Utility.openPrintWriter("CheckCollationValidity.html", Utility.UTF8_WINDOWS);
-												added additional well-formedness test for UCA, tested UCD name lengths

X-SVN-Rev: 8773
											
										
										
											2002-06-04 01:59:02 +00:00
-												New d5 after discussions with Ken; added HOMELESS in Fractional UCA

X-SVN-Rev: 8969
											
										
										
											2002-06-28 01:59:58 +00:00
+								        log.println("<html><head><meta http-equiv='Content-Type' content='text/html; charset=utf-8'>");
 								        log.println("<title>UCA Validity Log</title>");
 								        log.println("<style>.bottom { border-bottom-style: solid; border-bottom-color: #0000FF }</style>");
 								        log.println("</head><body bgcolor='#FFFFFF'>");
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
 								        //collator = new UCA(null);
 								        if (false){
 								            String key = collator.getSortKey("\u0308\u0301", UCA.SHIFTED, false);
 								            String look = printableKey(key);
 								            System.out.println(look);
 								        }
 								        System.out.println("Sorting");
-												New d5 after discussions with Ken; added HOMELESS in Fractional UCA

X-SVN-Rev: 8969
											
										
										
											2002-06-28 01:59:58 +00:00
+								        /*
 								        for (int i = 0; i <= 0x10FFFF; ++i) {
-												finally did some significant code cleanup on collation. not enough, but it's a start

X-SVN-Rev: 8896
											
										
										
											2002-06-15 02:47:14 +00:00
+								            if (EXCLUDE_UNSUPPORTED && !collator.found.contains(i)) continue;
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								            if (0xD800 <= i && i <= 0xF8FF) continue; // skip surrogates and private use
 								            //if (0xA000 <= c && c <= 0xA48F) continue; // skip YI
 								            addString(UTF32.valueOf32(i), option);
 								        }
-												New d5 after discussions with Ken; added HOMELESS in Fractional UCA

X-SVN-Rev: 8969
											
										
										
											2002-06-28 01:59:58 +00:00
+								        */
-												finally did some significant code cleanup on collation. not enough, but it's a start

X-SVN-Rev: 8896
											
										
										
											2002-06-15 02:47:14 +00:00
 								        UCA.UCAContents cc = collator.getContents(UCA.FIXED_CE, null);
 								        cc.enableSamples();
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
-												finally did some significant code cleanup on collation. not enough, but it's a start

X-SVN-Rev: 8896
											
										
										
											2002-06-15 02:47:14 +00:00
+								        while (true) {
 								            String s = cc.next();
 								            if (s == null) break;
 								            addString(s, option);
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								        }
-												finally did some significant code cleanup on collation. not enough, but it's a start

X-SVN-Rev: 8896
											
										
										
											2002-06-15 02:47:14 +00:00
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								        System.out.println("Total: " + sortedD.size());
 								        Iterator it;
 								        //ucd.init();
 								        if (false) {
 								            System.out.println("Listing Mismatches");
 								            it = duplicates.keySet().iterator();
 								            //String lastSortKey = "";
 								            //String lastSource = "";
 								            while (it.hasNext()) {
 								                String source = (String)it.next();
 								                String sortKey = (String)duplicates.get(source);
 								                char endMark = source.charAt(source.length()-1);
 								                source = source.substring(0,source.length()-1);
 								                if (endMark == MARK1) {
 								                    log.println("<br>");
-												Fixed Fractional UCA for surrogates

X-SVN-Rev: 6504
											
										
										
											2001-10-31 00:02:54 +00:00
+								                    log.println("Mismatch: " + Utility.hex(source, " ")
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								                        + ", " + ucd.getName(source) + "<br>");
 								                    log.print("  NFD:");
 								                } else {
 								                    log.print("  NFC:");
 								                }
 								                log.println(UCA.toString(sortKey) + "<br>");
 								                /*if (source.equals(lastSource)) {
 								                    it.remove();
 								                    --duplicateCount;
 								                }
 								                //lastSortKey = sortKey;
 								                lastSource = lastSource;
 								                */
 								            }
 								            System.out.println("Total: " + sortedD.size());
 								        }
 								        System.out.println("Writing");
 								        String version = collator.getDataVersion();
-												New d5 after discussions with Ken; added HOMELESS in Fractional UCA

X-SVN-Rev: 8969
											
										
										
											2002-06-28 01:59:58 +00:00
+								        log.println("<h1>Collation Validity Checks</h1>");
 								        log.println("<table><tr><td>Generated: </td><td>" + getNormalDate() + "</td></tr>");
 								        log.println("<tr><td>File Version: </td><td>" + collator.getDataVersion() + "/" + collator.getUCDVersion() + "</td></tr></table>");
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								        if (GENERATED_NFC_MISMATCHES) showMismatches();
 								        removeAdjacentDuplicates2();
-												New d5 after discussions with Ken; added HOMELESS in Fractional UCA

X-SVN-Rev: 8969
											
										
										
											2002-06-28 01:59:58 +00:00
+								        UnicodeSet alreadySeen = new UnicodeSet(compatibilityExceptions);
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
-												added additional well-formedness test for UCA, tested UCD name lengths

X-SVN-Rev: 8773
											
										
										
											2002-06-04 01:59:02 +00:00
+								        checkBadDecomps(1, false, alreadySeen); // if decomposition is off, all primaries should be identical
-												New d5 after discussions with Ken; added HOMELESS in Fractional UCA

X-SVN-Rev: 8969
											
										
										
											2002-06-28 01:59:58 +00:00
+								        checkBadDecomps(2, false, alreadySeen); // if decomposition is ON, all primaries and secondaries should be identical
 								        checkBadDecomps(3, false, alreadySeen); // if decomposition is ON, all primaries and secondaries should be identical
 								        //checkBadDecomps(2, true, alreadySeen); // if decomposition is ON, all primaries and secondaries should be identical
-												added additional well-formedness test for UCA, tested UCD name lengths

X-SVN-Rev: 8773
											
										
										
											2002-06-04 01:59:02 +00:00
 								        log.println("<p>Note: characters with decompositions to space + X, and tatweel + X are excluded,"
-												New d5 after discussions with Ken; added HOMELESS in Fractional UCA

X-SVN-Rev: 8969
											
										
										
											2002-06-28 01:59:58 +00:00
+								        	+ " as are a few special characters: " + compatibilityExceptions.toPattern(true) + "</p>");
-												added additional well-formedness test for UCA, tested UCD name lengths

X-SVN-Rev: 8773
											
										
										
											2002-06-04 01:59:02 +00:00
 								        checkWellformedTable();
-												New d5 after discussions with Ken; added HOMELESS in Fractional UCA

X-SVN-Rev: 8969
											
										
										
											2002-06-28 01:59:58 +00:00
+								        addClosure();
-												Updated with changes agreed with Ken for d6.
Added line for rearrange (missing earlier); this changes conformance!

X-SVN-Rev: 9022
											
										
										
											2002-07-03 02:15:47 +00:00
+								        writeDuplicates();
 								        writeOverlap();
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								        log.println("</body></html>");
 								        log.close();
 								        sortedD.clear();
 								        System.out.println("Done");
 								    }
-												added additional well-formedness test for UCA, tested UCD name lengths

X-SVN-Rev: 8773
											
										
										
											2002-06-04 01:59:02 +00:00
-												New d5 after discussions with Ken; added HOMELESS in Fractional UCA

X-SVN-Rev: 8969
											
										
										
											2002-06-28 01:59:58 +00:00
+								    static void addClosure() {
 								        int canCount = 0;
 								        System.out.println("Add missing decomposibles");
 								    	log.println("<h2>7. Comparing Other Equivalents</h2>");
 								    	log.println("<p>These are not necessarily errors, but should be examined for <i>possible</i> errors</p>");
 								    	log.println("<p>Each of the three strings is canonically equivalent, but has different sort keys</p>");
 								        log.println("<table border='1' cellspacing='0' cellpadding='2'>");
 								        log.println("<tr><th>Count</th><th>Name</th><th>Code</th><th>Sort Keys</th></tr>");
 								        Set contentsForCanonicalIteration = new TreeSet();
 								        UCA.UCAContents ucac = collator.getContents(UCA.FIXED_CE, null); // NFD
 								        int ccounter = 0;
 								        while (true) {
 								            Utility.dot(ccounter++);
 								            String s = ucac.next();
 								            if (s == null) break;
 								            contentsForCanonicalIteration.add(s);
 								        }
 								        Set additionalSet = new HashSet();
 								        System.out.println("Loading canonical iterator");
 								        if (canIt == null) canIt = new CanonicalIterator(".");
 								        Iterator it2 = contentsForCanonicalIteration.iterator();
 								        System.out.println("Adding any FCD equivalents that have different sort keys");
 								        while (it2.hasNext()) {
 								            String key = (String)it2.next();
 								            if (key == null) {
 								                System.out.println("Null Key");
 								                continue;
 								            }
 								            canIt.setSource(key);
 								            String nfdKey = toD.normalize(key);
 								            boolean first = true;
 								            while (true) {
 								                String s = canIt.next();
 								                if (s == null) break;
 								                if (s.equals(key)) continue;
 								                if (contentsForCanonicalIteration.contains(s)) continue;
 								                if (additionalSet.contains(s)) continue;
 								                // Skip anything that is not FCD.
 								                if (!NFD.isFCD(s)) continue;
 								                // We ONLY add if the sort key would be different
 								                // Than what we would get if we didn't decompose!!
 								                String sortKey = collator.getSortKey(s, UCA.NON_IGNORABLE);
 								                String nonDecompSortKey = collator.getSortKey(s, UCA.NON_IGNORABLE, false);
 								                if (sortKey.equals(nonDecompSortKey)) continue;
 								                if (DEBUG && first) {
 								                    System.out.println(" " + ucd.getCodeAndName(key));
 								                    first = false;
 								                }
 								                log.println("<tr><td rowspan='3'>" + (++canCount) + "</td><td>" + Utility.replace(ucd.getName(key), ", ", ",<br>") + "</td>");
 								                log.println("<td>" + Utility.hex(key) + "</td>");
 								                log.println("<td>" + collator.toString(sortKey) + "</td></tr>");
 								                log.println("<tr><td>" + Utility.replace(ucd.getName(nfdKey), ", ", ",<br>") + "</td>");
 								                log.println("<td>" + Utility.hex(nfdKey) + "</td>");
 								                log.println("<td>" + collator.toString(sortKey) + "</td></tr>");
 								                log.println("<tr><td class='bottom'>" + Utility.replace(ucd.getName(s), ", ", ",<br>") + "</td>");
 								                log.println("<td class='bottom'>" + Utility.hex(s) + "</td>");
 								                log.println("<td class='bottom'>" + collator.toString(nonDecompSortKey) + "</td></tr>");
 								                additionalSet.add(s);
 								            }
 								        }
 								    	log.println("</table>");
 								    	log.println("<p>Items: " + canCount + "</p>");
-												Updated with changes agreed with Ken for d6.
Added line for rearrange (missing earlier); this changes conformance!

X-SVN-Rev: 9022
											
										
										
											2002-07-03 02:15:47 +00:00
+								    	log.flush();
-												New d5 after discussions with Ken; added HOMELESS in Fractional UCA

X-SVN-Rev: 8969
											
										
										
											2002-06-28 01:59:58 +00:00
+								    }
-												added additional well-formedness test for UCA, tested UCD name lengths

X-SVN-Rev: 8773
											
										
										
											2002-06-04 01:59:02 +00:00
+									static void checkWellformedTable() throws IOException {
 								    	System.out.println("Checking for well-formedness");
-												New d5 after discussions with Ken; added HOMELESS in Fractional UCA

X-SVN-Rev: 8969
											
										
										
											2002-06-28 01:59:58 +00:00
+								    	log.println("<h2>6. Checking for well-formedness</h2>");
-												added additional well-formedness test for UCA, tested UCD name lengths

X-SVN-Rev: 8773
											
										
										
											2002-06-04 01:59:02 +00:00
 								        Normalizer nfd = new Normalizer(Normalizer.NFD, UNICODE_VERSION);
 								        int[] ces = new int[50];
 								        UCA.UCAContents cc = collator.getContents(UCA.FIXED_CE, nfd);
 								        int[] lenArray = new int[1];
 								        int minps = Integer.MAX_VALUE;
 								        int minpst = Integer.MAX_VALUE;
 								        String minpsSample = "", minpstSample = "";
 								        int errorCount = 0;
 								        while (true) {
 								            String str = cc.next(ces, lenArray);
 								            if (str == null) break;
 								            int len = lenArray[0];
 								            for (int i = 0; i < len; ++i) {
 								            	int ce = ces[i];
 								            	int p = UCA.getPrimary(ce);
 								            	int s = UCA.getSecondary(ce);
 								            	int t = UCA.getTertiary(ce);
 								            	// Gather data for WF#2 check
 								            	if (p == 0) {
 								            		if (s > 0) {
 								            			if (s < minps) {
 								            				minps = s;
 								            				minpsSample = str;
 								            			}
 								            		} else {
 								            			if (t > 0 && t < minpst) {
 								            				minpst = t;
 								            				minpstSample = str;
 								            			}
 								                    }
 								            	}
 								            }
 								        }
 								        cc = collator.getContents(UCA.FIXED_CE, nfd);
 								        log.println("<table border='1' cellspacing='0' cellpadding='2'>");
-												New d5 after discussions with Ken; added HOMELESS in Fractional UCA

X-SVN-Rev: 8969
											
										
										
											2002-06-28 01:59:58 +00:00
+								        int lastPrimary = 0;
-												added additional well-formedness test for UCA, tested UCD name lengths

X-SVN-Rev: 8773
											
										
										
											2002-06-04 01:59:02 +00:00
 								        while (true) {
 								            String str = cc.next(ces, lenArray);
 								            if (str == null) break;
 								            int len = lenArray[0];
 								            for (int i = 0; i < len; ++i) {
 								            	int ce = ces[i];
 								            	int p = UCA.getPrimary(ce);
 								            	int s = UCA.getSecondary(ce);
 								            	int t = UCA.getTertiary(ce);
-												New d5 after discussions with Ken; added HOMELESS in Fractional UCA

X-SVN-Rev: 8969
											
										
										
											2002-06-28 01:59:58 +00:00
+								            	// IF we are at the start of an implicit, then just check that the implicit is in range
 								            	// CHECK implicit
 								            	if (collator.isImplicitLeadPrimary(lastPrimary)) {
 								            	    try {
 								            	        if (s != 0 || t != 0) throw new Exception("Second implicit must be [X,0,0]");
 								            	        collator.ImplicitToCodePoint(lastPrimary, p);   // throws exception if bad
 								            	    } catch (Exception e) {
 								            		    log.println("<tr><td>" + (++errorCount) + ". BAD IMPLICIT: " + e.getMessage()
 								            			    + "</td><td>" + CEList.toString(ces, len)
 								            			    + "</td><td>" + ucd.getCodeAndName(str) + "</td></tr>");
 								            	    }
 								            	    // zap the primary, since we worry about the last REAL primary:
 								            	    lastPrimary = 0;
 								            	    continue;
 								            	}
 								            	// IF we are in the trailing range, something is wrong.
 								            	if (p >= UCA_Types.UNSUPPORTED_LIMIT) {
 								                    log.println("<tr><td>" + (++errorCount) + ". > " + Utility.hex(UCA_Types.UNSUPPORTED_LIMIT,4)
 								            			+ "</td><td>" + CEList.toString(ces, len)
 								            			+ "</td><td>" + ucd.getCodeAndName(str) + "</td></tr>");
 								            	    lastPrimary = p;
 								            	    continue;
 								            	}
-												added additional well-formedness test for UCA, tested UCD name lengths

X-SVN-Rev: 8773
											
										
										
											2002-06-04 01:59:02 +00:00
+								            	// Check WF#1
 								            	if (p != 0 && s == 0) {
-												New d5 after discussions with Ken; added HOMELESS in Fractional UCA

X-SVN-Rev: 8969
											
										
										
											2002-06-28 01:59:58 +00:00
+								            		log.println("<tr><td>" + (++errorCount) + ". WF1.1"
-												added additional well-formedness test for UCA, tested UCD name lengths

X-SVN-Rev: 8773
											
										
										
											2002-06-04 01:59:02 +00:00
+								            			+ "</td><td>" + CEList.toString(ces, len)
 								            			+ "</td><td>" + ucd.getCodeAndName(str) + "</td></tr>");
 								            	}
 								            	if (s != 0 && t == 0) {
-												New d5 after discussions with Ken; added HOMELESS in Fractional UCA

X-SVN-Rev: 8969
											
										
										
											2002-06-28 01:59:58 +00:00
+								            		log.println("<tr><td>" + (++errorCount) + ". WF1.2"
-												added additional well-formedness test for UCA, tested UCD name lengths

X-SVN-Rev: 8773
											
										
										
											2002-06-04 01:59:02 +00:00
+								            			+ "</td><td>" + CEList.toString(ces, len)
 								            			+ "</td><td>" + ucd.getCodeAndName(str) + "</td></tr>");
 								            	}
 								            	// Check WF#2
 								            	if (p != 0) {
 								            		if (s > minps) {
-												New d5 after discussions with Ken; added HOMELESS in Fractional UCA

X-SVN-Rev: 8969
											
										
										
											2002-06-28 01:59:58 +00:00
+								            		log.println("<tr><td>" + (++errorCount) + ". WF2.2"
-												added additional well-formedness test for UCA, tested UCD name lengths

X-SVN-Rev: 8773
											
										
										
											2002-06-04 01:59:02 +00:00
+								            			+ "</td><td>" + CEList.toString(ces, len)
 								            			+ "</td><td>" + ucd.getCodeAndName(str) + "</td></tr>");
 								            		}
 								            	}
 								            	if (s != 0) {
 								            		if (t > minpst) {
-												New d5 after discussions with Ken; added HOMELESS in Fractional UCA

X-SVN-Rev: 8969
											
										
										
											2002-06-28 01:59:58 +00:00
+								            		log.println("<tr><td>" + (++errorCount) + ". WF2.3"
-												added additional well-formedness test for UCA, tested UCD name lengths

X-SVN-Rev: 8773
											
										
										
											2002-06-04 01:59:02 +00:00
+								            			+ "</td><td>" + CEList.toString(ces, len)
 								            			+ "</td><td>" + ucd.getCodeAndName(str) + "</td></tr>");
 								            		}
 								            	} else {
 								            	}
-												New d5 after discussions with Ken; added HOMELESS in Fractional UCA

X-SVN-Rev: 8969
											
										
										
											2002-06-28 01:59:58 +00:00
 								            	lastPrimary = p;
-												added additional well-formedness test for UCA, tested UCD name lengths

X-SVN-Rev: 8773
											
										
										
											2002-06-04 01:59:02 +00:00
+								            }
 								        }
 								        log.println("</table>");
 								        log.println("<p>Minimum Secondary in Primary Ignorable = " + Utility.hex(minps)
 								        	+ " from \t" + collator.getCEList(minpsSample, true)
 								        	+ "\t" + ucd.getCodeAndName(minpsSample) + "</p>");
 								        if (minpst < Integer.MAX_VALUE) {
 								        	log.println("<p>Minimum Tertiary in Secondary Ignorable =" + Utility.hex(minpst)
 								        		+ " from \t" + collator.getCEList(minpstSample, true)
 								        		+ "\t" + ucd.getCodeAndName(minpstSample) + "</p>");
 								        }
-												New d5 after discussions with Ken; added HOMELESS in Fractional UCA

X-SVN-Rev: 8969
											
										
										
											2002-06-28 01:59:58 +00:00
+								        log.println("<p>Errors: " + errorCount + "</p>");
-												Updated with changes agreed with Ken for d6.
Added line for rearrange (missing earlier); this changes conformance!

X-SVN-Rev: 9022
											
										
										
											2002-07-03 02:15:47 +00:00
+								    	log.flush();
-												added additional well-formedness test for UCA, tested UCD name lengths

X-SVN-Rev: 8773
											
										
										
											2002-06-04 01:59:02 +00:00
+								    }
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								/*
 ;<CJK Ideograph Extension A, First>;Lo;0;L;;;;;N;;;;;
 DB5;<CJK Ideograph Extension A, Last>;Lo;0;L;;;;;N;;;;;
 E00;<CJK Ideograph, First>;Lo;0;L;;;;;N;;;;;
 FA5;<CJK Ideograph, Last>;Lo;0;L;;;;;N;;;;;
 								AC00;<Hangul Syllable, First>;Lo;0;L;;;;;N;;;;;
 								D7A3;<Hangul Syllable, Last>;Lo;0;L;;;;;N;;;;;
 								A000;YI SYLLABLE IT;Lo;0;L;;;;;N;;;;;
 								A001;YI SYLLABLE IX;Lo;0;L;;;;;N;;;;;
 								A4C4;YI RADICAL ZZIET;So;0;ON;;;;;N;;;;;
 								A4C6;YI RADICAL KE;So;0;ON;;;;;N;;;;;
 								*/
 								    static final int[][] extraConformanceRanges = {
 								        {0x3400, 0x4DB5}, {0x4E00, 0x9FA5}, {0xAC00, 0xD7A3}, {0xA000, 0xA48C}, {0xE000, 0xF8FF},
 								        {0xFDD0, 0xFDEF},
 								        {0x20000, 0x2A6D6},
 								        {0x2F800, 0x2FA1D},
 								        };
 								    static final int[] extraConformanceTests = {
 								            //0xD800, 0xDBFF, 0xDC00, 0xDFFF,
 xFDD0, 0xFDEF, 0xFFF8,
 xFFFE, 0xFFFF,
 x10000, 0x1FFFD, 0x1FFFE, 0x1FFFF,
 x20000, 0x2FFFD, 0x2FFFE, 0x2FFFF,
 xE0000, 0xEFFFD, 0xEFFFE, 0xEFFFF,
 xF0000, 0xFFFFD, 0xFFFFE, 0xFFFFF,
 x100000, 0x10FFFD, 0x10FFFE, 0x10FFFF,
-												shifted the implicit range.

X-SVN-Rev: 8767
											
										
										
											2002-06-02 05:07:08 +00:00
+								            IMPLICIT_4BYTE_BOUNDARY, IMPLICIT_4BYTE_BOUNDARY-1, IMPLICIT_4BYTE_BOUNDARY+1,
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								        };
 								    static final int MARK = 1;
 								    static final char MARK1 = '\u0001';
 								    static final char MARK2 = '\u0002';
 								    //Normalizer normalizer = new Normalizer(Normalizer.NFC, true);
-												ICU-0 updated to reorganized java directories

X-SVN-Rev: 8039
											
										
										
											2002-03-15 01:57:01 +00:00
+								    static Normalizer toC = new Normalizer(Normalizer.NFC, UNICODE_VERSION);
 								    static Normalizer toD = new Normalizer(Normalizer.NFD, UNICODE_VERSION);
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								    static TreeMap MismatchedC = new TreeMap();
 								    static TreeMap MismatchedN = new TreeMap();
 								    static TreeMap MismatchedD = new TreeMap();
 								    static final byte option = UCA.NON_IGNORABLE; // SHIFTED
 								    static void addString(int ch, byte option) {
 								        addString(UTF32.valueOf32(ch), option);
 								    }
 								    static void addString(String ch, byte option) {
 								        String colDbase = collator.getSortKey(ch, option, true);
 								        String colNbase = collator.getSortKey(ch, option, false);
 								        String colCbase = collator.getSortKey(toC.normalize(ch), option, false);
-												New d5 after discussions with Ken; added HOMELESS in Fractional UCA

X-SVN-Rev: 8969
											
										
										
											2002-06-28 01:59:58 +00:00
+								        if (!colNbase.equals(colCbase) || !colNbase.equals(colDbase) ) {
-												Fixed Fractional UCA for surrogates

X-SVN-Rev: 6504
											
										
										
											2001-10-31 00:02:54 +00:00
+								            /*System.out.println(Utility.hex(ch));
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								            System.out.println(printableKey(colNbase));
 								            System.out.println(printableKey(colNbase));
 								            System.out.println(printableKey(colNbase));*/
 								            MismatchedN.put(ch,colNbase);
 								            MismatchedC.put(ch,colCbase);
 								            MismatchedD.put(ch,colDbase);
 								        }
 								        String colD = colDbase + "\u0000" + ch; // UCA.NON_IGNORABLE
 								        String colN = colNbase + "\u0000" + ch;
 								        String colC = colCbase + "\u0000" + ch;
 								        sortedD.put(colD, ch);
 								        backD.put(ch, colD);
 								        sortedN.put(colN, ch);
 								        backN.put(ch, colN);
 								        /*
 								        if (strength > 4) {
 								            duplicateCount++;
 								            duplicates.put(ch+MARK1, col);
 								            duplicates.put(ch+MARK2, col2);
 								        } else if (strength != 0) {
 								            sorted.put(col2 + MARK2, ch);
 								        }
 								        unique += 2;
 								        */
 								    }
 								   static void removeAdjacentDuplicates() {
 								        String lastChar = "";
 								        int countRem = 0;
 								        int countDups = 0;
-												New d5 after discussions with Ken; added HOMELESS in Fractional UCA

X-SVN-Rev: 8969
											
										
										
											2002-06-28 01:59:58 +00:00
+								        int errorCount = 0;
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								        Iterator it1 = sortedD.keySet().iterator();
 								        Iterator it2 = sortedN.keySet().iterator();
 								        Differ differ = new Differ(250,3);
-												New d5 after discussions with Ken; added HOMELESS in Fractional UCA

X-SVN-Rev: 8969
											
										
										
											2002-06-28 01:59:58 +00:00
+								        log.println("<h2>2. Differences in Ordering</h2>");
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								        log.println("<p>Codes and names are in the white rows: bold means that the NO-NFD sort key differs from UCA key.</p>");
 								        log.println("<p>Keys are in the light blue rows: green is the bad key, blue is UCA, black is where they equal.</p>");
-												added additional well-formedness test for UCA, tested UCD name lengths

X-SVN-Rev: 8773
											
										
										
											2002-06-04 01:59:02 +00:00
+								        log.println("<table border='1' cellspacing='0' cellpadding='2'>");
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								        log.println("<tr><th>File Order</th><th>Code and Decomp</th><th>Key and Decomp-Key</th></tr>");
 								        while (true) {
 								            boolean gotOne = false;
 								            if (it1.hasNext()) {
 								                String col1 = (String)it1.next();
 								                String ch1 = (String)sortedD.get(col1);
 								                differ.addA(ch1);
 								                gotOne = true;
 								            }
 								            if (it2.hasNext()) {
 								                String col2 = (String)it2.next();
 								                String ch2 = (String)sortedN.get(col2);
 								                differ.addB(ch2);
 								                gotOne = true;
 								            }
 								            differ.checkMatch(!gotOne);
 								            if (differ.getACount() != 0 || differ.getBCount() != 0) {
 								                for (int q = 0; q < 2; ++q) {
 								                    String cell = "<td valign='top'" + (q!=0 ? "bgcolor='#C0C0C0'" : "") + ">" + (q!=0 ? "<tt>" : "");
 								                    log.print("<tr>" + cell);
 								                    for (int i = -1; i < differ.getACount()+1; ++i) {
 								                        showDiff(q==0, true, differ.getALine(i), differ.getA(i));
 								                        log.println("<br>");
 								                        ++countDups;
 								                    }
 								                    countDups -= 2; // to make up for extra line above and below
 								                    if (false) {
 								                        log.print("</td>" + cell);
 								                        for (int i = -1; i < differ.getBCount()+1; ++i) {
 								                            showDiff(q==0, false, differ.getBLine(i), differ.getB(i));
 								                            log.println("<br>");
 								                        }
 								                    }
 								                    log.println("</td></tr>");
 								                }
-												New d5 after discussions with Ken; added HOMELESS in Fractional UCA

X-SVN-Rev: 8969
											
										
										
											2002-06-28 01:59:58 +00:00
+								                errorCount++;
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								            }
 								            //differ.flush();
 								            if (!gotOne) break;
 								        }
 								        log.println("</table>");
-												New d5 after discussions with Ken; added HOMELESS in Fractional UCA

X-SVN-Rev: 8969
											
										
										
											2002-06-28 01:59:58 +00:00
+								        log.println("<p>Errors: " + errorCount + "</p>");
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								        //log.println("Removed " + countRem + " adjacent duplicates.<br>");
 								        System.out.println("Left " + countDups + " conflicts.<br>");
 								        log.println("Left " + countDups + " conflicts.<br>");
-												Updated with changes agreed with Ken for d6.
Added line for rearrange (missing earlier); this changes conformance!

X-SVN-Rev: 9022
											
										
										
											2002-07-03 02:15:47 +00:00
+								    	log.flush();
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								   }
 								   static void removeAdjacentDuplicates2() {
 								        String lastChar = "";
 								        int countRem = 0;
 								        int countDups = 0;
-												New d5 after discussions with Ken; added HOMELESS in Fractional UCA

X-SVN-Rev: 8969
											
										
										
											2002-06-28 01:59:58 +00:00
+								        int errorCount = 0;
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								        Iterator it = sortedD.keySet().iterator();
 								        log.println("<h1>2. Differences in Ordering</h1>");
 								        log.println("<p>Codes and names are in the white rows: bold means that the NO-NFD sort key differs from UCA key.</p>");
 								        log.println("<p>Keys are in the light blue rows: green is the bad key, blue is UCA, black is where they equal.</p>");
-												New d5 after discussions with Ken; added HOMELESS in Fractional UCA

X-SVN-Rev: 8969
											
										
										
											2002-06-28 01:59:58 +00:00
+								        log.println("<p>Note: so black lines are generally ok.</p>");
-												added additional well-formedness test for UCA, tested UCD name lengths

X-SVN-Rev: 8773
											
										
										
											2002-06-04 01:59:02 +00:00
+								        log.println("<table border='1' cellspacing='0' cellpadding='2'>");
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								        log.println("<tr><th>File Order</th><th>Code and Decomp</th><th>Key and Decomp-Key</th></tr>");
 								        String lastCol = "a";
 								        String lastColN = "a";
 								        String lastCh = "";
 								        boolean showedLast = true;
 								        int count = 0;
 								        while (it.hasNext()) {
 								            count++;
 								            String col = (String)it.next();
 								            String ch = (String)sortedD.get(col);
 								            String colN = (String)backN.get(ch);
 								            if (colN == null || colN.length() < 1) {
-												Fixed Fractional UCA for surrogates

X-SVN-Rev: 6504
											
										
										
											2001-10-31 00:02:54 +00:00
+								                System.out.println("Missing colN value for " + Utility.hex(ch, " ") + ": " + printableKey(colN));
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								            }
 								            if (col == null || col.length() < 1) {
-												Fixed Fractional UCA for surrogates

X-SVN-Rev: 6504
											
										
										
											2001-10-31 00:02:54 +00:00
+								                System.out.println("Missing col value for " + Utility.hex(ch, " ") + ": " + printableKey(col));
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								            }
 								            if (compareMinusLast(col, lastCol) == compareMinusLast(colN, lastColN)) {
 								                showedLast = false;
 								            } else {
 								                if (true && count < 200) {
 								                    System.out.println();
-												Fixed Fractional UCA for surrogates

X-SVN-Rev: 6504
											
										
										
											2001-10-31 00:02:54 +00:00
+								                    System.out.println(Utility.hex(ch, " ") + ", " + Utility.hex(lastCh, " "));
 								                    System.out.println("      col: " + Utility.hex(col, " "));
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								                    System.out.println(compareMinusLast(col, lastCol));
-												Fixed Fractional UCA for surrogates

X-SVN-Rev: 6504
											
										
										
											2001-10-31 00:02:54 +00:00
+								                    System.out.println("  lastCol: " + Utility.hex(lastCol, " "));
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								                    System.out.println();
-												Fixed Fractional UCA for surrogates

X-SVN-Rev: 6504
											
										
										
											2001-10-31 00:02:54 +00:00
+								                    System.out.println("     colN: " + Utility.hex(colN, " "));
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								                    System.out.println(compareMinusLast(colN, lastColN));
-												Fixed Fractional UCA for surrogates

X-SVN-Rev: 6504
											
										
										
											2001-10-31 00:02:54 +00:00
+								                    System.out.println(" lastColN: " + Utility.hex(lastColN, " "));
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								                }
 								                if (!showedLast) {
 								                    log.println("<tr><td colspan='3'></td><tr>");
 								                    showLine(count-1, lastCh, lastCol, lastColN);
-												New d5 after discussions with Ken; added HOMELESS in Fractional UCA

X-SVN-Rev: 8969
											
										
										
											2002-06-28 01:59:58 +00:00
+								                    errorCount++;
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								                }
 								                showedLast = true;
 								                showLine(count,ch, col, colN);
-												New d5 after discussions with Ken; added HOMELESS in Fractional UCA

X-SVN-Rev: 8969
											
										
										
											2002-06-28 01:59:58 +00:00
+								                errorCount++;
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								            }
 								            lastCol = col;
 								            lastColN = colN;
 								            lastCh = ch;
 								        }
 								        log.println("</table>");
-												New d5 after discussions with Ken; added HOMELESS in Fractional UCA

X-SVN-Rev: 8969
											
										
										
											2002-06-28 01:59:58 +00:00
+								        log.println("<p>Errors: " + errorCount + "</p>");
-												Updated with changes agreed with Ken for d6.
Added line for rearrange (missing earlier); this changes conformance!

X-SVN-Rev: 9022
											
										
										
											2002-07-03 02:15:47 +00:00
+								    	log.flush();
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								   }
 								    static int compareMinusLast(String a, String b) {
 								        String am = a.substring(0,a.length()-1);
 								        String bm = b.substring(0,b.length()-1);
 								        int result = am.compareTo(b);
 								        return (result < 0 ? -1 : result > 0 ? 1 : 0);
 								    }
 								    static void showLine(int count, String ch, String keyD, String keyN) {
 								        String decomp = toD.normalize(ch);
-												Fixed Fractional UCA for surrogates

X-SVN-Rev: 6504
											
										
										
											2001-10-31 00:02:54 +00:00
+								        if (decomp.equals(ch)) decomp = ""; else decomp = "<br><" + Utility.hex(decomp, " ") + "> ";
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								        log.println("<tr><td>" + count + "</td><td>"
-												Fixed Fractional UCA for surrogates

X-SVN-Rev: 6504
											
										
										
											2001-10-31 00:02:54 +00:00
+								            + Utility.hex(ch, " ")
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								            + " " + ucd.getName(ch)
 								            + decomp
 								            + "</td><td>");
 								        if (keyD.equals(keyN)) {
 								            log.println(printableKey(keyN));
 								        } else {
 								            log.println("<font color='#009900'>" + printableKey(keyN)
 								                + "</font><br><font color='#000099'>" + printableKey(keyD) + "</font>"
 								            );
 								        }
 								        log.println("</td></tr>");
 								    }
 								    TreeSet foo;
 								    static final String[] alternateName = {"SHIFTED", "ZEROED", "NON_IGNORABLE", "SHIFTED_TRIMMED"};
 								    static void showMismatches() {
-												New d5 after discussions with Ken; added HOMELESS in Fractional UCA

X-SVN-Rev: 8969
											
										
										
											2002-06-28 01:59:58 +00:00
+								        log.println("<h2>1. Mismatches when NFD is OFF</h2>");
 								        log.println("<p>Alternate Handling = " + alternateName[option] + "</p>");
 								        log.println("<p>NOTE: NFD form is used by UCA,"
 								            + "so if other forms are different there are <i>ignored</i>. This may indicate a problem, e.g. missing contraction.</p>");
 								        log.println("<table border='1'>");
 								        log.println("<tr><th>Name</th><th>Type</th><th>Unicode</th><th>Key</th></tr>");
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								        Iterator it = MismatchedC.keySet().iterator();
-												New d5 after discussions with Ken; added HOMELESS in Fractional UCA

X-SVN-Rev: 8969
											
										
										
											2002-06-28 01:59:58 +00:00
+								        int errorCount = 0;
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								        while (it.hasNext()) {
 								            String ch = (String)it.next();
 								            String MN = (String)MismatchedN.get(ch);
 								            String MC = (String)MismatchedC.get(ch);
-												New d5 after discussions with Ken; added HOMELESS in Fractional UCA

X-SVN-Rev: 8969
											
										
										
											2002-06-28 01:59:58 +00:00
+								            String MD = (String)MismatchedD.get(ch);
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								            String chInC = toC.normalize(ch);
-												New d5 after discussions with Ken; added HOMELESS in Fractional UCA

X-SVN-Rev: 8969
											
										
										
											2002-06-28 01:59:58 +00:00
+								            String chInD = toD.normalize(ch);
 								            log.println("<tr><td rowSpan='3' class='bottom'>" + Utility.replace(ucd.getName(ch), ", ", ",<br>")
 								                + "</td><td>NFD</td><td>" + Utility.hex(chInD)
 								                + "</td><td>" + printableKey(MD) + "</td></tr>");
 								            log.println("<tr><td>NFC</td><td>" + Utility.hex(chInC)
 								                + "</td><td>" + printableKey(MC) + "</td></tr>");
 								            log.println("<tr><td class='bottom'>Plain</td><td class='bottom'>" + Utility.hex(ch)
 								                + "</td><td class='bottom'>" + printableKey(MN) + "</td></tr>");
 								            errorCount++;
 								        }
 								        log.println("</table>");
 								        log.println("<p>Errors: " + errorCount + "</p>");
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								        log.println("<br>");
-												Updated with changes agreed with Ken for d6.
Added line for rearrange (missing earlier); this changes conformance!

X-SVN-Rev: 9022
											
										
										
											2002-07-03 02:15:47 +00:00
+								    	log.flush();
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								    }
 								    static boolean containsCombining(String s) {
 								        for (int i = 0; i < s.length(); ++i) {
 								            if ((ucd.getCategoryMask(s.charAt(i)) & ucd.MARK_MASK) != 0) return true;
 								        }
 								        return false;
 								    }
 								    static void showDiff(boolean showName, boolean firstColumn, int line, Object chobj) {
 								        String ch = chobj.toString();
 								        String decomp = toD.normalize(ch);
 								        if (showName) {
 								            if (ch.equals(decomp)) {
 								                log.println(//title + counter + " "
-												Fixed Fractional UCA for surrogates

X-SVN-Rev: 6504
											
										
										
											2001-10-31 00:02:54 +00:00
+								                    Utility.hex(ch, " ")
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								                    + " " + ucd.getName(ch)
 								                );
 								            } else {
 								                log.println(//title + counter + " "
-												Fixed Fractional UCA for surrogates

X-SVN-Rev: 6504
											
										
										
											2001-10-31 00:02:54 +00:00
+								                    "<b>" + Utility.hex(ch, " ")
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								                    + " " + ucd.getName(ch) + "</b>"
 								                );
 								            }
 								        } else {
 								            String keyD = printableKey(backD.get(chobj));
 								            String keyN = printableKey(backN.get(chobj));
 								            if (keyD.equals(keyN)) {
 								                log.println(//title + counter + " "
-												Fixed Fractional UCA for surrogates

X-SVN-Rev: 6504
											
										
										
											2001-10-31 00:02:54 +00:00
+								                    Utility.hex(ch, " ") + " " + keyN);
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								            } else {
 								                log.println(//title + counter + " "
-												Fixed Fractional UCA for surrogates

X-SVN-Rev: 6504
											
										
										
											2001-10-31 00:02:54 +00:00
+								                    "<font color='#009900'>" + Utility.hex(ch, " ") + " " + keyN
 								                    + "</font><br><font color='#000099'>" + Utility.hex(decomp, " ") + " " + keyD + "</font>"
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
+								                );
 								            }
 								        }
 								    }
 								    static String printableKey(Object keyobj) {
 								        String sortKey;
 								        if (keyobj == null) {
 								            sortKey = "NULL!!";
 								        } else {
 								            sortKey = keyobj.toString();
 								            sortKey = sortKey.substring(0,sortKey.length()-1);
 								            sortKey = UCA.toString(sortKey);
 								        }
 								        return sortKey;
 								    }
 								    /*
 								      LINKS</td></tr><tr><td><blockquote>
 								     CONTENTS
 								    */
 								    static void writeTail(PrintWriter out, int counter, String title, String other, boolean show) throws IOException {
 								        copyFile(out, "HTML-Part2.txt");
 								        /*
 								        out.println("</tr></table></center></div>");
 								        out.println("</body></html>");
 								        */
 								        out.close();
 								    }
 								    static String pad (int number) {
 								        String num = Integer.toString(number);
 								        if (num.length() < 2) num = "0" + number;
 								        return num;
 								    }
 								    static PrintWriter writeHead(int counter, int end, String title, String other, String version, boolean show) throws IOException {
-												Added chart program, minor edits.

X-SVN-Rev: 9918
											
										
										
											2002-09-25 06:40:14 +00:00
+								        PrintWriter out = Utility.openPrintWriter(title + pad(counter) + ".html", Utility.UTF8_WINDOWS);
-												First check-in

X-SVN-Rev: 5636
											
										
										
											2001-08-30 20:50:18 +00:00
 								        copyFile(out, "HTML-Part1.txt");
 								        /*
 								        out.println("<html><head>");
 								        out.println("<meta http-equiv='Content-Type' content='text/html; charset=utf-8'>");
 								        out.println("<title>" + HTMLString(title) + "</title>");
 								        out.println("<style>");
 								        out.println("<!--");
 								        //out.println("td           { font-size: 18pt; font-family: Bitstream Cyberbit, Arial Unicode MS; text-align: Center}");
 								        out.println("td           { font-size: 18pt; text-align: Center}");
 								        out.println("td.right           { font-size: 12pt; text-align: Right}");
 								        out.println("td.title           { font-size: 18pt; text-align: Center}");
 								        out.println("td.left           { font-size: 12pt; text-align: Left}");
 								        //out.println("th         { background-color: #C0C0C0; font-size: 18pt; font-family: Arial Unicode MS, Bitstream Cyberbit; text-align: Center }");
 								        out.println("tt         { font-size: 8pt; }");
 								        //out.println("code           { font-size: 8pt; }");
 								        out.println("-->");
 								        out.println("</style></head><body bgcolor='#FFFFFF'>");
 								        // header
 								        out.print("<table width='100%'><tr>");
 								        out.println("<td><p align='left'><font size='3'><a href='index.html'>Instructions</a></font></td>");
 								        out.println("<td>" + HTMLString(title) + " Version" + version + "</td>");
 								        out.println("<td><p align='right'><font size='3'><a href='" + other + pad(counter) + ".html'>"
 								            + (show ? "Hide" : "Show") + " Key</a></td>");
 								        out.println("</tr></table>");
 								        /*
 								        <table width="100%">
 								  <tr>
 								    <td.left><a href="Collation.html">
 								      <font size="3">Instructions</font></a>
 								    <td>
 								      <td.title>Collation Version-2.1.9d7
 								    <td>
 								      <p align="right"><a href="CollationKey24.html"><font size="3">Show Key</font></a>
 								  </tr>
 								*/
 								        // index
 								        out.print("<table width='100%'><tr>");
 								        out.println("<td><p align='left'><font size='3'><a href='index.html'>Instructions</a></font></td>");
 								        out.println("<td>" + HTMLString(title) + " Version" + version + "</td>");
 								        out.println("<td><p align='right'><font size='3'><a href='" + other + pad(counter) + ".html'>"
 								            + (show ? "Hide" : "Show") + " Key</a></td>");
 								        out.println("</tr></table>");
 								        out.print("<table width='100%'><tr>");
 								        out.print("<td width='1%'><p align='left'>");
 								        if (counter > 0) {
 								            out.print("<a href='" + title + pad(counter-1) + ".html'>&lt;&lt;</a>");
 								        } else {
 								            out.print("<font color='#999999'>&lt;&lt;</font>");
 								        }
 								        out.println("</td>");
 								        out.println("<td><p align='center'>");
 								        boolean lastFar = false;
 								        for (int i = 0; i <= end; ++i) {
 								            boolean far = (i < counter-2 || i > counter+2);
 								            if (far && ((i % 5) != 0) && (i != end)) continue;
 								            if (i != 0 && lastFar != far) out.print(" - ");
 								            lastFar = far;
 								            if (i != counter) {
 								                out.print("<a href='" + title + pad(i) + ".html'>" + i + "</a>");
 								            } else {
 								                out.print("<font color='#FF0000'>" + i + "</font>");
 								            }
 								            out.println();
 								        }
 								        out.println("</td>");
 								        out.println("<td width='1%'><p align='right'>");
 								        if (counter < end) {
 								            out.print("<a href='" + title + pad(counter+1) + ".html'>&gt;&gt;</a>");
 								        } else {
 								            out.print("<font color='#999999'>&gt;&gt;</font>");
 								        }
 								        out.println("</td></tr></table>");
 								        // standard template!!!
 								        out.println("</td></tr><tr><td><blockquote>");
 								        //out.println("<p><div align='center'><center><table border='1'><tr>");
 								        return out;
 								    }
 								    static int getStrengthDifference(String old, String newStr) {
 								        int result = 5;
 								        int min = old.length();
 								        if (newStr.length() < min) min = newStr.length();
 								        for (int i = 0; i < min; ++i) {
 								            char ch1 = old.charAt(i);
 								            char ch2 = newStr.charAt(i);
 								            if (ch1 != ch2) return result;
 								            // see if we get difference before we get 0000.
 								            if (ch1 == 0) --result;
 								        }
 								        if (newStr.length() != old.length()) return 1;
 								        return 0;
 								    }
 								    static final boolean needsXMLQuote(String source, boolean quoteApos) {
 								        for (int i = 0; i < source.length(); ++i) {
 								            char ch = source.charAt(i);
 								            if (ch < ' ' || ch == '<' || ch == '&' || ch == '>') return true;
 								            if (quoteApos & ch == '\'') return true;
 								            if (ch == '\"') return true;
 								            if (ch >= '\uD800' && ch <= '\uDFFF') return true;
 								            if (ch >= '\uFFFE') return true;
 								        }
 								        return false;
 								    }
 								    public static final String XMLString(int[] cps) {
 								        return XMLBaseString(cps, cps.length, true);
 								    }
 								    public static final String XMLString(int[] cps, int len) {
 								        return XMLBaseString(cps, len, true);
 								    }
 								    public static final String XMLString(String source) {
 								        return XMLBaseString(source, true);
 								    }
 								    public static final String HTMLString(int[] cps) {
 								        return XMLBaseString(cps, cps.length, false);
 								    }
 								    public static final String HTMLString(int[] cps, int len) {
 								        return XMLBaseString(cps, len, false);
 								    }
 								    public static final String HTMLString(String source) {
 								        return XMLBaseString(source, false);
 								    }
 								    public static final String XMLBaseString(int[] cps, int len, boolean quoteApos) {
 								        StringBuffer temp = new StringBuffer();
 								        for (int i = 0; i < len; ++i) {
 								            temp.append((char)cps[i]);
 								        }
 								        return XMLBaseString(temp.toString(), quoteApos);
 								    }
 								    public static final String XMLBaseString(String source, boolean quoteApos) {
 								        if (!needsXMLQuote(source, quoteApos)) return source;
 								        StringBuffer result = new StringBuffer();
 								        for (int i = 0; i < source.length(); ++i) {
 								            char ch = source.charAt(i);
 								            if (ch < ' '
 								              || ch >= '\u007F' && ch <= '\u009F'
 								              || ch >= '\uD800' && ch <= '\uDFFF'
 								              || ch >= '\uFFFE') {
 								                result.append('\uFFFD');
 								                /*result.append("#x");
 								                result.append(cpName(ch));
 								                result.append(";");
 								                */
 								            } else if (quoteApos && ch == '\'') {
 								                result.append("&apos;");
 								            } else if (ch == '\"') {
 								                result.append("&quot;");
 								            } else if (ch == '<') {
 								                result.append("&lt;");
 								            } else if (ch == '&') {
 								                result.append("&amp;");
 								            } else if (ch == '>') {
 								                result.append("&gt;");
 								             } else {
 								                result.append(ch);
 								            }
 								        }
 								        return result.toString();
 								    }
 								    static int mapToStartOfRange(int ch) {
 								        if (ch <= 0x3400) return ch;         // CJK Ideograph Extension A
 								        if (ch <= 0x4DB5) return 0x3400;
 								        if (ch <= 0x4E00) return ch;         // CJK Ideograph
 								        if (ch <= 0x9FA5) return 0x4E00;
 								        if (ch <= 0xAC00) return ch;         // Hangul Syllable
 								        if (ch <= 0xD7A3) return 0xAC00;
 								        if (ch <= 0xD800) return ch;         // Non Private Use High Surrogate
 								        if (ch <= 0xDB7F) return 0xD800;
 								        if (ch <= 0xDB80) return ch;         // Private Use High Surrogate
 								        if (ch <= 0xDBFF) return 0xDB80;
 								        if (ch <= 0xDC00) return ch;         // Low Surrogate
 								        if (ch <= 0xDFFF) return 0xDC00;
 								        if (ch <= 0xE000) return ch;         // Private Use
 								        if (ch <= 0xF8FF) return 0xE000;
 								        if (ch <= 0xF0000) return ch;       // Plane 15 Private Use
 								        if (ch <= 0xFFFFD) return 0xF0000;
 								        if (ch <= 0x100000) return ch;       // Plane 16 Private Use
 								        return 0x100000;
 								    }
 								}